Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_Merge.hpp
1// @HEADER
2// ***********************************************************************
3//
4// Tpetra: Templated Linear Algebra Services Package
5// Copyright (2008) Sandia Corporation
6//
7// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
8// the U.S. Government retains certain rights in this software.
9//
10// Redistribution and use in source and binary forms, with or without
11// modification, are permitted provided that the following conditions are
12// met:
13//
14// 1. Redistributions of source code must retain the above copyright
15// notice, this list of conditions and the following disclaimer.
16//
17// 2. Redistributions in binary form must reproduce the above copyright
18// notice, this list of conditions and the following disclaimer in the
19// documentation and/or other materials provided with the distribution.
20//
21// 3. Neither the name of the Corporation nor the names of the
22// contributors may be used to endorse or promote products derived from
23// this software without specific prior written permission.
24//
25// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36//
37// Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38//
39// ************************************************************************
40// @HEADER
41
42#ifndef TPETRA_DETAILS_MERGE_HPP
43#define TPETRA_DETAILS_MERGE_HPP
44
45#include "TpetraCore_config.h"
46#include "Teuchos_TestForException.hpp"
47#include <algorithm> // std::sort
48#include <utility> // std::pair, std::make_pair
49#include <stdexcept>
50
51namespace Tpetra {
52namespace Details {
53
72template<class OrdinalType, class IndexType>
73IndexType
74countMergeUnsortedIndices (const OrdinalType curInds[],
75 const IndexType numCurInds,
76 const OrdinalType inputInds[],
77 const IndexType numInputInds)
78{
79 IndexType mergeCount = 0;
80
81 if (numCurInds <= numInputInds) {
82 // More input than current entries, so iterate linearly over
83 // input and scan current entries repeatedly.
84 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
85 const OrdinalType inVal = inputInds[inPos];
86 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
87 if (curInds[curPos] == inVal) {
88 ++mergeCount;
89 }
90 }
91 }
92 }
93 else { // numCurInds > numInputInds
94 // More current entries than input, so iterate linearly over
95 // current entries and scan input repeatedly.
96 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
97 const OrdinalType curVal = curInds[curPos];
98 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
99 if (inputInds[inPos] == curVal) {
100 ++mergeCount;
101 }
102 }
103 }
104 }
105
106#ifdef HAVE_TPETRA_DEBUG
107 TEUCHOS_TEST_FOR_EXCEPTION
108 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
109 mergeCount << " > numInputInds = " << numInputInds << ".");
110#endif // HAVE_TPETRA_DEBUG
111 return mergeCount;
112}
113
131template<class OrdinalType, class IndexType>
132IndexType
133countMergeSortedIndices (const OrdinalType curInds[],
134 const IndexType numCurInds,
135 const OrdinalType inputInds[],
136 const IndexType numInputInds)
137{
138 // Only count possible merges; don't merge yet. If the row
139 // doesn't have enough space, we want to return without side
140 // effects.
141 IndexType curPos = 0;
142 IndexType inPos = 0;
143 IndexType mergeCount = 0;
144 while (inPos < numInputInds && curPos < numCurInds) {
145 const OrdinalType inVal = inputInds[inPos];
146 const OrdinalType curVal = curInds[curPos];
147
148 if (curVal == inVal) { // can merge
149 ++mergeCount;
150 ++inPos; // go on to next input
151 } else if (curVal < inVal) {
152 ++curPos; // go on to next row entry
153 } else { // curVal > inVal
154 ++inPos; // can't merge it ever, since row entries sorted
155 }
156 }
157
158#ifdef HAVE_TPETRA_DEBUG
159 TEUCHOS_TEST_FOR_EXCEPTION
160 (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
161 " > numInputInds = " << numInputInds << ".");
162 TEUCHOS_TEST_FOR_EXCEPTION
163 (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
164 " > numCurInds = " << numCurInds << ".");
165 TEUCHOS_TEST_FOR_EXCEPTION
166 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
167 mergeCount << " > numInputInds = " << numInputInds << ".");
168#endif // HAVE_TPETRA_DEBUG
169
170 // At this point, 2 situations are possible:
171 //
172 // 1. inPos == numInputInds: We looked at all inputs. Some
173 // (mergeCount of them) could have been merged.
174 // 2. inPos < numInputInds: We didn't get to look at all inputs.
175 // Since the inputs are sorted, we know that those inputs we
176 // didn't examine weren't mergeable.
177 //
178 // Either way, mergeCount gives the number of mergeable inputs.
179 return mergeCount;
180}
181
182
203template<class OrdinalType, class IndexType>
204std::pair<bool, IndexType>
205mergeSortedIndices (OrdinalType curInds[],
206 const IndexType midPos,
207 const IndexType endPos,
208 const OrdinalType inputInds[],
209 const IndexType numInputInds)
210{
211 // Optimize for the following cases, in decreasing order of
212 // optimization concern:
213 //
214 // a. Current row has allocated space but no entries
215 // b. All input indices already in the graph
216 //
217 // If the row has insufficient space for a merge, don't do
218 // anything! Just return an upper bound on the number of extra
219 // entries required to fit everything. This imposes extra cost,
220 // but correctly supports the count, allocate, fill, compute
221 // pattern. (If some entries were merged in and others weren't,
222 // how would you know which got merged in? CrsGraph insert is
223 // idempotent, but CrsMatrix insert does a += on the value and
224 // is therefore not idempotent.)
225 if (midPos == 0) {
226 // Current row has no entries, but may have preallocated space.
227 if (endPos >= numInputInds) {
228 // Sufficient space for new entries; copy directly.
229 for (IndexType k = 0; k < numInputInds; ++k) {
230 curInds[k] = inputInds[k];
231 }
232 std::sort (curInds, curInds + numInputInds);
233 return std::make_pair (true, numInputInds);
234 }
235 else { // not enough space
236 return std::make_pair (false, numInputInds);
237 }
238 }
239 else { // current row contains indices, requiring merge
240 // Only count possible merges; don't merge yet. If the row
241 // doesn't have enough space, we want to return without side
242 // effects.
243 const IndexType mergeCount =
244 countMergeSortedIndices<OrdinalType, IndexType> (curInds, midPos,
245 inputInds,
246 numInputInds);
247 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
248 const IndexType newRowLen = midPos + extraSpaceNeeded;
249 if (newRowLen > endPos) {
250 return std::make_pair (false, newRowLen);
251 }
252 else { // we have enough space; merge in
253 IndexType curPos = 0;
254 IndexType inPos = 0;
255 IndexType newPos = midPos;
256 while (inPos < numInputInds && curPos < midPos) {
257 const OrdinalType inVal = inputInds[inPos];
258 const OrdinalType curVal = curInds[curPos];
259
260 if (curVal == inVal) { // can merge
261 ++inPos; // merge and go on to next input
262 } else if (curVal < inVal) {
263 ++curPos; // go on to next row entry
264 } else { // curVal > inVal
265 // The input doesn't exist in the row.
266 // Copy it to the end; we'll sort it in later.
267 curInds[newPos] = inVal;
268 ++newPos;
269 ++inPos; // move on to next input
270 }
271 }
272
273 // If any inputs remain, and the current row has space for them,
274 // then copy them in. We'll sort them later.
275 for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
276 curInds[newPos] = inputInds[inPos];
277 }
278
279#ifdef HAVE_TPETRA_DEBUG
280 TEUCHOS_TEST_FOR_EXCEPTION
281 (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
282 << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
283 " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
284 "developers.");
285#endif // HAVE_TPETRA_DEBUG
286
287 if (newPos != midPos) { // new entries at end; sort them in
288 // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
289 // be faster (linear time) just to iterate backwards
290 // through the current entries, pushing them over to make
291 // room for unmerged input. However, I'm not so worried
292 // about the asymptotics here, because dense rows in a
293 // sparse matrix are ungood anyway.
294 std::sort (curInds, curInds + newPos);
295 }
296 return std::make_pair (true, newPos);
297 }
298 }
299}
300
301
323template<class OrdinalType, class IndexType>
324std::pair<bool, IndexType>
325mergeUnsortedIndices (OrdinalType curInds[],
326 const IndexType midPos,
327 const IndexType endPos,
328 const OrdinalType inputInds[],
329 const IndexType numInputInds)
330{
331 // Optimize for the following cases, in decreasing order of
332 // optimization concern:
333 //
334 // a. Current row has allocated space but no entries
335 // b. All input indices already in the graph
336 //
337 // If the row has insufficient space for a merge, don't do
338 // anything! Just return an upper bound on the number of extra
339 // entries required to fit everything. This imposes extra cost,
340 // but correctly supports the count, allocate, fill, compute
341 // pattern. (If some entries were merged in and others weren't,
342 // how would you know which got merged in? CrsGraph insert is
343 // idempotent, but CrsMatrix insert does a += on the value and
344 // is therefore not idempotent.)
345 if (midPos == 0) {
346 // Current row has no entries, but may have preallocated space.
347 if (endPos >= numInputInds) {
348 // Sufficient space for new entries; copy directly.
349 for (IndexType k = 0; k < numInputInds; ++k) {
350 curInds[k] = inputInds[k];
351 }
352 return std::make_pair (true, numInputInds);
353 }
354 else { // not enough space
355 return std::make_pair (false, numInputInds);
356 }
357 }
358 else { // current row contains indices, requiring merge
359 // Only count possible merges; don't merge yet. If the row
360 // doesn't have enough space, we want to return without side
361 // effects.
362 const IndexType mergeCount =
363 countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
364 inputInds,
365 numInputInds);
366 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
367 const IndexType newRowLen = midPos + extraSpaceNeeded;
368 if (newRowLen > endPos) {
369 return std::make_pair (false, newRowLen);
370 }
371 else { // we have enough space; merge in
372 // Iterate linearly over input. Scan current entries
373 // repeatedly. Add new entries at end.
374 IndexType newPos = midPos;
375 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
376 const OrdinalType inVal = inputInds[inPos];
377 bool merged = false;
378 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
379 if (curInds[curPos] == inVal) {
380 merged = true;
381 }
382 }
383 if (! merged) {
384 curInds[newPos] = inVal;
385 ++newPos;
386 }
387 }
388 return std::make_pair (true, newPos);
389 }
390 }
391}
392
417template<class OrdinalType, class ValueType, class IndexType>
418std::pair<bool, IndexType>
419mergeUnsortedIndicesAndValues (OrdinalType curInds[],
420 ValueType curVals[],
421 const IndexType midPos,
422 const IndexType endPos,
423 const OrdinalType inputInds[],
424 const ValueType inputVals[],
425 const IndexType numInputInds)
426{
427 // Optimize for the following cases, in decreasing order of
428 // optimization concern:
429 //
430 // a. Current row has allocated space but no entries
431 // b. All input indices already in the graph
432 //
433 // If the row has insufficient space for a merge, don't do
434 // anything! Just return an upper bound on the number of extra
435 // entries required to fit everything. This imposes extra cost,
436 // but correctly supports the count, allocate, fill, compute
437 // pattern. (If some entries were merged in and others weren't,
438 // how would you know which got merged in? CrsGraph insert is
439 // idempotent, but CrsMatrix insert does a += on the value and
440 // is therefore not idempotent.)
441 if (midPos == 0) {
442 // Current row has no entries, but may have preallocated space.
443 if (endPos >= numInputInds) {
444 // Sufficient space for new entries; copy directly.
445 for (IndexType k = 0; k < numInputInds; ++k) {
446 curInds[k] = inputInds[k];
447 curVals[k] = inputVals[k];
448 }
449 return std::make_pair (true, numInputInds);
450 }
451 else { // not enough space
452 return std::make_pair (false, numInputInds);
453 }
454 }
455 else { // current row contains indices, requiring merge
456 // Only count possible merges; don't merge yet. If the row
457 // doesn't have enough space, we want to return without side
458 // effects.
459 const IndexType mergeCount =
460 countMergeUnsortedIndices<OrdinalType, IndexType> (curInds, midPos,
461 inputInds,
462 numInputInds);
463 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
464 const IndexType newRowLen = midPos + extraSpaceNeeded;
465 if (newRowLen > endPos) {
466 return std::make_pair (false, newRowLen);
467 }
468 else { // we have enough space; merge in
469 // Iterate linearly over input. Scan current entries
470 // repeatedly. Add new entries at end.
471 IndexType newPos = midPos;
472 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
473 const OrdinalType inInd = inputInds[inPos];
474 bool merged = false;
475 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
476 if (curInds[curPos] == inInd) {
477 merged = true;
478 curVals[curPos] += inputVals[inPos];
479 }
480 }
481 if (! merged) {
482 curInds[newPos] = inInd;
483 curVals[newPos] = inputVals[inPos];
484 ++newPos;
485 }
486 }
487 return std::make_pair (true, newPos);
488 }
489 }
490}
491
492} // namespace Details
493} // namespace Tpetra
494
495#endif // TPETRA_DETAILS_MERGE_HPP
Implementation details of Tpetra.
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row's column indices and corresponding...
Namespace Tpetra contains the class and methods constituting the Tpetra library.