COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
Friends.h
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30#ifndef _FRIENDS_H_
31#define _FRIENDS_H_
32
33#include <iostream>
34#include "SpMat.h" // Best to include the base class first
35#include "SpHelper.h"
36#include "StackEntry.h"
37#include "Isect.h"
38#include "Deleter.h"
39#include "SpImpl.h"
40#include "SpParHelper.h"
41#include "Compare.h"
42#include "CombBLAS.h"
43#include "PreAllocatedSPA.h"
44
45namespace combblas {
46
47template <class IU, class NU>
48class SpTuples;
49
50template <class IU, class NU>
51class SpDCCols;
52
53template <class IU, class NU>
54class Dcsc;
55
56/*************************************************************************************************/
57/**************************** SHARED ADDRESS SPACE FRIEND FUNCTIONS ******************************/
58/****************************** MULTITHREADED LOGIC ALSO GOES HERE *******************************/
59/*************************************************************************************************/
60
61
63template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
64void dcsc_gespmv (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
65{
66 if(A.nnz > 0)
67 {
68 for(IU j =0; j<A.dcsc->nzc; ++j) // for all nonzero columns
69 {
70 IU colid = A.dcsc->jc[j];
71 for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
72 {
73 IU rowid = A.dcsc->ir[i];
74 SR::axpy(A.dcsc->numx[i], x[colid], y[rowid]);
75 }
76 }
77 }
78}
79
81template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
83{
84 if(A.nnz > 0)
85 {
86 int nthreads=1;
87 #ifdef _OPENMP
88 #pragma omp parallel
89 {
90 nthreads = omp_get_num_threads();
91 }
92 #endif
93
94 IU nlocrows = A.getnrow();
95 LHS ** tomerge = SpHelper::allocate2D<LHS>(nthreads, nlocrows);
96 auto id = SR::id();
97
98 for(int i=0; i<nthreads; ++i)
99 {
100 std::fill_n(tomerge[i], nlocrows, id);
101 }
102
103 #pragma omp parallel for
104 for(IU j =0; j<A.dcsc->nzc; ++j) // for all nonzero columns
105 {
106 int curthread = 1;
107 #ifdef _OPENMP
108 curthread = omp_get_thread_num();
109 #endif
110
112
113 IU colid = A.dcsc->jc[j];
114 for(IU i = A.dcsc->cp[j]; i< A.dcsc->cp[j+1]; ++i)
115 {
116 IU rowid = A.dcsc->ir[i];
117 SR::axpy(A.dcsc->numx[i], x[colid], loc2merge[rowid]);
118 }
119 }
120
121 #pragma omp parallel for
122 for(IU j=0; j < nlocrows; ++j)
123 {
124 for(int i=0; i< nthreads; ++i)
125 {
126 y[j] = SR::add(y[j], tomerge[i][j]);
127 }
128 }
130 }
131}
132
133
134
135
139 template <typename SR, typename IU, typename NU, typename RHS, typename LHS>
140 void dcsc_gespmv_threaded (const SpDCCols<IU, NU> & A, const RHS * x, LHS * y)
141 {
142 if(A.nnz > 0)
143 {
144 int splits = A.getnsplit();
145 if(splits > 0)
146 {
147 IU nlocrows = A.getnrow();
148 IU perpiece = nlocrows / splits;
149 std::vector<int> disp(splits, 0);
150 for(int i=1; i<splits; ++i)
151 disp[i] = disp[i-1] + perpiece;
152#ifdef _OPENMP
153#pragma omp parallel for
154#endif
155 for(int s=0; s<splits; ++s)
156 {
157 Dcsc<IU, NU> * dcsc = A.GetInternal(s);
158 for(IU j =0; j<dcsc->nzc; ++j) // for all nonzero columns
159 {
160 IU colid = dcsc->jc[j];
161 for(IU i = dcsc->cp[j]; i< dcsc->cp[j+1]; ++i)
162 {
163 IU rowid = dcsc->ir[i] + disp[s];
164 SR::axpy(dcsc->numx[i], x[colid], y[rowid]);
165 }
166 }
167 }
168 }
169 else
170 {
172 }
173 }
174 }
175
176
181template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
183 int32_t * & sendindbuf, OVT * & sendnumbuf, int * & sdispls, int p_c, PreAllocatedSPA<OVT> & SPA)
184{
185 // FACTS: Split boundaries (for multithreaded execution) are independent of recipient boundaries
186 // Two splits might create output to the same recipient (needs to be merged)
187 // However, each split's output is distinct (no duplicate elimination is needed after merge)
188
189 sdispls = new int[p_c](); // initialize to zero (as all indy might be empty)
190 if(A.getnnz() > 0 && nnzx > 0)
191 {
192 int splits = A.getnsplit();
193 if(splits > 0)
194 {
195 int32_t nlocrows = static_cast<int32_t>(A.getnrow());
196 int32_t perpiece = nlocrows / splits;
197 std::vector< std::vector< int32_t > > indy(splits);
198 std::vector< std::vector< OVT > > numy(splits);
199
200 // Parallelize with OpenMP
201 #ifdef _OPENMP
202 #pragma omp parallel for // num_threads(6)
203 #endif
204 for(int i=0; i<splits; ++i)
205 {
206 if(SPA.initialized)
207 {
208 if(i != splits-1)
209 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
210 else
211 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece, SPA.V_localy[i], SPA.V_isthere[i], SPA.V_inds[i]);
212 }
213 else
214 {
215 if(i != splits-1)
216 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
217 else
218 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
219 }
220 }
221
222 std::vector<int> accum(splits+1, 0);
223 for(int i=0; i<splits; ++i)
224 accum[i+1] = accum[i] + indy[i].size();
225
226 sendindbuf = new int32_t[accum[splits]];
227 sendnumbuf = new OVT[accum[splits]];
228 int32_t perproc = nlocrows / p_c;
229 int32_t last_rec = p_c-1;
230
231 // keep recipients of last entries in each split (-1 for an empty split)
232 // so that we can delete indy[] and numy[] contents as soon as they are processed
233 std::vector<int32_t> end_recs(splits);
234 for(int i=0; i<splits; ++i)
235 {
236 if(indy[i].empty())
237 end_recs[i] = -1;
238 else
239 end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
240 }
241 #ifdef _OPENMP
242 #pragma omp parallel for // num_threads(6)
243 #endif
244 for(int i=0; i<splits; ++i)
245 {
246 if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
247 {
248 // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
249 // then the whole data is sent to the same processor
250 int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
251
252 // We have to test the previous "split", to see if we are marking a "recipient head"
253 // set displacement markers for the completed (previous) buffers only
254 if(i != 0)
255 {
256 int k = i-1;
257 while (k >= 0 && end_recs[k] == -1) k--; // loop backwards until seeing an non-empty split
258 if(k >= 0) // we found a non-empty split
259 {
260 std::fill(sdispls+end_recs[k]+1, sdispls+beg_rec+1, accum[i]); // last entry to be set is sdispls[beg_rec]
261 }
262 // else fill sdispls[1...beg_rec] with zero (already done)
263 }
264 // else set sdispls[0] to zero (already done)
265 if(beg_rec == end_recs[i]) // fast case
266 {
267 std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
268 std::copy(indy[i].begin(), indy[i].end(), sendindbuf+accum[i]);
269 std::copy(numy[i].begin(), numy[i].end(), sendnumbuf+accum[i]);
270 }
271 else // slow case
272 {
273 // FACT: No matter how many splits or threads, there will be only one "recipient head"
274 // Therefore there are no race conditions for marking send displacements (sdispls)
275 int end = indy[i].size();
276 for(int cur=0; cur< end; ++cur)
277 {
278 int32_t cur_rec = std::min( indy[i][cur] / perproc, last_rec);
279 while(beg_rec != cur_rec)
280 {
281 sdispls[++beg_rec] = accum[i] + cur; // first entry to be set is sdispls[beg_rec+1]
282 }
283 sendindbuf[ accum[i] + cur ] = indy[i][cur] - perproc*beg_rec; // convert to receiver's local index
284 sendnumbuf[ accum[i] + cur ] = numy[i][cur];
285 }
286 }
287 std::vector<int32_t>().swap(indy[i]);
288 std::vector<OVT>().swap(numy[i]);
289 bool lastnonzero = true; // am I the last nonzero split?
290 for(int k=i+1; k < splits; ++k)
291 {
292 if(end_recs[k] != -1)
293 lastnonzero = false;
294 }
295 if(lastnonzero)
296 std::fill(sdispls+end_recs[i]+1, sdispls+p_c, accum[i+1]);
297 } // end_if(!indy[i].empty)
298 } // end parallel for
299 return accum[splits];
300 }
301 else
302 {
303 std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
304 return 0;
305 }
306 }
307 else
308 {
311 return 0;
312 }
313}
314
315
322template <typename SR, typename IU, typename NUM, typename DER, typename IVT, typename OVT>
324 int32_t * sendindbuf, OVT * sendnumbuf, int * cnts, int * dspls, int p_c)
325{
326 if(A.getnnz() > 0 && nnzx > 0)
327 {
328 int splits = A.getnsplit();
329 if(splits > 0)
330 {
331 std::vector< std::vector<int32_t> > indy(splits);
332 std::vector< std::vector< OVT > > numy(splits);
333 int32_t nlocrows = static_cast<int32_t>(A.getnrow());
334 int32_t perpiece = nlocrows / splits;
335
336 #ifdef _OPENMP
337 #pragma omp parallel for
338 #endif
339 for(int i=0; i<splits; ++i)
340 {
341 if(i != splits-1)
342 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), perpiece, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
343 else
344 SpMXSpV_ForThreading<SR>(*(A.GetInternal(i)), nlocrows - perpiece*i, indx, numx, nnzx, indy[i], numy[i], i*perpiece);
345 }
346
347 int32_t perproc = nlocrows / p_c;
348 int32_t last_rec = p_c-1;
349
350 // keep recipients of last entries in each split (-1 for an empty split)
351 // so that we can delete indy[] and numy[] contents as soon as they are processed
352 std::vector<int32_t> end_recs(splits);
353 for(int i=0; i<splits; ++i)
354 {
355 if(indy[i].empty())
356 end_recs[i] = -1;
357 else
358 end_recs[i] = std::min(indy[i].back() / perproc, last_rec);
359 }
360
361 int ** loc_rec_cnts = new int *[splits];
362 #ifdef _OPENMP
363 #pragma omp parallel for
364 #endif
365 for(int i=0; i<splits; ++i)
366 {
367 loc_rec_cnts[i] = new int[p_c](); // thread-local recipient data
368 if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
369 {
370 int32_t cur_rec = std::min( indy[i].front() / perproc, last_rec);
371 int32_t lastdata = (cur_rec+1) * perproc; // one past last entry that goes to this current recipient
372 for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
373 {
374
375 if( ( (*it) >= lastdata ) && cur_rec != last_rec )
376 {
377 cur_rec = std::min( (*it) / perproc, last_rec);
378 lastdata = (cur_rec+1) * perproc;
379 }
380 ++loc_rec_cnts[i][cur_rec];
381 }
382 }
383 }
384 #ifdef _OPENMP
385 #pragma omp parallel for
386 #endif
387 for(int i=0; i<splits; ++i)
388 {
389 if(!indy[i].empty()) // guarantee that .begin() and .end() are not null
390 {
391 // FACT: Data is sorted, so if the recipient of begin is the same as the owner of end,
392 // then the whole data is sent to the same processor
393 int32_t beg_rec = std::min( indy[i].front() / perproc, last_rec);
394 int32_t alreadysent = 0; // already sent per recipient
395 for(int before = i-1; before >= 0; before--)
397
398 if(beg_rec == end_recs[i]) // fast case
399 {
400 std::transform(indy[i].begin(), indy[i].end(), indy[i].begin(), std::bind2nd(std::minus<int32_t>(), perproc*beg_rec));
401 std::copy(indy[i].begin(), indy[i].end(), sendindbuf + dspls[beg_rec] + alreadysent);
402 std::copy(numy[i].begin(), numy[i].end(), sendnumbuf + dspls[beg_rec] + alreadysent);
403 }
404 else // slow case
405 {
407 int32_t lastdata = (cur_rec+1) * perproc; // one past last entry that goes to this current recipient
408 for(typename std::vector<int32_t>::iterator it = indy[i].begin(); it != indy[i].end(); ++it)
409 {
410 if( ( (*it) >= lastdata ) && cur_rec != last_rec )
411 {
412 cur_rec = std::min( (*it) / perproc, last_rec);
413 lastdata = (cur_rec+1) * perproc;
414
415 // if this split switches to a new recipient after sending some data
416 // then it's sure that no data has been sent to that recipient yet
417 alreadysent = 0;
418 }
419 sendindbuf[ dspls[cur_rec] + alreadysent ] = (*it) - perproc*cur_rec; // convert to receiver's local index
420 sendnumbuf[ dspls[cur_rec] + (alreadysent++) ] = *(numy[i].begin() + (it-indy[i].begin()));
421 }
422 }
423 }
424 }
425 // Deallocated rec counts serially once all threads complete
426 for(int i=0; i< splits; ++i)
427 {
428 for(int j=0; j< p_c; ++j)
429 cnts[j] += loc_rec_cnts[i][j];
430 delete [] loc_rec_cnts[i];
431 }
432 delete [] loc_rec_cnts;
433 }
434 else
435 {
436 std::cout << "Something is wrong, splits should be nonzero for multithreaded execution" << std::endl;
437 }
438 }
439}
440
444template <typename SR, typename MIND, typename VIND, typename DER, typename NUM, typename IVT, typename OVT>
445void generic_gespmv (const SpMat<MIND,NUM,DER> & A, const VIND * indx, const IVT * numx, VIND nnzx, std::vector<VIND> & indy, std::vector<OVT> & numy, PreAllocatedSPA<OVT> & SPA)
446{
447 if(A.getnnz() > 0 && nnzx > 0)
448 {
449 if(A.getnsplit() > 0)
450 {
451 std::cout << "Call dcsc_gespmv_threaded instead" << std::endl;
452 }
453 else
454 {
455 SpMXSpV<SR>(*(A.GetInternal()), (VIND) A.getnrow(), indx, numx, nnzx, indy, numy, SPA);
456 }
457 }
458}
459
463template <typename SR, typename IU, typename DER, typename NUM, typename IVT, typename OVT>
464void generic_gespmv (const SpMat<IU,NUM,DER> & A, const int32_t * indx, const IVT * numx, int32_t nnzx,
465 int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c, bool indexisvalue)
466{
467 if(A.getnnz() > 0 && nnzx > 0)
468 {
469 if(A.getnsplit() > 0)
470 {
471 SpParHelper::Print("Call dcsc_gespmv_threaded instead\n");
472 }
473 else
474 {
475 SpMXSpV<SR>(*(A.GetInternal()), (int32_t) A.getnrow(), indx, numx, nnzx, indy, numy, cnts, dspls, p_c);
476 }
477 }
478}
479
480
481template<typename IU>
483{
484 if(A.m < numsplits)
485 {
486 std::cerr<< "Warning: Matrix is too small to be splitted for multithreading" << std::endl;
487 return;
488 }
489 A.splits = numsplits;
490 IU perpiece = A.m / A.splits;
491 std::vector<IU> prevcolids(A.splits, -1); // previous column id's are set to -1
492 std::vector<IU> nzcs(A.splits, 0);
493 std::vector<IU> nnzs(A.splits, 0);
494 std::vector < std::vector < std::pair<IU,IU> > > colrowpairs(A.splits);
495 if(A.nnz > 0 && A.dcsc != NULL)
496 {
497 for(IU i=0; i< A.dcsc->nzc; ++i)
498 {
499 for(IU j = A.dcsc->cp[i]; j< A.dcsc->cp[i+1]; ++j)
500 {
501 IU colid = A.dcsc->jc[i];
502 IU rowid = A.dcsc->ir[j];
503 IU owner = std::min(rowid / perpiece, static_cast<IU>(A.splits-1));
504 colrowpairs[owner].push_back(std::make_pair(colid, rowid - owner*perpiece));
505
506 if(prevcolids[owner] != colid)
507 {
508 prevcolids[owner] = colid;
509 ++nzcs[owner];
510 }
511 ++nnzs[owner];
512 }
513 }
514 }
515 delete A.dcsc; // claim memory
516 //copy(nzcs.begin(), nzcs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
517 //copy(nnzs.begin(), nnzs.end(), ostream_iterator<IU>(cout," " )); cout << endl;
518 A.dcscarr = new Dcsc<IU,bool>*[A.splits];
519
520 // To be parallelized with OpenMP
521 for(int i=0; i< A.splits; ++i)
522 {
523 sort(colrowpairs[i].begin(), colrowpairs[i].end()); // sort w.r.t. columns
524 if(nzcs[i]>0)
525 {
526 A.dcscarr[i] = new Dcsc<IU,bool>(nnzs[i],nzcs[i]);
527 std::fill(A.dcscarr[i]->numx, A.dcscarr[i]->numx+nnzs[i], static_cast<bool>(1));
528 IU curnzc = 0; // number of nonzero columns constructed so far
529 IU cindex = colrowpairs[i][0].first;
530 IU rindex = colrowpairs[i][0].second;
531
532 A.dcscarr[i]->ir[0] = rindex;
533 A.dcscarr[i]->jc[curnzc] = cindex;
534 A.dcscarr[i]->cp[curnzc++] = 0;
535
536 for(IU j=1; j<nnzs[i]; ++j)
537 {
538 cindex = colrowpairs[i][j].first;
539 rindex = colrowpairs[i][j].second;
540
541 A.dcscarr[i]->ir[j] = rindex;
542 if(cindex != A.dcscarr[i]->jc[curnzc-1])
543 {
544 A.dcscarr[i]->jc[curnzc] = cindex;
545 A.dcscarr[i]->cp[curnzc++] = j;
546 }
547 }
548 A.dcscarr[i]->cp[curnzc] = nnzs[i];
549 }
550 else
551 {
552 A.dcscarr[i] = new Dcsc<IU,bool>();
553 }
554 }
555}
556
557
564template<class SR, class NUO, class IU, class NU1, class NU2>
566 (const SpDCCols<IU, NU1> & A,
567 const SpDCCols<IU, NU2> & B,
568 bool clearA = false, bool clearB = false)
569{
570 IU mdim = A.m;
571 IU ndim = B.m; // B is already transposed
572
573 if(A.isZero() || B.isZero())
574 {
575 if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
576 if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
577 return new SpTuples< IU, NUO >(0, mdim, ndim); // just return an empty matrix
578 }
580 SpHelper::SpIntersect(*(A.dcsc), *(B.dcsc), cols, rows, isect1, isect2, itr1, itr2);
581
582 IU kisect = static_cast<IU>(itr1-isect1); // size of the intersection ((itr1-isect1) == (itr2-isect2))
583 if(kisect == 0)
584 {
585 if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
586 if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
588 return new SpTuples< IU, NUO >(0, mdim, ndim);
589 }
590
592
593 IU cnz = SpHelper::SpCartesian< SR > (*(A.dcsc), *(B.dcsc), kisect, isect1, isect2, multstack);
595
596 if(clearA) delete const_cast<SpDCCols<IU, NU1> *>(&A);
597 if(clearB) delete const_cast<SpDCCols<IU, NU2> *>(&B);
598 return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
599}
600
607template<class SR, class NUO, class IU, class NU1, class NU2>
609 (const SpDCCols<IU, NU1> & A,
610 const SpDCCols<IU, NU2> & B,
611 bool clearA = false, bool clearB = false)
612{
613 IU mdim = A.m;
614 IU ndim = B.n;
615 if(A.isZero() || B.isZero())
616 {
617 return new SpTuples<IU, NUO>(0, mdim, ndim);
618 }
620 IU cnz = SpHelper::SpColByCol< SR > (*(A.dcsc), *(B.dcsc), A.n, multstack);
621
622 if(clearA)
623 delete const_cast<SpDCCols<IU, NU1> *>(&A);
624 if(clearB)
625 delete const_cast<SpDCCols<IU, NU2> *>(&B);
626
627 return new SpTuples<IU, NUO> (cnz, mdim, ndim, multstack);
628}
629
630
631template<class SR, class NUO, class IU, class NU1, class NU2>
633 (const SpDCCols<IU, NU1> & A,
634 const SpDCCols<IU, NU2> & B,
635 bool clearA = false, bool clearB = false)
636{
637 IU mdim = A.n;
638 IU ndim = B.m;
639 std::cout << "Tuples_AtXBt function has not been implemented yet !" << std::endl;
640
641 return new SpTuples<IU, NUO> (0, mdim, ndim);
642}
643
644template<class SR, class NUO, class IU, class NU1, class NU2>
646 (const SpDCCols<IU, NU1> & A,
647 const SpDCCols<IU, NU2> & B,
648 bool clearA = false, bool clearB = false)
649{
650 IU mdim = A.n;
651 IU ndim = B.n;
652 std::cout << "Tuples_AtXBn function has not been implemented yet !" << std::endl;
653
654 return new SpTuples<IU, NUO> (0, mdim, ndim);
655}
656
657// Performs a balanced merge of the array of SpTuples
658// Assumes the input parameters are already column sorted
659template<class SR, class IU, class NU>
660SpTuples<IU,NU> MergeAll( const std::vector<SpTuples<IU,NU> *> & ArrSpTups, IU mstar = 0, IU nstar = 0, bool delarrs = false )
661{
662 int hsize = ArrSpTups.size();
663 if(hsize == 0)
664 {
665 return SpTuples<IU,NU>(0, mstar,nstar);
666 }
667 else
668 {
669 mstar = ArrSpTups[0]->m;
670 nstar = ArrSpTups[0]->n;
671 }
672 for(int i=1; i< hsize; ++i)
673 {
674 if((mstar != ArrSpTups[i]->m) || nstar != ArrSpTups[i]->n)
675 {
676 std::cerr << "Dimensions do not match on MergeAll()" << std::endl;
677 return SpTuples<IU,NU>(0,0,0);
678 }
679 }
680 if(hsize > 1)
681 {
683 std::tuple<IU, IU, int> * heap = new std::tuple<IU, IU, int> [hsize]; // (rowindex, colindex, source-id)
684 IU * curptr = new IU[hsize];
685 std::fill_n(curptr, hsize, static_cast<IU>(0));
686 IU estnnz = 0;
687
688 for(int i=0; i< hsize; ++i)
689 {
690 estnnz += ArrSpTups[i]->getnnz();
691 heap[i] = std::make_tuple(std::get<0>(ArrSpTups[i]->tuples[0]), std::get<1>(ArrSpTups[i]->tuples[0]), i);
692 }
693 std::make_heap(heap, heap+hsize, std::not2(heapcomp));
694
695 std::tuple<IU, IU, NU> * ntuples = new std::tuple<IU,IU,NU>[estnnz];
696 IU cnz = 0;
697
698 while(hsize > 0)
699 {
700 std::pop_heap(heap, heap + hsize, std::not2(heapcomp)); // result is stored in heap[hsize-1]
701 int source = std::get<2>(heap[hsize-1]);
702
703 if( (cnz != 0) &&
704 ((std::get<0>(ntuples[cnz-1]) == std::get<0>(heap[hsize-1])) && (std::get<1>(ntuples[cnz-1]) == std::get<1>(heap[hsize-1]))) )
705 {
706 std::get<2>(ntuples[cnz-1]) = SR::add(std::get<2>(ntuples[cnz-1]), ArrSpTups[source]->numvalue(curptr[source]++));
707 }
708 else
709 {
710 ntuples[cnz++] = ArrSpTups[source]->tuples[curptr[source]++];
711 }
712
713 if(curptr[source] != ArrSpTups[source]->getnnz()) // That array has not been depleted
714 {
715 heap[hsize-1] = std::make_tuple(std::get<0>(ArrSpTups[source]->tuples[curptr[source]]),
716 std::get<1>(ArrSpTups[source]->tuples[curptr[source]]), source);
717 std::push_heap(heap, heap+hsize, std::not2(heapcomp));
718 }
719 else
720 {
721 --hsize;
722 }
723 }
726
727 if(delarrs)
728 {
729 for(size_t i=0; i<ArrSpTups.size(); ++i)
730 delete ArrSpTups[i];
731 }
733 }
734 else
735 {
737 if(delarrs)
738 delete ArrSpTups[0];
739 return ret;
740 }
741}
742
743
747template <typename IU, typename NU1, typename NU2>
749{
751 IU estnzc, estnz;
752 estnzc = A.nzc;
753 estnz = A.nz;
754
756
757 IU curnzc = 0;
758 IU curnz = 0;
759 IU i = 0;
760 IU j = 0;
761 temp.cp[0] = 0;
762
763 while(i< A.nzc && B != NULL && j< B->nzc)
764 {
765 if(A.jc[i] > B->jc[j]) ++j;
766 else if(A.jc[i] < B->jc[j])
767 {
768 temp.jc[curnzc++] = A.jc[i++];
769 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
770 {
771 temp.ir[curnz] = A.ir[k];
772 temp.numx[curnz++] = A.numx[k];
773 }
774 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
775 }
776 else
777 {
778 IU ii = A.cp[i];
779 IU jj = B->cp[j];
780 IU prevnz = curnz;
781 while (ii < A.cp[i+1] && jj < B->cp[j+1])
782 {
783 if (A.ir[ii] > B->ir[jj]) ++jj;
784 else if (A.ir[ii] < B->ir[jj])
785 {
786 temp.ir[curnz] = A.ir[ii];
787 temp.numx[curnz++] = A.numx[ii++];
788 }
789 else // eliminate those existing nonzeros
790 {
791 ++ii;
792 ++jj;
793 }
794 }
795 while (ii < A.cp[i+1])
796 {
797 temp.ir[curnz] = A.ir[ii];
798 temp.numx[curnz++] = A.numx[ii++];
799 }
800
801 if(prevnz < curnz) // at least one nonzero exists in this column
802 {
803 temp.jc[curnzc++] = A.jc[i];
804 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
805 }
806 ++i;
807 ++j;
808 }
809 }
810 while(i< A.nzc)
811 {
812 temp.jc[curnzc++] = A.jc[i++];
813 for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
814 {
815 temp.ir[curnz] = A.ir[k];
816 temp.numx[curnz++] = A.numx[k];
817 }
818 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
819 }
820
821 temp.Resize(curnzc, curnz);
822 return temp;
823}
824
825
833template <typename IU, typename NU1, typename NU2>
835{
837 IU estnzc, estnz;
838 if(exclude)
839 {
840 return combblas::SetDifference(A, B); // call set difference for this version
841 }
842 else // A = A .* B
843 {
844 estnzc = std::min(A.nzc, B->nzc);
845 estnz = std::min(A.nz, B->nz);
846
848
849 IU curnzc = 0;
850 IU curnz = 0;
851 IU i = 0;
852 IU j = 0;
853 temp.cp[0] = 0;
854
855 while(i< A.nzc && B != NULL && j<B->nzc)
856 {
857 if(A.jc[i] > B->jc[j]) ++j;
858 else if(A.jc[i] < B->jc[j]) ++i;
859 else
860 {
861 IU ii = A.cp[i];
862 IU jj = B->cp[j];
863 IU prevnz = curnz;
864 while (ii < A.cp[i+1] && jj < B->cp[j+1])
865 {
866 if (A.ir[ii] < B->ir[jj]) ++ii;
867 else if (A.ir[ii] > B->ir[jj]) ++jj;
868 else
869 {
870 temp.ir[curnz] = A.ir[ii];
871 temp.numx[curnz++] = A.numx[ii++] * B->numx[jj++];
872 }
873 }
874 if(prevnz < curnz) // at least one nonzero exists in this column
875 {
876 temp.jc[curnzc++] = A.jc[i];
877 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
878 }
879 ++i;
880 ++j;
881 }
882 }
883
884 temp.Resize(curnzc, curnz);
885 return temp;
886 }
887}
888
889template <typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
891{
892 //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
893 IU estnzc, estnz;
894 if(notB)
895 {
896 estnzc = A.nzc;
897 estnz = A.nz;
898 }
899 else
900 {
901 estnzc = std::min(A.nzc, B->nzc);
902 estnz = std::min(A.nz, B->nz);
903 }
904
906
907 IU curnzc = 0;
908 IU curnz = 0;
909 IU i = 0;
910 IU j = 0;
911 temp.cp[0] = 0;
912
913 if(!notB) // A = A .* B
914 {
915 while(i< A.nzc && B != NULL && j<B->nzc)
916 {
917 if(A.jc[i] > B->jc[j]) ++j;
918 else if(A.jc[i] < B->jc[j]) ++i;
919 else
920 {
921 IU ii = A.cp[i];
922 IU jj = B->cp[j];
923 IU prevnz = curnz;
924 while (ii < A.cp[i+1] && jj < B->cp[j+1])
925 {
926 if (A.ir[ii] < B->ir[jj]) ++ii;
927 else if (A.ir[ii] > B->ir[jj]) ++jj;
928 else
929 {
930 temp.ir[curnz] = A.ir[ii];
931 temp.numx[curnz++] = __binary_op(A.numx[ii++], B->numx[jj++]);
932 }
933 }
934 if(prevnz < curnz) // at least one nonzero exists in this column
935 {
936 temp.jc[curnzc++] = A.jc[i];
937 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
938 }
939 ++i;
940 ++j;
941 }
942 }
943 }
944 else // A = A .* not(B)
945 {
946 while(i< A.nzc && B != NULL && j< B->nzc)
947 {
948 if(A.jc[i] > B->jc[j]) ++j;
949 else if(A.jc[i] < B->jc[j])
950 {
951 temp.jc[curnzc++] = A.jc[i++];
952 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
953 {
954 temp.ir[curnz] = A.ir[k];
955 temp.numx[curnz++] = __binary_op(A.numx[k], defaultBVal);
956 }
957 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
958 }
959 else
960 {
961 IU ii = A.cp[i];
962 IU jj = B->cp[j];
963 IU prevnz = curnz;
964 while (ii < A.cp[i+1] && jj < B->cp[j+1])
965 {
966 if (A.ir[ii] > B->ir[jj]) ++jj;
967 else if (A.ir[ii] < B->ir[jj])
968 {
969 temp.ir[curnz] = A.ir[ii];
970 temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
971 }
972 else // eliminate those existing nonzeros
973 {
974 ++ii;
975 ++jj;
976 }
977 }
978 while (ii < A.cp[i+1])
979 {
980 temp.ir[curnz] = A.ir[ii];
981 temp.numx[curnz++] = __binary_op(A.numx[ii++], defaultBVal);
982 }
983
984 if(prevnz < curnz) // at least one nonzero exists in this column
985 {
986 temp.jc[curnzc++] = A.jc[i];
987 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
988 }
989 ++i;
990 ++j;
991 }
992 }
993 while(i< A.nzc)
994 {
995 temp.jc[curnzc++] = A.jc[i++];
996 for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
997 {
998 temp.ir[curnz] = A.ir[k];
999 temp.numx[curnz++] = __binary_op(A.numx[k], defaultBVal);
1000 }
1001 temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1002 }
1003 }
1004
1005 temp.Resize(curnzc, curnz);
1006 return temp;
1007}
1008
1009
1010template<typename IU, typename NU1, typename NU2>
1012{
1014 assert(A.m == B.m);
1015 assert(A.n == B.n);
1016
1018 if(A.nnz > 0 && B.nnz > 0)
1019 {
1020 tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), B.dcsc, exclude));
1021 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1022 }
1023 else if (A.nnz > 0 && exclude) // && B.nnz == 0
1024 {
1025 tdcsc = new Dcsc<IU, N_promote>(EWiseMult(*(A.dcsc), (const Dcsc<IU,NU2>*)NULL, exclude));
1026 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1027 }
1028 else
1029 {
1030 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1031 }
1032}
1033
1034
1035template<typename N_promote, typename IU, typename NU1, typename NU2, typename _BinaryOperation>
1037{
1038 //typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
1039 assert(A.m == B.m);
1040 assert(A.n == B.n);
1041
1043 if(A.nnz > 0 && B.nnz > 0)
1044 {
1046 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1047 }
1048 else if (A.nnz > 0 && notB) // && B.nnz == 0
1049 {
1051 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1052 }
1053 else
1054 {
1055 return SpDCCols<IU, N_promote> (A.m , A.n, tdcsc);
1056 }
1057}
1058
1068template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
1070{
1071 if (Ap == NULL && Bp == NULL)
1072 return Dcsc<IU,RETT>(0, 0);
1073
1074 if (Ap == NULL && Bp != NULL)
1075 {
1076 if (!allowANulls)
1077 return Dcsc<IU,RETT>(0, 0);
1078
1079 const Dcsc<IU,NU2> & B = *Bp;
1080 IU estnzc = B.nzc;
1081 IU estnz = B.nz;
1083
1084 IU curnzc = 0;
1085 IU curnz = 0;
1086 //IU i = 0;
1087 IU j = 0;
1088 temp.cp[0] = 0;
1089 while(j<B.nzc)
1090 {
1091 // Based on the if statement below which handles A null values.
1092 j++;
1093 IU prevnz = curnz;
1094 temp.jc[curnzc++] = B.jc[j-1];
1095 for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1096 {
1097 if (do_op(ANullVal, B.numx[k], true, false))
1098 {
1099 temp.ir[curnz] = B.ir[k];
1100 temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1101 }
1102 }
1103 //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1104 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1105 }
1106 temp.Resize(curnzc, curnz);
1107 return temp;
1108 }
1109
1110 if (Ap != NULL && Bp == NULL)
1111 {
1112 if (!allowBNulls)
1113 return Dcsc<IU,RETT>(0, 0);
1114
1115 const Dcsc<IU,NU1> & A = *Ap;
1116 IU estnzc = A.nzc;
1117 IU estnz = A.nz;
1119
1120 IU curnzc = 0;
1121 IU curnz = 0;
1122 IU i = 0;
1123 //IU j = 0;
1124 temp.cp[0] = 0;
1125 while(i< A.nzc)
1126 {
1127 i++;
1128 IU prevnz = curnz;
1129 temp.jc[curnzc++] = A.jc[i-1];
1130 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
1131 {
1132 if (do_op(A.numx[k], BNullVal, false, true))
1133 {
1134 temp.ir[curnz] = A.ir[k];
1135 temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1136 }
1137 }
1138 //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1139 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1140 }
1141 temp.Resize(curnzc, curnz);
1142 return temp;
1143 }
1144
1145 // both A and B are non-NULL at this point
1146 const Dcsc<IU,NU1> & A = *Ap;
1147 const Dcsc<IU,NU2> & B = *Bp;
1148
1149 IU estnzc = A.nzc + B.nzc;
1150 IU estnz = A.nz + B.nz;
1152
1153 IU curnzc = 0;
1154 IU curnz = 0;
1155 IU i = 0;
1156 IU j = 0;
1157 temp.cp[0] = 0;
1158 while(i< A.nzc && j<B.nzc)
1159 {
1160 if(A.jc[i] > B.jc[j])
1161 {
1162 j++;
1163 if (allowANulls)
1164 {
1165 IU prevnz = curnz;
1166 temp.jc[curnzc++] = B.jc[j-1];
1167 for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1168 {
1169 if (do_op(ANullVal, B.numx[k], true, false))
1170 {
1171 temp.ir[curnz] = B.ir[k];
1172 temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1173 }
1174 }
1175 //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1176 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1177 }
1178 }
1179 else if(A.jc[i] < B.jc[j])
1180 {
1181 i++;
1182 if (allowBNulls)
1183 {
1184 IU prevnz = curnz;
1185 temp.jc[curnzc++] = A.jc[i-1];
1186 for(IU k = A.cp[i-1]; k< A.cp[i]; k++)
1187 {
1188 if (do_op(A.numx[k], BNullVal, false, true))
1189 {
1190 temp.ir[curnz] = A.ir[k];
1191 temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1192 }
1193 }
1194 //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1195 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1196 }
1197 }
1198 else
1199 {
1200 temp.jc[curnzc++] = A.jc[i];
1201 IU ii = A.cp[i];
1202 IU jj = B.cp[j];
1203 IU prevnz = curnz;
1204 while (ii < A.cp[i+1] && jj < B.cp[j+1])
1205 {
1206 if (A.ir[ii] < B.ir[jj])
1207 {
1208 if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
1209 {
1210 temp.ir[curnz] = A.ir[ii];
1211 temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
1212 }
1213 else
1214 ii++;
1215 }
1216 else if (A.ir[ii] > B.ir[jj])
1217 {
1218 if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
1219 {
1220 temp.ir[curnz] = B.ir[jj];
1221 temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
1222 }
1223 else
1224 jj++;
1225 }
1226 else
1227 {
1228 if (allowIntersect && do_op(A.numx[ii], B.numx[jj], false, false))
1229 {
1230 temp.ir[curnz] = A.ir[ii];
1231 temp.numx[curnz++] = __binary_op(A.numx[ii++], B.numx[jj++], false, false); // might include zeros
1232 }
1233 else
1234 {
1235 ii++;
1236 jj++;
1237 }
1238 }
1239 }
1240 while (ii < A.cp[i+1])
1241 {
1242 if (allowBNulls && do_op(A.numx[ii], BNullVal, false, true))
1243 {
1244 temp.ir[curnz] = A.ir[ii];
1245 temp.numx[curnz++] = __binary_op(A.numx[ii++], BNullVal, false, true);
1246 }
1247 else
1248 ii++;
1249 }
1250 while (jj < B.cp[j+1])
1251 {
1252 if (allowANulls && do_op(ANullVal, B.numx[jj], true, false))
1253 {
1254 temp.ir[curnz] = B.ir[jj];
1255 temp.numx[curnz++] = __binary_op(ANullVal, B.numx[jj++], true, false);
1256 }
1257 else
1258 jj++;
1259 }
1260 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1261 ++i;
1262 ++j;
1263 }
1264 }
1265 while(allowBNulls && i< A.nzc) // remaining A elements after B ran out
1266 {
1267 IU prevnz = curnz;
1268 temp.jc[curnzc++] = A.jc[i++];
1269 for(IU k = A.cp[i-1]; k< A.cp[i]; ++k)
1270 {
1271 if (do_op(A.numx[k], BNullVal, false, true))
1272 {
1273 temp.ir[curnz] = A.ir[k];
1274 temp.numx[curnz++] = __binary_op(A.numx[k], BNullVal, false, true);
1275 }
1276 }
1277 //temp.cp[curnzc] = temp.cp[curnzc-1] + (A.cp[i] - A.cp[i-1]);
1278 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1279 }
1280 while(allowANulls && j < B.nzc) // remaining B elements after A ran out
1281 {
1282 IU prevnz = curnz;
1283 temp.jc[curnzc++] = B.jc[j++];
1284 for(IU k = B.cp[j-1]; k< B.cp[j]; ++k)
1285 {
1286 if (do_op(ANullVal, B.numx[k], true, false))
1287 {
1288 temp.ir[curnz] = B.ir[k];
1289 temp.numx[curnz++] = __binary_op(ANullVal, B.numx[k], true, false);
1290 }
1291 }
1292 //temp.cp[curnzc] = temp.cp[curnzc-1] + (B.cp[j] - B.cp[j-1]);
1293 temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
1294 }
1295 temp.Resize(curnzc, curnz);
1296 return temp;
1297}
1298
1299template <typename RETT, typename IU, typename NU1, typename NU2, typename _BinaryOperation, typename _BinaryPredicate>
1308
1309
1310}
1311
1312#endif
Definition test.cpp:53
static void SpIntersect(const Dcsc< IT, NT1 > &Adcsc, const Dcsc< IT, NT2 > &Bdcsc, Isect< IT > *&cols, Isect< IT > *&rows, Isect< IT > *&isect1, Isect< IT > *&isect2, Isect< IT > *&itr1, Isect< IT > *&itr2)
Definition SpHelper.h:346
static void deallocate2D(T **array, I m)
Definition SpHelper.h:249
static void ShrinkArray(NT *&array, IT newsize)
Definition SpHelper.h:274
static void Print(const std::string &s)
int size
Definition common.h:20
void generic_gespmv_threaded_setbuffers(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *sendindbuf, OVT *sendnumbuf, int *cnts, int *dspls, int p_c)
Definition Friends.h:323
void dcsc_gespmv_threaded(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
Definition Friends.h:140
SpTuples< IU, NUO > * Tuples_AtXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition Friends.h:633
void BooleanRowSplit(SpDCCols< IU, bool > &A, int numsplits)
Definition Friends.h:482
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > SetDifference(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B)
Definition Friends.h:748
SpTuples< IU, NUO > * Tuples_AtXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition Friends.h:646
SpTuples< IU, NUO > * Tuples_AnXBn(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition Friends.h:609
int generic_gespmv_threaded(const SpMat< IU, NUM, DER > &A, const int32_t *indx, const IVT *numx, int32_t nnzx, int32_t *&sendindbuf, OVT *&sendnumbuf, int *&sdispls, int p_c, PreAllocatedSPA< OVT > &SPA)
Definition Friends.h:182
void DeleteAll(A arr1)
Definition Deleter.h:48
Dcsc< IU, N_promote > EWiseApply(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, _BinaryOperation __binary_op, bool notB, const NU2 &defaultBVal)
Definition Friends.h:890
void dcsc_gespmv_threaded_nosplit(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector (multithreaded version)
Definition Friends.h:82
SpTuples< IU, NUO > * Tuples_AnXBt(const SpDCCols< IU, NU1 > &A, const SpDCCols< IU, NU2 > &B, bool clearA=false, bool clearB=false)
Definition Friends.h:566
void generic_gespmv(const SpMat< MIND, NUM, DER > &A, const VIND *indx, const IVT *numx, VIND nnzx, std::vector< VIND > &indy, std::vector< OVT > &numy, PreAllocatedSPA< OVT > &SPA)
Definition Friends.h:445
SpTuples< IU, NU > MergeAll(const std::vector< SpTuples< IU, NU > * > &ArrSpTups, IU mstar=0, IU nstar=0, bool delarrs=false)
Definition Friends.h:660
void dcsc_gespmv(const SpDCCols< IU, NU > &A, const RHS *x, LHS *y)
SpMV with dense vector.
Definition Friends.h:64
Dcsc< IU, typename promote_trait< NU1, NU2 >::T_promote > EWiseMult(const Dcsc< IU, NU1 > &A, const Dcsc< IU, NU2 > *B, bool exclude)
Definition Friends.h:834
double A
signed int int32_t
Definition stdint.h:77