COMBINATORIAL_BLAS 1.6
 
Loading...
Searching...
No Matches
PreAllocatedSPA.h
Go to the documentation of this file.
1/****************************************************************/
2/* Parallel Combinatorial BLAS Library (for Graph Computations) */
3/* version 1.6 -------------------------------------------------*/
4/* date: 6/15/2017 ---------------------------------------------*/
5/* authors: Ariful Azad, Aydin Buluc --------------------------*/
6/****************************************************************/
7/*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30#ifndef _PRE_ALLOCATED_SPA_H
31#define _PRE_ALLOCATED_SPA_H
32#include "BitMap.h"
33
34namespace combblas {
35
41template <class OVT > // output value type
43{
44public:
45 PreAllocatedSPA():initialized(false) {}; // hide default constructor
46
47 template <class LMAT>
48 PreAllocatedSPA(LMAT & A):initialized(true) // the one and only constructor
49 {
50 int64_t mA = A.getnrow();
51 if( A.getnsplit() > 0) // multithreaded
52 {
53 int64_t perpiece = mA / A.getnsplit();
54 for(int i=0; i<A.getnsplit(); ++i)
55 {
56 if(i != A.getnsplit()-1)
57 {
58 V_isthere.push_back(BitMap(perpiece));
59 V_localy.push_back(std::vector<OVT>(perpiece));
60
61 std::vector<bool> isthere(perpiece, false);
62 for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
63 {
64 for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
65 {
66 size_t rowid = nzit.rowid();
67 if(!isthere[rowid]) isthere[rowid] = true;
68 }
69 }
70 size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
71 V_inds.push_back(std::vector<uint32_t>(maxvector));
72 }
73 else
74 {
75 V_isthere.push_back(BitMap(mA - i*perpiece));
76 V_localy.push_back(std::vector<OVT>(mA - i*perpiece));
77
78 std::vector<bool> isthere(mA - i*perpiece, false);
79 for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
80 {
81 for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
82 {
83 size_t rowid = nzit.rowid();
84 if(!isthere[rowid]) isthere[rowid] = true;
85 }
86 }
87 size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
88 V_inds.push_back(std::vector<uint32_t>(maxvector));
89 }
90 }
91 }
92 else // single threaded
93 {
94 V_isthere.push_back(BitMap(mA));
95 V_localy.push_back(std::vector<OVT>(mA));
96
97 std::vector<bool> isthere(mA, false);
98 for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
99 {
100 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
101 {
102 size_t rowid = nzit.rowid();
103 if(!isthere[rowid]) isthere[rowid] = true;
104 }
105 }
106 size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
107 V_inds.push_back(std::vector<uint32_t>(maxvector));
108
109 }
110 };
111
112 // for manual splitting. just a hack. need to be fixed
113
114 template <class LMAT>
116 {
117 buckets = splits;
118 int64_t mA = A.getnrow();
119 V_isthere.push_back(BitMap(mA));
120 V_localy.push_back(std::vector<OVT>(mA));
121 V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
122
123
124
125
126
127 std::vector<int32_t> nnzSplitA(buckets,0);
128 int32_t rowPerSplit = mA / splits;
129
130
131 //per thread because writing vector<bool> is not thread safe
132 for(int i=0; i<splits-1; i++)
133 V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
134 V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
135
136
137 //vector<bool> isthere(mA, false);
138 for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
139 {
140 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
141 {
142 size_t rowid = nzit.rowid();
143 //if(!isthere[rowid]) isthere[rowid] = true;
144 size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
146 }
147 }
148
149
150 // prefix sum
151 disp.resize(splits+1);
152 disp[0] = 0;
153 for(int i=0; i<splits; i++)
154 {
155 disp[i+1] = disp[i] + nnzSplitA[i];
156 }
157
158 indSplitA.resize(disp[splits]);
159 numSplitA.resize(disp[splits]);
160
161
162 };
163
164 // initialize an uninitialized SPA
165 template <class LMAT>
166 void Init(LMAT & A, int splits) // not done for DCSC matrices with A.getnsplit()
167 {
168 if(!initialized)
169 {
170 initialized = true;
171 buckets = splits;
172 int64_t mA = A.getnrow();
173 V_isthere.push_back(BitMap(mA));
174 V_localy.push_back(std::vector<OVT>(mA));
175 V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
176
177 std::vector<int32_t> nnzSplitA(buckets,0);
178 int32_t rowPerSplit = mA / splits;
179
180 for(int i=0; i<splits-1; i++)
181 V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
182 V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
183
184 //vector<bool> isthere(mA, false);
185 for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
186 {
187 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
188 {
189 size_t rowid = nzit.rowid();
190 //if(!isthere[rowid]) isthere[rowid] = true;
191 size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
193 }
194 }
195
196
197 // prefix sum
198 disp.resize(splits+1);
199 disp[0] = 0;
200 for(int i=0; i<splits; i++)
201 {
202 disp[i+1] = disp[i] + nnzSplitA[i];
203 }
204
205 indSplitA.resize(disp[splits]);
206 numSplitA.resize(disp[splits]);
207 }
208 };
209
210 int buckets; // number of buckets
211 std::vector< std::vector<uint32_t> > V_inds; // ABAB: is this big enough?
212 std::vector< BitMap > V_isthere;
213 std::vector< std::vector<bool> > V_isthereBool; // for thread safe access
214 std::vector< std::vector<OVT> > V_localy;
216 std::vector<int32_t> indSplitA;
217 std::vector<OVT> numSplitA;
218 std::vector<uint32_t> disp;
219};
220
221}
222
223#endif
224
std::vector< uint32_t > disp
std::vector< int32_t > indSplitA
PreAllocatedSPA(LMAT &A, int splits)
std::vector< OVT > numSplitA
std::vector< std::vector< uint32_t > > V_inds
std::vector< std::vector< bool > > V_isthereBool
std::vector< BitMap > V_isthere
std::vector< std::vector< OVT > > V_localy
void Init(LMAT &A, int splits)
double A
signed int int32_t
Definition stdint.h:77