COMBINATORIAL_BLAS  1.6
PreAllocatedSPA.h
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _PRE_ALLOCATED_SPA_H
31 #define _PRE_ALLOCATED_SPA_H
32 #include "BitMap.h"
33 
34 namespace combblas {
35 
41 template <class OVT > // output value type
43 {
44 public:
45  PreAllocatedSPA():initialized(false) {}; // hide default constructor
46 
47  template <class LMAT>
48  PreAllocatedSPA(LMAT & A):initialized(true) // the one and only constructor
49  {
50  int64_t mA = A.getnrow();
51  if( A.getnsplit() > 0) // multithreaded
52  {
53  int64_t perpiece = mA / A.getnsplit();
54  for(int i=0; i<A.getnsplit(); ++i)
55  {
56  if(i != A.getnsplit()-1)
57  {
58  V_isthere.push_back(BitMap(perpiece));
59  V_localy.push_back(std::vector<OVT>(perpiece));
60 
61  std::vector<bool> isthere(perpiece, false);
62  for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
63  {
64  for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
65  {
66  size_t rowid = nzit.rowid();
67  if(!isthere[rowid]) isthere[rowid] = true;
68  }
69  }
70  size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
71  V_inds.push_back(std::vector<uint32_t>(maxvector));
72  }
73  else
74  {
75  V_isthere.push_back(BitMap(mA - i*perpiece));
76  V_localy.push_back(std::vector<OVT>(mA - i*perpiece));
77 
78  std::vector<bool> isthere(mA - i*perpiece, false);
79  for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
80  {
81  for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
82  {
83  size_t rowid = nzit.rowid();
84  if(!isthere[rowid]) isthere[rowid] = true;
85  }
86  }
87  size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
88  V_inds.push_back(std::vector<uint32_t>(maxvector));
89  }
90  }
91  }
92  else // single threaded
93  {
94  V_isthere.push_back(BitMap(mA));
95  V_localy.push_back(std::vector<OVT>(mA));
96 
97  std::vector<bool> isthere(mA, false);
98  for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
99  {
100  for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
101  {
102  size_t rowid = nzit.rowid();
103  if(!isthere[rowid]) isthere[rowid] = true;
104  }
105  }
106  size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
107  V_inds.push_back(std::vector<uint32_t>(maxvector));
108 
109  }
110  };
111 
112  // for manual splitting. just a hack. need to be fixed
113 
114  template <class LMAT>
115  PreAllocatedSPA(LMAT & A, int splits):initialized(true)
116  {
117  buckets = splits;
118  int64_t mA = A.getnrow();
119  V_isthere.push_back(BitMap(mA));
120  V_localy.push_back(std::vector<OVT>(mA));
121  V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
122 
123 
124 
125 
126 
127  std::vector<int32_t> nnzSplitA(buckets,0);
128  int32_t rowPerSplit = mA / splits;
129 
130 
131  //per thread because writing vector<bool> is not thread safe
132  for(int i=0; i<splits-1; i++)
133  V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
134  V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
135 
136 
137  //vector<bool> isthere(mA, false);
138  for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
139  {
140  for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
141  {
142  size_t rowid = nzit.rowid();
143  //if(!isthere[rowid]) isthere[rowid] = true;
144  size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
145  nnzSplitA[splitId]++;
146  }
147  }
148 
149 
150  // prefix sum
151  disp.resize(splits+1);
152  disp[0] = 0;
153  for(int i=0; i<splits; i++)
154  {
155  disp[i+1] = disp[i] + nnzSplitA[i];
156  }
157 
158  indSplitA.resize(disp[splits]);
159  numSplitA.resize(disp[splits]);
160 
161 
162  };
163 
164  // initialize an uninitialized SPA
165  template <class LMAT>
166  void Init(LMAT & A, int splits) // not done for DCSC matrices with A.getnsplit()
167  {
168  if(!initialized)
169  {
170  initialized = true;
171  buckets = splits;
172  int64_t mA = A.getnrow();
173  V_isthere.push_back(BitMap(mA));
174  V_localy.push_back(std::vector<OVT>(mA));
175  V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
176 
177  std::vector<int32_t> nnzSplitA(buckets,0);
178  int32_t rowPerSplit = mA / splits;
179 
180  for(int i=0; i<splits-1; i++)
181  V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
182  V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
183 
184  //vector<bool> isthere(mA, false);
185  for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
186  {
187  for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
188  {
189  size_t rowid = nzit.rowid();
190  //if(!isthere[rowid]) isthere[rowid] = true;
191  size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
192  nnzSplitA[splitId]++;
193  }
194  }
195 
196 
197  // prefix sum
198  disp.resize(splits+1);
199  disp[0] = 0;
200  for(int i=0; i<splits; i++)
201  {
202  disp[i+1] = disp[i] + nnzSplitA[i];
203  }
204 
205  indSplitA.resize(disp[splits]);
206  numSplitA.resize(disp[splits]);
207  }
208  };
209 
210  int buckets; // number of buckets
211  std::vector< std::vector<uint32_t> > V_inds; // ABAB: is this big enough?
212  std::vector< BitMap > V_isthere;
213  std::vector< std::vector<bool> > V_isthereBool; // for thread safe access
214  std::vector< std::vector<OVT> > V_localy;
216  std::vector<int32_t> indSplitA;
217  std::vector<OVT> numSplitA;
218  std::vector<uint32_t> disp;
219 };
220 
221 }
222 
223 #endif
224 
std::vector< std::vector< bool > > V_isthereBool
std::vector< BitMap > V_isthere
std::vector< int32_t > indSplitA
std::vector< std::vector< OVT > > V_localy
double A
long int64_t
Definition: compat.h:21
std::vector< std::vector< uint32_t > > V_inds
void Init(LMAT &A, int splits)
std::vector< uint32_t > disp
std::vector< OVT > numSplitA
Definition: CCGrid.h:4
PreAllocatedSPA(LMAT &A, int splits)