COMBINATORIAL_BLAS  1.6
CC.h
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #include <mpi.h>
31 
32 // These macros should be defined before stdint.h is included
33 #ifndef __STDC_CONSTANT_MACROS
34 #define __STDC_CONSTANT_MACROS
35 #endif
36 #ifndef __STDC_LIMIT_MACROS
37 #define __STDC_LIMIT_MACROS
38 #endif
39 #include <stdint.h>
40 
41 #include <sys/time.h>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <sstream>
46 #include <ctime>
47 #include <cmath>
48 #include "CombBLAS/CombBLAS.h"
49 
54 namespace combblas {
55 
56 template <typename T1, typename T2>
57 struct Select2ndMinSR
58 {
60  static T_promote id(){ return std::numeric_limits<T_promote>::max(); };
61  static bool returnedSAID() { return false; }
62  static MPI_Op mpi_op() { return MPI_MIN; };
63 
64  static T_promote add(const T_promote & arg1, const T_promote & arg2)
65  {
66  return std::min(arg1, arg2);
67  }
68 
69  static T_promote multiply(const T1 & arg1, const T2 & arg2)
70  {
71  return static_cast<T_promote> (arg2);
72  }
73 
74  static void axpy(const T1 a, const T2 & x, T_promote & y)
75  {
76  y = add(y, multiply(a, x));
77  }
78 };
79 
80 
81 
82 template <typename IT, typename NT, typename DER>
84 {
85  // FullyDistVec doesn't support "bool" due to crippled vector<bool> semantics
86  //TODO: change the value of star entries to grandfathers so it will be IT as well.
87 
88  FullyDistVec<IT,short> star(A.getcommgrid(), A.getnrow(), 1); // all initialized to true
89  FullyDistVec<IT, IT> grandfather = father(father); // find grandparents
90 
91 
92 
93  grandfather.EWiseOut(father,
94  [](IT pv, IT gpv) -> short { return static_cast<short>(pv == gpv); },
95  star); // remove some stars
96 
97  // nostars
98  FullyDistSpVec<IT,short> nonstar = star.Find([](short isStar){return isStar==0;});
99  // grandfathers of nonstars
100  FullyDistSpVec<IT, IT> nonstarGF = EWiseApply<IT>(nonstar, grandfather,
101  [](short isStar, IT gf){return gf;},
102  [](short isStar, IT gf){return true;},
103  false, static_cast<short>(0));
104  // grandfather pointing to a grandchild
105  FullyDistSpVec<IT, IT> gfNonstar = nonstarGF.Invert(nonstarGF.TotalLength()); // for duplicates, keep the first one
106 
107 
108 
109 
110  // star(GF) = 0
111  star.EWiseApply(gfNonstar, [](short isStar, IT x){return static_cast<short>(0);},
112  false, static_cast<IT>(0));
113 
114  // at this point vertices at level 1 (children of the root) can still be stars
115  FullyDistVec<IT,short> starFather = star(father);
116  star.EWiseApply(starFather, std::multiplies<short>());
117 
118  /* alternative approach (used in the Matlab code)
119  // fathers of nonstars
120  FullyDistSpVec<IT, IT> nonstarF = EWiseApply<IT>(nonstar, father,[](short isStar, IT f){return f;}, [](short isStar, IT f){return true;},false, static_cast<short>(0));
121  // father pointing to a child
122  FullyDistSpVec<IT, IT> fNonstar = nonstarF.Invert(nonstarF.TotalLength());
123  // star(F) = 0
124  star.EWiseApply(fNonstar, [](short isStar, IT x){return static_cast<short>(0);},false, static_cast<IT>(0));
125  star = star(father);
126  */
127  return star;
128 }
129 
130 template <typename IT, typename NT, typename DER>
132 {
133  FullyDistVec<IT,short> stars = StarCheck(A, father);
134 
135  FullyDistVec<IT, IT> minNeighborFather ( A.getcommgrid());
136  minNeighborFather = SpMV<Select2ndMinSR<NT, IT>>(A, father); // value is the minimum of all neighbors' fathers
137  FullyDistSpVec<IT, std::pair<IT, IT>> hooks(A.getcommgrid(), A.getnrow());
138  // create entries belonging to stars
139  hooks = EWiseApply<std::pair<IT, IT>>(hooks, stars,
140  [](std::pair<IT, IT> x, short isStar){return std::make_pair(0,0);},
141  [](std::pair<IT, IT> x, short isStar){return isStar==1;},
142  true, {0,0});
143 
144 
145  // include father information
146 
147  hooks = EWiseApply<std::pair<IT, IT>>(hooks, father,
148  [](std::pair<IT, IT> x, IT f){return std::make_pair(f,0);},
149  [](std::pair<IT, IT> x, IT f){return true;},
150  false, {0,0});
151 
152  //keep entries with father>minNeighborFather and insert minNeighborFather information
153  hooks = EWiseApply<std::pair<IT, IT>>(hooks, minNeighborFather,
154  [](std::pair<IT, IT> x, IT mnf){return std::make_pair(std::get<0>(x), mnf);},
155  [](std::pair<IT, IT> x, IT mnf){return std::get<0>(x) > mnf;},
156  false, {0,0});
157  //Invert
158  FullyDistSpVec<IT, std::pair<IT, IT>> starhooks= hooks.Invert(hooks.TotalLength(),
159  [](std::pair<IT, IT> val, IT ind){return std::get<0>(val);},
160  [](std::pair<IT, IT> val, IT ind){return std::make_pair(ind, std::get<1>(val));},
161  [](std::pair<IT, IT> val1, std::pair<IT, IT> val2){return val2;} );
162  // allowing the last vertex to pick the parent of the root of a star gives the correct output!!
163  // [](pair<IT, IT> val1, pair<IT, IT> val2){return val1;} does not give the correct output. why?
164 
165 
166  // drop the index informaiton
167  FullyDistSpVec<IT, IT> finalhooks = EWiseApply<IT>(starhooks, father,
168  [](std::pair<IT, IT> x, IT f){return std::get<1>(x);},
169  [](std::pair<IT, IT> x, IT f){return true;},
170  false, {0,0});
171  father.Set(finalhooks);
172 }
173 
174 template <typename IT, typename NT, typename DER>
176 {
177  FullyDistVec<IT,short> stars = StarCheck(A, father);
178 
179  FullyDistVec<IT, IT> minNeighborFather ( A.getcommgrid());
180  minNeighborFather = SpMV<Select2ndMinSR<NT, IT>>(A, father); // value is the minimum of all neighbors' fathers
181 
182  FullyDistSpVec<IT, std::pair<IT, IT>> hooks(A.getcommgrid(), A.getnrow());
183  // create entries belonging to stars
184  hooks = EWiseApply<std::pair<IT, IT>>(hooks, stars,
185  [](std::pair<IT, IT> x, short isStar){return std::make_pair(0,0);},
186  [](std::pair<IT, IT> x, short isStar){return isStar==1;},
187  true, {0,0});
188 
189 
190  // include father information
191 
192  hooks = EWiseApply<std::pair<IT, IT>>(hooks, father,
193  [](std::pair<IT, IT> x, IT f){return std::make_pair(f,0);},
194  [](std::pair<IT, IT> x, IT f){return true;},
195  false, {0,0});
196 
197  //keep entries with father!minNeighborFather and insert minNeighborFather information
198  hooks = EWiseApply<std::pair<IT, IT>>(hooks, minNeighborFather,
199  [](std::pair<IT, IT> x, IT mnf){return std::make_pair(std::get<0>(x), mnf);},
200  [](std::pair<IT, IT> x, IT mnf){return std::get<0>(x) != mnf;},
201  false, {0,0});
202  //Invert
203  FullyDistSpVec<IT, std::pair<IT, IT>> starhooks= hooks.Invert(hooks.TotalLength(),
204  [](std::pair<IT, IT> val, IT ind){return std::get<0>(val);},
205  [](std::pair<IT, IT> val, IT ind){return std::make_pair(ind, std::get<1>(val));},
206  [](std::pair<IT, IT> val1, std::pair<IT, IT> val2){return val1;} );
207 
208 
209  // drop the index informaiton
210  FullyDistSpVec<IT, IT> finalhooks = EWiseApply<IT>(starhooks, father,
211  [](std::pair<IT, IT> x, IT f){return std::get<1>(x);},
212  [](std::pair<IT, IT> x, IT f){return true;},
213  false, {0,0});
214  father.Set(finalhooks);
215 }
216 
217 template <typename IT>
219 {
220  FullyDistVec<IT, IT> grandfather = father(father);
221  father = grandfather; // we can do it unconditionally because it is trivially true for stars
222 }
223 
224 template <typename IT, typename NT, typename DER>
226 {
227  FullyDistVec<IT, IT> minNeighborCCLabel ( A.getcommgrid());
228  minNeighborCCLabel = SpMV<Select2ndMinSR<NT, IT>>(A, cclabel);
229  return minNeighborCCLabel==cclabel;
230 }
231 
232 
233 // works only on P=1
234 template <typename IT, typename NT, typename DER>
236 {
237  DER* spSeq = A.seqptr(); // local submatrix
238 
239  for(auto colit = spSeq->begcol(); colit != spSeq->endcol(); ++colit) // iterate over columns
240  {
241  IT j = colit.colid(); // local numbering
242  for(auto nzit = spSeq->begnz(colit); nzit < spSeq->endnz(colit); ++nzit)
243  {
244  IT i = nzit.rowid();
245  if( cclabel[i] != cclabel[j])
246  {
247  std::cout << i << " (" << father[i] << ", "<< cclabel[i] << ") & "<< j << "("<< father[j] << ", " << cclabel[j] << ")\n";
248  }
249  }
250 
251  }
252 }
253 
254 // Input:
255 // father: father of each vertex. Father is essentilly the root of the star which a vertex belongs to.
256 // father of the root is itself
257 // Output:
258 // cclabel: connected components are incrementally labeled
259 // returns the number of connected components
260 // Example: input = [0, 0, 2, 3, 0, 2], output = (0, 0, 1, 2, 0, 1), return 3
261 template <typename IT>
263 {
264  cclabel = father;
265  cclabel.ApplyInd([](IT val, IT ind){return val==ind ? -1 : val;});
266  FullyDistSpVec<IT, IT> roots (cclabel, bind2nd(std::equal_to<IT>(), -1));
267  roots.nziota(0);
268  cclabel.Set(roots);
269  cclabel = cclabel(father);
270  return roots.getnnz();
271 }
272 
273 // Compute strongly connected components
274 // If you need weakly connected components, symmetricize the matrix beforehand
275 template <typename IT, typename NT, typename DER>
277 {
278  A.AddLoops(1); // needed for isolated vertices
279  FullyDistVec<IT,IT> father(A.getcommgrid());
280  father.iota(A.getnrow(), 0); // father(i)=i initially
281  IT nonstars = 1;
282  int iteration = 0;
283  std::ostringstream outs;
284  while (true)
285  {
286 #ifdef TIMING
287  double t1 = MPI_Wtime();
288 #endif
289  ConditionalHook(A, father);
290  UnconditionalHook(A, father);
291  Shortcut(father);
292  FullyDistVec<IT,short> stars = StarCheck(A, father);
293  nonstars = stars.Reduce(std::plus<IT>(), static_cast<IT>(0), [](short isStar){return static_cast<IT>(isStar==0);});
294  if(nonstars==0)
295  {
296  if(neigborsInSameCC(A, father)) break;
297 
298  }
299  iteration++;
300 #ifdef CC_TIMING
301  double t2 = MPI_Wtime();
302  outs.str("");
303  outs.clear();
304  outs << "Iteration: " << iteration << " Non stars: " << nonstars;
305  outs << " Time: " << t2 - t1;
306  outs<< endl;
307  SpParHelper::Print(outs.str());
308 #endif
309  }
310 
311  FullyDistVec<IT, IT> cc(father.getcommgrid());
312  nCC = LabelCC(father, cc);
313 
314  // TODO: Print to file
315  //PrintCC(cc, nCC);
316  //Correctness(A, cc, nCC, father);
317 
318  return cc;
319 }
320 
321 
322 template <typename IT>
324 {
325  for(IT i=0; i< nCC; i++)
326  {
327  FullyDistVec<IT, IT> ith = CC.FindInds(bind2nd(std::equal_to<IT>(), i));
328  ith.DebugPrint();
329  }
330 }
331 
332 // Print the size of the first 4 clusters
333 template <typename IT>
335 {
336  FullyDistSpVec<IT, IT> cc1 = cc.Find([](IT label){return label==0;});
337  FullyDistSpVec<IT, IT> cc2 = cc.Find([](IT label){return label==1;});
338  FullyDistSpVec<IT, IT> cc3 = cc.Find([](IT label){return label==2;});
339  FullyDistSpVec<IT, IT> cc4 = cc.Find([](IT label){return label==3;});
340 
341  std::ostringstream outs;
342  outs.str("");
343  outs.clear();
344  outs << "Size of the first component: " << cc1.getnnz() << std::endl;
345  outs << "Size of the second component: " << cc2.getnnz() << std::endl;
346  outs << "Size of the third component: " << cc3.getnnz() << std::endl;
347  outs << "Size of the fourth component: " << cc4.getnnz() << std::endl;
348 }
349 
350 
351 template <typename IT>
353 {
354  FullyDistVec<IT, IT> ccSizes(CC.getcommgrid(), nCC, 0);
355  for(IT i=0; i< nCC; i++)
356  {
357  FullyDistSpVec<IT, IT> ith = CC.Find(bind2nd(std::equal_to<IT>(), i));
358  ccSizes.SetElement(i, ith.getnnz());
359  }
360 
361  IT largestCCSise = ccSizes.Reduce(maximum<IT>(), static_cast<IT>(0));
362 
363 
364  const IT * locCCSizes = ccSizes.GetLocArr();
365  int numBins = 200;
366  std::vector<IT> localHist(numBins,0);
367  for(IT i=0; i< ccSizes.LocArrSize(); i++)
368  {
369  IT bin = (locCCSizes[i]*(numBins-1))/largestCCSise;
370  localHist[bin]++;
371  }
372 
373  std::vector<IT> globalHist(numBins,0);
374  MPI_Comm world = CC.getcommgrid()->GetWorld();
375  MPI_Reduce(localHist.data(), globalHist.data(), numBins, MPIType<IT>(), MPI_SUM, 0, world);
376 
377 
378  int myrank;
379  MPI_Comm_rank(world,&myrank);
380  if(myrank==0)
381  {
382  std::cout << "The largest component size: " << largestCCSise << std::endl;
383  std::ofstream output;
384  output.open("hist.txt", std::ios_base::app );
385  std::copy(globalHist.begin(), globalHist.end(), std::ostream_iterator<IT> (output, " "));
386  output << std::endl;
387  output.close();
388  }
389 
390 
391  //ccSizes.PrintToFile("histCC.txt");
392 }
393 
394 }
395 
void EWiseOut(const FullyDistVec< IT, NT > &rhs, _BinaryOperation __binary_op, FullyDistVec< IT, OUT > &result)
std::shared_ptr< CommGrid > getcommgrid() const
Definition: SpParMat.h:275
static MPI_Op mpi_op()
Definition: CC.h:62
Compute the maximum of two values.
Definition: Operations.h:154
static void axpy(const T1 a, const T2 &x, T_promote &y)
Definition: CC.h:74
IT LabelCC(FullyDistVec< IT, IT > &father, FullyDistVec< IT, IT > &cclabel)
Definition: CC.h:262
void Set(const FullyDistSpVec< IT, NT > &rhs)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
promote_trait< T1, T2 >::T_promote T_promote
Definition: CC.h:59
void nziota(NT first)
iota over existing nonzero entries
static bool returnedSAID()
Definition: CC.h:61
FullyDistSpVec< IT, NT > Find(_Predicate pred) const
Return the elements for which pred is true.
std::shared_ptr< CommGrid > getcommgrid() const
Definition: FullyDistVec.h:257
static T_promote add(const T_promote &arg1, const T_promote &arg2)
Definition: CC.h:64
static T_promote multiply(const T1 &arg1, const T2 &arg2)
Definition: CC.h:69
void ConditionalHook(const SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &father)
Definition: CC.h:131
FullyDistVec< IT, IT > CC(SpParMat< IT, NT, DER > &A, IT &nCC)
Definition: CC.h:276
void AddLoops(NT loopval, bool replaceExisting=false)
Definition: SpParMat.cpp:2714
FullyDistSpVec< IT, NT > Invert(IT globallen)
bool neigborsInSameCC(const SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &cclabel)
Definition: CC.h:225
void HistCC(FullyDistVec< IT, IT > CC, IT nCC)
Definition: CC.h:352
void Correctness(const SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &cclabel, IT nCC, FullyDistVec< IT, IT > father)
Definition: CC.h:235
double A
static void Print(const std::string &s)
void UnconditionalHook(const SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &father)
Definition: CC.h:175
void iota(IT globalsize, NT first)
static T_promote id()
Definition: CC.h:60
void PrintCC(FullyDistVec< IT, IT > CC, IT nCC)
Definition: CC.h:323
void First4Clust(FullyDistVec< IT, IT > &cc)
Definition: CC.h:334
void Shortcut(FullyDistVec< IT, IT > &father)
Definition: CC.h:218
static T2 add(const T2 &arg1, const T2 &arg2)
Definition: RestrictionOp.h:31
static T2 multiply(const T1 &arg1, const T2 &arg2)
Definition: RestrictionOp.h:36
void ApplyInd(_BinaryOperation __binary_op)
Definition: FullyDistVec.h:188
Definition: CCGrid.h:4
NT Reduce(_BinaryOperation __binary_op, NT identity) const
FullyDistVec< IT, short > StarCheck(const SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &father)
Definition: CC.h:83
IT getnrow() const
Definition: SpParMat.cpp:685