1 #ifndef BP_MAXIMUM_MATCHING_H 2 #define BP_MAXIMUM_MATCHING_H 4 #include "../CombBLAS.h" 26 template <
class IT,
class DER>
30 IT procsPerRow = ri.commGrid->GetGridCols();
31 IT procsPerCol = ri.commGrid->GetGridRows();
34 IT global_nrow = ri.TotalLength();
35 IT global_ncol = ncol;
36 IT m_perprocrow = global_nrow / procsPerRow;
37 IT n_perproccol = global_ncol / procsPerCol;
44 std::vector< std::vector<IT> > rowid(procsPerRow);
45 std::vector< std::vector<IT> > colid(procsPerRow);
47 IT locvec = ri.arr.size();
48 IT roffset = ri.RowLenUntil();
49 for(
typename std::vector<IT>::size_type i=0; i< (unsigned)locvec; ++i)
51 if(ri.arr[i]>=0 && ri.arr[i]<ncol)
53 IT rowrec = (n_perproccol!=0) ? std::min(ri.arr[i] / n_perproccol, procsPerRow-1) : (procsPerRow-1);
55 rowid[rowrec].push_back( i + roffset);
56 colid[rowrec].push_back(ri.arr[i] - (rowrec * n_perproccol));
63 int * sendcnt =
new int[procsPerRow];
64 int * recvcnt =
new int[procsPerRow];
65 for(IT i=0; i<procsPerRow; ++i)
67 sendcnt[i] = rowid[i].size();
70 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, ri.commGrid->GetRowWorld());
72 int * sdispls =
new int[procsPerRow]();
73 int * rdispls =
new int[procsPerRow]();
74 partial_sum(sendcnt, sendcnt+procsPerRow-1, sdispls+1);
75 partial_sum(recvcnt, recvcnt+procsPerRow-1, rdispls+1);
76 IT p_nnz = accumulate(recvcnt,recvcnt+procsPerRow, static_cast<IT>(0));
79 IT * p_rows =
new IT[p_nnz];
80 IT * p_cols =
new IT[p_nnz];
81 IT * senddata =
new IT[locvec];
82 for(
int i=0; i<procsPerRow; ++i)
84 copy(rowid[i].begin(), rowid[i].end(), senddata+sdispls[i]);
85 std::vector<IT>().swap(rowid[i]);
87 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_rows, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
89 for(
int i=0; i<procsPerRow; ++i)
91 copy(colid[i].begin(), colid[i].end(), senddata+sdispls[i]);
92 std::vector<IT>().swap(colid[i]);
94 MPI_Alltoallv(senddata, sendcnt, sdispls, MPIType<IT>(), p_cols, recvcnt, rdispls, MPIType<IT>(), ri.commGrid->GetRowWorld());
97 std::tuple<IT,IT,bool> * p_tuples =
new std::tuple<IT,IT,bool>[p_nnz];
98 for(IT i=0; i< p_nnz; ++i)
100 p_tuples[i] = make_tuple(p_rows[i], p_cols[i], 1);
106 IT local_nrow = ri.MyRowLength();
107 int my_proccol = ri.commGrid->GetRankInProcRow();
108 IT local_ncol = (my_proccol<(procsPerCol-1))? (n_perproccol) : (global_ncol - (n_perproccol*(procsPerCol-1)));
112 DER_IT * PSeq =
new DER_IT();
113 PSeq->Create( p_nnz, local_nrow, local_ncol, p_tuples);
129 template <
typename IT>
133 IT nrow = mateRow2Col.TotalLength();
134 IT ncol = mateCol2Row.TotalLength();
139 while(col.getnnz()!=0)
143 row = EWiseApply<IT>(row, parentsRow,
144 [](IT root, IT parent){
return parent;},
145 [](IT root, IT parent){
return true;},
148 col = row.Invert(ncol);
149 nextcol = EWiseApply<IT>(col, mateCol2Row,
150 [](IT child, IT mate){
return mate;},
151 [](IT child, IT mate){
return mate!=-1;},
153 mateRow2Col.
Set(row);
154 mateCol2Row.Set(col);
168 template <
typename IT>
171 MPI_Win win_mateRow2Col, win_mateCol2Row, win_parentsRow;
172 MPI_Win_create((IT*)mateRow2Col.
GetLocArr(), mateRow2Col.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateRow2Col.commGrid->GetWorld(), &win_mateRow2Col);
173 MPI_Win_create((IT*)mateCol2Row.
GetLocArr(), mateCol2Row.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, mateCol2Row.commGrid->GetWorld(), &win_mateCol2Row);
174 MPI_Win_create((IT*)parentsRow.
GetLocArr(), parentsRow.
LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, parentsRow.commGrid->GetWorld(), &win_parentsRow);
177 IT* leaves_ptr = (IT*) leaves.
GetLocArr();
182 IT row, col=100, nextrow;
183 int owner_row, owner_col;
184 IT locind_row, locind_col;
187 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
193 row = *(leaves_ptr+i);
197 owner_row = mateRow2Col.Owner(row, locind_row);
198 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_parentsRow);
199 MPI_Get(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_parentsRow);
200 MPI_Win_unlock(owner_row, win_parentsRow);
202 owner_col = mateCol2Row.Owner(col, locind_col);
203 MPI_Win_lock(MPI_LOCK_SHARED, owner_col, 0, win_mateCol2Row);
204 MPI_Fetch_and_op(&row, &nextrow, MPIType<IT>(), owner_col, locind_col, MPI_REPLACE, win_mateCol2Row);
205 MPI_Win_unlock(owner_col, win_mateCol2Row);
207 MPI_Win_lock(MPI_LOCK_SHARED, owner_row, 0, win_mateRow2Col);
208 MPI_Put(&col, 1, MPIType<IT>(), owner_row, locind_row, 1, MPIType<IT>(), win_mateRow2Col);
209 MPI_Win_unlock(owner_row, win_mateRow2Col);
219 MPI_Win_free(&win_mateRow2Col);
220 MPI_Win_free(&win_mateCol2Row);
221 MPI_Win_free(&win_parentsRow);
230 template <
typename IT,
typename NT,
typename DER>
241 nthreads = omp_get_num_threads();
246 double tstart = MPI_Wtime();
248 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
249 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
258 std::vector<std::vector<double> > timing;
259 std::vector<int> layers;
260 std::vector<int64_t> phaseMatched;
261 double t1, time_search, time_augment, time_phase;
270 MPI_Win_create((IT*)leaves.GetLocArr(), leaves.LocArrSize() *
sizeof(IT),
sizeof(IT), MPI_INFO_NULL, A.
getcommgrid()->GetWorld(), &winLeaves);
275 time_phase = MPI_Wtime();
277 std::vector<double> phase_timing(8,0);
278 leaves.
Apply ( [](IT val){
return (IT) -1;});
281 fringeCol = EWiseApply<VertexType>(fringeCol, mateCol2Row,
282 [](VertexType vtx, IT mate){
return vtx;},
283 [](VertexType vtx, IT mate){
return mate==-1;},
293 fringeCol.ApplyInd([](VertexType vtx, IT idx){
return VertexType(idx,idx);});
297 numUnmatchedCol = fringeCol.getnnz();
301 time_search = MPI_Wtime();
302 while(fringeCol.getnnz() > 0)
309 SpMV<WeightMaxMMSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
311 SpMV<Select2ndMinSR<NT, VertexType>>(
A, fringeCol, fringeRow,
false, SPA);
312 phase_timing[0] += MPI_Wtime()-t1;
319 fringeRow = EWiseApply<VertexType>(fringeRow, parentsRow,
320 [](VertexType vtx, IT parent){
return vtx;},
321 [](VertexType vtx, IT parent){
return parent==-1;},
322 false, VertexType());
325 parentsRow.EWiseApply(fringeRow,
326 [](IT dval, VertexType svtx){
return svtx.
parent;},
327 [](IT dval, VertexType svtx){
return true;},
328 false, VertexType());
331 umFringeRow = EWiseApply<IT>(fringeRow, mateRow2Col,
332 [](VertexType vtx, IT mate){
return vtx.root;},
333 [](VertexType vtx, IT mate){
return mate==-1;},
334 false, VertexType());
336 phase_timing[1] += MPI_Wtime()-t1;
339 IT nnz_umFringeRow = umFringeRow.getnnz();
342 if(nnz_umFringeRow >0)
357 temp1 = umFringeRow.
Invert(ncol);
362 phase_timing[2] += MPI_Wtime()-t1;
368 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
369 [](VertexType vtx, IT mate){
return VertexType(mate, vtx.root);},
370 [](VertexType vtx, IT mate){
return mate!=-1;},
371 false, VertexType());
374 if(nnz_umFringeRow>0 &&
prune)
376 fringeRow.FilterByVal (umFringeRow,[](VertexType vtx){
return vtx.root;},
false);
378 double tprune = MPI_Wtime()-t1;
379 phase_timing[3] += tprune;
384 fringeCol = fringeRow.Invert(ncol,
385 [](VertexType& vtx,
const IT & index){
return vtx.
parent;},
386 [](VertexType& vtx,
const IT & index){
return vtx;},
387 [](VertexType& vtx1, VertexType& vtx2){
return vtx1;});
388 phase_timing[4] += MPI_Wtime()-t1;
394 time_search = MPI_Wtime() - time_search;
395 phase_timing[5] += time_search;
397 IT numMatchedCol = leaves.
Count([](IT leaf){
return leaf!=-1;});
398 phaseMatched.push_back(numMatchedCol);
399 time_augment = MPI_Wtime();
400 if (numMatchedCol== 0) matched =
false;
404 if(numMatchedCol < (2* nprocs * nprocs))
405 AugmentPath(mateRow2Col, mateCol2Row,parentsRow, leaves);
407 AugmentLevel(mateRow2Col, mateCol2Row,parentsRow, leaves);
409 time_augment = MPI_Wtime() - time_augment;
410 phase_timing[6] += time_augment;
412 time_phase = MPI_Wtime() - time_phase;
413 phase_timing[7] += time_phase;
414 timing.push_back(phase_timing);
416 layers.push_back(layer);
421 MPI_Win_free(&winLeaves);
433 std::cout <<
"****** maximum matching runtime ********\n";
434 std::cout << std::endl;
435 std::cout <<
"========================================================================\n";
436 std::cout <<
" BFS Search \n";
437 std::cout <<
"===================== ==================================================\n";
438 std::cout <<
"Phase Layer Match SpMV EWOpp CmUqL Prun CmMC BFS Aug Total\n";
439 std::cout <<
"===================== ===================================================\n";
441 std::vector<double> totalTimes(timing[0].
size(),0);
442 int nphases = timing.size();
443 for(
int i=0; i<timing.size(); i++)
445 printf(
" %3d %3d %8lld ", i+1, layers[i], phaseMatched[i]);
446 for(
int j=0; j<timing[i].size(); j++)
448 totalTimes[j] += timing[i][j];
450 printf(
"%.2lf ", timing[i][j]);
456 std::cout <<
"-----------------------------------------------------------------------\n";
457 std::cout <<
"Phase Layer UnMat SpMV EWOpp CmUqL Prun CmMC BFS Aug Total \n";
458 std::cout <<
"-----------------------------------------------------------------------\n";
460 combTime = totalTimes.back();
461 printf(
" %3d %3d %8lld ", nphases, totalLayer/nphases, numUnmatchedCol);
462 for(
int j=0; j<totalTimes.size()-1; j++)
464 printf(
"%.2lf ", totalTimes[j]);
466 printf(
"%.2lf\n", combTime);
470 IT matchedRow = mateRow2Col.
Count([](IT mate){
return mate!=-1;});
473 std::cout <<
"***Final Maximum Matching***\n";
474 std::cout <<
"***Total-Rows Matched-Rows Total Time***\n";
475 printf(
"%lld %lld %lf \n",nrows, matchedRow, combTime);
476 printf(
"matched rows: %lld , which is: %lf percent \n",matchedRow, 100*(
double)matchedRow/(nrows));
477 std::cout <<
"-------------------------------------------------------\n\n";
std::shared_ptr< CommGrid > getcommgrid() const
void Set(const FullyDistSpVec< IT, NT > &rhs)
std::shared_ptr< CommGrid > getcommgrid() const
FullyDistSpVec< IT, NT > Invert(IT globallen)
IT Count(_Predicate pred) const
Return the number of elements for which pred is true.
void AugmentLevel(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
void AugmentPath(FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, FullyDistVec< IT, IT > &parentsRow, FullyDistVec< IT, IT > &leaves)
SpParMat< IT, bool, DER > PermMat(const FullyDistVec< IT, IT > &ri, const IT ncol)
void ApplyInd(_BinaryOperation __binary_op)
void Apply(_UnaryOperation __unary_op)
const NT * GetLocArr() const
IT Count(_Predicate pred) const
Return the number of elements for which pred is true.
void maximumMatching(SpParMat< IT, NT, DER > &A, FullyDistVec< IT, IT > &mateRow2Col, FullyDistVec< IT, IT > &mateCol2Row, bool prune=true, bool randMM=false, bool maximizeWeight=false)