30 #ifndef _PAR_FRIENDS_EXT_H_ 31 #define _PAR_FRIENDS_EXT_H_ 42 template <
class IT,
class NT,
class DER>
57 template <
typename SR,
typename IU,
typename NU1,
typename NU2,
typename UDERA,
typename UDERB>
58 SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,
typename promote_trait<UDERA,UDERB>::T_promote>
Mult_AnXBn_ActiveTarget 67 std::cout<<
"Can not multiply, dimensions does not match"<<std::endl;
71 int stages, Aoffset, Boffset;
72 std::shared_ptr<CommGrid> GridC =
ProductGrid((A.commGrid).get(), (B.commGrid).
get(), stages, Aoffset, Boffset);
74 IU C_m = A.spSeq->getnrow();
75 IU C_n = B.spSeq->getncol();
78 (A.spSeq)->Split( A1seq, A2seq);
81 const_cast< UDERB*
>(B.spSeq)->Transpose();
84 (B.spSeq)->Split( B1seq, B2seq);
87 std::vector<MPI_Win> rowwins1, rowwins2, colwins1, colwins2;
100 IU ** ARecvSizes1 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
101 IU ** ARecvSizes2 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
102 IU ** BRecvSizes1 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
103 IU ** BRecvSizes2 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
111 UDERA * ARecv1, * ARecv2;
112 UDERB * BRecv1, * BRecv2;
113 std::vector< SpTuples<IU,N_promote> *> tomerge;
115 MPI_Group row_group, col_group;
116 MPI_Comm_group((A.commGrid)->GetRowWorld(), &row_group);
117 MPI_Comm_group((B.commGrid)->GetColWorld(), &col_group);
119 int Aself = (A.commGrid)->GetRankInProcRow();
120 int Bself = (B.commGrid)->GetRankInProcCol();
123 MPI_Barrier(GridC->GetWorld());
126 GridC->OpenDebugFile(
"deb", oput);
127 oput <<
"A1seq: " << A1seq.getnrow() <<
" " << A1seq.getncol() <<
" " << A1seq.getnnz() << std::endl;
128 oput <<
"A2seq: " << A2seq.getnrow() <<
" " << A2seq.getncol() <<
" " << A2seq.getnnz() << std::endl;
129 oput <<
"B1seq: " << B1seq.getnrow() <<
" " << B1seq.getncol() <<
" " << B1seq.getnnz() << std::endl;
130 oput <<
"B2seq: " << B2seq.getnrow() <<
" " << B2seq.getncol() <<
" " << B2seq.getnnz() << std::endl;
132 MPI_Barrier(GridC->GetWorld());
140 MPI_Barrier(GridC->GetWorld());
142 MPI_Barrier(GridC->GetWorld());
144 int Aowner = (0+Aoffset) % stages;
145 int Bowner = (0+Boffset) % stages;
149 for(
int j=0; j< rowwins1.size(); ++j)
150 rowwins1[j].Complete();
155 for(
int j=0; j< colwins1.size(); ++j)
156 colwins1[j].Complete();
158 for(
int i = 1; i < stages; ++i)
172 tomerge.push_back(C_cont);
176 MPI_Barrier(GridC->GetWorld());
179 bool remoteA =
false;
180 bool remoteB =
false;
183 for(
int j=0; j< rowwins2.size(); ++j)
184 rowwins2[j].Complete();
188 for(
int j=0; j< colwins2.size(); ++j)
189 colwins2[j].Complete();
193 MPI_Barrier(GridC->GetWorld());
196 Aowner = (i+Aoffset) % stages;
197 Bowner = (i+Boffset) % stages;
205 MPI_Barrier(GridC->GetWorld());
209 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2,
false,
true);
210 if(!C_cont->isZero())
211 tomerge.push_back(C_cont);
215 MPI_Barrier(GridC->GetWorld());
221 for(
int j=0; j< rowwins1.size(); ++j)
222 rowwins1[j].Complete();
223 for(
int j=0; j< colwins1.size(); ++j)
224 colwins1[j].Complete();
228 MPI_Barrier(GridC->GetWorld());
237 tomerge.push_back(C_cont);
240 for(
int j=0; j< rowwins2.size(); ++j)
241 rowwins2[j].Complete();
243 for(
int j=0; j< colwins2.size(); ++j)
244 colwins2[j].Complete();
246 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2,
false,
true);
248 tomerge.push_back(C_cont);
258 DER_promote *
C =
new DER_promote(MergeAll<SR>(tomerge, C_m, C_n),
false, NULL);
259 for(
int i=0; i<tomerge.size(); ++i)
270 (A.spSeq)->Merge(A1seq, A2seq);
271 (B.spSeq)->Merge(B1seq, B2seq);
273 MPI_Group_free(&row_group);
274 MPI_Group_free(&col_group);
275 const_cast< UDERB*
>(B.spSeq)->Transpose();
290 template <
typename SR,
typename IU,
typename NU1,
typename NU2,
typename UDERA,
typename UDERB>
300 std::cout<<
"Can not multiply, dimensions does not match"<<std::endl;
304 int stages, Aoffset, Boffset;
305 std::shared_ptr<CommGrid> GridC =
ProductGrid((A.commGrid).get(), (B.commGrid).
get(), stages, Aoffset, Boffset);
307 IU C_m = A.spSeq->getnrow();
308 IU C_n = B.spSeq->getncol();
311 (A.spSeq)->Split( A1seq, A2seq);
314 const_cast< UDERB*
>(B.spSeq)->Transpose();
317 (B.spSeq)->Split( B1seq, B2seq);
320 std::vector<MPI_Win> rowwins1, rowwins2, colwins1, colwins2;
326 IU ** ARecvSizes1 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
327 IU ** ARecvSizes2 = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
328 IU ** BRecvSizes1 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
329 IU ** BRecvSizes2 = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
337 UDERA * ARecv1, * ARecv2;
338 UDERB * BRecv1, * BRecv2;
339 std::vector< SpTuples<IU,N_promote> *> tomerge;
341 MPI_Group row_group, col_group;
342 MPI_Comm_group((A.commGrid)->GetRowWorld(), &row_group);
343 MPI_Comm_group((B.commGrid)->GetColWorld(), &col_group);
345 int Aself = (A.commGrid)->GetRankInProcRow();
346 int Bself = (B.commGrid)->GetRankInProcCol();
348 int Aowner = (0+Aoffset) % stages;
349 int Bowner = (0+Boffset) % stages;
360 for(
int i = 1; i < stages; ++i)
365 tomerge.push_back(C_cont);
367 bool remoteA =
false;
368 bool remoteB =
false;
376 Aowner = (i+Aoffset) % stages;
377 Bowner = (i+Boffset) % stages;
384 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2,
false,
true);
386 tomerge.push_back(C_cont);
402 tomerge.push_back(C_cont);
410 C_cont = MultiplyReturnTuples<SR>(*ARecv2, *BRecv2,
false,
true);
412 tomerge.push_back(C_cont);
422 DER_promote *
C =
new DER_promote(MergeAll<SR>(tomerge, C_m, C_n),
false, NULL);
423 for(
int i=0; i<tomerge.size(); ++i)
433 (A.spSeq)->Merge(A1seq, A2seq);
434 (B.spSeq)->Merge(B1seq, B2seq);
436 MPI_Group_free(&row_group);
437 MPI_Group_free(&col_group);
438 const_cast< UDERB*
>(B.spSeq)->Transpose();
447 template <
typename SR,
typename IU,
typename NU1,
typename NU2,
typename UDERA,
typename UDERB>
456 std::cout<<
"Can not multiply, dimensions does not match"<<std::endl;
461 int stages, Aoffset, Boffset;
462 std::shared_ptr<CommGrid> GridC =
ProductGrid((A.commGrid).get(), (B.commGrid).
get(), stages, Aoffset, Boffset);
465 GridC->OpenDebugFile(
"deb", oput);
466 const_cast< UDERB*
>(B.spSeq)->Transpose();
469 std::vector<MPI_Win> rowwindows, colwindows;
470 std::vector<MPI_Win> rowwinnext, colwinnext;
476 IU ** ARecvSizes = SpHelper::allocate2D<IU>(UDERA::esscount, stages);
477 IU ** BRecvSizes = SpHelper::allocate2D<IU>(UDERB::esscount, stages);
482 UDERA * ARecv, * ARecvNext;
483 UDERB * BRecv, * BRecvNext;
484 std::vector< SpTuples<IU,N_promote> *> tomerge;
487 for(
int j=0; j< rowwindows.size(); ++j)
488 MPI_Win_fence(MPI_MODE_NOPRECEDE, rowwindows[j]);
489 for(
int j=0; j< colwindows.size(); ++j)
490 MPI_Win_fence(MPI_MODE_NOPRECEDE, colwindows[j]);
492 for(
int j=0; j< rowwinnext.size(); ++j)
493 MPI_Win_fence(MPI_MODE_NOPRECEDE, rowwinnext[j]);
494 for(
int j=0; j< colwinnext.size(); ++j)
495 MPI_Win_fence(MPI_MODE_NOPRECEDE, colwinnext[j]);
498 int Aownind = (0+Aoffset) % stages;
499 int Bownind = (0+Boffset) % stages;
500 if(Aownind == (A.commGrid)->GetRankInProcRow())
506 std::vector<IU> ess1(UDERA::esscount);
507 for(
int j=0; j< UDERA::esscount; ++j)
509 ess1[j] = ARecvSizes[j][Aownind];
513 oput <<
"For A (out), Fetching " << (
void*)rowwindows[0] << std::endl;
516 if(Bownind == (B.commGrid)->GetRankInProcCol())
522 std::vector<IU> ess2(UDERB::esscount);
523 for(
int j=0; j< UDERB::esscount; ++j)
525 ess2[j] = BRecvSizes[j][Bownind];
529 oput <<
"For B (out), Fetching " << (
void*)colwindows[0] << std::endl;
533 int Aownprev = Aownind;
534 int Bownprev = Bownind;
536 for(
int i = 1; i < stages; ++i)
538 Aownind = (i+Aoffset) % stages;
539 Bownind = (i+Boffset) % stages;
543 if(Aownind == (A.commGrid)->GetRankInProcRow())
549 std::vector<IU> ess1(UDERA::esscount);
550 for(
int j=0; j< UDERA::esscount; ++j)
552 ess1[j] = ARecvSizes[j][Aownind];
554 ARecvNext =
new UDERA();
556 oput <<
"For A, Fetching " << (
void*) rowwinnext[0] << std::endl;
560 if(Bownind == (B.commGrid)->GetRankInProcCol())
566 std::vector<IU> ess2(UDERB::esscount);
567 for(
int j=0; j< UDERB::esscount; ++j)
569 ess2[j] = BRecvSizes[j][Bownind];
571 BRecvNext =
new UDERB();
573 oput <<
"For B, Fetching " << (
void*)colwinnext[0] << std::endl;
577 oput <<
"Fencing " << (
void*) rowwindows[0] << std::endl;
578 oput <<
"Fencing " << (
void*) colwindows[0] << std::endl;
580 for(
int j=0; j< rowwindows.size(); ++j)
581 MPI_Win_fence(MPI_MODE_NOSTORE, rowwindows[j]);
582 for(
int j=0; j< colwindows.size(); ++j)
583 MPI_Win_fence(MPI_MODE_NOSTORE, colwindows[j]);
587 tomerge.push_back(C_cont);
589 if(Aownprev != (A.commGrid)->GetRankInProcRow())
delete ARecv;
590 if(Bownprev != (B.commGrid)->GetRankInProcCol())
delete BRecv;
598 if(Aownind == (A.commGrid)->GetRankInProcRow())
604 std::vector<IU> ess1(UDERA::esscount);
605 for(
int j=0; j< UDERA::esscount; ++j)
607 ess1[j] = ARecvSizes[j][Aownind];
611 oput <<
"For A, Fetching " << (
void*) rowwindows[0] << std::endl;
615 if(Bownind == (B.commGrid)->GetRankInProcCol())
621 std::vector<IU> ess2(UDERB::esscount);
622 for(
int j=0; j< UDERB::esscount; ++j)
624 ess2[j] = BRecvSizes[j][Bownind];
628 oput <<
"For B, Fetching " << (
void*)colwindows[0] << std::endl;
632 oput <<
"Fencing " << (
void*) rowwinnext[0] << std::endl;
633 oput <<
"Fencing " << (
void*) rowwinnext[0] << std::endl;
635 for(
int j=0; j< rowwinnext.size(); ++j)
636 MPI_Win_fence(MPI_MODE_NOSTORE, rowwinnext[j]);
637 for(
int j=0; j< colwinnext.size(); ++j)
638 MPI_Win_fence(MPI_MODE_NOSTORE, colwinnext[j]);
642 tomerge.push_back(C_cont);
645 if(Aownprev != (A.commGrid)->GetRankInProcRow())
delete ARecvNext;
646 if(Bownprev != (B.commGrid)->GetRankInProcCol())
delete BRecvNext;
656 oput <<
"Fencing " << (
void*) rowwindows[0] << std::endl;
657 oput <<
"Fencing " << (
void*) colwindows[0] << std::endl;
659 for(
int j=0; j< rowwindows.size(); ++j)
660 MPI_Win_fence(MPI_MODE_NOSUCCEED, rowwindows[j]);
661 for(
int j=0; j< colwindows.size(); ++j)
662 MPI_Win_fence(MPI_MODE_NOSUCCEED, colwindows[j]);
666 tomerge.push_back(C_cont);
668 if(Aownprev != (A.commGrid)->GetRankInProcRow())
delete ARecv;
669 if(Bownprev != (B.commGrid)->GetRankInProcRow())
delete BRecv;
673 oput <<
"Fencing " << (
void*) rowwinnext[0] << std::endl;
674 oput <<
"Fencing " << (
void*) colwinnext[0] << std::endl;
676 for(
int j=0; j< rowwinnext.size(); ++j)
677 MPI_Win_fence(MPI_MODE_NOSUCCEED, rowwinnext[j]);
678 for(
int j=0; j< colwinnext.size(); ++j)
679 MPI_Win_fence(MPI_MODE_NOSUCCEED, colwinnext[j]);
683 tomerge.push_back(C_cont);
685 if(Aownprev != (A.commGrid)->GetRankInProcRow())
delete ARecvNext;
686 if(Bownprev != (B.commGrid)->GetRankInProcRow())
delete BRecvNext;
688 for(
int i=0; i< rowwindows.size(); ++i)
690 MPI_Win_free(&rowwindows[i]);
691 MPI_Win_free(&rowwinnext[i]);
693 for(
int i=0; i< colwindows.size(); ++i)
695 MPI_Win_free(&colwindows[i]);
696 MPI_Win_free(&colwinnext[i]);
698 MPI_Barrier(GridC->GetWorld());
700 IU C_m = A.spSeq->getnrow();
701 IU C_n = B.spSeq->getncol();
702 DER_promote *
C =
new DER_promote(MergeAll<SR>(tomerge, C_m, C_n),
false, NULL);
703 for(
int i=0; i<tomerge.size(); ++i)
710 const_cast< UDERB*
>(B.spSeq)->Transpose();
static void LockNFetch(DER *&Matrix, int owner, std::vector< MPI_Win > &arrwin, MPI_Group &group, IT **sizes)
static void AccessNFetch(DER *&Matrix, int owner, std::vector< MPI_Win > &arrwin, MPI_Group &group, IT **sizes)
static void GetSetSizes(const SpMat< IT, NT, DER > &Matrix, IT **&sizes, MPI_Comm &comm1d)
shared_ptr< CommGrid > ProductGrid(CommGrid *gridA, CommGrid *gridB, int &innerdim, int &Aoffset, int &Boffset)
static void WaitNFree(std::vector< MPI_Win > &arrwin)
static void FetchMatrix(SpMat< IT, NT, DER > &MRecv, const std::vector< IT > &essentials, std::vector< MPI_Win > &arrwin, int ownind)
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_ActiveTarget(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)
static void Print(const std::string &s)
static void UnlockWindows(int ownind, std::vector< MPI_Win > &arrwin)
static void PostExposureEpoch(int self, std::vector< MPI_Win > &arrwin, MPI_Group &group)
static void deallocate2D(T **array, I m)
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_Fence(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)
static void FreeWindows(std::vector< MPI_Win > &arrwin)
static void SetWindows(MPI_Comm &comm1d, const SpMat< IT, NT, DER > &Matrix, std::vector< MPI_Win > &arrwin)
SpParMat< IU, typename promote_trait< NU1, NU2 >::T_promote, typename promote_trait< UDERA, UDERB >::T_promote > Mult_AnXBn_PassiveTarget(const SpParMat< IU, NU1, UDERA > &A, const SpParMat< IU, NU2, UDERB > &B)