33 #ifndef __STDC_CONSTANT_MACROS 34 #define __STDC_CONSTANT_MACROS 36 #ifndef __STDC_LIMIT_MACROS 37 #define __STDC_LIMIT_MACROS 151 ostringstream runinfo;
152 runinfo <<
"\n======================================" << endl;
153 runinfo <<
"Running HipMCL with the parameters: " << endl;
154 runinfo <<
"======================================" << endl;
155 runinfo <<
"Input/Output file" << endl;
156 runinfo <<
" input filename: " << param.
ifilename << endl;
157 runinfo <<
" input file type: " ;
160 runinfo <<
" Matrix Market" << endl;
161 runinfo <<
" Base of the input matrix: " << param.
base << endl;
163 else runinfo <<
" Labeled Triples format" << endl;
164 runinfo <<
" Output filename: " << param.
ofilename << endl;
167 runinfo <<
"Preprocessing" << endl;
168 runinfo <<
" Remove isolated vertices? : ";
170 else runinfo <<
"no" << endl;
172 runinfo <<
" Randomly permute vertices? : ";
174 else runinfo <<
"no" << endl;
176 runinfo <<
"Inflation: " << param.
inflation << endl;
178 runinfo <<
"Pruning" << endl;
179 runinfo <<
" Prunelimit: " << param.
prunelimit << endl;
180 runinfo <<
" Recover number: " << param.
recover_num << endl;
181 runinfo <<
" Recover percent: " << ceil(param.
recover_pct*100) << endl;
182 runinfo <<
" Selection number: " << param.
select << endl;
191 runinfo <<
"HipMCL optimization" << endl;
192 runinfo <<
" Number of phases: " << param.
phases << endl;
193 runinfo <<
" Memory avilable per process: ";
195 else runinfo <<
"not provided" << endl;
196 if(param.
isDoublePrecision) runinfo <<
"Using double precision floating point" << endl;
197 else runinfo <<
"Using single precision floating point" << endl;
198 if(param.
is64bInt ) runinfo <<
"Using 64 bit indexing" << endl;
199 else runinfo <<
"Using 32 bit indexing" << endl;
201 runinfo <<
"Debugging" << endl;
202 runinfo <<
" Show matrices after major steps? : ";
203 if (param.
show) runinfo <<
"yes";
204 else runinfo <<
"no" << endl;
205 runinfo <<
"======================================" << endl;
206 SpParHelper::Print(runinfo.str());
211 for (
int i = 1; i < argc; i++)
213 if (strcmp(argv[i],
"-M")==0){
216 else if (strcmp(argv[i],
"--matrix-market")==0){
219 else if (strcmp(argv[i],
"-o")==0){
222 else if (strcmp(argv[i],
"--show")==0){
225 else if (strcmp(argv[i],
"--remove-isolated")==0){
228 else if (strcmp(argv[i],
"--tournament-select")==0){
231 else if (strcmp(argv[i],
"--quick-select")==0){
235 else if (strcmp(argv[i],
"-I")==0){
238 }
else if (strcmp(argv[i],
"-p")==0) {
241 }
else if (strcmp(argv[i],
"-S")==0) {
242 param.
select = atoi(argv[i + 1]);
244 }
else if (strcmp(argv[i],
"-R")==0) {
247 }
else if (strcmp(argv[i],
"-pct")==0)
251 }
else if (strcmp(argv[i],
"-base")==0) {
252 param.
base = atoi(argv[i + 1]);
254 else if (strcmp(argv[i],
"-rand")==0) {
257 else if (strcmp(argv[i],
"-phases")==0) {
258 param.
phases = atoi(argv[i + 1]);
260 else if (strcmp(argv[i],
"-per-process-mem")==0) {
263 else if (strcmp(argv[i],
"--single-precision")==0) {
266 else if (strcmp(argv[i],
"--32bit-index")==0) {
281 ostringstream runinfo;
283 runinfo <<
"Usage: ./hipmcl -M <input filename> -I <inlfation> (required)" << endl;
285 runinfo <<
"======================================" << endl;
286 runinfo <<
" Detail parameter options " << endl;
287 runinfo <<
"======================================" << endl;
291 runinfo <<
"Input/Output file" << endl;
292 runinfo <<
" -M <input file name (labeled triples format)> (mandatory)" << endl;
293 runinfo <<
" --matrix-market : if provided, the input file is in the matrix market format (default: the file is in labeled triples format)" << endl;
294 runinfo <<
" -base <index of the first vertex in the matrix market file, 0|1> (default: 1) " << endl;
295 runinfo <<
" -o <output filename> (default: input_file_name.hipmcl )" << endl;
297 runinfo <<
"Inflation" << endl;
298 runinfo <<
"-I <inflation> (mandatory)\n";
300 runinfo <<
"Preprocessing" << endl;
301 runinfo <<
" -rand <randomly permute vertices> (default:0)\n";
302 runinfo <<
" --remove-isolated : if provided, remove isolated vertices (default: don't remove isolated vertices)\n";
305 runinfo <<
"Pruning" << endl;
306 runinfo <<
" -p <cutoff> (default: 1/10000)\n";
307 runinfo <<
" -R <recovery number> (default: 1400)\n";
308 runinfo <<
" -pct <recovery pct> (default: 90)\n";
309 runinfo <<
" -S <selection number> (default: 1100)\n";
312 runinfo <<
"HipMCL optimization" << endl;
313 runinfo <<
" -phases <number of phases> (default:1)\n";
314 runinfo <<
" -per-process-mem <memory (GB) available per process> (default:0, number of phases is not estimated)\n";
315 runinfo <<
" --single-precision (if not provided, use double precision floating point numbers)\n" << endl;
316 runinfo <<
" --32bit-index (if not provided, use 64 bit indexing for vertex ids)\n" << endl;
318 runinfo <<
"Debugging" << endl;
319 runinfo <<
" --show: show information about matrices after major steps (default: do not show matrices)" << endl;
323 runinfo <<
"======================================" << endl;
324 runinfo <<
" Few examples " << endl;
325 runinfo <<
"======================================" << endl;
326 runinfo <<
"Example with with a graph in labeled triples format on a laptop with 8GB memory and 8 cores:\nexport OMP_NUM_THREADS=8\nbin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 8" << endl;
327 runinfo <<
"Same as above with 4 processes and 2 theaded per process cores:\nexport OMP_NUM_THREADS=2\nmpirun -np 4 bin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 2" << endl;
328 runinfo <<
"Example with a graph in matrix market format:\nbin/hipmcl -M data/sevenvertex.mtx --matrix-market -base 1 -I 2 -per-process-mem 8" << endl;
330 runinfo <<
"Example on the NERSC/Edison system with 16 nodes and 24 threads per node: \nsrun -N 16 -n 16 -c 24 bin/hipmcl -M data/hep-th.mtx --matrix-market -base 1 -per-process-mem 64 -o hep-th.hipmcl" << endl;
331 SpParHelper::Print(runinfo.str());
337 template <
typename IT,
typename NT,
typename DER>
352 template <
typename IT,
typename NT,
typename DER>
360 template <
typename IT,
typename NT,
typename DER>
371 colmaxs.
EWiseApply(nnzPerColumn, multiplies<NT>());
376 template <
typename IT,
typename NT,
typename DER>
385 template <
typename IT,
typename NT,
typename DER>
391 A.
Apply([](NT val){
return val==numeric_limits<NT>::min() ? 1.0 : val;});
394 outs <<
"Adjusting loops" << endl;
395 SpParHelper::Print(outs.str());
398 template <
typename IT,
typename NT,
typename DER>
404 IT numIsolated = A.
getnrow() - nonisov.TotalLength();
405 outs <<
"Number of isolated vertices: " << numIsolated << endl;
406 SpParHelper::Print(outs.str());
408 A(nonisov, nonisov,
true);
409 SpParHelper::Print(
"Removed isolated vertices.\n");
418 template <
typename IT,
typename NT,
typename DER>
428 SpParHelper::Print(
"Applied symmetric permutation.\n");
432 SpParHelper::Print(
"Rectangular matrix: Can not apply symmetric permutation.\n");
436 template <
typename IT,
typename NT,
typename DER>
450 SpParHelper::Print(
"Made stochastic\n");
468 double t1 = MPI_Wtime();
470 A = MemEfficientSpGEMM<PTFF, NT, DER>(
A,
A, param.
phases, param.
prunelimit, (IT)param.
select, (IT)param.
recover_num, param.
recover_pct, param.
kselectVersion, param.
perProcessMem);
473 tExpand += (MPI_Wtime() - t1);
477 SpParHelper::Print(
"After expansion\n");
482 double tInflate1 = MPI_Wtime();
485 tInflate += (MPI_Wtime() - tInflate1);
489 SpParHelper::Print(
"After inflation\n");
495 double newbalance = A.LoadImbalance();
496 double t3=MPI_Wtime();
498 s <<
"Iteration# " << setw(3) << it <<
" : " <<
" chaos: " << setprecision(3) << chaos <<
" load-balance: "<< newbalance <<
" Time: " << (t3-t1) << endl;
499 SpParHelper::Print(s.str());
507 double tcc1 = MPI_Wtime();
512 double tcc = MPI_Wtime() - tcc1;
517 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
520 cout <<
"================detailed timing==================" << endl;
530 cout <<
"Inflation " << tInflate << endl;
531 cout <<
"Component: " << tcc << endl;
532 cout <<
"File I/O: " <<
tIO << endl;
533 cout <<
"=================================================" << endl;
543 template <
typename IT,
typename NT,
typename DER>
550 SpParHelper::Print(
"Symmatricizing an unsymmetric input matrix.\n");
555 template <
typename GIT,
typename LIT,
typename NT>
563 SpParHelper::Print(
"Reading input file......\n");
565 double tIO1 = MPI_Wtime();
571 tIO = MPI_Wtime() - tIO1;
573 outs <<
" : took " <<
tIO <<
" seconds" << endl;
574 SpParHelper::Print(outs.str());
586 outs <<
"Number of vertices: " << nv <<
" number of edges: "<< nnz << endl;
588 outs <<
"Load balance: " << balance << endl;
589 SpParHelper::Print(outs.str());
609 double tstart = MPI_Wtime();
625 double tend = MPI_Wtime();
627 s2 <<
"Number of clusters: " << nclusters << endl;
628 s2 <<
"Total time: " << (tend-tstart) << endl;
629 s2 <<
"=================================================\n" << endl ;
630 SpParHelper::Print(s2.str());
635 int main(
int argc,
char* argv[])
638 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
639 if (provided < MPI_THREAD_SERIALIZED)
641 printf(
"ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
642 MPI_Abort(MPI_COMM_WORLD, 1);
649 nthreads = omp_get_num_threads();
654 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
655 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
668 SpParHelper::Print(
"Required options are missing.\n");
677 cout <<
"\nProcess Grid used (pr x pc x threads): " << sqrt(nprocs) <<
" x " << sqrt(nprocs) <<
" x " << nthreads << endl;
688 cout <<
"******** Number of phases will not be estimated as -per-process-mem option is supplied. It is highly recommended that you provide -per-process-mem option for large-scale runs. *********** " << endl;
695 MainBody<int64_t, int64_t, double>(param);
697 MainBody<int32_t, int32_t, double>(param);
699 MainBody<int64_t, int64_t, float>(param);
701 MainBody<int32_t, int32_t, float>(param);
FullyDistVec< IT, NT > Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const
FullyDistVec< IT, IT > HipMCL(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
FullyDistVec< IT, IT > Interpret(SpParMat< IT, NT, DER > &A)
std::shared_ptr< CommGrid > getcommgrid() const
FullyDistVec< IT, std::array< char, MAXVERTNAME > > ReadGeneralizedTuples(const std::string &, _BinaryOperation)
int main(int argc, char *argv[])
void ShowParam(HipMCLParam ¶m)
Compute the maximum of two values.
SpDCCols< int64_t, double > DCCols
void Apply(_UnaryOperation __unary_op)
double cblas_allgathertime
void RemoveIsolated(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
FullyDistVec< IT, IT > FindInds(_Predicate pred) const
Return the indices where pred is true.
void Symmetricize(SpParMat< IT, NT, DER > &A)
float LoadImbalance() const
FullyDistVec< IT, IT > CC(SpParMat< IT, NT, DER > &A, IT &nCC)
void DimApply(Dim dim, const FullyDistVec< IT, NT > &v, _BinaryOperation __binary_op)
void EWiseApply(const FullyDistVec< IT, NT2 > &other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
void AddLoops(NT loopval, bool replaceExisting=false)
void ProcessParam(int argc, char *argv[], HipMCLParam ¶m)
void MainBody(HipMCLParam ¶m)
NT Chaos(SpParMat< IT, NT, DER > &A)
double mcl_multiwaymergetime
void iota(IT globalsize, NT first)
void RandPermute(SpParMat< IT, NT, DER > &A, HipMCLParam ¶m)
double mcl_prunecolumntime
FullyDistVec< int64_t, double > MPI_DenseVec
void InitParam(HipMCLParam ¶m)
double mcl_localspgemmtime
void Apply(_UnaryOperation __unary_op)
SpParMat< int64_t, double, DCCols > MPI_DCCols
void AdjustLoops(SpParMat< IT, NT, DER > &A)
NT Reduce(_BinaryOperation __binary_op, NT identity) const
void WriteMCLClusters(std::string ofName, FullyDistVec< IT, IT > clustIdForVtx, FullyDistVec< IT, std::array< char, MAXVERTNAME > > vtxLabels)
void Inflate(SpParMat< IT, NT, DER > &A, double power)
void ParallelReadMM(const std::string &filename, bool onebased, _BinaryOperation BinOp)
void MakeColStochastic(SpParMat< IT, NT, DER > &A)