COMBINATORIAL_BLAS  1.6
RefGen21.h
Go to the documentation of this file.
1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 
35 #ifndef _REF_GEN_2_1_H_
36 #define _REF_GEN_2_1_H_
37 
38 #ifdef _STDINT_H
39  #undef _STDINT_H
40 #endif
41 #ifdef _GCC_STDINT_H // for cray
42  #undef _GCC_STDINT_H // original stdint does #include_next<"/opt/gcc/4.5.2/snos/lib/gcc/x86_64-suse-linux/4.5.2/include/stdint-gcc.h">
43 #endif
44 
45 #ifndef __STDC_CONSTANT_MACROS
46 #define __STDC_CONSTANT_MACROS
47 #endif
48 #ifndef __STDC_LIMIT_MACROS
49 #define __STDC_LIMIT_MACROS
50 #endif
51 #include <stdint.h>
52 #include <inttypes.h>
53 #include <errno.h>
54 
55 #include <vector>
56 #include <limits>
57 #include "SpDefs.h"
58 #include "StackEntry.h"
59 #include "promote.h"
60 #include "Isect.h"
61 #include "HeapEntry.h"
62 #include "SpImpl.h"
65 
66 namespace combblas {
67 
68 
69 /* Initiator settings: for faster random number generation, the initiator
70  * probabilities are defined as fractions (a = INITIATOR_A_NUMERATOR /
71  * INITIATOR_DENOMINATOR, b = c = INITIATOR_BC_NUMERATOR /
72  * INITIATOR_DENOMINATOR, d = 1 - a - b - c. */
73 #define INITIATOR_A_NUMERATOR 5700
74 #define INITIATOR_BC_NUMERATOR 1900
75 #define INITIATOR_DENOMINATOR 10000
76 
77 /* If this macro is defined to a non-zero value, use SPK_NOISE_LEVEL /
78  * INITIATOR_DENOMINATOR as the noise parameter to use in introducing noise
79  * into the graph parameters. The approach used is from "A Hitchhiker's Guide
80  * to Choosing Parameters of Stochastic Kronecker Graphs" by C. Seshadhri, Ali
81  * Pinar, and Tamara G. Kolda (http://arxiv.org/abs/1102.5046v1), except that
82  * the adjustment here is chosen based on the current level being processed
83  * rather than being chosen randomly. */
84 #define SPK_NOISE_LEVEL 0
85 /* #define SPK_NOISE_LEVEL 1000 -- in INITIATOR_DENOMINATOR units */
86 
87 
88 class RefGen21
89 {
90 public:
91 
92  /* Spread the two 64-bit numbers into five nonzero values in the correct range (2 parameter version) */
93  static void make_mrg_seed_short(uint64_t userseed, uint_fast32_t* seed)
94  {
95  seed[0] = (userseed & 0x3FFFFFFF) + 1;
96  seed[1] = ((userseed >> 30) & 0x3FFFFFFF) + 1;
97  seed[2] = (userseed & 0x3FFFFFFF) + 1;
98  seed[3] = ((userseed >> 30) & 0x3FFFFFFF) + 1;
99  seed[4] = ((userseed >> 60) << 4) + (userseed >> 60) + 1;
100  }
101 
102  static int generate_4way_bernoulli(mrg_state* st, int level, int nlevels)
103  {
104  /* Generator a pseudorandom number in the range [0, INITIATOR_DENOMINATOR) without modulo bias. */
105  static const uint32_t limit = (UINT32_C(0xFFFFFFFF) % INITIATOR_DENOMINATOR);
106  uint32_t val = mrg_get_uint_orig(st);
107  if (/* Unlikely */ val < limit) {
108  do
109  {
110  val = mrg_get_uint_orig(st);
111  }
112  while (val < limit);
113  }
114  #if SPK_NOISE_LEVEL == 0
115  int spk_noise_factor = 0;
116  #else
117  int spk_noise_factor = 2 * SPK_NOISE_LEVEL * level / nlevels - SPK_NOISE_LEVEL;
118  #endif
119  int adjusted_bc_numerator = INITIATOR_BC_NUMERATOR + spk_noise_factor;
120  val %= INITIATOR_DENOMINATOR;
121  if ((signed)val < adjusted_bc_numerator) return 1;
122  val -= adjusted_bc_numerator;
123  if ((signed)val < adjusted_bc_numerator) return 2;
124  val -= adjusted_bc_numerator;
125  #if SPK_NOISE_LEVEL == 0
126  if (val < INITIATOR_A_NUMERATOR) return 0;
127  #else
128  if (val < INITIATOR_A_NUMERATOR * (INITIATOR_DENOMINATOR - 2 * INITIATOR_BC_NUMERATOR) / (INITIATOR_DENOMINATOR - 2 * adjusted_bc_numerator)) return 0;
129  #endif
130  return 3;
131  }
132 
133  /* Reverse bits in a number; this should be optimized for performance
134  * (including using bit- or byte-reverse intrinsics if your platform has them).
135  * */
136  static inline uint64_t bitreverse(uint64_t x)
137  {
138  #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
139  #define USE_GCC_BYTESWAP /* __builtin_bswap* are in 4.3 but not 4.2 */
140  #endif
141 
142  #ifdef FAST_64BIT_ARITHMETIC
143 
144  /* 64-bit code */
145  #ifdef USE_GCC_BYTESWAP
146  x = __builtin_bswap64(x);
147  #else
148  x = (x >> 32) | (x << 32);
149  x = ((x >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((x & UINT64_C(0x0000FFFF0000FFFF)) << 16);
150  x = ((x >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((x & UINT64_C(0x00FF00FF00FF00FF)) << 8);
151  #endif
152  x = ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((x & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4);
153  x = ((x >> 2) & UINT64_C(0x3333333333333333)) | ((x & UINT64_C(0x3333333333333333)) << 2);
154  x = ((x >> 1) & UINT64_C(0x5555555555555555)) | ((x & UINT64_C(0x5555555555555555)) << 1);
155  return x;
156 
157  #else
158 
159  /* 32-bit code */
160  uint32_t h = (uint32_t)(x >> 32);
161  uint32_t l = (uint32_t)(x & UINT32_MAX);
162  #ifdef USE_GCC_BYTESWAP
163  h = __builtin_bswap32(h);
164  l = __builtin_bswap32(l);
165  #else
166  h = (h >> 16) | (h << 16);
167  l = (l >> 16) | (l << 16);
168  h = ((h >> 8) & UINT32_C(0x00FF00FF)) | ((h & UINT32_C(0x00FF00FF)) << 8);
169  l = ((l >> 8) & UINT32_C(0x00FF00FF)) | ((l & UINT32_C(0x00FF00FF)) << 8);
170  #endif
171  h = ((h >> 4) & UINT32_C(0x0F0F0F0F)) | ((h & UINT32_C(0x0F0F0F0F)) << 4);
172  l = ((l >> 4) & UINT32_C(0x0F0F0F0F)) | ((l & UINT32_C(0x0F0F0F0F)) << 4);
173  h = ((h >> 2) & UINT32_C(0x33333333)) | ((h & UINT32_C(0x33333333)) << 2);
174  l = ((l >> 2) & UINT32_C(0x33333333)) | ((l & UINT32_C(0x33333333)) << 2);
175  h = ((h >> 1) & UINT32_C(0x55555555)) | ((h & UINT32_C(0x55555555)) << 1);
176  l = ((l >> 1) & UINT32_C(0x55555555)) | ((l & UINT32_C(0x55555555)) << 1);
177  return ((uint64_t)l << 32) | h; /* Swap halves */
178 
179  #endif
180  }
181 
182 
183  /* Apply a permutation to scramble vertex numbers; a randomly generated
184  * permutation is not used because applying it at scale is too expensive. */
185  static inline int64_t scramble(int64_t v0, int lgN, uint64_t val0, uint64_t val1)
186  {
187  uint64_t v = (uint64_t)v0;
188  v += val0 + val1;
189  v *= (val0 | UINT64_C(0x4519840211493211));
190  v = (RefGen21::bitreverse(v) >> (64 - lgN));
191  assert ((v >> lgN) == 0);
192  v *= (val1 | UINT64_C(0x3050852102C843A5));
193  v = (RefGen21::bitreverse(v) >> (64 - lgN));
194  assert ((v >> lgN) == 0);
195  return (int64_t)v;
196  }
197 
198  /* Make a single graph edge using a pre-set MRG state. */
199  static void make_one_edge(int64_t nverts, int level, int lgN, mrg_state* st, packed_edge* result, uint64_t val0, uint64_t val1)
200  {
201  int64_t base_src = 0, base_tgt = 0;
202  while (nverts > 1)
203  {
204  int square = generate_4way_bernoulli(st, level, lgN);
205  int src_offset = square / 2;
206  int tgt_offset = square % 2;
207  assert (base_src <= base_tgt);
208  if (base_src == base_tgt)
209  {
210  /* Clip-and-flip for undirected graph */
211  if (src_offset > tgt_offset) {
212  int temp = src_offset;
213  src_offset = tgt_offset;
214  tgt_offset = temp;
215  }
216  }
217  nverts /= 2;
218  ++level;
219  base_src += nverts * src_offset;
220  base_tgt += nverts * tgt_offset;
221  }
222  write_edge(result,
223  scramble(base_src, lgN, val0, val1),
224  scramble(base_tgt, lgN, val0, val1));
225  }
226 
227  static inline mrg_state MakeScrambleValues(uint64_t & val0, uint64_t & val1, const uint_fast32_t seed[])
228  {
229  mrg_state state;
230  mrg_seed(&state, seed);
231  mrg_state new_state = state;
232  mrg_skip(&new_state, 50, 7, 0);
233  val0 = mrg_get_uint_orig(&new_state);
234  val0 *= UINT64_C(0xFFFFFFFF);
235  val0 += mrg_get_uint_orig(&new_state);
236  val1 = mrg_get_uint_orig(&new_state);
237  val1 *= UINT64_C(0xFFFFFFFF);
238  val1 += mrg_get_uint_orig(&new_state);
239  return state;
240  }
241 
242  /* Generate a range of edges (from start_edge to end_edge of the total graph),
243  * writing into elements [0, end_edge - start_edge) of the edges array. This
244  * code is parallel on OpenMP, it must be used with separately-implemented SPMD parallelism for MPI.
245  */
246  static void generate_kronecker_range( const uint_fast32_t seed[5] /* All values in [0, 2^31 - 1), not all zero */,
247  int logN /* In base 2 */, int64_t start_edge, int64_t end_edge, packed_edge* edges)
248  {
249  int64_t nverts = (int64_t)1 << logN;
250  uint64_t val0, val1; /* Values for scrambling */
251  mrg_state state = MakeScrambleValues(val0, val1, seed);
252 
253  #ifdef _OPENMP
254  #pragma omp parallel for
255  #endif
256  for (int64_t ei = start_edge; ei < end_edge; ++ei)
257  {
258  mrg_state new_state = state;
259  mrg_skip(&new_state, 0, ei, 0);
260  make_one_edge(nverts, 0, logN, &new_state, edges + (ei - start_edge), val0, val1);
261  }
262  }
263  static inline void compute_edge_range(int rank, int size, int64_t M, int64_t* start_idx, int64_t* end_idx)
264  {
265  int64_t rankc = (int64_t)(rank);
266  int64_t sizec = (int64_t)(size);
267  *start_idx = rankc * (M / sizec) + (rankc < (M % sizec) ? rankc : (M % sizec));
268  *end_idx = (rankc + 1) * (M / sizec) + (rankc + 1 < (M % sizec) ? rankc + 1 : (M % sizec));
269  }
270 
271  static inline void make_graph(int log_numverts, int64_t M, int64_t* nedges_ptr, packed_edge** result_ptr, MPI_Comm & world)
272  {
273  int rank, size;
274 #ifdef DETERMINISTIC
275  uint64_t userseed1 = 0;
276 #else
277  uint64_t userseed1 = (uint64_t) init_random();
278 #endif
279 
280  /* Spread the two 64-bit numbers into five nonzero values in the correct range. */
281  uint_fast32_t seed[5];
282  make_mrg_seed(userseed1, userseed1, seed);
283 
284  MPI_Comm_rank(world, &rank);
285  MPI_Comm_size(world, &size);
286 
287  int64_t start_idx, end_idx;
288  compute_edge_range(rank, size, M, &start_idx, &end_idx);
289  int64_t nedges = end_idx - start_idx;
290 
291  packed_edge* local_edges = new packed_edge[nedges];
292 
293  double start = MPI_Wtime();
294  generate_kronecker_range(seed, log_numverts, start_idx, end_idx, local_edges);
295  double gen_time = MPI_Wtime() - start;
296 
297  *result_ptr = local_edges;
298  *nedges_ptr = nedges;
299 
300  if (rank == 0)
301  {
302  fprintf(stdout, "graph_generation: %f s\n", gen_time);
303  }
304  }
305 
306  static inline long init_random ()
307  {
308  long seed = -1;
309  if (getenv ("SEED"))
310  {
311  errno = 0;
312  seed = strtol (getenv ("SEED"), NULL, 10);
313  if (errno) seed = -1;
314  }
315 
316  if (seed < 0) seed = 0xDECAFBAD;
317  return seed;
318  }
319 };
320 
321 }
322 
323 #endif
static int generate_4way_bernoulli(mrg_state *st, int level, int nlevels)
Definition: RefGen21.h:102
static uint64_t bitreverse(uint64_t x)
Definition: RefGen21.h:136
void mrg_skip(mrg_state *state, uint_least64_t exponent_high, uint_least64_t exponent_middle, uint_least64_t exponent_low)
#define INITIATOR_A_NUMERATOR
Definition: RefGen21.h:73
static void make_graph(int log_numverts, int64_t M, int64_t *nedges_ptr, packed_edge **result_ptr, MPI_Comm &world)
Definition: RefGen21.h:271
void mrg_seed(mrg_state *st, const uint_fast32_t seed[5])
int size
static int64_t scramble(int64_t v0, int lgN, uint64_t val0, uint64_t val1)
Definition: RefGen21.h:185
uint_fast32_t mrg_get_uint_orig(mrg_state *state)
#define INITIATOR_DENOMINATOR
Definition: RefGen21.h:75
static void make_one_edge(int64_t nverts, int level, int lgN, mrg_state *st, packed_edge *result, uint64_t val0, uint64_t val1)
Definition: RefGen21.h:199
void make_mrg_seed(uint64_t userseed1, uint64_t userseed2, uint_fast32_t *seed)
static long init_random()
Definition: RefGen21.h:306
long int64_t
Definition: compat.h:21
#define SPK_NOISE_LEVEL
Definition: RefGen21.h:84
static void make_mrg_seed_short(uint64_t userseed, uint_fast32_t *seed)
Definition: RefGen21.h:93
Definition: CCGrid.h:4
static void generate_kronecker_range(const uint_fast32_t seed[5], int logN, int64_t start_edge, int64_t end_edge, packed_edge *edges)
Definition: RefGen21.h:246
static mrg_state MakeScrambleValues(uint64_t &val0, uint64_t &val1, const uint_fast32_t seed[])
Definition: RefGen21.h:227
static void compute_edge_range(int rank, int size, int64_t M, int64_t *start_idx, int64_t *end_idx)
Definition: RefGen21.h:263
int rank
#define INITIATOR_BC_NUMERATOR
Definition: RefGen21.h:74