COMBINATORIAL_BLAS  1.6
mod_arith_xmt.h
Go to the documentation of this file.
1 /* Copyright (C) 2010 The Trustees of Indiana University. */
2 /* */
3 /* Use, modification and distribution is subject to the Boost Software */
4 /* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */
5 /* http://www.boost.org/LICENSE_1_0.txt) */
6 /* */
7 /* Authors: Jeremiah Willcock */
8 /* Andrew Lumsdaine */
9 
10 #ifndef MOD_ARITH_XMT_H
11 #define MOD_ARITH_XMT_H
12 
13 #include <stdint.h>
14 #include <assert.h>
15 #include <machine/mtaops.h>
16 
17 /* Various modular arithmetic operations for modulus 2^31-1 (0x7FFFFFFF).
18  * These may need to be tweaked to get acceptable performance on some platforms
19  * (especially ones without conditional moves). */
20 
21 /* MTA code is based on description in
22  * http://www.hackersdelight.org/divcMore.pdf and experimentation based on
23  * that. */
24 
25 #pragma mta inline
26 static inline uint_fast32_t mod_add(uint_fast32_t a, uint_fast32_t b) {
27  uint_fast32_t x;
28  assert (a <= 0x7FFFFFFE);
29  assert (b <= 0x7FFFFFFE);
30  x = a + b; /* x <= 0xFFFFFFFC */
31  return (x + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, x)) & 0x7FFFFFFF;
32 }
33 
34 #pragma mta inline
35 static inline uint_fast32_t mod_mul(uint_fast32_t a, uint_fast32_t b) {
36  uint_fast64_t temp;
37  uint_fast32_t temp2;
38  assert (a <= 0x7FFFFFFE);
39  assert (b <= 0x7FFFFFFE);
40  temp = MTA_INT_ADD_MUL(0, a, b);
41  return (temp + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, temp)) & 0x7FFFFFFF;
42 }
43 
44 #pragma mta inline
45 static inline uint_fast32_t mod_mac(uint_fast32_t sum, uint_fast32_t a, uint_fast32_t b) {
46  uint_fast64_t temp;
47  uint_fast32_t temp2;
48  assert (sum <= 0x7FFFFFFE);
49  assert (a <= 0x7FFFFFFE);
50  assert (b <= 0x7FFFFFFE);
51  temp = MTA_INT_ADD_MUL(sum, a, b);
52  return (temp + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, temp)) & 0x7FFFFFFF;
53 }
54 
55 #pragma mta inline
56 static inline uint_fast32_t mod_mac2(uint_fast32_t sum, uint_fast32_t a, uint_fast32_t b, uint_fast32_t c, uint_fast32_t d) {
57  uint_fast64_t temp;
58  assert (sum <= 0x7FFFFFFE);
59  assert (a <= 0x7FFFFFFE);
60  assert (b <= 0x7FFFFFFE);
61  assert (c <= 0x7FFFFFFE);
62  assert (d <= 0x7FFFFFFE);
63  temp = MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(sum, a, b), c, d);
64  return (temp + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, temp)) & 0x7FFFFFFF;
65 }
66 
67 #pragma mta inline
68 static inline uint_fast32_t mod_mac3(uint_fast32_t sum, uint_fast32_t a, uint_fast32_t b, uint_fast32_t c, uint_fast32_t d, uint_fast32_t e, uint_fast32_t f) {
69  uint_fast64_t temp;
70  assert (sum <= 0x7FFFFFFE);
71  assert (a <= 0x7FFFFFFE);
72  assert (b <= 0x7FFFFFFE);
73  assert (c <= 0x7FFFFFFE);
74  assert (d <= 0x7FFFFFFE);
75  assert (e <= 0x7FFFFFFE);
76  assert (f <= 0x7FFFFFFE);
77  temp = MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(sum, a, b), c, d), e, f);
78  return (temp + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, temp)) & 0x7FFFFFFF;
79 }
80 
81 #pragma mta inline
82 static inline uint_fast32_t mod_mac4(uint_fast32_t sum, uint_fast32_t a, uint_fast32_t b, uint_fast32_t c, uint_fast32_t d, uint_fast32_t e, uint_fast32_t f, uint_fast32_t g, uint_fast32_t h) {
83  uint_fast64_t temp;
84  assert (sum <= 0x7FFFFFFE);
85  assert (a <= 0x7FFFFFFE);
86  assert (b <= 0x7FFFFFFE);
87  assert (c <= 0x7FFFFFFE);
88  assert (d <= 0x7FFFFFFE);
89  assert (e <= 0x7FFFFFFE);
90  assert (f <= 0x7FFFFFFE);
91  assert (g <= 0x7FFFFFFE);
92  assert (h <= 0x7FFFFFFE);
93  temp = MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(MTA_INT_ADD_MUL(sum, a, b), c, d), e, f), g, h);
94  return (temp + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, temp)) & 0x7FFFFFFF;
95 }
96 
97 static inline uint_fast64_t mod_down(uint_fast64_t x);
98 
99 #pragma mta inline
100 static inline uint_fast64_t mod_down_fast(uint_fast64_t x) {
101  return (x >> 31) + (x & 0x7FFFFFFF);
102 }
103 
104 #pragma mta inline
105 static inline uint_fast64_t mod_down(uint_fast64_t x) {
106  return ((x + MTA_UNS_ADD_MUL_UPPER(0x0000000200000003, 0x0000000200000004, x)) & 0x7FFFFFFF);
107  // x = mod_down_fast(mod_down_fast(x));
108  // x = (x + (x >= 0x7FFFFFFF)) & 0x7FFFFFFF;
109  // return x;
110 }
111 
112 /* The two constants x and y are special cases because they are easier to
113  * multiply by on 32-bit systems. They are used as multipliers in the random
114  * number generator. The techniques for fast multiplication by these
115  * particular values are in L'Ecuyer's papers; we don't use them yet. */
116 
117 #pragma mta inline
118 static inline uint_fast32_t mod_mul_x(uint_fast32_t a) {
119  return mod_mul(a, 107374182);
120 }
121 
122 #pragma mta inline
123 static inline uint_fast32_t mod_mul_y(uint_fast32_t a) {
124  return mod_mul(a, 104480);
125 }
126 
127 #pragma mta inline
128 static inline uint_fast32_t mod_mac_y(uint_fast32_t sum, uint_fast32_t a) {
129  return mod_mac(sum, a, 104480);
130 }
131 
132 #endif /* MOD_ARITH_XMT_H */