/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mrg31k3p.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mrg31k3p.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_mrg31k3p.h Source File
rocrand_mrg31k3p.h
1// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_MRG31K3P_H_
22#define ROCRAND_MRG31K3P_H_
23
24#include "rocrand/rocrand_common.h"
25#include "rocrand/rocrand_mrg31k3p_precomputed.h"
26
27#include <hip/hip_runtime.h>
28
29#define ROCRAND_MRG31K3P_M1 2147483647U // 2 ^ 31 - 1
30#define ROCRAND_MRG31K3P_M2 2147462579U // 2 ^ 31 - 21069
31#define ROCRAND_MRG31K3P_MASK12 511U // 2 ^ 9 - 1
32#define ROCRAND_MRG31K3P_MASK13 16777215U // 2 ^ 24 - 1
33#define ROCRAND_MRG31K3P_MASK21 65535U // 2 ^ 16 - 1
34#define ROCRAND_MRG31K3P_NORM_DOUBLE (4.656612875245796923e-10) // 1 / ROCRAND_MRG31K3P_M1
35#define ROCRAND_MRG31K3P_UINT32_NORM \
36 (2.000000001396983862) // UINT32_MAX / (ROCRAND_MRG31K3P_M1 - 1)
37
46#define ROCRAND_MRG31K3P_DEFAULT_SEED 12345ULL // end of group rocranddevice
48
49namespace rocrand_device
50{
51
52class mrg31k3p_engine
53{
54public:
55 struct mrg31k3p_state
56 {
57 unsigned int x1[3];
58 unsigned int x2[3];
59
60#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
61 // The Box–Muller transform requires two inputs to convert uniformly
62 // distributed real values [0; 1] to normally distributed real values
63 // (with mean = 0, and stddev = 1). Often user wants only one
64 // normally distributed number, to save performance and random
65 // numbers the 2nd value is saved for future requests.
66 unsigned int boxmuller_float_state; // is there a float in boxmuller_float
67 unsigned int boxmuller_double_state; // is there a double in boxmuller_double
68 float boxmuller_float; // normally distributed float
69 double boxmuller_double; // normally distributed double
70#endif
71 };
72
73 __forceinline__ __device__ __host__ mrg31k3p_engine()
74 {
75 this->seed(ROCRAND_MRG31K3P_DEFAULT_SEED, 0, 0);
76 }
77
86 __forceinline__ __device__ __host__ mrg31k3p_engine(const unsigned long long seed,
87 const unsigned long long subsequence,
88 const unsigned long long offset)
89 {
90 this->seed(seed, subsequence, offset);
91 }
92
101 __forceinline__ __device__ __host__ void seed(unsigned long long seed_value,
102 const unsigned long long subsequence,
103 const unsigned long long offset)
104 {
105 if(seed_value == 0)
106 {
108 }
109 unsigned int x = static_cast<unsigned int>(seed_value ^ 0x55555555U);
110 unsigned int y = static_cast<unsigned int>((seed_value >> 32) ^ 0xAAAAAAAAU);
111 m_state.x1[0] = mod_mul_m1(x, seed_value);
112 m_state.x1[1] = mod_mul_m1(y, seed_value);
113 m_state.x1[2] = mod_mul_m1(x, seed_value);
114 m_state.x2[0] = mod_mul_m2(y, seed_value);
115 m_state.x2[1] = mod_mul_m2(x, seed_value);
116 m_state.x2[2] = mod_mul_m2(y, seed_value);
117 this->restart(subsequence, offset);
118 }
119
121 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
122 {
123 this->discard_impl(offset);
124 }
125
128 __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
129 {
130 this->discard_subsequence_impl(subsequence);
131 }
132
135 __forceinline__ __device__ __host__ void discard_sequence(unsigned long long sequence)
136 {
137 this->discard_sequence_impl(sequence);
138 }
139
140 __forceinline__ __device__ __host__ void restart(const unsigned long long subsequence,
141 const unsigned long long offset)
142 {
143#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
144 m_state.boxmuller_float_state = 0;
145 m_state.boxmuller_double_state = 0;
146#endif
147 this->discard_subsequence_impl(subsequence);
148 this->discard_impl(offset);
149 }
150
151 __forceinline__ __device__ __host__ unsigned int operator()()
152 {
153 return this->next();
154 }
155
156 // Returned value is in range [1, ROCRAND_MRG31K3P_M1].
157 __forceinline__ __device__ __host__ unsigned int next()
158 {
159 // First component
160 unsigned int tmp
161 = (((m_state.x1[1] & ROCRAND_MRG31K3P_MASK12) << 22) + (m_state.x1[1] >> 9))
162 + (((m_state.x1[2] & ROCRAND_MRG31K3P_MASK13) << 7) + (m_state.x1[2] >> 24));
163 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
164 tmp += m_state.x1[2];
165 tmp -= (tmp >= ROCRAND_MRG31K3P_M1) ? ROCRAND_MRG31K3P_M1 : 0;
166 m_state.x1[2] = m_state.x1[1];
167 m_state.x1[1] = m_state.x1[0];
168 m_state.x1[0] = tmp;
169
170 // Second component
171 tmp = (((m_state.x2[0] & ROCRAND_MRG31K3P_MASK21) << 15) + 21069 * (m_state.x2[0] >> 16));
172 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
173 tmp += ((m_state.x2[2] & ROCRAND_MRG31K3P_MASK21) << 15);
174 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
175 tmp += 21069 * (m_state.x2[2] >> 16);
176 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
177 tmp += m_state.x2[2];
178 tmp -= (tmp >= ROCRAND_MRG31K3P_M2) ? ROCRAND_MRG31K3P_M2 : 0;
179 m_state.x2[2] = m_state.x2[1];
180 m_state.x2[1] = m_state.x2[0];
181 m_state.x2[0] = tmp;
182
183 // Combination
184 return m_state.x1[0] - m_state.x2[0]
185 + (m_state.x1[0] <= m_state.x2[0] ? ROCRAND_MRG31K3P_M1 : 0);
186 }
187
188protected:
189 // Advances the internal state to skip \p offset numbers.
190 __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
191 {
192 discard_state(offset);
193 }
194
195 // Advances the internal state to skip \p subsequence subsequences.
196 __forceinline__ __device__ __host__ void
197 discard_subsequence_impl(unsigned long long subsequence)
198 {
199 int i = 0;
200
201 while(subsequence > 0)
202 {
203 if(subsequence & 1)
204 {
205#if defined(__HIP_DEVICE_COMPILE__)
206 mod_mat_vec_m1(d_mrg31k3p_A1P72 + i, m_state.x1);
207 mod_mat_vec_m2(d_mrg31k3p_A2P72 + i, m_state.x2);
208#else
209 mod_mat_vec_m1(h_mrg31k3p_A1P72 + i, m_state.x1);
210 mod_mat_vec_m2(h_mrg31k3p_A2P72 + i, m_state.x2);
211#endif
212 }
213 subsequence >>= 1;
214 i += 9;
215 }
216 }
217
218 // Advances the internal state to skip \p sequences.
219 __forceinline__ __device__ __host__ void discard_sequence_impl(unsigned long long sequence)
220 {
221 int i = 0;
222
223 while(sequence > 0)
224 {
225 if(sequence & 1)
226 {
227#if defined(__HIP_DEVICE_COMPILE__)
228 mod_mat_vec_m1(d_mrg31k3p_A1P134 + i, m_state.x1);
229 mod_mat_vec_m2(d_mrg31k3p_A2P134 + i, m_state.x2);
230#else
231 mod_mat_vec_m1(h_mrg31k3p_A1P134 + i, m_state.x1);
232 mod_mat_vec_m2(h_mrg31k3p_A2P134 + i, m_state.x2);
233#endif
234 }
235 sequence >>= 1;
236 i += 9;
237 }
238 }
239
240 // Advances the internal state to skip \p offset numbers.
241 __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
242 {
243 int i = 0;
244
245 while(offset > 0)
246 {
247 if(offset & 1)
248 {
249#if defined(__HIP_DEVICE_COMPILE__)
250 mod_mat_vec_m1(d_mrg31k3p_A1 + i, m_state.x1);
251 mod_mat_vec_m2(d_mrg31k3p_A2 + i, m_state.x2);
252#else
253 mod_mat_vec_m1(h_mrg31k3p_A1 + i, m_state.x1);
254 mod_mat_vec_m2(h_mrg31k3p_A2 + i, m_state.x2);
255#endif
256 }
257 offset >>= 1;
258 i += 9;
259 }
260 }
261
262 // Advances the internal state to the next state.
263 __forceinline__ __device__ __host__ void discard_state()
264 {
265 discard_state(1);
266 }
267
268private:
269 __forceinline__ __device__ __host__ static void mod_mat_vec_m1(const unsigned int* A,
270 unsigned int* s)
271 {
272 unsigned long long x[3] = {s[0], s[1], s[2]};
273
274 s[0] = mod_m1(mod_m1(A[0] * x[0]) + mod_m1(A[1] * x[1]) + mod_m1(A[2] * x[2]));
275
276 s[1] = mod_m1(mod_m1(A[3] * x[0]) + mod_m1(A[4] * x[1]) + mod_m1(A[5] * x[2]));
277
278 s[2] = mod_m1(mod_m1(A[6] * x[0]) + mod_m1(A[7] * x[1]) + mod_m1(A[8] * x[2]));
279 }
280
281 __forceinline__ __device__ __host__ static void mod_mat_vec_m2(const unsigned int* A,
282 unsigned int* s)
283 {
284 unsigned long long x[3] = {s[0], s[1], s[2]};
285
286 s[0] = mod_m2(mod_m2(A[0] * x[0]) + mod_m2(A[1] * x[1]) + mod_m2(A[2] * x[2]));
287
288 s[1] = mod_m2(mod_m2(A[3] * x[0]) + mod_m2(A[4] * x[1]) + mod_m2(A[5] * x[2]));
289
290 s[2] = mod_m2(mod_m2(A[6] * x[0]) + mod_m2(A[7] * x[1]) + mod_m2(A[8] * x[2]));
291 }
292
293 __forceinline__ __device__ __host__ static unsigned long long mod_mul_m1(unsigned int i,
294 unsigned long long j)
295 {
296 return mod_m1(i * j);
297 }
298
299 __forceinline__ __device__ __host__ static unsigned long long mod_m1(unsigned long long p)
300 {
301 return p % ROCRAND_MRG31K3P_M1;
302 }
303
304 __forceinline__ __device__ __host__ static unsigned long long mod_mul_m2(unsigned int i,
305 unsigned long long j)
306 {
307 return mod_m2(i * j);
308 }
309
310 __forceinline__ __device__ __host__ static unsigned long long mod_m2(unsigned long long p)
311 {
312 return p % ROCRAND_MRG31K3P_M2;
313 }
314
315protected:
316 // State
317 mrg31k3p_state m_state;
318
319#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
320 friend struct detail::engine_boxmuller_helper<mrg31k3p_engine>;
321#endif
322}; // mrg31k3p_engine class
323
324} // end namespace rocrand_device
325
330
332typedef rocrand_device::mrg31k3p_engine rocrand_state_mrg31k3p;
334
346__forceinline__ __device__ __host__
347void rocrand_init(const unsigned long long seed,
348 const unsigned long long subsequence,
349 const unsigned long long offset,
350 rocrand_state_mrg31k3p* state)
351{
352 *state = rocrand_state_mrg31k3p(seed, subsequence, offset);
353}
354
367__forceinline__ __device__ __host__
368unsigned int rocrand(rocrand_state_mrg31k3p* state)
369{
370 // next() in [1, ROCRAND_MRG31K3P_M1]
371 return static_cast<unsigned int>((state->next() - 1) * ROCRAND_MRG31K3P_UINT32_NORM);
372}
373
382__forceinline__ __device__ __host__
383void skipahead(unsigned long long offset, rocrand_state_mrg31k3p* state)
384{
385 return state->discard(offset);
386}
387
397__forceinline__ __device__ __host__
398void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p* state)
399{
400 return state->discard_subsequence(subsequence);
401}
402
412__forceinline__ __device__ __host__
413void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p* state)
414{
415 return state->discard_sequence(sequence);
416}
417 // end of group rocranddevice
419
420#endif // ROCRAND_MRG31K3P_H_
#define ROCRAND_MRG31K3P_DEFAULT_SEED
Default seed for MRG31K3P PRNG.
Definition rocrand_mrg31k3p.h:46
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_mrg31k3p *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_mrg31k3p.h:368
__forceinline__ __device__ __host__ void skipahead_subsequence(unsigned long long subsequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by subsequence subsequences.
Definition rocrand_mrg31k3p.h:398
__forceinline__ __device__ __host__ void skipahead(unsigned long long offset, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by offset elements.
Definition rocrand_mrg31k3p.h:383
__forceinline__ __device__ __host__ void skipahead_sequence(unsigned long long sequence, rocrand_state_mrg31k3p *state)
Updates MRG31K3P state to skip ahead by sequence sequences.
Definition rocrand_mrg31k3p.h:413
__forceinline__ __device__ __host__ void rocrand_init(const unsigned long long seed, const unsigned long long subsequence, const unsigned long long offset, rocrand_state_mrg31k3p *state)
Initializes MRG31K3P state.
Definition rocrand_mrg31k3p.h:347