/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry4_impl.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry4_impl.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry4_impl.h Source File
rocrand_threefry4_impl.h
1// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21/*
22Copyright 2010-2011, D. E. Shaw Research.
23All rights reserved.
24
25Redistribution and use in source and binary forms, with or without
26modification, are permitted provided that the following conditions are
27met:
28
29* Redistributions of source code must retain the above copyright
30 notice, this list of conditions, and the following disclaimer.
31
32* Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions, and the following disclaimer in the
34 documentation and/or other materials provided with the distribution.
35
36* Neither the name of D. E. Shaw Research nor the names of its
37 contributors may be used to endorse or promote products derived from
38 this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51*/
52
53#ifndef ROCRAND_THREEFRY4_IMPL_H_
54#define ROCRAND_THREEFRY4_IMPL_H_
55
56#include "rocrand/rocrand_common.h"
57#include "rocrand/rocrand_threefry_common.h"
58
59#include <hip/hip_runtime.h>
60
61#ifndef THREEFRY4x32_DEFAULT_ROUNDS
62 #define THREEFRY4x32_DEFAULT_ROUNDS 20
63#endif
64
65#ifndef THREEFRY4x64_DEFAULT_ROUNDS
66 #define THREEFRY4x64_DEFAULT_ROUNDS 20
67#endif
68
69namespace rocrand_device
70{
71
72template<class value>
73__forceinline__ __device__ __host__ int threefry_rotation_array(int indexX, int indexY) = delete;
74
75template<>
76__forceinline__ __device__ __host__ int threefry_rotation_array<unsigned int>(int indexX,
77 int indexY)
78{
79 // Output from skein_rot_search: (srs-B128-X5000.out)
80 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
81 // Start: Mon Aug 24 22:41:36 2009
82 // ...
83 // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format
84 static constexpr int THREEFRY_ROTATION_32_4[8][2] = {
85 {10, 26},
86 {11, 21},
87 {13, 27},
88 {23, 5},
89 { 6, 20},
90 {17, 11},
91 {25, 10},
92 {18, 20}
93 };
94 return THREEFRY_ROTATION_32_4[indexX][indexY];
95}
96
97template<>
98__forceinline__ __device__ __host__ int threefry_rotation_array<unsigned long long>(int indexX,
99 int indexY)
100{
101 // These are the R_256 constants from the Threefish reference sources
102 // with names changed to R_64x4... */
103 static constexpr int THREEFRY_ROTATION_64_4[8][2] = {
104 {14, 16},
105 {52, 57},
106 {23, 40},
107 { 5, 37},
108 {25, 33},
109 {46, 12},
110 {58, 22},
111 {32, 32}
112 };
113 return THREEFRY_ROTATION_64_4[indexX][indexY];
114}
115
116template<typename state_value, typename value, unsigned int Nrounds>
117class threefry_engine4_base
118{
119public:
120 struct threefry_state_4
121 {
122 state_value counter;
123 state_value key;
124 state_value result;
125 unsigned int substate;
126 };
127 using state_type = threefry_state_4;
128 using state_vector_type = state_value;
129
131 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
132 {
133 this->discard_impl(offset);
134 this->m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
135 }
136
142 __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
143 {
144 this->discard_subsequence_impl(subsequence);
145 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
146 }
147
148 __forceinline__ __device__ __host__ value operator()()
149 {
150 return this->next();
151 }
152
153 __forceinline__ __device__ __host__ value next()
154 {
155#if defined(__HIP_PLATFORM_AMD__)
156 value ret = ROCRAND_HIPVEC_ACCESS(m_state.result)[m_state.substate];
157#else
158 value ret = (&m_state.result.x)[m_state.substate];
159#endif
160 m_state.substate++;
161 if(m_state.substate == 4)
162 {
163 m_state.substate = 0;
164 m_state.counter = this->bump_counter(m_state.counter);
165 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
166 }
167 return ret;
168 }
169
170 __forceinline__ __device__ __host__ state_value next4()
171 {
172 state_value ret = m_state.result;
173 m_state.counter = this->bump_counter(m_state.counter);
174 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
175
176 return this->interleave(ret, m_state.result);
177 }
178
179protected:
180 __forceinline__ __device__ __host__ static state_value threefry_rounds(state_value counter,
181 state_value key)
182 {
183 state_value X;
184 value ks[4 + 1];
185
186 static_assert(Nrounds <= 72, "72 or less only supported in threefry rounds");
187
188 ks[4] = skein_ks_parity<value>();
189
190 ks[0] = key.x;
191 ks[1] = key.y;
192 ks[2] = key.z;
193 ks[3] = key.w;
194
195 X.x = counter.x;
196 X.y = counter.y;
197 X.z = counter.z;
198 X.w = counter.w;
199
200 ks[4] ^= key.x;
201 ks[4] ^= key.y;
202 ks[4] ^= key.z;
203 ks[4] ^= key.w;
204
205 /* Insert initial key before round 0 */
206 X.x += ks[0];
207 X.y += ks[1];
208 X.z += ks[2];
209 X.w += ks[3];
210
211 for(unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
212 {
213 int rot_0 = threefry_rotation_array<value>(round_idx & 7u, 0);
214 int rot_1 = threefry_rotation_array<value>(round_idx & 7u, 1);
215 if((round_idx & 2u) == 0)
216 {
217 X.x += X.y;
218 X.y = rotl<value>(X.y, rot_0);
219 X.y ^= X.x;
220 X.z += X.w;
221 X.w = rotl<value>(X.w, rot_1);
222 X.w ^= X.z;
223 }
224 else
225 {
226 X.x += X.w;
227 X.w = rotl<value>(X.w, rot_0);
228 X.w ^= X.x;
229 X.z += X.y;
230 X.y = rotl<value>(X.y, rot_1);
231 X.y ^= X.z;
232 }
233
234 if((round_idx & 3u) == 3)
235 {
236 unsigned int inject_idx = round_idx / 4;
237 // InjectKey(r = 1 + inject_idx)
238 X.x += ks[(1 + inject_idx) % 5];
239 X.y += ks[(2 + inject_idx) % 5];
240 X.z += ks[(3 + inject_idx) % 5];
241 X.w += ks[(4 + inject_idx) % 5];
242 X.w += 1 + inject_idx;
243 }
244 }
245
246 return X;
247 }
248
251 __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
252 {
253 // Adjust offset for subset
254 m_state.substate += offset & 3;
255 unsigned long long counter_offset = offset / 4;
256 counter_offset += m_state.substate < 4 ? 0 : 1;
257 m_state.substate += m_state.substate < 4 ? 0 : -4;
258 // Discard states
259 this->discard_state(counter_offset);
260 }
261
263 __forceinline__ __device__ __host__ void
264 discard_subsequence_impl(unsigned long long subsequence)
265 {
266 value lo, hi;
267 ::rocrand_device::detail::split_ull(lo, hi, subsequence);
268
269 value old_counter = m_state.counter.z;
270 m_state.counter.z += lo;
271 m_state.counter.w += hi + (m_state.counter.z < old_counter ? 1 : 0);
272 }
273
276 __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
277 {
278 value lo, hi;
279 ::rocrand_device::detail::split_ull(lo, hi, offset);
280
281 state_value old_counter = m_state.counter;
282 m_state.counter.x += lo;
283 m_state.counter.y += hi + (m_state.counter.x < old_counter.x ? 1 : 0);
284 m_state.counter.z += (m_state.counter.y < old_counter.y ? 1 : 0);
285 m_state.counter.w += (m_state.counter.z < old_counter.z ? 1 : 0);
286 }
287
288 __forceinline__ __device__ __host__ static state_value bump_counter(state_value counter)
289 {
290 counter.x++;
291 value add = counter.x == 0 ? 1 : 0;
292 counter.y += add;
293 add = counter.y == 0 ? add : 0;
294 counter.z += add;
295 add = counter.z == 0 ? add : 0;
296 counter.w += add;
297 return counter;
298 }
299
300 __forceinline__ __device__ __host__ state_value interleave(const state_value prev,
301 const state_value next) const
302 {
303 switch(m_state.substate)
304 {
305 case 0: return prev;
306 case 1: return state_value{prev.y, prev.z, prev.w, next.x};
307 case 2: return state_value{prev.z, prev.w, next.x, next.y};
308 case 3: return state_value{prev.w, next.x, next.y, next.z};
309 }
310 __builtin_unreachable();
311 }
312
313protected:
314 threefry_state_4 m_state;
315}; // threefry_engine4_base class
316
317} // end namespace rocrand_device
318
319#endif // ROCRAND_THREEFRY4_IMPL_H_