/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry2_impl.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry2_impl.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_threefry2_impl.h Source File
rocrand_threefry2_impl.h
1// Copyright (c) 2022-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21/*
22Copyright 2010-2011, D. E. Shaw Research.
23All rights reserved.
24
25Redistribution and use in source and binary forms, with or without
26modification, are permitted provided that the following conditions are
27met:
28
29* Redistributions of source code must retain the above copyright
30 notice, this list of conditions, and the following disclaimer.
31
32* Redistributions in binary form must reproduce the above copyright
33 notice, this list of conditions, and the following disclaimer in the
34 documentation and/or other materials provided with the distribution.
35
36* Neither the name of D. E. Shaw Research nor the names of its
37 contributors may be used to endorse or promote products derived from
38 this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
41"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
42LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
43A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
44OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
45SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
46LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
47DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
48THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
49(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
50OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
51*/
52
53#ifndef ROCRAND_THREEFRY2_IMPL_H_
54#define ROCRAND_THREEFRY2_IMPL_H_
55
56#include "rocrand/rocrand_common.h"
57#include "rocrand/rocrand_threefry_common.h"
58
59#include <hip/hip_runtime.h>
60
61#ifndef THREEFRY2x32_DEFAULT_ROUNDS
62 #define THREEFRY2x32_DEFAULT_ROUNDS 20
63#endif
64
65#ifndef THREEFRY2x64_DEFAULT_ROUNDS
66 #define THREEFRY2x64_DEFAULT_ROUNDS 20
67#endif
68
69namespace rocrand_device
70{
71
72template<class value>
73__forceinline__ __device__ __host__ int threefry_rotation_array(int index) = delete;
74
75template<>
76__forceinline__ __device__ __host__ int threefry_rotation_array<unsigned int>(int index)
77{
78 // Output from skein_rot_search (srs32x2-X5000.out)
79 // Random seed = 1. BlockSize = 64 bits. sampleCnt = 1024. rounds = 8, minHW_or=28
80 // Start: Tue Jul 12 11:11:33 2011
81 // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize= 64].format
82 static constexpr int THREEFRY_ROTATION_32_2[8] = {13, 15, 26, 6, 17, 29, 16, 24};
83 return THREEFRY_ROTATION_32_2[index];
84}
85
86template<>
87__forceinline__ __device__ __host__ int threefry_rotation_array<unsigned long long>(int index)
88{
89 // Output from skein_rot_search: (srs64_B64-X1000)
90 // Random seed = 1. BlockSize = 128 bits. sampleCnt = 1024. rounds = 8, minHW_or=57
91 // Start: Tue Mar 1 10:07:48 2011
92 // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format
93 static constexpr int THREEFRY_ROTATION_64_2[8] = {16, 42, 12, 31, 16, 32, 24, 21};
94 return THREEFRY_ROTATION_64_2[index];
95}
96
97template<typename state_value, typename value, unsigned int Nrounds>
98class threefry_engine2_base
99{
100public:
101 struct threefry_state_2
102 {
103 state_value counter;
104 state_value key;
105 state_value result;
106 unsigned int substate;
107 };
108 using state_type = threefry_state_2;
109 using state_vector_type = state_value;
110
111 __forceinline__ __device__ __host__ void discard(unsigned long long offset)
112 {
113 this->discard_impl(offset);
114 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
115 }
116
117 __forceinline__ __device__ __host__ void discard()
118 {
119 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
120 }
121
127 __forceinline__ __device__ __host__ void discard_subsequence(unsigned long long subsequence)
128 {
129 this->discard_subsequence_impl(subsequence);
130 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
131 }
132
133 __forceinline__ __device__ __host__ value operator()()
134 {
135 return this->next();
136 }
137
138 __forceinline__ __device__ __host__
139 value next()
140 {
141#if defined(__HIP_PLATFORM_AMD__)
142 value ret = ROCRAND_HIPVEC_ACCESS(m_state.result)[m_state.substate];
143#else
144 value ret = (&m_state.result.x)[m_state.substate];
145#endif
146 m_state.substate++;
147 if(m_state.substate == 2)
148 {
149 m_state.substate = 0;
150 m_state.counter = this->bump_counter(m_state.counter);
151 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
152 }
153 return ret;
154 }
155
156 __forceinline__ __device__ __host__ state_value next2()
157 {
158 state_value ret = m_state.result;
159 m_state.counter = this->bump_counter(m_state.counter);
160 m_state.result = this->threefry_rounds(m_state.counter, m_state.key);
161
162 return this->interleave(ret, m_state.result);
163 }
164
165protected:
166 __forceinline__ __device__ __host__ static state_value threefry_rounds(state_value counter,
167 state_value key)
168 {
169 state_value X;
170 value ks[2 + 1];
171
172 static_assert(Nrounds <= 32, "32 or less only supported in threefry rounds");
173
174 ks[2] = skein_ks_parity<value>();
175
176 ks[0] = key.x;
177 ks[1] = key.y;
178
179 X.x = counter.x;
180 X.y = counter.y;
181
182 ks[2] ^= key.x;
183 ks[2] ^= key.y;
184
185 /* Insert initial key before round 0 */
186 X.x += ks[0];
187 X.y += ks[1];
188
189 for(unsigned int round_idx = 0; round_idx < Nrounds; round_idx++)
190 {
191 X.x += X.y;
192 X.y = rotl<value>(X.y, threefry_rotation_array<value>(round_idx & 7u));
193 X.y ^= X.x;
194
195 if((round_idx & 3u) == 3)
196 {
197 unsigned int inject_idx = round_idx / 4;
198 // InjectKey(r = 1 + inject_idx)
199 X.x += ks[(1 + inject_idx) % 3];
200 X.y += ks[(2 + inject_idx) % 3];
201 X.y += 1 + inject_idx;
202 }
203 }
204
205 return X;
206 }
207
210 __forceinline__ __device__ __host__ void discard_impl(unsigned long long offset)
211 {
212 // Adjust offset for subset
213 m_state.substate += offset & 1;
214 unsigned long long counter_offset = offset / 2;
215 counter_offset += m_state.substate < 2 ? 0 : 1;
216 m_state.substate += m_state.substate < 2 ? 0 : -2;
217 // Discard states
218 this->discard_state(counter_offset);
219 }
220
222 __forceinline__ __device__ __host__ void
223 discard_subsequence_impl(unsigned long long subsequence)
224 {
225 m_state.counter.y += subsequence;
226 }
227
230 __forceinline__ __device__ __host__ void discard_state(unsigned long long offset)
231 {
232 value lo, hi;
233 ::rocrand_device::detail::split_ull(lo, hi, offset);
234
235 value old_counter = m_state.counter.x;
236 m_state.counter.x += lo;
237 m_state.counter.y += hi + (m_state.counter.x < old_counter ? 1 : 0);
238 }
239
240 __forceinline__ __device__ __host__ static state_value bump_counter(state_value counter)
241 {
242 counter.x++;
243 value add = counter.x == 0 ? 1 : 0;
244 counter.y += add;
245 return counter;
246 }
247
248 __forceinline__ __device__ __host__ state_value interleave(const state_value prev,
249 const state_value next) const
250 {
251 switch(m_state.substate)
252 {
253 case 0: return prev;
254 case 1: return state_value{prev.y, next.x};
255 }
256 __builtin_unreachable();
257 }
258
259protected:
260 threefry_state_2 m_state;
261}; // threefry_engine2_base class
262
263} // end namespace rocrand_device
264
265#endif // ROCRAND_THREEFRY2_IMPL_H_