/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_poisson.h Source File

/build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_poisson.h Source File#

API library: /build/rocrand-7S8maf/rocrand-7.1.1/library/include/rocrand/rocrand_poisson.h Source File
rocrand_poisson.h
1// Copyright (c) 2017-2025 Advanced Micro Devices, Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21#ifndef ROCRAND_POISSON_H_
22#define ROCRAND_POISSON_H_
23
28
29#include <math.h>
30
31#include "rocrand/rocrand_lfsr113.h"
32#include "rocrand/rocrand_mrg31k3p.h"
33#include "rocrand/rocrand_mrg32k3a.h"
34#include "rocrand/rocrand_mtgp32.h"
35#include "rocrand/rocrand_philox4x32_10.h"
36#include "rocrand/rocrand_scrambled_sobol32.h"
37#include "rocrand/rocrand_scrambled_sobol64.h"
38#include "rocrand/rocrand_sobol32.h"
39#include "rocrand/rocrand_sobol64.h"
40#include "rocrand/rocrand_threefry2x32_20.h"
41#include "rocrand/rocrand_threefry2x64_20.h"
42#include "rocrand/rocrand_threefry4x32_20.h"
43#include "rocrand/rocrand_threefry4x64_20.h"
44#include "rocrand/rocrand_xorwow.h"
45
46#include "rocrand/rocrand_normal.h"
47#include "rocrand/rocrand_uniform.h"
48
49#include <hip/hip_runtime.h>
50
51namespace rocrand_device {
52namespace detail {
53
54constexpr double lambda_threshold_small = 64.0;
55constexpr double lambda_threshold_huge = 4000.0;
56
57template<class State, typename Result_Type = unsigned int>
58__forceinline__ __device__ __host__ Result_Type poisson_distribution_small(State& state,
59 double lambda)
60{
61 // Knuth's method
62
63 const double limit = exp(-lambda);
64 Result_Type k = 0;
65 double product = 1.0;
66
67 do
68 {
69 k++;
70 product *= rocrand_uniform_double(state);
71 }
72 while (product > limit);
73
74 return k - 1;
75}
76
77__forceinline__ __device__ __host__ double lgamma_approx(const double x)
78{
79 // Lanczos approximation (g = 7, n = 9)
80
81 const double z = x - 1.0;
82
83 const int g = 7;
84 const int n = 9;
85 const double coefs[n] = {
86 0.99999999999980993227684700473478,
87 676.520368121885098567009190444019,
88 -1259.13921672240287047156078755283,
89 771.3234287776530788486528258894,
90 -176.61502916214059906584551354,
91 12.507343278686904814458936853,
92 -0.13857109526572011689554707,
93 9.984369578019570859563e-6,
94 1.50563273514931155834e-7
95 };
96 double sum = 0.0;
97 #pragma unroll
98 for (int i = n - 1; i > 0; i--)
99 {
100 sum += coefs[i] / (z + i);
101 }
102 sum += coefs[0];
103
104 const double log_sqrt_2_pi = 0.9189385332046727418;
105 const double e = 2.718281828459045090796;
106 return (log_sqrt_2_pi + log(sum) - g) + (z + 0.5) * log((z + g + 0.5) / e);
107}
108
109template<class State, typename Result_Type = unsigned int>
110__forceinline__ __device__ __host__ Result_Type poisson_distribution_large(State& state,
111 double lambda)
112{
113 // Rejection method PA, A. C. Atkinson
114
115 const double c = 0.767 - 3.36 / lambda;
116 const double beta = ROCRAND_PI_DOUBLE / sqrt(3.0 * lambda);
117 const double alpha = beta * lambda;
118 const double k = log(c) - lambda - log(beta);
119 const double log_lambda = log(lambda);
120
121 while (true)
122 {
123 const double u = rocrand_uniform_double(state);
124 const double x = (alpha - log((1.0 - u) / u)) / beta;
125 const double n = floor(x + 0.5);
126 if (n < 0)
127 {
128 continue;
129 }
130 const double v = rocrand_uniform_double(state);
131 const double y = alpha - beta * x;
132 const double t = 1.0 + exp(y);
133 const double lhs = y + log(v / (t * t));
134 const double rhs = k + n * log_lambda - lgamma_approx(n + 1.0);
135 if (lhs <= rhs)
136 {
137 return static_cast<Result_Type>(n);
138 }
139 }
140}
141
142template<class State, typename Result_Type = unsigned int>
143__forceinline__ __device__ __host__ Result_Type poisson_distribution_huge(State& state,
144 double lambda)
145{
146 // Approximate Poisson distribution with normal distribution
147
148 const double n = rocrand_normal_double(state);
149 return static_cast<Result_Type>(round(sqrt(lambda) * n + lambda));
150}
151
152template<class State, typename Result_Type = unsigned int>
153__forceinline__ __device__ __host__ Result_Type poisson_distribution(State& state, double lambda)
154{
155 if (lambda < lambda_threshold_small)
156 {
157 return poisson_distribution_small<State, Result_Type>(state, lambda);
158 }
159 else if (lambda <= lambda_threshold_huge)
160 {
161 return poisson_distribution_large<State, Result_Type>(state, lambda);
162 }
163 else
164 {
165 return poisson_distribution_huge<State, Result_Type>(state, lambda);
166 }
167}
168
169template<class State, typename Result_Type = unsigned int>
170__forceinline__ __device__ __host__ Result_Type poisson_distribution_itr(State& state,
171 double lambda)
172{
173 // Algorithm ITR
174 // George S. Fishman
175 // Discrete-Event Simulation: Modeling, Programming, and Analysis
176 // p. 333
177 double L;
178 double x = 1.0;
179 double y = 1.0;
180 Result_Type k = 0;
181 int pow = 0;
182 // Algorithm ITR uses u from (0, 1) and uniform_double returns (0, 1]
183 // Change u to ensure that 1 is never generated,
184 // otherwise the inner loop never ends.
185 double u = rocrand_uniform_double(state) - ROCRAND_2POW32_INV_DOUBLE / 2.0;
186 double upow = pow + 500.0;
187 double ex = exp(-500.0);
188 do{
189 if (lambda > upow)
190 L = ex;
191 else
192 L = exp((double)(pow - lambda));
193
194 x *= L;
195 y *= L;
196 pow += 500;
197 while (u > y)
198 {
199 k++;
200 x *= ((double)lambda / (double) k);
201 y += x;
202 }
203 } while((double)pow < lambda);
204 return k;
205}
206
207template<class State, typename Result_Type = unsigned int>
208__forceinline__ __device__ __host__ Result_Type poisson_distribution_inv(State& state,
209 double lambda)
210{
211 if (lambda < 1000.0)
212 {
213 return poisson_distribution_itr<State, Result_Type>(state, lambda);
214 }
215 else
216 {
217 return poisson_distribution_huge<State, Result_Type>(state, lambda);
218 }
219}
220
221} // end namespace detail
222} // end namespace rocrand_device
223
235#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
236__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_philox4x32_10* state,
237 double lambda)
238{
239 return rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
240 state,
241 lambda);
242}
243
255__forceinline__ __device__ __host__
256uint4 rocrand_poisson4(rocrand_state_philox4x32_10* state, double lambda)
257{
258 return uint4{
259 rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
260 state,
261 lambda),
262 rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
263 state,
264 lambda),
265 rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
266 state,
267 lambda),
268 rocrand_device::detail::poisson_distribution<rocrand_state_philox4x32_10*, unsigned int>(
269 state,
270 lambda)};
271}
272#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
273
285#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
286__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg31k3p* state,
287 double lambda)
288{
289 return rocrand_device::detail::poisson_distribution<rocrand_state_mrg31k3p*, unsigned int>(
290 state,
291 lambda);
292}
293#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
294
306#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
307__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_mrg32k3a* state,
308 double lambda)
309{
310 return rocrand_device::detail::poisson_distribution<rocrand_state_mrg32k3a*, unsigned int>(
311 state,
312 lambda);
313}
314#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
315
327#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
328__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_xorwow* state,
329 double lambda)
330{
331 return rocrand_device::detail::poisson_distribution<rocrand_state_xorwow*, unsigned int>(
332 state,
333 lambda);
334}
335#endif // ROCRAND_DETAIL_BM_NOT_IN_STATE
336
348__forceinline__ __device__ __host__
349unsigned int rocrand_poisson(rocrand_state_mtgp32* state, double lambda)
350{
351 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_mtgp32*, unsigned int>(
352 state,
353 lambda);
354}
355
367__forceinline__ __device__ __host__
368unsigned int rocrand_poisson(rocrand_state_sobol32* state, double lambda)
369{
370 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_sobol32*, unsigned int>(
371 state,
372 lambda);
373}
374
386__forceinline__ __device__ __host__
387unsigned int rocrand_poisson(rocrand_state_scrambled_sobol32* state, double lambda)
388{
389 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_scrambled_sobol32*,
390 unsigned int>(state, lambda);
391}
392
404__forceinline__ __device__ __host__
405unsigned int rocrand_poisson(rocrand_state_sobol64* state, double lambda)
406{
407 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_sobol64*, unsigned int>(
408 state,
409 lambda);
410}
411
423__forceinline__ __device__ __host__
424unsigned int rocrand_poisson(rocrand_state_scrambled_sobol64* state, double lambda)
425{
426 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_scrambled_sobol64*,
427 unsigned int>(state, lambda);
428}
429
441__forceinline__ __device__ __host__
442unsigned int rocrand_poisson(rocrand_state_lfsr113* state, double lambda)
443{
444 return rocrand_device::detail::poisson_distribution_inv<rocrand_state_lfsr113*, unsigned int>(
445 state,
446 lambda);
447}
448
460__forceinline__ __device__ __host__
461unsigned int rocrand_poisson(rocrand_state_threefry2x32_20* state, double lambda)
462{
463 return rocrand_device::detail::poisson_distribution_inv(state, lambda);
464}
465
477__forceinline__ __device__ __host__
478unsigned int rocrand_poisson(rocrand_state_threefry2x64_20* state, double lambda)
479{
480 return rocrand_device::detail::poisson_distribution_inv(state, lambda);
481}
482
494__forceinline__ __device__ __host__
495unsigned int rocrand_poisson(rocrand_state_threefry4x32_20* state, double lambda)
496{
497 return rocrand_device::detail::poisson_distribution_inv(state, lambda);
498}
499
511__forceinline__ __device__ __host__
512unsigned int rocrand_poisson(rocrand_state_threefry4x64_20* state, double lambda)
513{
514 return rocrand_device::detail::poisson_distribution_inv(state, lambda);
515}
516 // end of group rocranddevice
518
519#endif // ROCRAND_POISSON_H_
__forceinline__ __device__ __host__ unsigned int rocrand_poisson(rocrand_state_philox4x32_10 *state, double lambda)
Returns a Poisson-distributed unsigned int using Philox generator.
Definition rocrand_poisson.h:236
__forceinline__ __device__ __host__ uint4 rocrand_poisson4(rocrand_state_philox4x32_10 *state, double lambda)
Returns four Poisson-distributed unsigned int values using Philox generator.
Definition rocrand_poisson.h:256
__forceinline__ __device__ __host__ double rocrand_uniform_double(rocrand_state_philox4x32_10 *state)
Returns a uniformly distributed random double value from (0; 1] range.
Definition rocrand_uniform.h:299
__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition rocrand_normal.h:442