21#ifndef ROCRAND_NORMAL_H_
22#define ROCRAND_NORMAL_H_
29#include "rocrand/rocrand_lfsr113.h"
30#include "rocrand/rocrand_mrg31k3p.h"
31#include "rocrand/rocrand_mrg32k3a.h"
32#include "rocrand/rocrand_mtgp32.h"
33#include "rocrand/rocrand_philox4x32_10.h"
34#include "rocrand/rocrand_scrambled_sobol32.h"
35#include "rocrand/rocrand_scrambled_sobol64.h"
36#include "rocrand/rocrand_sobol32.h"
37#include "rocrand/rocrand_sobol64.h"
38#include "rocrand/rocrand_threefry2x32_20.h"
39#include "rocrand/rocrand_threefry2x64_20.h"
40#include "rocrand/rocrand_threefry4x32_20.h"
41#include "rocrand/rocrand_threefry4x64_20.h"
42#include "rocrand/rocrand_xorwow.h"
44#include "rocrand/rocrand_uniform.h"
46#include <hip/hip_runtime.h>
50namespace rocrand_device {
53__forceinline__ __device__ __host__ float2 box_muller(
unsigned int x,
unsigned int y)
56 float u = ROCRAND_2POW32_INV + (x * ROCRAND_2POW32_INV);
57 float v = ROCRAND_2POW32_INV_2PI + (y * ROCRAND_2POW32_INV_2PI);
58 float s = sqrtf(-2.0f * logf(u));
59 #ifdef __HIP_DEVICE_COMPILE__
60 __sincosf(v, &result.x, &result.y);
64 result.x = sinf(v) * s;
65 result.y = cosf(v) * s;
70__forceinline__ __device__ __host__ float2 box_muller(
unsigned long long v)
72 unsigned int x =
static_cast<unsigned int>(v);
73 unsigned int y =
static_cast<unsigned int>(v >> 32);
75 return box_muller(x, y);
78__forceinline__ __device__ __host__ double2 box_muller_double(uint4 v)
81 unsigned long long int v1 = (
unsigned long long int)v.x ^
82 ((
unsigned long long int)v.y << (53 - 32));
83 double u = ROCRAND_2POW53_INV_DOUBLE + (v1 * ROCRAND_2POW53_INV_DOUBLE);
84 unsigned long long int v2 = (
unsigned long long int)v.z ^
85 ((
unsigned long long int)v.w << (53 - 32));
86 double w = (ROCRAND_2POW53_INV_DOUBLE * 2.0) +
87 (v2 * (ROCRAND_2POW53_INV_DOUBLE * 2.0));
88 double s = sqrt(-2.0 * log(u));
89 #ifdef __HIP_DEVICE_COMPILE__
90 sincospi(w, &result.x, &result.y);
94 result.x = sin(w * ROCRAND_PI_DOUBLE) * s;
95 result.y = cos(w * ROCRAND_PI_DOUBLE) * s;
100__forceinline__ __device__ __host__ double2 box_muller_double(ulonglong2 v)
102 unsigned int x =
static_cast<unsigned int>(v.x);
103 unsigned int y =
static_cast<unsigned int>(v.x >> 32);
104 unsigned int z =
static_cast<unsigned int>(v.y);
105 unsigned int w =
static_cast<unsigned int>(v.y >> 32);
107 return box_muller_double(make_uint4(x, y, z, w));
110__forceinline__ __device__ __host__ __half2 box_muller_half(
unsigned short x,
unsigned short y)
112 #if defined(ROCRAND_HALF_MATH_SUPPORTED)
113 __half u = __float2half(ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV));
114 __half v = __float2half(ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI));
115 __half s = hsqrt(__hmul(__float2half(-2.0f), hlog(u)));
122 float u = ROCRAND_2POW16_INV + (x * ROCRAND_2POW16_INV);
123 float v = ROCRAND_2POW16_INV_2PI + (y * ROCRAND_2POW16_INV_2PI);
124 float s = sqrtf(-2.0f * logf(u));
125 #ifdef __HIP_DEVICE_COMPILE__
126 __sincosf(v, &r.x, &r.y);
140template<
typename state_type>
141__forceinline__ __device__ __host__ float2 mrg_box_muller(
unsigned int x,
unsigned int y)
144 float u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
145 float v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * ROCRAND_2PI;
146 float s = sqrtf(-2.0f * logf(u));
147 #ifdef __HIP_DEVICE_COMPILE__
148 __sincosf(v, &result.x, &result.y);
152 result.x = sinf(v) * s;
153 result.y = cosf(v) * s;
158template<
typename state_type>
159__forceinline__ __device__ __host__ double2 mrg_box_muller_double(
unsigned int x,
unsigned int y)
162 double u = rocrand_device::detail::mrg_uniform_distribution<state_type>(x);
163 double v = rocrand_device::detail::mrg_uniform_distribution<state_type>(y) * 2.0;
164 double s = sqrt(-2.0 * log(u));
165 #ifdef __HIP_DEVICE_COMPILE__
166 sincospi(v, &result.x, &result.y);
170 result.x = sin(v * ROCRAND_PI_DOUBLE) * s;
171 result.y = cos(v * ROCRAND_PI_DOUBLE) * s;
176__forceinline__ __device__ __host__
float roc_f_erfinv(
float x)
178 float tt1, tt2, lnx, sgn;
179 sgn = (x < 0.0f) ? -1.0f : 1.0f;
181 x = (1.0f - x) * (1.0f + x);
184 #ifdef __HIP_DEVICE_COMPILE__
190 #ifdef __HIP_DEVICE_COMPILE__
193 else if (std::isinf(lnx))
197 tt1 = 2.0f / (ROCRAND_PI * 0.147f) + 0.5f * lnx;
198 tt2 = 1.0f / (0.147f) * lnx;
200 return(sgn * sqrtf(-tt1 + sqrtf(tt1 * tt1 - tt2)));
203__forceinline__ __device__ __host__
double roc_d_erfinv(
double x)
205 double tt1, tt2, lnx, sgn;
206 sgn = (x < 0.0) ? -1.0 : 1.0;
208 x = (1.0 - x) * (1.0 + x);
211 #ifdef __HIP_DEVICE_COMPILE__
217 #ifdef __HIP_DEVICE_COMPILE__
220 else if (std::isinf(lnx))
224 tt1 = 2.0 / (ROCRAND_PI_DOUBLE * 0.147) + 0.5 * lnx;
225 tt2 = 1.0 / (0.147) * lnx;
227 return(sgn * sqrt(-tt1 + sqrt(tt1 * tt1 - tt2)));
230__forceinline__ __device__ __host__
float normal_distribution(
unsigned int x)
232 float p = ::rocrand_device::detail::uniform_distribution(x);
233 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
237__forceinline__ __device__ __host__
float normal_distribution(
unsigned long long int x)
239 float p = ::rocrand_device::detail::uniform_distribution(x);
240 float v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_f_erfinv(2.0f * p - 1.0f);
244__forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned int v1,
unsigned int v2)
246 return ::rocrand_device::detail::box_muller(v1, v2);
249__forceinline__ __device__ __host__ float2 normal_distribution2(uint2 v)
251 return ::rocrand_device::detail::box_muller(v.x, v.y);
254__forceinline__ __device__ __host__ float2 normal_distribution2(
unsigned long long v)
256 return ::rocrand_device::detail::box_muller(v);
259__forceinline__ __device__ __host__ float4 normal_distribution4(uint4 v)
261 float2 r1 = ::rocrand_device::detail::box_muller(v.x, v.y);
262 float2 r2 = ::rocrand_device::detail::box_muller(v.z, v.w);
271__forceinline__ __device__ __host__ float4 normal_distribution4(longlong2 v)
273 float2 r1 = ::rocrand_device::detail::box_muller(v.x);
274 float2 r2 = ::rocrand_device::detail::box_muller(v.y);
275 return float4{r1.x, r1.y, r2.x, r2.y};
278__forceinline__ __device__ __host__ float4 normal_distribution4(
unsigned long long v1,
279 unsigned long long v2)
281 float2 r1 = ::rocrand_device::detail::box_muller(v1);
282 float2 r2 = ::rocrand_device::detail::box_muller(v2);
283 return float4{r1.x, r1.y, r2.x, r2.y};
286__forceinline__ __device__ __host__
double normal_distribution_double(
unsigned int x)
288 double p = ::rocrand_device::detail::uniform_distribution_double(x);
289 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
293__forceinline__ __device__ __host__
double normal_distribution_double(
unsigned long long int x)
295 double p = ::rocrand_device::detail::uniform_distribution_double(x);
296 double v = ROCRAND_SQRT2 * ::rocrand_device::detail::roc_d_erfinv(2.0 * p - 1.0);
300__forceinline__ __device__ __host__ double2 normal_distribution_double2(uint4 v)
302 return ::rocrand_device::detail::box_muller_double(v);
305__forceinline__ __device__ __host__ double2 normal_distribution_double2(ulonglong2 v)
307 return ::rocrand_device::detail::box_muller_double(v);
310__forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned int v)
312 return ::rocrand_device::detail::box_muller_half(
313 static_cast<unsigned short>(v),
314 static_cast<unsigned short>(v >> 16)
318__forceinline__ __device__ __host__ __half2 normal_distribution_half2(
unsigned long long v)
320 return ::rocrand_device::detail::box_muller_half(
static_cast<unsigned short>(v),
321 static_cast<unsigned short>(v >> 32));
324template<
typename state_type>
325__forceinline__ __device__ __host__ float2 mrg_normal_distribution2(
unsigned int v1,
328 return ::rocrand_device::detail::mrg_box_muller<state_type>(v1, v2);
331template<
typename state_type>
332__forceinline__ __device__ __host__ double2 mrg_normal_distribution_double2(
unsigned int v1,
335 return ::rocrand_device::detail::mrg_box_muller_double<state_type>(v1, v2);
338template<
typename state_type>
339__forceinline__ __device__ __host__ __half2 mrg_normal_distribution_half2(
unsigned int v)
341 v = rocrand_device::detail::mrg_uniform_distribution_uint<state_type>(v);
342 return ::rocrand_device::detail::box_muller_half(
343 static_cast<unsigned short>(v),
344 static_cast<unsigned short>(v >> 16)
365#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
366__forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_philox4x32_10* state)
368 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
370 if(bm_helper::has_float(state))
372 return bm_helper::get_float(state);
378 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
379 bm_helper::save_float(state, r.y);
398__forceinline__ __device__ __host__
404 return rocrand_device::detail::normal_distribution2(state1, state2);
421__forceinline__ __device__ __host__
424 return rocrand_device::detail::normal_distribution4(
rocrand4(state));
441#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
444 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_philox4x32_10> bm_helper;
446 if(bm_helper::has_double(state))
448 return bm_helper::get_double(state);
450 double2 r = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
451 bm_helper::save_double(state, r.y);
470__forceinline__ __device__ __host__
473 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
490__forceinline__ __device__ __host__
494 r1 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
495 r2 = rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
497 r1.x, r1.y, r2.x, r2.y
515#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
516__forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_mrg31k3p* state)
518 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
520 if(bm_helper::has_float(state))
522 return bm_helper::get_float(state);
525 auto state1 = state->next();
526 auto state2 = state->next();
529 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
530 bm_helper::save_float(state, r.y);
549__forceinline__ __device__ __host__
552 auto state1 = state->next();
553 auto state2 = state->next();
555 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg31k3p>(state1, state2);
572#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
575 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg31k3p> bm_helper;
577 if(bm_helper::has_double(state))
579 return bm_helper::get_double(state);
582 auto state1 = state->next();
583 auto state2 = state->next();
586 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
588 bm_helper::save_double(state, r.y);
607__forceinline__ __device__ __host__
610 auto state1 = state->next();
611 auto state2 = state->next();
613 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg31k3p>(state1,
631#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
632__forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_mrg32k3a* state)
634 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
636 if(bm_helper::has_float(state))
638 return bm_helper::get_float(state);
641 auto state1 = state->next();
642 auto state2 = state->next();
645 = rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
646 bm_helper::save_float(state, r.y);
665__forceinline__ __device__ __host__
668 auto state1 = state->next();
669 auto state2 = state->next();
671 return rocrand_device::detail::mrg_normal_distribution2<rocrand_state_mrg32k3a>(state1, state2);
688#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
691 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_mrg32k3a> bm_helper;
693 if(bm_helper::has_double(state))
695 return bm_helper::get_double(state);
698 auto state1 = state->next();
699 auto state2 = state->next();
702 = rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
704 bm_helper::save_double(state, r.y);
723__forceinline__ __device__ __host__
726 auto state1 = state->next();
727 auto state2 = state->next();
729 return rocrand_device::detail::mrg_normal_distribution_double2<rocrand_state_mrg32k3a>(state1,
747#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
748__forceinline__ __device__ __host__
float rocrand_normal(rocrand_state_xorwow* state)
750 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
752 if(bm_helper::has_float(state))
754 return bm_helper::get_float(state);
758 float2 r = rocrand_device::detail::normal_distribution2(state1, state2);
759 bm_helper::save_float(state, r.y);
778__forceinline__ __device__ __host__
783 return rocrand_device::detail::normal_distribution2(state1, state2);
800#ifndef ROCRAND_DETAIL_BM_NOT_IN_STATE
803 typedef rocrand_device::detail::engine_boxmuller_helper<rocrand_state_xorwow> bm_helper;
805 if(bm_helper::has_double(state))
807 return bm_helper::get_double(state);
815 double2 r = rocrand_device::detail::normal_distribution_double2(
816 uint4 { state1, state2, state3, state4 }
818 bm_helper::save_double(state, r.y);
837__forceinline__ __device__ __host__
845 return rocrand_device::detail::normal_distribution_double2(
846 uint4 { state1, state2, state3, state4 }
862__forceinline__ __device__
865 return rocrand_device::detail::normal_distribution(
rocrand(state));
882__forceinline__ __device__
887 return rocrand_device::detail::normal_distribution2(state1, state2);
902__forceinline__ __device__
905 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
922__forceinline__ __device__
930 return rocrand_device::detail::normal_distribution_double2(
931 uint4{state1, state2, state3, state4});
946__forceinline__ __device__ __host__
949 return rocrand_device::detail::normal_distribution(
rocrand(state));
964__forceinline__ __device__ __host__
967 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
982__forceinline__ __device__ __host__
985 return rocrand_device::detail::normal_distribution(
rocrand(state));
1000__forceinline__ __device__ __host__
1003 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1018__forceinline__ __device__ __host__
1021 return rocrand_device::detail::normal_distribution(
rocrand(state));
1036__forceinline__ __device__ __host__
1039 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1054__forceinline__ __device__ __host__
1057 return rocrand_device::detail::normal_distribution(
rocrand(state));
1072__forceinline__ __device__ __host__
1075 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1090__forceinline__ __device__ __host__
1093 return rocrand_device::detail::normal_distribution(
rocrand(state));
1110__forceinline__ __device__ __host__
1116 return rocrand_device::detail::normal_distribution2(state1, state2);
1131__forceinline__ __device__ __host__
1134 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1151__forceinline__ __device__ __host__
1159 return rocrand_device::detail::normal_distribution_double2(
1160 uint4{state1, state2, state3, state4});
1175__forceinline__ __device__ __host__
1178 return rocrand_device::detail::normal_distribution(
rocrand(state));
1195__forceinline__ __device__ __host__
1198 return rocrand_device::detail::normal_distribution2(rocrand2(state));
1213__forceinline__ __device__ __host__
1216 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1233__forceinline__ __device__ __host__
1236 auto state1 = rocrand2(state);
1237 auto state2 = rocrand2(state);
1239 return rocrand_device::detail::normal_distribution_double2(
1240 uint4{state1.x, state1.y, state2.x, state2.y});
1255__forceinline__ __device__ __host__
1258 return rocrand_device::detail::normal_distribution(
rocrand(state));
1275__forceinline__ __device__ __host__
1278 return rocrand_device::detail::normal_distribution2(
rocrand(state));
1293__forceinline__ __device__ __host__
1296 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1313__forceinline__ __device__ __host__
1316 return rocrand_device::detail::normal_distribution_double2(rocrand2(state));
1331__forceinline__ __device__ __host__
1334 return rocrand_device::detail::normal_distribution(
rocrand(state));
1351__forceinline__ __device__ __host__
1357 return rocrand_device::detail::normal_distribution2(state1, state2);
1372__forceinline__ __device__ __host__
1375 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1392__forceinline__ __device__ __host__
1395 return rocrand_device::detail::normal_distribution_double2(
rocrand4(state));
1410__forceinline__ __device__ __host__
1413 return rocrand_device::detail::normal_distribution(
rocrand(state));
1430__forceinline__ __device__ __host__
1436 return rocrand_device::detail::normal_distribution2(state1, state2);
1451__forceinline__ __device__ __host__
1454 return rocrand_device::detail::normal_distribution_double(
rocrand(state));
1471__forceinline__ __device__ __host__
1477 return rocrand_device::detail::normal_distribution_double2(ulonglong2{state1, state2});
__forceinline__ __device__ __host__ double4 rocrand_normal_double4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed double values.
Definition rocrand_normal.h:491
__forceinline__ __device__ __host__ double2 rocrand_normal_double2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed double values.
Definition rocrand_normal.h:471
__forceinline__ __device__ __host__ float rocrand_normal(rocrand_state_philox4x32_10 *state)
Returns a normally distributed float value.
Definition rocrand_normal.h:366
__forceinline__ __device__ __host__ uint4 rocrand4(rocrand_state_philox4x32_10 *state)
Returns four uniformly distributed random unsigned int values from [0; 2^32 - 1] range.
Definition rocrand_philox4x32_10.h:379
__forceinline__ __device__ __host__ double rocrand_normal_double(rocrand_state_philox4x32_10 *state)
Returns a normally distributed double value.
Definition rocrand_normal.h:442
__forceinline__ __device__ __host__ float4 rocrand_normal4(rocrand_state_philox4x32_10 *state)
Returns four normally distributed float values.
Definition rocrand_normal.h:422
__forceinline__ __device__ __host__ unsigned int rocrand(rocrand_state_lfsr113 *state)
Returns uniformly distributed random unsigned int value from [0; 2^32 - 1] range.
Definition rocrand_lfsr113.h:277
__forceinline__ __device__ __host__ float2 rocrand_normal2(rocrand_state_philox4x32_10 *state)
Returns two normally distributed float values.
Definition rocrand_normal.h:399