From 4098612a87d140a1d3daa69f007f2a059acc7285 Mon Sep 17 00:00:00 2001 From: John <50754967+j-bm@users.noreply.github.com> Date: Thu, 19 Jan 2023 09:21:47 -0400 Subject: [PATCH] rand: add full precision f32 and f64 random functions; fix f32/f64 multipliers (#16875) --- vlib/math/bits/unsafe_bits.js.v | 59 ++++++++++++++++ vlib/math/bits/unsafe_bits.v | 42 ++++++++++++ vlib/rand/README.md | 27 ++++++-- vlib/rand/constants/constants.v | 19 ++++-- vlib/rand/fp_test.v | 116 ++++++++++++++++++++++++++++++++ vlib/rand/rand.v | 89 ++++++++++++++++++++++-- 6 files changed, 337 insertions(+), 15 deletions(-) create mode 100644 vlib/math/bits/unsafe_bits.js.v create mode 100644 vlib/math/bits/unsafe_bits.v create mode 100644 vlib/rand/fp_test.v diff --git a/vlib/math/bits/unsafe_bits.js.v b/vlib/math/bits/unsafe_bits.js.v new file mode 100644 index 000000000..b4f18d826 --- /dev/null +++ b/vlib/math/bits/unsafe_bits.js.v @@ -0,0 +1,59 @@ +module bits + +// f32_bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position. +// f32_bits(f32_from_bits(x)) == x. +pub fn f32_bits(f f32) u32 { + p := u32(0) + #let buffer = new ArrayBuffer(4) + #let floatArr = new Float32Array(buffer) + #floatArr[0] = f.val + #let uintArr = new Uint32Array(buffer) + #p.val = uintArr[0] + + return p +} + +// f32_from_bits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// f32_from_bits(f32_bits(x)) == x. +pub fn f32_from_bits(b u32) f32 { + p := f32(0.0) + #let buffer = new ArrayBuffer(4) + #let floatArr = new Float32Array(buffer) + #let uintArr = new Uint32Array(buffer) + #uintArr[0] = Number(b.val) + #p.val = floatArr[0] + + return p +} + +// f64_bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position, +// and f64_bits(f64_from_bits(x)) == x. +pub fn f64_bits(f f64) u64 { + p := u64(0) + #let buffer = new ArrayBuffer(8) + #let floatArr = new Float64Array(buffer) + #floatArr[0] = f.val + #let uintArr = new BigUint64Array(buffer) + #p.val = uintArr[0] + + return p +} + +// f64_from_bits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// f64_from_bits(f64_bits(x)) == x. +pub fn f64_from_bits(b u64) f64 { + p := 0.0 + #let buffer = new ArrayBuffer(8) + #let floatArr = new Float64Array(buffer) + #let uintArr = new BigUint64Array(buffer) + #uintArr[0] = b.val + #p.val = floatArr[0] + + return p +} diff --git a/vlib/math/bits/unsafe_bits.v b/vlib/math/bits/unsafe_bits.v new file mode 100644 index 000000000..bc77d6616 --- /dev/null +++ b/vlib/math/bits/unsafe_bits.v @@ -0,0 +1,42 @@ +// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved. +// Use of this source code is governed by an MIT license +// that can be found in the LICENSE file. +module bits + +// f32_bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position. +// f32_bits(f32_from_bits(x)) == x. +[inline] +pub fn f32_bits(f f32) u32 { + p := *unsafe { &u32(&f) } + return p +} + +// f32_from_bits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// f32_from_bits(f32_bits(x)) == x. +[inline] +pub fn f32_from_bits(b u32) f32 { + p := *unsafe { &f32(&b) } + return p +} + +// f64_bits returns the IEEE 754 binary representation of f, +// with the sign bit of f and the result in the same bit position, +// and f64_bits(f64_from_bits(x)) == x. +[inline] +pub fn f64_bits(f f64) u64 { + p := *unsafe { &u64(&f) } + return p +} + +// f64_from_bits returns the floating-point number corresponding +// to the IEEE 754 binary representation b, with the sign bit of b +// and the result in the same bit position. +// f64_from_bits(f64_bits(x)) == x. +[inline] +pub fn f64_from_bits(b u64) f64 { + p := *unsafe { &f64(&b) } + return p +} diff --git a/vlib/rand/README.md b/vlib/rand/README.md index 33443a850..305c8f8c9 100644 --- a/vlib/rand/README.md +++ b/vlib/rand/README.md @@ -11,7 +11,7 @@ import rand ... // Optionally seed the default generator -rand.seed([u32(3110), 50714]) +rand.seed([u32(3223878742), 1732001562]) ... @@ -61,21 +61,40 @@ Otherwise, there is feature parity between the generator functions and the top-l A PRNG is a Pseudo Random Number Generator. Computers cannot generate truly random numbers without an external source of noise or entropy. We can use algorithms to generate sequences of seemingly random numbers, -but their outputs will always be deterministic. -This is often useful for simulations that need the same starting seed. +but their outputs will always be deterministic, according to the seed values. + +This is often useful for simulations that need the same starting seeds. +You may be debugging a program and want to restart it with the same +seeds, or you want to verify a working program is still +operating identically after compiler or operating system updates. If you need truly random numbers that are going to be used for cryptography, use the `crypto.rand` module. # Seeding Functions -All the generators are time-seeded. +All the generators are initialized with time-based seeds. The helper functions publicly available in `rand.seed` module are: 1. `time_seed_array()` - returns a `[]u32` that can be directly plugged into the `seed()` functions. 2. `time_seed_32()` and `time_seed_64()` - 32-bit and 64-bit values respectively that are generated from the current time. +When composing your own seeds, use "typical" u32 numbers, not small numbers. This +is especially important for PRNGs with large state, such as `mt19937`. You can create +random unsigned integers with openssl `rand` or with `v repl` as follows: + +``` +$ openssl rand -hex 4 +e3655862 +$ openssl rand -hex 4 +97c4b1db +$ v repl +>>> import rand +>>> [rand.u32(),rand.u32()] +[2132382944, 2443871665] +``` + # Caveats Note that the `sys.SysRNG` struct (in the C backend) uses `C.srand()` which sets the seed globally. diff --git a/vlib/rand/constants/constants.v b/vlib/rand/constants/constants.v index 371d54d73..db3ee3049 100644 --- a/vlib/rand/constants/constants.v +++ b/vlib/rand/constants/constants.v @@ -2,11 +2,16 @@ module constants // Commonly used constants across RNGs - some taken from "Numerical Recipes". pub const ( - lower_mask = u64(0x00000000FFFFFFFF) - max_u32 = u32(0xFFFFFFFF) - max_u64 = u64(0xFFFFFFFFFFFFFFFF) - max_u32_as_f32 = f32(max_u32) + 1 - max_u64_as_f64 = f64(max_u64) + 1 - u31_mask = u32(0x7FFFFFFF) - u63_mask = u64(0x7FFFFFFFFFFFFFFF) + lower_mask = u64(0x00000000FFFFFFFF) + max_u32 = u32(0xFFFFFFFF) + max_u64 = u64(0xFFFFFFFFFFFFFFFF) + u31_mask = u32(0x7FFFFFFF) + u63_mask = u64(0x7FFFFFFFFFFFFFFF) + // 23 bits for f32 + ieee754_mantissa_f32_mask = (u32(1) << 23) - 1 + // 52 bits for f64 + ieee754_mantissa_f64_mask = (u64(1) << 52) - 1 + // smallest mantissa with exponent 0 (un normalized) + reciprocal_2_23rd = 1.0 / f64(u32(1) << 23) + reciprocal_2_52nd = 1.0 / f64(u64(1) << 52) ) diff --git a/vlib/rand/fp_test.v b/vlib/rand/fp_test.v new file mode 100644 index 000000000..e1edebb09 --- /dev/null +++ b/vlib/rand/fp_test.v @@ -0,0 +1,116 @@ +import rand + +struct Histo { +mut: + lo f64 + ct int +} + +const ( + // The sample size to be used; keep cpu time less than 5 seconds + count = 9100100 + // Two sets of seeds + seeds = [[u32(2742798260), 2159764996], [u32(2135051596), 958016781]] +) + +fn test_f32() { + mut histo := []Histo{} + mut candidate := f32(0.0) + + histo << Histo{1.0e-9, 0} + histo << Histo{1.0e-6, 0} + histo << Histo{1.0e-4, 0} + histo << Histo{1.0e-2, 0} + histo << Histo{1.0e00, 0} + + for seed in seeds { + rand.seed(seed) + for _ in 0 .. count { + candidate = rand.f32() + for mut p in histo { + if candidate < p.lo { + p.ct += 1 + } + } + } + } + println(' f32 ') + println(histo) + assert histo[0].ct == 1 + assert histo[1].ct == 16 + assert histo[2].ct == 1802 + assert histo[3].ct == 181963 + assert histo[4].ct == 18200200 + for mut p in histo { + p.ct = 0 + } + for seed in seeds { + rand.seed(seed) + for _ in 0 .. count { + candidate = rand.f32cp() + for mut p in histo { + if candidate < p.lo { + p.ct += 1 + } + } + } + } + println(' f32cp') + println(histo) + assert histo[0].ct == 0 + assert histo[1].ct == 16 + assert histo[2].ct == 1863 + assert histo[3].ct == 142044 + assert histo[4].ct == 18200200 +} + +fn test_f64() { + mut histo := []Histo{} + mut candidate := f64(0.0) + + histo << Histo{1.0e-9, 0} + histo << Histo{1.0e-6, 0} + histo << Histo{1.0e-4, 0} + histo << Histo{1.0e-2, 0} + histo << Histo{1.0e00, 0} + + for seed in seeds { + rand.seed(seed) + for _ in 0 .. count { + candidate = rand.f64() + for mut p in histo { + if candidate < p.lo { + p.ct += 1 + } + } + } + } + println(' f64 ') + println(histo) + assert histo[0].ct == 0 + assert histo[1].ct == 23 + assert histo[2].ct == 1756 + assert histo[3].ct == 182209 + assert histo[4].ct == 18200200 + for mut p in histo { + p.ct = 0 + } + for seed in seeds { + rand.seed(seed) + for _ in 0 .. count { + candidate = rand.f64cp() + for mut p in histo { + if candidate < p.lo { + p.ct += 1 + } + } + } + } + println(' f64cp') + println(histo) + assert histo[0].ct == 0 + assert histo[1].ct == 17 + assert histo[2].ct == 1878 + assert histo[3].ct == 181754 + assert histo[4].ct == 18200200 +} diff --git a/vlib/rand/rand.v b/vlib/rand/rand.v index 04604d4f3..4fdc03de5 100644 --- a/vlib/rand/rand.v +++ b/vlib/rand/rand.v @@ -191,16 +191,85 @@ pub fn (mut rng PRNG) i64_in_range(min i64, max i64) !i64 { return min + rng.i64n(max - min)! } -// f32 returns a pseudorandom `f32` value in range `[0, 1)`. +// f32 returns a pseudorandom `f32` value in range `[0, 1)` +// using rng.u32() multiplied by an f64 constant. [inline] pub fn (mut rng PRNG) f32() f32 { - return f32(rng.u32()) / constants.max_u32_as_f32 + return f32((rng.u32() >> 9) * constants.reciprocal_2_23rd) } -// f64 returns a pseudorandom `f64` value in range `[0, 1)`. +// f32cp returns a pseudorandom `f32` value in range `[0, 1)` +// with full precision (mantissa random between 0 and 1 +// and the exponent varies as well.) +// See https://allendowney.com/research/rand/ for background on the method. +[inline] +pub fn (mut rng PRNG) f32cp() f32 { + mut x := rng.u32() + mut exp := u32(126) + mut mask := u32(1) << 31 + + // check if prng returns 0; rare but keep looking for precision + if _unlikely_(x == 0) { + x = rng.u32() + exp -= 31 + } + // count leading one bits and scale exponent accordingly + for { + if x & mask != 0 { + mask >>= 1 + exp -= 1 + } else { + break + } + } + // if we used any high-order mantissa bits; replace x + if exp < (126 - 8) { + x = rng.u32() + } + + // Assumes little-endian IEEE floating point. + x = (exp << 23) | (x >> 8) & constants.ieee754_mantissa_f32_mask + return bits.f32_from_bits(x) +} + +// f64 returns a pseudorandom `f64` value in range `[0, 1)` +// using rng.u64() multiplied by a constant. [inline] pub fn (mut rng PRNG) f64() f64 { - return f64(rng.u64()) / constants.max_u64_as_f64 + return f64((rng.u64() >> 12) * constants.reciprocal_2_52nd) +} + +// f64cp returns a pseudorandom `f64` value in range `[0, 1)` +// with full precision (mantissa random between 0 and 1 +// and the exponent varies as well.) +// See https://allendowney.com/research/rand/ for background on the method. +[inline] +pub fn (mut rng PRNG) f64cp() f64 { + mut x := rng.u64() + mut exp := u64(1022) + mut mask := u64(1) << 63 + mut bitcount := u32(0) + + // check if prng returns 0; unlikely. + if _unlikely_(x == 0) { + x = rng.u64() + exp -= 31 + } + // count leading one bits and scale exponent accordingly + for { + if x & mask != 0 { + mask >>= 1 + bitcount += 1 + } else { + break + } + } + exp -= bitcount + if bitcount > 11 { + x = rng.u64() + } + x = (exp << 52) | (x & constants.ieee754_mantissa_f64_mask) + return bits.f64_from_bits(x) } // f32n returns a pseudorandom `f32` value in range `[0, max]`. @@ -520,11 +589,23 @@ pub fn f32() f32 { return default_rng.f32() } +// f32cp returns a uniformly distributed 32-bit floating point in range `[0, 1)` +// with full precision mantissa. +pub fn f32cp() f32 { + return default_rng.f32cp() +} + // f64 returns a uniformly distributed 64-bit floating point in range `[0, 1)`. pub fn f64() f64 { return default_rng.f64() } +// f64 returns a uniformly distributed 64-bit floating point in range `[0, 1)` +// with full precision mantissa. +pub fn f64cp() f64 { + return default_rng.f64cp() +} + // f32n returns a uniformly distributed 32-bit floating point in range `[0, max)`. pub fn f32n(max f32) !f32 { return default_rng.f32n(max) -- 2.30.2