From 4098612a87d140a1d3daa69f007f2a059acc7285 Mon Sep 17 00:00:00 2001
From: John <50754967+j-bm@users.noreply.github.com>
Date: Thu, 19 Jan 2023 09:21:47 -0400
Subject: [PATCH] rand: add full precision f32 and f64 random functions; fix
 f32/f64 multipliers (#16875)

---
 vlib/math/bits/unsafe_bits.js.v |  59 ++++++++++++++++
 vlib/math/bits/unsafe_bits.v    |  42 ++++++++++++
 vlib/rand/README.md             |  27 ++++++--
 vlib/rand/constants/constants.v |  19 ++++--
 vlib/rand/fp_test.v             | 116 ++++++++++++++++++++++++++++++++
 vlib/rand/rand.v                |  89 ++++++++++++++++++++++--
 6 files changed, 337 insertions(+), 15 deletions(-)
 create mode 100644 vlib/math/bits/unsafe_bits.js.v
 create mode 100644 vlib/math/bits/unsafe_bits.v
 create mode 100644 vlib/rand/fp_test.v

diff --git a/vlib/math/bits/unsafe_bits.js.v b/vlib/math/bits/unsafe_bits.js.v
new file mode 100644
index 000000000..b4f18d826
--- /dev/null
+++ b/vlib/math/bits/unsafe_bits.js.v
@@ -0,0 +1,59 @@
+module bits
+
+// f32_bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position.
+// f32_bits(f32_from_bits(x)) == x.
+pub fn f32_bits(f f32) u32 {
+	p := u32(0)
+	#let buffer = new ArrayBuffer(4)
+	#let floatArr = new Float32Array(buffer)
+	#floatArr[0] = f.val
+	#let uintArr = new Uint32Array(buffer)
+	#p.val = uintArr[0]
+
+	return p
+}
+
+// f32_from_bits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// f32_from_bits(f32_bits(x)) == x.
+pub fn f32_from_bits(b u32) f32 {
+	p := f32(0.0)
+	#let buffer = new ArrayBuffer(4)
+	#let floatArr = new Float32Array(buffer)
+	#let uintArr = new Uint32Array(buffer)
+	#uintArr[0] = Number(b.val)
+	#p.val = floatArr[0]
+
+	return p
+}
+
+// f64_bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position,
+// and f64_bits(f64_from_bits(x)) == x.
+pub fn f64_bits(f f64) u64 {
+	p := u64(0)
+	#let buffer = new ArrayBuffer(8)
+	#let floatArr = new Float64Array(buffer)
+	#floatArr[0] = f.val
+	#let uintArr = new BigUint64Array(buffer)
+	#p.val = uintArr[0]
+
+	return p
+}
+
+// f64_from_bits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// f64_from_bits(f64_bits(x)) == x.
+pub fn f64_from_bits(b u64) f64 {
+	p := 0.0
+	#let buffer = new ArrayBuffer(8)
+	#let floatArr = new Float64Array(buffer)
+	#let uintArr = new BigUint64Array(buffer)
+	#uintArr[0] = b.val
+	#p.val = floatArr[0]
+
+	return p
+}
diff --git a/vlib/math/bits/unsafe_bits.v b/vlib/math/bits/unsafe_bits.v
new file mode 100644
index 000000000..bc77d6616
--- /dev/null
+++ b/vlib/math/bits/unsafe_bits.v
@@ -0,0 +1,42 @@
+// Copyright (c) 2019-2022 Alexander Medvednikov. All rights reserved.
+// Use of this source code is governed by an MIT license
+// that can be found in the LICENSE file.
+module bits
+
+// f32_bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position.
+// f32_bits(f32_from_bits(x)) == x.
+[inline]
+pub fn f32_bits(f f32) u32 {
+	p := *unsafe { &u32(&f) }
+	return p
+}
+
+// f32_from_bits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// f32_from_bits(f32_bits(x)) == x.
+[inline]
+pub fn f32_from_bits(b u32) f32 {
+	p := *unsafe { &f32(&b) }
+	return p
+}
+
+// f64_bits returns the IEEE 754 binary representation of f,
+// with the sign bit of f and the result in the same bit position,
+// and f64_bits(f64_from_bits(x)) == x.
+[inline]
+pub fn f64_bits(f f64) u64 {
+	p := *unsafe { &u64(&f) }
+	return p
+}
+
+// f64_from_bits returns the floating-point number corresponding
+// to the IEEE 754 binary representation b, with the sign bit of b
+// and the result in the same bit position.
+// f64_from_bits(f64_bits(x)) == x.
+[inline]
+pub fn f64_from_bits(b u64) f64 {
+	p := *unsafe { &f64(&b) }
+	return p
+}
diff --git a/vlib/rand/README.md b/vlib/rand/README.md
index 33443a850..305c8f8c9 100644
--- a/vlib/rand/README.md
+++ b/vlib/rand/README.md
@@ -11,7 +11,7 @@ import rand
 ...
 
 // Optionally seed the default generator
-rand.seed([u32(3110), 50714])
+rand.seed([u32(3223878742), 1732001562])
 
 ...
 
@@ -61,21 +61,40 @@ Otherwise, there is feature parity between the generator functions and the top-l
 A PRNG is a Pseudo Random Number Generator.
 Computers cannot generate truly random numbers without an external source of noise or entropy.
 We can use algorithms to generate sequences of seemingly random numbers,
-but their outputs will always be deterministic.
-This is often useful for simulations that need the same starting seed.
+but their outputs will always be deterministic, according to the seed values.
+
+This is often useful for simulations that need the same starting seeds.
+You may be debugging a program and want to restart it with the same
+seeds, or you want to verify a working program is still
+operating identically after compiler or operating system updates.
 
 If you need truly random numbers that are going to be used for cryptography,
 use the `crypto.rand` module.
 
 # Seeding Functions
 
-All the generators are time-seeded.
+All the generators are initialized with time-based seeds.
 The helper functions publicly available in `rand.seed` module are:
 
 1. `time_seed_array()` - returns a `[]u32` that can be directly plugged into the `seed()` functions.
 2. `time_seed_32()` and `time_seed_64()` - 32-bit and 64-bit values respectively
    that are generated from the current time.
 
+When composing your own seeds, use "typical" u32 numbers, not small numbers.  This
+is especially important for PRNGs with large state, such as `mt19937`.  You can create
+random unsigned integers with openssl `rand` or with `v repl` as follows:
+
+```
+$ openssl rand -hex 4
+e3655862
+$ openssl rand -hex 4
+97c4b1db
+$ v repl
+>>> import rand
+>>> [rand.u32(),rand.u32()]
+[2132382944, 2443871665]
+```
+
 # Caveats
 
 Note that the `sys.SysRNG` struct (in the C backend) uses `C.srand()` which sets the seed globally.
diff --git a/vlib/rand/constants/constants.v b/vlib/rand/constants/constants.v
index 371d54d73..db3ee3049 100644
--- a/vlib/rand/constants/constants.v
+++ b/vlib/rand/constants/constants.v
@@ -2,11 +2,16 @@ module constants
 
 // Commonly used constants across RNGs - some taken from "Numerical Recipes".
 pub const (
-	lower_mask     = u64(0x00000000FFFFFFFF)
-	max_u32        = u32(0xFFFFFFFF)
-	max_u64        = u64(0xFFFFFFFFFFFFFFFF)
-	max_u32_as_f32 = f32(max_u32) + 1
-	max_u64_as_f64 = f64(max_u64) + 1
-	u31_mask       = u32(0x7FFFFFFF)
-	u63_mask       = u64(0x7FFFFFFFFFFFFFFF)
+	lower_mask                = u64(0x00000000FFFFFFFF)
+	max_u32                   = u32(0xFFFFFFFF)
+	max_u64                   = u64(0xFFFFFFFFFFFFFFFF)
+	u31_mask                  = u32(0x7FFFFFFF)
+	u63_mask                  = u64(0x7FFFFFFFFFFFFFFF)
+	// 23 bits for f32
+	ieee754_mantissa_f32_mask = (u32(1) << 23) - 1
+	// 52 bits for f64
+	ieee754_mantissa_f64_mask = (u64(1) << 52) - 1
+	// smallest mantissa with exponent 0 (un normalized)
+	reciprocal_2_23rd         = 1.0 / f64(u32(1) << 23)
+	reciprocal_2_52nd         = 1.0 / f64(u64(1) << 52)
 )
diff --git a/vlib/rand/fp_test.v b/vlib/rand/fp_test.v
new file mode 100644
index 000000000..e1edebb09
--- /dev/null
+++ b/vlib/rand/fp_test.v
@@ -0,0 +1,116 @@
+import rand
+
+struct Histo {
+mut:
+	lo f64
+	ct int
+}
+
+const (
+	// The sample size to be used; keep cpu time less than 5 seconds
+	count = 9100100
+	// Two sets of seeds
+	seeds = [[u32(2742798260), 2159764996], [u32(2135051596), 958016781]]
+)
+
+fn test_f32() {
+	mut histo := []Histo{}
+	mut candidate := f32(0.0)
+
+	histo << Histo{1.0e-9, 0}
+	histo << Histo{1.0e-6, 0}
+	histo << Histo{1.0e-4, 0}
+	histo << Histo{1.0e-2, 0}
+	histo << Histo{1.0e00, 0}
+
+	for seed in seeds {
+		rand.seed(seed)
+		for _ in 0 .. count {
+			candidate = rand.f32()
+			for mut p in histo {
+				if candidate < p.lo {
+					p.ct += 1
+				}
+			}
+		}
+	}
+	println(' f32  ')
+	println(histo)
+	assert histo[0].ct == 1
+	assert histo[1].ct == 16
+	assert histo[2].ct == 1802
+	assert histo[3].ct == 181963
+	assert histo[4].ct == 18200200
+	for mut p in histo {
+		p.ct = 0
+	}
+	for seed in seeds {
+		rand.seed(seed)
+		for _ in 0 .. count {
+			candidate = rand.f32cp()
+			for mut p in histo {
+				if candidate < p.lo {
+					p.ct += 1
+				}
+			}
+		}
+	}
+	println(' f32cp')
+	println(histo)
+	assert histo[0].ct == 0
+	assert histo[1].ct == 16
+	assert histo[2].ct == 1863
+	assert histo[3].ct == 142044
+	assert histo[4].ct == 18200200
+}
+
+fn test_f64() {
+	mut histo := []Histo{}
+	mut candidate := f64(0.0)
+
+	histo << Histo{1.0e-9, 0}
+	histo << Histo{1.0e-6, 0}
+	histo << Histo{1.0e-4, 0}
+	histo << Histo{1.0e-2, 0}
+	histo << Histo{1.0e00, 0}
+
+	for seed in seeds {
+		rand.seed(seed)
+		for _ in 0 .. count {
+			candidate = rand.f64()
+			for mut p in histo {
+				if candidate < p.lo {
+					p.ct += 1
+				}
+			}
+		}
+	}
+	println(' f64  ')
+	println(histo)
+	assert histo[0].ct == 0
+	assert histo[1].ct == 23
+	assert histo[2].ct == 1756
+	assert histo[3].ct == 182209
+	assert histo[4].ct == 18200200
+	for mut p in histo {
+		p.ct = 0
+	}
+	for seed in seeds {
+		rand.seed(seed)
+		for _ in 0 .. count {
+			candidate = rand.f64cp()
+			for mut p in histo {
+				if candidate < p.lo {
+					p.ct += 1
+				}
+			}
+		}
+	}
+	println(' f64cp')
+	println(histo)
+	assert histo[0].ct == 0
+	assert histo[1].ct == 17
+	assert histo[2].ct == 1878
+	assert histo[3].ct == 181754
+	assert histo[4].ct == 18200200
+}
diff --git a/vlib/rand/rand.v b/vlib/rand/rand.v
index 04604d4f3..4fdc03de5 100644
--- a/vlib/rand/rand.v
+++ b/vlib/rand/rand.v
@@ -191,16 +191,85 @@ pub fn (mut rng PRNG) i64_in_range(min i64, max i64) !i64 {
 	return min + rng.i64n(max - min)!
 }
 
-// f32 returns a pseudorandom `f32` value in range `[0, 1)`.
+// f32 returns a pseudorandom `f32` value in range `[0, 1)`
+// using rng.u32() multiplied by an f64 constant.
 [inline]
 pub fn (mut rng PRNG) f32() f32 {
-	return f32(rng.u32()) / constants.max_u32_as_f32
+	return f32((rng.u32() >> 9) * constants.reciprocal_2_23rd)
 }
 
-// f64 returns a pseudorandom `f64` value in range `[0, 1)`.
+// f32cp returns a pseudorandom `f32` value in range `[0, 1)`
+// with full precision (mantissa random between 0 and 1
+// and the exponent varies as well.)
+// See https://allendowney.com/research/rand/ for background on the method.
+[inline]
+pub fn (mut rng PRNG) f32cp() f32 {
+	mut x := rng.u32()
+	mut exp := u32(126)
+	mut mask := u32(1) << 31
+
+	// check if prng returns 0; rare but keep looking for precision
+	if _unlikely_(x == 0) {
+		x = rng.u32()
+		exp -= 31
+	}
+	// count leading one bits and scale exponent accordingly
+	for {
+		if x & mask != 0 {
+			mask >>= 1
+			exp -= 1
+		} else {
+			break
+		}
+	}
+	// if we used any high-order mantissa bits; replace x
+	if exp < (126 - 8) {
+		x = rng.u32()
+	}
+
+	// Assumes little-endian IEEE floating point.
+	x = (exp << 23) | (x >> 8) & constants.ieee754_mantissa_f32_mask
+	return bits.f32_from_bits(x)
+}
+
+// f64 returns a pseudorandom `f64` value in range `[0, 1)`
+// using rng.u64() multiplied by a constant.
 [inline]
 pub fn (mut rng PRNG) f64() f64 {
-	return f64(rng.u64()) / constants.max_u64_as_f64
+	return f64((rng.u64() >> 12) * constants.reciprocal_2_52nd)
+}
+
+// f64cp returns a pseudorandom `f64` value in range `[0, 1)`
+// with full precision (mantissa random between 0 and 1
+// and the exponent varies as well.)
+// See https://allendowney.com/research/rand/ for background on the method.
+[inline]
+pub fn (mut rng PRNG) f64cp() f64 {
+	mut x := rng.u64()
+	mut exp := u64(1022)
+	mut mask := u64(1) << 63
+	mut bitcount := u32(0)
+
+	// check if prng returns 0; unlikely.
+	if _unlikely_(x == 0) {
+		x = rng.u64()
+		exp -= 31
+	}
+	// count leading one bits and scale exponent accordingly
+	for {
+		if x & mask != 0 {
+			mask >>= 1
+			bitcount += 1
+		} else {
+			break
+		}
+	}
+	exp -= bitcount
+	if bitcount > 11 {
+		x = rng.u64()
+	}
+	x = (exp << 52) | (x & constants.ieee754_mantissa_f64_mask)
+	return bits.f64_from_bits(x)
 }
 
 // f32n returns a pseudorandom `f32` value in range `[0, max]`.
@@ -520,11 +589,23 @@ pub fn f32() f32 {
 	return default_rng.f32()
 }
 
+// f32cp returns a uniformly distributed 32-bit floating point in range `[0, 1)`
+//  with full precision mantissa.
+pub fn f32cp() f32 {
+	return default_rng.f32cp()
+}
+
 // f64 returns a uniformly distributed 64-bit floating point in range `[0, 1)`.
 pub fn f64() f64 {
 	return default_rng.f64()
 }
 
+// f64 returns a uniformly distributed 64-bit floating point in range `[0, 1)`
+//  with full precision mantissa.
+pub fn f64cp() f64 {
+	return default_rng.f64cp()
+}
+
 // f32n returns a uniformly distributed 32-bit floating point in range `[0, max)`.
 pub fn f32n(max f32) !f32 {
 	return default_rng.f32n(max)
-- 
2.30.2