From a8a1e9381fc7ff3d55d0947d0ae67115f94b8511 Mon Sep 17 00:00:00 2001 From: playX Date: Mon, 15 Nov 2021 15:13:44 +0300 Subject: [PATCH] strconv,js: f64_to_str works on JS backend now; Fix BigInt usage in infix expressions (#12464) --- vlib/strconv/f32_str.js.v | 1 + vlib/strconv/f64_str.c.v | 70 ++------ vlib/strconv/f64_str.js.v | 339 +++++++++++++++++++++++++++++++++++ vlib/strconv/f64_str.v | 37 ++++ vlib/strconv/format_mem.js.v | 2 +- vlib/v/gen/js/infix.v | 4 +- vlib/v/gen/js/js.v | 12 +- 7 files changed, 399 insertions(+), 66 deletions(-) create mode 100644 vlib/strconv/f32_str.js.v create mode 100644 vlib/strconv/f64_str.js.v create mode 100644 vlib/strconv/f64_str.v diff --git a/vlib/strconv/f32_str.js.v b/vlib/strconv/f32_str.js.v new file mode 100644 index 000000000..2a67c559f --- /dev/null +++ b/vlib/strconv/f32_str.js.v @@ -0,0 +1 @@ +module strconv diff --git a/vlib/strconv/f64_str.c.v b/vlib/strconv/f64_str.c.v index 6be43546f..69ffd439f 100644 --- a/vlib/strconv/f64_str.c.v +++ b/vlib/strconv/f64_str.c.v @@ -20,42 +20,6 @@ https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea =============================================================================*/ -// pow of ten table used by n_digit reduction -const ( - ten_pow_table_64 = [ - u64(1), - u64(10), - u64(100), - u64(1000), - u64(10000), - u64(100000), - u64(1000000), - u64(10000000), - u64(100000000), - u64(1000000000), - u64(10000000000), - u64(100000000000), - u64(1000000000000), - u64(10000000000000), - u64(100000000000000), - u64(1000000000000000), - u64(10000000000000000), - u64(100000000000000000), - u64(1000000000000000000), - u64(10000000000000000000), - ] -) - -//============================================================================= -// Conversion Functions -//============================================================================= -const ( - mantbits64 = u32(52) - expbits64 = u32(11) - bias64 = 1023 // f64 exponent bias - maxexp64 = 2047 -) - [direct_array_access] fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { mut n_digit := i_n_digit + 1 @@ -87,10 +51,10 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { // rounding last used digit if n_digit < out_len { // println("out:[$out]") - out += strconv.ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up - out /= strconv.ten_pow_table_64[out_len - n_digit] + out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up + out /= ten_pow_table_64[out_len - n_digit] // println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}") - if d.m / strconv.ten_pow_table_64[out_len - n_digit] < out { + if d.m / ten_pow_table_64[out_len - n_digit] < out { d_exp++ n_digit++ } @@ -170,11 +134,11 @@ fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) { mut d := Dec64{} - e := exp - strconv.bias64 - if e > strconv.mantbits64 { + e := exp - bias64 + if e > mantbits64 { return d, false } - shift := strconv.mantbits64 - e + shift := mantbits64 - e mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1 // mant := i_mant | (1 << mantbits64) // implicit 1 d.m = mant >> shift @@ -195,11 +159,11 @@ fn f64_to_decimal(mant u64, exp u64) Dec64 { if exp == 0 { // We subtract 2 so that the bounds computation has // 2 additional bits. - e2 = 1 - strconv.bias64 - int(strconv.mantbits64) - 2 + e2 = 1 - bias64 - int(mantbits64) - 2 m2 = mant } else { - e2 = int(exp) - strconv.bias64 - int(strconv.mantbits64) - 2 - m2 = (u64(1) << strconv.mantbits64) | mant + e2 = int(exp) - bias64 - int(mantbits64) - 2 + m2 = (u64(1) << mantbits64) | mant } even := (m2 & 1) == 0 accept_bounds := even @@ -373,13 +337,13 @@ pub fn f64_to_str(f f64, n_digit int) string { u1.f = f u := unsafe { u1.u } - neg := (u >> (strconv.mantbits64 + strconv.expbits64)) != 0 - mant := u & ((u64(1) << strconv.mantbits64) - u64(1)) - exp := (u >> strconv.mantbits64) & ((u64(1) << strconv.expbits64) - u64(1)) + neg := (u >> (mantbits64 + expbits64)) != 0 + mant := u & ((u64(1) << mantbits64) - u64(1)) + exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") // Exit early for easy cases. - if (exp == strconv.maxexp64) || (exp == 0 && mant == 0) { + if (exp == maxexp64) || (exp == 0 && mant == 0) { return get_string_special(neg, exp == 0, mant == 0) } @@ -398,13 +362,13 @@ pub fn f64_to_str_pad(f f64, n_digit int) string { u1.f = f u := unsafe { u1.u } - neg := (u >> (strconv.mantbits64 + strconv.expbits64)) != 0 - mant := u & ((u64(1) << strconv.mantbits64) - u64(1)) - exp := (u >> strconv.mantbits64) & ((u64(1) << strconv.expbits64) - u64(1)) + neg := (u >> (mantbits64 + expbits64)) != 0 + mant := u & ((u64(1) << mantbits64) - u64(1)) + exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") // Exit early for easy cases. - if (exp == strconv.maxexp64) || (exp == 0 && mant == 0) { + if (exp == maxexp64) || (exp == 0 && mant == 0) { return get_string_special(neg, exp == 0, mant == 0) } diff --git a/vlib/strconv/f64_str.js.v b/vlib/strconv/f64_str.js.v new file mode 100644 index 000000000..4bc62ef88 --- /dev/null +++ b/vlib/strconv/f64_str.js.v @@ -0,0 +1,339 @@ +module strconv + +import math + +fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { + mut n_digit := i_n_digit + 1 + pad_digit := i_pad_digit + 1 + mut out := d.m + mut d_exp := d.e + // mut out_len := decimal_len_64(out) + mut out_len := dec_digits(out) + out_len_original := out_len + + mut fw_zeros := 0 + if pad_digit > out_len { + fw_zeros = pad_digit - out_len + } + + mut buf := []byte{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0} + mut i := 0 + + if neg { + #buf.arr.arr[i.val] = '-'.charCodeAt() + i++ + } + + mut disp := 0 + if out_len <= 1 { + disp = 1 + } + + // rounding last used digit + if n_digit < out_len { + // println("out:[$out]") + out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up + out /= ten_pow_table_64[out_len - n_digit] + // println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}") + if d.m / ten_pow_table_64[out_len - n_digit] < out { + d_exp++ + n_digit++ + } + + // println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}") + + out_len = n_digit + // println("orig: ${out_len_original} new len: ${out_len} out:[$out]") + } + + y := i + out_len + mut x := 0 + for x < (out_len - disp - 1) { + #buf.arr.arr[y.val - x.val].val = '0'.charCodeAt() + Number(out.valueOf() % 10n) + + out /= 10 + i++ + x++ + } + + // no decimal digits needed, end here + if i_n_digit == 0 { + res := '' + #buf.arr.arr.forEach((it) => it.val == 0 ? res.str : res.str += String.fromCharCode(it.val)) + + return res + } + + if out_len >= 1 { + buf[y - x] = `.` + x++ + i++ + } + + if y - x >= 0 { + #buf.arr.arr[y.val - x.val].val = '0'.charCodeAt() + Number(out.valueOf() % 10n) + i++ + } + + for fw_zeros > 0 { + #buf.arr.arr[i.val].val = '0'.charCodeAt() + i++ + fw_zeros-- + } + + #buf.arr.arr[i.val].val = 'e'.charCodeAt() + i++ + + mut exp := d_exp + out_len_original - 1 + if exp < 0 { + #buf.arr.arr[i.val].val = '-'.charCodeAt() + i++ + exp = -exp + } else { + #buf.arr.arr[i.val].val = '+'.charCodeAt() + i++ + } + + // Always print at least two digits to match strconv's formatting. + d2 := exp % 10 + exp /= 10 + d1 := exp % 10 + _ := d1 + _ := d2 + d0 := exp / 10 + if d0 > 0 { + #buf.arr.arr[i].val = '0'.charCodeAt() + d0.val + i++ + } + #buf.arr.arr[i].val = '0'.charCodeAt() + d1.val + i++ + #buf.arr.arr[i].val = '0' + d2.val + i++ + #buf.arr.arr[i].val = 0 + + res := '' + #buf.arr.arr.forEach((it) => it.val == 0 ? res.str : res.str += String.fromCharCode(it.val)) + + return res +} + +fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) { + mut d := Dec64{} + e := exp - bias64 + if e > mantbits64 { + return d, false + } + shift := mantbits64 - e + mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1 + // mant := i_mant | (1 << mantbits64) // implicit 1 + d.m = mant >> shift + if (d.m << shift) != mant { + return d, false + } + + for (d.m % 10) == 0 { + d.m /= 10 + d.e++ + } + return d, true +} + +fn f64_to_decimal(mant u64, exp u64) Dec64 { + mut e2 := 0 + mut m2 := u64(0) + if exp == 0 { + // We subtract 2 so that the bounds computation has + // 2 additional bits. + e2 = 1 - bias64 - int(mantbits64) - 2 + m2 = mant + } else { + e2 = int(exp) - bias64 - int(mantbits64) - 2 + m2 = (u64(1) << mantbits64) | mant + } + even := (m2 & 1) == 0 + accept_bounds := even + + // Step 2: Determine the interval of valid decimal representations. + mv := u64(4 * m2) + mm_shift := bool_to_u64(mant != 0 || exp <= 1) + + // Step 3: Convert to a decimal power base uing 128-bit arithmetic. + mut vr := u64(0) + mut vp := u64(0) + mut vm := u64(0) + mut e10 := 0 + mut vm_is_trailing_zeros := false + mut vr_is_trailing_zeros := false + + if e2 >= 0 { + // This expression is slightly faster than max(0, log10Pow2(e2) - 1). + q := log10_pow2(e2) - bool_to_u32(e2 > 3) + e10 = int(q) + k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1 + i := -e2 + int(q) + k + + mul := pow5_inv_split_64[q] + vr = mul_shift_64(u64(4) * m2, mul, i) + vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i) + vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i) + if q <= 21 { + // This should use q <= 22, but I think 21 is also safe. + // Smaller values may still be safe, but it's more + // difficult to reason about them. Only one of mp, mv, + // and mm can be a multiple of 5, if any. + if mv % 5 == 0 { + vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q) + } else if accept_bounds { + // Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q + // <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q + // <=> true && pow5Factor64(mm) >= q, since e2 >= q. + vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift, + q) + } else if multiple_of_power_of_five_64(mv + 2, q) { + vp-- + } + } + } else { + // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). + q := log10_pow5(-e2) - bool_to_u32(-e2 > 1) + e10 = int(q) + e2 + i := -e2 - int(q) + k := pow5_bits(i) - pow5_num_bits_64 + j := int(q) - k + mul := pow5_split_64[i] + vr = mul_shift_64(u64(4) * m2, mul, j) + vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j) + vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j) + if q <= 1 { + // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. + // mv = 4 * m2, so it always has at least two trailing 0 bits. + vr_is_trailing_zeros = true + if accept_bounds { + // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. + vm_is_trailing_zeros = (mm_shift == 1) + } else { + // mp = mv + 2, so it always has at least one trailing 0 bit. + vp-- + } + } else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here. + // We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1 + // <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1 + // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) + // <=> (mv & ((1 << (q - 1)) - 1)) == 0 + // We also need to make sure that the left shift does not overflow. + vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1) + } + } + + // Step 4: Find the shortest decimal representation + // in the interval of valid representations. + mut removed := 0 + mut last_removed_digit := byte(0) + mut out := u64(0) + // On average, we remove ~2 digits. + if vm_is_trailing_zeros || vr_is_trailing_zeros { + // General case, which happens rarely (~0.7%). + for { + vp_div_10 := vp / 10 + vm_div_10 := vm / 10 + if vp_div_10 <= vm_div_10 { + break + } + vm_mod_10 := vm % 10 + vr_div_10 := vr / 10 + vr_mod_10 := vr % 10 + vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0 + vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0) + last_removed_digit = byte(vr_mod_10) + vr = vr_div_10 + vp = vp_div_10 + vm = vm_div_10 + removed++ + } + if vm_is_trailing_zeros { + for { + vm_div_10 := vm / 10 + vm_mod_10 := vm % 10 + if vm_mod_10 != 0 { + break + } + vp_div_10 := vp / 10 + vr_div_10 := vr / 10 + vr_mod_10 := vr % 10 + vr_is_trailing_zeros = vr_is_trailing_zeros && (last_removed_digit == 0) + last_removed_digit = byte(vr_mod_10) + vr = vr_div_10 + vp = vp_div_10 + vm = vm_div_10 + removed++ + } + } + if vr_is_trailing_zeros && (last_removed_digit == 5) && (vr % 2) == 0 { + // Round even if the exact number is .....50..0. + last_removed_digit = 4 + } + out = vr + // We need to take vr + 1 if vr is outside bounds + // or we need to round up. + if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 { + out++ + } + } else { + // Specialized for the common case (~99.3%). + // Percentages below are relative to this. + mut round_up := false + for vp / 100 > vm / 100 { + // Optimization: remove two digits at a time (~86.2%). + round_up = (vr % 100) >= 50 + vr /= 100 + vp /= 100 + vm /= 100 + removed += 2 + } + // Loop iterations below (approximately), without optimization above: + // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% + // Loop iterations below (approximately), with optimization above: + // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% + for vp / 10 > vm / 10 { + round_up = (vr % 10) >= 5 + vr /= 10 + vp /= 10 + vm /= 10 + removed++ + } + // We need to take vr + 1 if vr is outside bounds + // or we need to round up. + out = vr + bool_to_u64(vr == vm || round_up) + } + + return Dec64{ + m: out + e: e10 + removed + } +} + +//============================================================================= +// String Functions +//============================================================================= + +// f64_to_str return a string in scientific notation with max n_digit after the dot +pub fn f64_to_str(f f64, n_digit int) string { + u := math.f64_bits(f) + neg := (u >> (mantbits64 + expbits64)) != 0 + mant := u & ((u64(1) << mantbits64) - u64(1)) + exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) + // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") + + // Exit early for easy cases. + if (exp == maxexp64) || (exp == 0 && mant == 0) { + return get_string_special(neg, exp == 0, mant == 0) + } + + mut d, ok := f64_to_decimal_exact_int(mant, exp) + if !ok { + // println("to_decimal") + d = f64_to_decimal(mant, exp) + } + // println("${d.m} ${d.e}") + return d.get_string_64(neg, n_digit, 0) +} diff --git a/vlib/strconv/f64_str.v b/vlib/strconv/f64_str.v new file mode 100644 index 000000000..00b0a8123 --- /dev/null +++ b/vlib/strconv/f64_str.v @@ -0,0 +1,37 @@ +module strconv + +// pow of ten table used by n_digit reduction +const ( + ten_pow_table_64 = [ + u64(1), + u64(10), + u64(100), + u64(1000), + u64(10000), + u64(100000), + u64(1000000), + u64(10000000), + u64(100000000), + u64(1000000000), + u64(10000000000), + u64(100000000000), + u64(1000000000000), + u64(10000000000000), + u64(100000000000000), + u64(1000000000000000), + u64(10000000000000000), + u64(100000000000000000), + u64(1000000000000000000), + u64(10000000000000000000), + ] +) + +//============================================================================= +// Conversion Functions +//============================================================================= +const ( + mantbits64 = u32(52) + expbits64 = u32(11) + bias64 = 1023 // f64 exponent bias + maxexp64 = 2047 +) diff --git a/vlib/strconv/format_mem.js.v b/vlib/strconv/format_mem.js.v index 658fe6617..b082eb41d 100644 --- a/vlib/strconv/format_mem.js.v +++ b/vlib/strconv/format_mem.js.v @@ -86,7 +86,7 @@ pub fn format_dec_sb(d u64, p BF_param, mut res strings.Builder) { i-- } - for j in 0 .. n_char { + for _ in 0 .. n_char { i++ res.write_b(buf[i]) } diff --git a/vlib/v/gen/js/infix.v b/vlib/v/gen/js/infix.v index 0d34e69d4..9dc4dd35f 100644 --- a/vlib/v/gen/js/infix.v +++ b/vlib/v/gen/js/infix.v @@ -15,12 +15,12 @@ fn (mut g JsGen) gen_plain_infix_expr(node ast.InfixExpr) { g.write('BigInt(') g.expr(node.left) g.gen_deref_ptr(node.left_type) - g.write('.val)') + g.write('.valueOf())') g.write(' $node.op.str() ') g.write('BigInt(') g.expr(node.right) g.gen_deref_ptr(node.left_type) - g.write('.val)') + g.write('.valueOf())') } else { g.expr(node.left) g.gen_deref_ptr(node.left_type) diff --git a/vlib/v/gen/js/js.v b/vlib/v/gen/js/js.v index 962739d15..2131ef9fd 100644 --- a/vlib/v/gen/js/js.v +++ b/vlib/v/gen/js/js.v @@ -979,16 +979,8 @@ fn (mut g JsGen) expr(node ast.Expr) { } } else { g.write(node.op.str()) - - if node.op in [.inc, .dec] { - g.expr(node.right) - g.write('.val ') - } else { - g.write('(') - g.expr(node.right) - g.write('.valueOf()') - g.write(')') - } + g.expr(node.right) + g.write('.val ') } } ast.RangeExpr { -- 2.30.2