1 | module strconv |
2 | |
3 | /*============================================================================= |
4 | |
5 | f64 to string |
6 | |
7 | Copyright (c) 2019-2023 Dario Deledda. All rights reserved. |
8 | Use of this source code is governed by an MIT license |
9 | that can be found in the LICENSE file. |
10 | |
11 | This file contains the f64 to string functions |
12 | |
13 | These functions are based on the work of: |
14 | Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN |
15 | Conference on Programming Language Design and ImplementationJune 2018 |
16 | Pages 270–282 https://doi.org/10.1145/3192366.3192369 |
17 | |
18 | inspired by the Go version here: |
19 | https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea |
20 | |
21 | =============================================================================*/ |
22 | |
23 | [direct_array_access] |
24 | fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string { |
25 | mut n_digit := i_n_digit + 1 |
26 | pad_digit := i_pad_digit + 1 |
27 | mut out := d.m |
28 | mut d_exp := d.e |
29 | // mut out_len := decimal_len_64(out) |
30 | mut out_len := dec_digits(out) |
31 | out_len_original := out_len |
32 | |
33 | mut fw_zeros := 0 |
34 | if pad_digit > out_len { |
35 | fw_zeros = pad_digit - out_len |
36 | } |
37 | |
38 | mut buf := []u8{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0} |
39 | mut i := 0 |
40 | |
41 | if neg { |
42 | buf[i] = `-` |
43 | i++ |
44 | } |
45 | |
46 | mut disp := 0 |
47 | if out_len <= 1 { |
48 | disp = 1 |
49 | } |
50 | |
51 | // rounding last used digit |
52 | if n_digit < out_len { |
53 | // println("out:[$out]") |
54 | out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up |
55 | out /= ten_pow_table_64[out_len - n_digit] |
56 | // println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}") |
57 | if d.m / ten_pow_table_64[out_len - n_digit] < out { |
58 | d_exp++ |
59 | n_digit++ |
60 | } |
61 | |
62 | // println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}") |
63 | |
64 | out_len = n_digit |
65 | // println("orig: ${out_len_original} new len: ${out_len} out:[$out]") |
66 | } |
67 | |
68 | y := i + out_len |
69 | mut x := 0 |
70 | for x < (out_len - disp - 1) { |
71 | buf[y - x] = `0` + u8(out % 10) |
72 | out /= 10 |
73 | i++ |
74 | x++ |
75 | } |
76 | |
77 | // no decimal digits needed, end here |
78 | if i_n_digit == 0 { |
79 | unsafe { |
80 | buf[i] = 0 |
81 | return tos(&u8(&buf[0]), i) |
82 | } |
83 | } |
84 | |
85 | if out_len >= 1 { |
86 | buf[y - x] = `.` |
87 | x++ |
88 | i++ |
89 | } |
90 | |
91 | if y - x >= 0 { |
92 | buf[y - x] = `0` + u8(out % 10) |
93 | i++ |
94 | } |
95 | |
96 | for fw_zeros > 0 { |
97 | buf[i] = `0` |
98 | i++ |
99 | fw_zeros-- |
100 | } |
101 | |
102 | buf[i] = `e` |
103 | i++ |
104 | |
105 | mut exp := d_exp + out_len_original - 1 |
106 | if exp < 0 { |
107 | buf[i] = `-` |
108 | i++ |
109 | exp = -exp |
110 | } else { |
111 | buf[i] = `+` |
112 | i++ |
113 | } |
114 | |
115 | // Always print at least two digits to match strconv's formatting. |
116 | d2 := exp % 10 |
117 | exp /= 10 |
118 | d1 := exp % 10 |
119 | d0 := exp / 10 |
120 | if d0 > 0 { |
121 | buf[i] = `0` + u8(d0) |
122 | i++ |
123 | } |
124 | buf[i] = `0` + u8(d1) |
125 | i++ |
126 | buf[i] = `0` + u8(d2) |
127 | i++ |
128 | buf[i] = 0 |
129 | |
130 | return unsafe { |
131 | tos(&u8(&buf[0]), i) |
132 | } |
133 | } |
134 | |
135 | fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) { |
136 | mut d := Dec64{} |
137 | e := exp - bias64 |
138 | if e > mantbits64 { |
139 | return d, false |
140 | } |
141 | shift := mantbits64 - e |
142 | mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1 |
143 | // mant := i_mant | (1 << mantbits64) // implicit 1 |
144 | d.m = mant >> shift |
145 | if (d.m << shift) != mant { |
146 | return d, false |
147 | } |
148 | |
149 | for (d.m % 10) == 0 { |
150 | d.m /= 10 |
151 | d.e++ |
152 | } |
153 | return d, true |
154 | } |
155 | |
156 | fn f64_to_decimal(mant u64, exp u64) Dec64 { |
157 | mut e2 := 0 |
158 | mut m2 := u64(0) |
159 | if exp == 0 { |
160 | // We subtract 2 so that the bounds computation has |
161 | // 2 additional bits. |
162 | e2 = 1 - bias64 - int(mantbits64) - 2 |
163 | m2 = mant |
164 | } else { |
165 | e2 = int(exp) - bias64 - int(mantbits64) - 2 |
166 | m2 = (u64(1) << mantbits64) | mant |
167 | } |
168 | even := (m2 & 1) == 0 |
169 | accept_bounds := even |
170 | |
171 | // Step 2: Determine the interval of valid decimal representations. |
172 | mv := u64(4 * m2) |
173 | mm_shift := bool_to_u64(mant != 0 || exp <= 1) |
174 | |
175 | // Step 3: Convert to a decimal power base uing 128-bit arithmetic. |
176 | mut vr := u64(0) |
177 | mut vp := u64(0) |
178 | mut vm := u64(0) |
179 | mut e10 := 0 |
180 | mut vm_is_trailing_zeros := false |
181 | mut vr_is_trailing_zeros := false |
182 | |
183 | if e2 >= 0 { |
184 | // This expression is slightly faster than max(0, log10Pow2(e2) - 1). |
185 | q := log10_pow2(e2) - bool_to_u32(e2 > 3) |
186 | e10 = int(q) |
187 | k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1 |
188 | i := -e2 + int(q) + k |
189 | |
190 | mul := *(&Uint128(&pow5_inv_split_64_x[q * 2])) |
191 | vr = mul_shift_64(u64(4) * m2, mul, i) |
192 | vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i) |
193 | vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i) |
194 | if q <= 21 { |
195 | // This should use q <= 22, but I think 21 is also safe. |
196 | // Smaller values may still be safe, but it's more |
197 | // difficult to reason about them. Only one of mp, mv, |
198 | // and mm can be a multiple of 5, if any. |
199 | if mv % 5 == 0 { |
200 | vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q) |
201 | } else if accept_bounds { |
202 | // Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q |
203 | // <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q |
204 | // <=> true && pow5Factor64(mm) >= q, since e2 >= q. |
205 | vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift, |
206 | q) |
207 | } else if multiple_of_power_of_five_64(mv + 2, q) { |
208 | vp-- |
209 | } |
210 | } |
211 | } else { |
212 | // This expression is slightly faster than max(0, log10Pow5(-e2) - 1). |
213 | q := log10_pow5(-e2) - bool_to_u32(-e2 > 1) |
214 | e10 = int(q) + e2 |
215 | i := -e2 - int(q) |
216 | k := pow5_bits(i) - pow5_num_bits_64 |
217 | j := int(q) - k |
218 | mul := *(&Uint128(&pow5_split_64_x[i * 2])) |
219 | vr = mul_shift_64(u64(4) * m2, mul, j) |
220 | vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j) |
221 | vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j) |
222 | if q <= 1 { |
223 | // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits. |
224 | // mv = 4 * m2, so it always has at least two trailing 0 bits. |
225 | vr_is_trailing_zeros = true |
226 | if accept_bounds { |
227 | // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1. |
228 | vm_is_trailing_zeros = (mm_shift == 1) |
229 | } else { |
230 | // mp = mv + 2, so it always has at least one trailing 0 bit. |
231 | vp-- |
232 | } |
233 | } else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here. |
234 | // We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1 |
235 | // <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1 |
236 | // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q) |
237 | // <=> (mv & ((1 << (q - 1)) - 1)) == 0 |
238 | // We also need to make sure that the left shift does not overflow. |
239 | vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1) |
240 | } |
241 | } |
242 | |
243 | // Step 4: Find the shortest decimal representation |
244 | // in the interval of valid representations. |
245 | mut removed := 0 |
246 | mut last_removed_digit := u8(0) |
247 | mut out := u64(0) |
248 | // On average, we remove ~2 digits. |
249 | if vm_is_trailing_zeros || vr_is_trailing_zeros { |
250 | // General case, which happens rarely (~0.7%). |
251 | for { |
252 | vp_div_10 := vp / 10 |
253 | vm_div_10 := vm / 10 |
254 | if vp_div_10 <= vm_div_10 { |
255 | break |
256 | } |
257 | vm_mod_10 := vm % 10 |
258 | vr_div_10 := vr / 10 |
259 | vr_mod_10 := vr % 10 |
260 | vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0 |
261 | vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0 |
262 | last_removed_digit = u8(vr_mod_10) |
263 | vr = vr_div_10 |
264 | vp = vp_div_10 |
265 | vm = vm_div_10 |
266 | removed++ |
267 | } |
268 | if vm_is_trailing_zeros { |
269 | for { |
270 | vm_div_10 := vm / 10 |
271 | vm_mod_10 := vm % 10 |
272 | if vm_mod_10 != 0 { |
273 | break |
274 | } |
275 | vp_div_10 := vp / 10 |
276 | vr_div_10 := vr / 10 |
277 | vr_mod_10 := vr % 10 |
278 | vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0 |
279 | last_removed_digit = u8(vr_mod_10) |
280 | vr = vr_div_10 |
281 | vp = vp_div_10 |
282 | vm = vm_div_10 |
283 | removed++ |
284 | } |
285 | } |
286 | if vr_is_trailing_zeros && last_removed_digit == 5 && (vr % 2) == 0 { |
287 | // Round even if the exact number is .....50..0. |
288 | last_removed_digit = 4 |
289 | } |
290 | out = vr |
291 | // We need to take vr + 1 if vr is outside bounds |
292 | // or we need to round up. |
293 | if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 { |
294 | out++ |
295 | } |
296 | } else { |
297 | // Specialized for the common case (~99.3%). |
298 | // Percentages below are relative to this. |
299 | mut round_up := false |
300 | for vp / 100 > vm / 100 { |
301 | // Optimization: remove two digits at a time (~86.2%). |
302 | round_up = (vr % 100) >= 50 |
303 | vr /= 100 |
304 | vp /= 100 |
305 | vm /= 100 |
306 | removed += 2 |
307 | } |
308 | // Loop iterations below (approximately), without optimization above: |
309 | // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02% |
310 | // Loop iterations below (approximately), with optimization above: |
311 | // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02% |
312 | for vp / 10 > vm / 10 { |
313 | round_up = (vr % 10) >= 5 |
314 | vr /= 10 |
315 | vp /= 10 |
316 | vm /= 10 |
317 | removed++ |
318 | } |
319 | // We need to take vr + 1 if vr is outside bounds |
320 | // or we need to round up. |
321 | out = vr + bool_to_u64(vr == vm || round_up) |
322 | } |
323 | |
324 | return Dec64{ |
325 | m: out |
326 | e: e10 + removed |
327 | } |
328 | } |
329 | |
330 | //============================================================================= |
331 | // String Functions |
332 | //============================================================================= |
333 | |
334 | // f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot. |
335 | pub fn f64_to_str(f f64, n_digit int) string { |
336 | mut u1 := Uf64{} |
337 | u1.f = f |
338 | u := unsafe { u1.u } |
339 | |
340 | neg := (u >> (mantbits64 + expbits64)) != 0 |
341 | mant := u & ((u64(1) << mantbits64) - u64(1)) |
342 | exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) |
343 | // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") |
344 | |
345 | // Exit early for easy cases. |
346 | if exp == maxexp64 || (exp == 0 && mant == 0) { |
347 | return get_string_special(neg, exp == 0, mant == 0) |
348 | } |
349 | |
350 | mut d, ok := f64_to_decimal_exact_int(mant, exp) |
351 | if !ok { |
352 | // println("to_decimal") |
353 | d = f64_to_decimal(mant, exp) |
354 | } |
355 | // println("${d.m} ${d.e}") |
356 | return d.get_string_64(neg, n_digit, 0) |
357 | } |
358 | |
359 | // f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot. |
360 | pub fn f64_to_str_pad(f f64, n_digit int) string { |
361 | mut u1 := Uf64{} |
362 | u1.f = f |
363 | u := unsafe { u1.u } |
364 | |
365 | neg := (u >> (mantbits64 + expbits64)) != 0 |
366 | mant := u & ((u64(1) << mantbits64) - u64(1)) |
367 | exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1)) |
368 | // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}") |
369 | |
370 | // Exit early for easy cases. |
371 | if exp == maxexp64 || (exp == 0 && mant == 0) { |
372 | return get_string_special(neg, exp == 0, mant == 0) |
373 | } |
374 | |
375 | mut d, ok := f64_to_decimal_exact_int(mant, exp) |
376 | if !ok { |
377 | // println("to_decimal") |
378 | d = f64_to_decimal(mant, exp) |
379 | } |
380 | // println("DEBUG: ${d.m} ${d.e}") |
381 | return d.get_string_64(neg, n_digit, n_digit) |
382 | } |