v / vlib / strconv
Raw file | 382 loc (340 sloc) | 10.05 KB | Latest commit hash 1c48a8d76
1module strconv
2
3/*=============================================================================
4
5f64 to string
6
7Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
8Use of this source code is governed by an MIT license
9that can be found in the LICENSE file.
10
11This file contains the f64 to string functions
12
13These functions are based on the work of:
14Publication:PLDI 2018: Proceedings of the 39th ACM SIGPLAN
15Conference on Programming Language Design and ImplementationJune 2018
16Pages 270–282 https://doi.org/10.1145/3192366.3192369
17
18inspired by the Go version here:
19https://github.com/cespare/ryu/tree/ba56a33f39e3bbbfa409095d0f9ae168a595feea
20
21=============================================================================*/
22
23[direct_array_access]
24fn (d Dec64) get_string_64(neg bool, i_n_digit int, i_pad_digit int) string {
25 mut n_digit := i_n_digit + 1
26 pad_digit := i_pad_digit + 1
27 mut out := d.m
28 mut d_exp := d.e
29 // mut out_len := decimal_len_64(out)
30 mut out_len := dec_digits(out)
31 out_len_original := out_len
32
33 mut fw_zeros := 0
34 if pad_digit > out_len {
35 fw_zeros = pad_digit - out_len
36 }
37
38 mut buf := []u8{len: (out_len + 6 + 1 + 1 + fw_zeros)} // sign + mant_len + . + e + e_sign + exp_len(2) + \0}
39 mut i := 0
40
41 if neg {
42 buf[i] = `-`
43 i++
44 }
45
46 mut disp := 0
47 if out_len <= 1 {
48 disp = 1
49 }
50
51 // rounding last used digit
52 if n_digit < out_len {
53 // println("out:[$out]")
54 out += ten_pow_table_64[out_len - n_digit - 1] * 5 // round to up
55 out /= ten_pow_table_64[out_len - n_digit]
56 // println("out1:[$out] ${d.m / ten_pow_table_64[out_len - n_digit ]}")
57 if d.m / ten_pow_table_64[out_len - n_digit] < out {
58 d_exp++
59 n_digit++
60 }
61
62 // println("cmp: ${d.m/ten_pow_table_64[out_len - n_digit ]} ${out/ten_pow_table_64[out_len - n_digit ]}")
63
64 out_len = n_digit
65 // println("orig: ${out_len_original} new len: ${out_len} out:[$out]")
66 }
67
68 y := i + out_len
69 mut x := 0
70 for x < (out_len - disp - 1) {
71 buf[y - x] = `0` + u8(out % 10)
72 out /= 10
73 i++
74 x++
75 }
76
77 // no decimal digits needed, end here
78 if i_n_digit == 0 {
79 unsafe {
80 buf[i] = 0
81 return tos(&u8(&buf[0]), i)
82 }
83 }
84
85 if out_len >= 1 {
86 buf[y - x] = `.`
87 x++
88 i++
89 }
90
91 if y - x >= 0 {
92 buf[y - x] = `0` + u8(out % 10)
93 i++
94 }
95
96 for fw_zeros > 0 {
97 buf[i] = `0`
98 i++
99 fw_zeros--
100 }
101
102 buf[i] = `e`
103 i++
104
105 mut exp := d_exp + out_len_original - 1
106 if exp < 0 {
107 buf[i] = `-`
108 i++
109 exp = -exp
110 } else {
111 buf[i] = `+`
112 i++
113 }
114
115 // Always print at least two digits to match strconv's formatting.
116 d2 := exp % 10
117 exp /= 10
118 d1 := exp % 10
119 d0 := exp / 10
120 if d0 > 0 {
121 buf[i] = `0` + u8(d0)
122 i++
123 }
124 buf[i] = `0` + u8(d1)
125 i++
126 buf[i] = `0` + u8(d2)
127 i++
128 buf[i] = 0
129
130 return unsafe {
131 tos(&u8(&buf[0]), i)
132 }
133}
134
135fn f64_to_decimal_exact_int(i_mant u64, exp u64) (Dec64, bool) {
136 mut d := Dec64{}
137 e := exp - bias64
138 if e > mantbits64 {
139 return d, false
140 }
141 shift := mantbits64 - e
142 mant := i_mant | u64(0x0010_0000_0000_0000) // implicit 1
143 // mant := i_mant | (1 << mantbits64) // implicit 1
144 d.m = mant >> shift
145 if (d.m << shift) != mant {
146 return d, false
147 }
148
149 for (d.m % 10) == 0 {
150 d.m /= 10
151 d.e++
152 }
153 return d, true
154}
155
156fn f64_to_decimal(mant u64, exp u64) Dec64 {
157 mut e2 := 0
158 mut m2 := u64(0)
159 if exp == 0 {
160 // We subtract 2 so that the bounds computation has
161 // 2 additional bits.
162 e2 = 1 - bias64 - int(mantbits64) - 2
163 m2 = mant
164 } else {
165 e2 = int(exp) - bias64 - int(mantbits64) - 2
166 m2 = (u64(1) << mantbits64) | mant
167 }
168 even := (m2 & 1) == 0
169 accept_bounds := even
170
171 // Step 2: Determine the interval of valid decimal representations.
172 mv := u64(4 * m2)
173 mm_shift := bool_to_u64(mant != 0 || exp <= 1)
174
175 // Step 3: Convert to a decimal power base uing 128-bit arithmetic.
176 mut vr := u64(0)
177 mut vp := u64(0)
178 mut vm := u64(0)
179 mut e10 := 0
180 mut vm_is_trailing_zeros := false
181 mut vr_is_trailing_zeros := false
182
183 if e2 >= 0 {
184 // This expression is slightly faster than max(0, log10Pow2(e2) - 1).
185 q := log10_pow2(e2) - bool_to_u32(e2 > 3)
186 e10 = int(q)
187 k := pow5_inv_num_bits_64 + pow5_bits(int(q)) - 1
188 i := -e2 + int(q) + k
189
190 mul := *(&Uint128(&pow5_inv_split_64_x[q * 2]))
191 vr = mul_shift_64(u64(4) * m2, mul, i)
192 vp = mul_shift_64(u64(4) * m2 + u64(2), mul, i)
193 vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, i)
194 if q <= 21 {
195 // This should use q <= 22, but I think 21 is also safe.
196 // Smaller values may still be safe, but it's more
197 // difficult to reason about them. Only one of mp, mv,
198 // and mm can be a multiple of 5, if any.
199 if mv % 5 == 0 {
200 vr_is_trailing_zeros = multiple_of_power_of_five_64(mv, q)
201 } else if accept_bounds {
202 // Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
203 // <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
204 // <=> true && pow5Factor64(mm) >= q, since e2 >= q.
205 vm_is_trailing_zeros = multiple_of_power_of_five_64(mv - 1 - mm_shift,
206 q)
207 } else if multiple_of_power_of_five_64(mv + 2, q) {
208 vp--
209 }
210 }
211 } else {
212 // This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
213 q := log10_pow5(-e2) - bool_to_u32(-e2 > 1)
214 e10 = int(q) + e2
215 i := -e2 - int(q)
216 k := pow5_bits(i) - pow5_num_bits_64
217 j := int(q) - k
218 mul := *(&Uint128(&pow5_split_64_x[i * 2]))
219 vr = mul_shift_64(u64(4) * m2, mul, j)
220 vp = mul_shift_64(u64(4) * m2 + u64(2), mul, j)
221 vm = mul_shift_64(u64(4) * m2 - u64(1) - mm_shift, mul, j)
222 if q <= 1 {
223 // {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
224 // mv = 4 * m2, so it always has at least two trailing 0 bits.
225 vr_is_trailing_zeros = true
226 if accept_bounds {
227 // mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
228 vm_is_trailing_zeros = (mm_shift == 1)
229 } else {
230 // mp = mv + 2, so it always has at least one trailing 0 bit.
231 vp--
232 }
233 } else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
234 // We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
235 // <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
236 // <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
237 // <=> (mv & ((1 << (q - 1)) - 1)) == 0
238 // We also need to make sure that the left shift does not overflow.
239 vr_is_trailing_zeros = multiple_of_power_of_two_64(mv, q - 1)
240 }
241 }
242
243 // Step 4: Find the shortest decimal representation
244 // in the interval of valid representations.
245 mut removed := 0
246 mut last_removed_digit := u8(0)
247 mut out := u64(0)
248 // On average, we remove ~2 digits.
249 if vm_is_trailing_zeros || vr_is_trailing_zeros {
250 // General case, which happens rarely (~0.7%).
251 for {
252 vp_div_10 := vp / 10
253 vm_div_10 := vm / 10
254 if vp_div_10 <= vm_div_10 {
255 break
256 }
257 vm_mod_10 := vm % 10
258 vr_div_10 := vr / 10
259 vr_mod_10 := vr % 10
260 vm_is_trailing_zeros = vm_is_trailing_zeros && vm_mod_10 == 0
261 vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0
262 last_removed_digit = u8(vr_mod_10)
263 vr = vr_div_10
264 vp = vp_div_10
265 vm = vm_div_10
266 removed++
267 }
268 if vm_is_trailing_zeros {
269 for {
270 vm_div_10 := vm / 10
271 vm_mod_10 := vm % 10
272 if vm_mod_10 != 0 {
273 break
274 }
275 vp_div_10 := vp / 10
276 vr_div_10 := vr / 10
277 vr_mod_10 := vr % 10
278 vr_is_trailing_zeros = vr_is_trailing_zeros && last_removed_digit == 0
279 last_removed_digit = u8(vr_mod_10)
280 vr = vr_div_10
281 vp = vp_div_10
282 vm = vm_div_10
283 removed++
284 }
285 }
286 if vr_is_trailing_zeros && last_removed_digit == 5 && (vr % 2) == 0 {
287 // Round even if the exact number is .....50..0.
288 last_removed_digit = 4
289 }
290 out = vr
291 // We need to take vr + 1 if vr is outside bounds
292 // or we need to round up.
293 if (vr == vm && (!accept_bounds || !vm_is_trailing_zeros)) || last_removed_digit >= 5 {
294 out++
295 }
296 } else {
297 // Specialized for the common case (~99.3%).
298 // Percentages below are relative to this.
299 mut round_up := false
300 for vp / 100 > vm / 100 {
301 // Optimization: remove two digits at a time (~86.2%).
302 round_up = (vr % 100) >= 50
303 vr /= 100
304 vp /= 100
305 vm /= 100
306 removed += 2
307 }
308 // Loop iterations below (approximately), without optimization above:
309 // 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
310 // Loop iterations below (approximately), with optimization above:
311 // 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
312 for vp / 10 > vm / 10 {
313 round_up = (vr % 10) >= 5
314 vr /= 10
315 vp /= 10
316 vm /= 10
317 removed++
318 }
319 // We need to take vr + 1 if vr is outside bounds
320 // or we need to round up.
321 out = vr + bool_to_u64(vr == vm || round_up)
322 }
323
324 return Dec64{
325 m: out
326 e: e10 + removed
327 }
328}
329
330//=============================================================================
331// String Functions
332//=============================================================================
333
334// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
335pub fn f64_to_str(f f64, n_digit int) string {
336 mut u1 := Uf64{}
337 u1.f = f
338 u := unsafe { u1.u }
339
340 neg := (u >> (mantbits64 + expbits64)) != 0
341 mant := u & ((u64(1) << mantbits64) - u64(1))
342 exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
343 // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
344
345 // Exit early for easy cases.
346 if exp == maxexp64 || (exp == 0 && mant == 0) {
347 return get_string_special(neg, exp == 0, mant == 0)
348 }
349
350 mut d, ok := f64_to_decimal_exact_int(mant, exp)
351 if !ok {
352 // println("to_decimal")
353 d = f64_to_decimal(mant, exp)
354 }
355 // println("${d.m} ${d.e}")
356 return d.get_string_64(neg, n_digit, 0)
357}
358
359// f64_to_str returns `f` as a `string` in scientific notation with max `n_digit` digits after the dot.
360pub fn f64_to_str_pad(f f64, n_digit int) string {
361 mut u1 := Uf64{}
362 u1.f = f
363 u := unsafe { u1.u }
364
365 neg := (u >> (mantbits64 + expbits64)) != 0
366 mant := u & ((u64(1) << mantbits64) - u64(1))
367 exp := (u >> mantbits64) & ((u64(1) << expbits64) - u64(1))
368 // println("s:${neg} mant:${mant} exp:${exp} float:${f} byte:${u1.u:016lx}")
369
370 // Exit early for easy cases.
371 if exp == maxexp64 || (exp == 0 && mant == 0) {
372 return get_string_special(neg, exp == 0, mant == 0)
373 }
374
375 mut d, ok := f64_to_decimal_exact_int(mant, exp)
376 if !ok {
377 // println("to_decimal")
378 d = f64_to_decimal(mant, exp)
379 }
380 // println("DEBUG: ${d.m} ${d.e}")
381 return d.get_string_64(neg, n_digit, n_digit)
382}