1 | module strconv |
2 | |
3 | // Copyright (c) 2019-2023 Dario Deledda. All rights reserved. |
4 | // Use of this source code is governed by an MIT license |
5 | // that can be found in the LICENSE file. |
6 | // |
7 | // This file contains utilities for converting a string to a f64 variable. |
8 | // IEEE 754 standard is used. |
9 | // Know limitation: limited to 18 significant digits |
10 | // |
11 | // The code is inspired by: |
12 | // Grzegorz Kraszewski [email protected] |
13 | // URL: http://krashan.ppa.pl/articles/stringtofloat/ |
14 | // Original license: MIT |
15 | // 96 bit operation utilities |
16 | // |
17 | // Note: when u128 will be available, these function can be refactored. |
18 | |
19 | // f32 constants |
20 | pub const ( |
21 | single_plus_zero = u32(0x0000_0000) |
22 | single_minus_zero = u32(0x8000_0000) |
23 | single_plus_infinity = u32(0x7F80_0000) |
24 | single_minus_infinity = u32(0xFF80_0000) |
25 | ) |
26 | |
27 | // f64 constants |
28 | pub const ( |
29 | digits = 18 |
30 | double_plus_zero = u64(0x0000000000000000) |
31 | double_minus_zero = u64(0x8000000000000000) |
32 | double_plus_infinity = u64(0x7FF0000000000000) |
33 | double_minus_infinity = u64(0xFFF0000000000000) |
34 | ) |
35 | |
36 | // char constants |
37 | pub const ( |
38 | c_dpoint = `.` |
39 | c_plus = `+` |
40 | c_minus = `-` |
41 | c_zero = `0` |
42 | c_nine = `9` |
43 | c_ten = u32(10) |
44 | ) |
45 | |
46 | // right logical shift 96 bit |
47 | fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) { |
48 | mut r0 := u32(0) |
49 | mut r1 := u32(0) |
50 | mut r2 := u32(0) |
51 | r0 = (s0 >> 1) | ((s1 & u32(1)) << 31) |
52 | r1 = (s1 >> 1) | ((s2 & u32(1)) << 31) |
53 | r2 = s2 >> 1 |
54 | return r2, r1, r0 |
55 | } |
56 | |
57 | // left logical shift 96 bit |
58 | fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) { |
59 | mut r0 := u32(0) |
60 | mut r1 := u32(0) |
61 | mut r2 := u32(0) |
62 | r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31) |
63 | r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31) |
64 | r0 = s0 << 1 |
65 | return r2, r1, r0 |
66 | } |
67 | |
68 | // sum on 96 bit |
69 | fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) { |
70 | mut w := u64(0) |
71 | mut r0 := u32(0) |
72 | mut r1 := u32(0) |
73 | mut r2 := u32(0) |
74 | w = u64(s0) + u64(d0) |
75 | r0 = u32(w) |
76 | w >>= 32 |
77 | w += u64(s1) + u64(d1) |
78 | r1 = u32(w) |
79 | w >>= 32 |
80 | w += u64(s2) + u64(d2) |
81 | r2 = u32(w) |
82 | return r2, r1, r0 |
83 | } |
84 | |
85 | // subtraction on 96 bit |
86 | fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) { |
87 | mut w := u64(0) |
88 | mut r0 := u32(0) |
89 | mut r1 := u32(0) |
90 | mut r2 := u32(0) |
91 | w = u64(s0) - u64(d0) |
92 | r0 = u32(w) |
93 | w >>= 32 |
94 | w += u64(s1) - u64(d1) |
95 | r1 = u32(w) |
96 | w >>= 32 |
97 | w += u64(s2) - u64(d2) |
98 | r2 = u32(w) |
99 | return r2, r1, r0 |
100 | } |
101 | |
102 | // Utility functions |
103 | fn is_digit(x u8) bool { |
104 | return x >= strconv.c_zero && x <= strconv.c_nine |
105 | } |
106 | |
107 | fn is_space(x u8) bool { |
108 | return x == `\t` || x == `\n` || x == `\v` || x == `\f` || x == `\r` || x == ` ` |
109 | } |
110 | |
111 | fn is_exp(x u8) bool { |
112 | return x == `E` || x == `e` |
113 | } |
114 | |
115 | // Possible parser return values. |
116 | enum ParserState { |
117 | ok // parser finished OK |
118 | pzero // no digits or number is smaller than +-2^-1022 |
119 | mzero // number is negative, module smaller |
120 | pinf // number is higher than +HUGE_VAL |
121 | minf // number is lower than -HUGE_VAL |
122 | invalid_number // invalid number, used for '#@%^' for example |
123 | } |
124 | |
125 | // parser tries to parse the given string into a number |
126 | // NOTE: #TOFIX need one char after the last char of the number |
127 | [direct_array_access] |
128 | fn parser(s string) (ParserState, PrepNumber) { |
129 | mut digx := 0 |
130 | mut result := ParserState.ok |
131 | mut expneg := false |
132 | mut expexp := 0 |
133 | mut i := 0 |
134 | mut pn := PrepNumber{} |
135 | |
136 | // skip spaces |
137 | for i < s.len && s[i].is_space() { |
138 | i++ |
139 | } |
140 | |
141 | // check negatives |
142 | if s[i] == `-` { |
143 | pn.negative = true |
144 | i++ |
145 | } |
146 | |
147 | // positive sign ignore it |
148 | if s[i] == `+` { |
149 | i++ |
150 | } |
151 | |
152 | // read mantissa |
153 | for i < s.len && s[i].is_digit() { |
154 | // println("$i => ${s[i]}") |
155 | if digx < strconv.digits { |
156 | pn.mantissa *= 10 |
157 | pn.mantissa += u64(s[i] - strconv.c_zero) |
158 | digx++ |
159 | } else if pn.exponent < 2147483647 { |
160 | pn.exponent++ |
161 | } |
162 | i++ |
163 | } |
164 | |
165 | // read mantissa decimals |
166 | if i < s.len && s[i] == `.` { |
167 | i++ |
168 | for i < s.len && s[i].is_digit() { |
169 | if digx < strconv.digits { |
170 | pn.mantissa *= 10 |
171 | pn.mantissa += u64(s[i] - strconv.c_zero) |
172 | pn.exponent-- |
173 | digx++ |
174 | } |
175 | i++ |
176 | } |
177 | } |
178 | |
179 | // read exponent |
180 | if i < s.len && (s[i] == `e` || s[i] == `E`) { |
181 | i++ |
182 | if i < s.len { |
183 | // esponent sign |
184 | if s[i] == strconv.c_plus { |
185 | i++ |
186 | } else if s[i] == strconv.c_minus { |
187 | expneg = true |
188 | i++ |
189 | } |
190 | |
191 | for i < s.len && s[i].is_digit() { |
192 | if expexp < 214748364 { |
193 | expexp *= 10 |
194 | expexp += int(s[i] - strconv.c_zero) |
195 | } |
196 | i++ |
197 | } |
198 | } |
199 | } |
200 | |
201 | if expneg { |
202 | expexp = -expexp |
203 | } |
204 | pn.exponent += expexp |
205 | if pn.mantissa == 0 { |
206 | if pn.negative { |
207 | result = .mzero |
208 | } else { |
209 | result = .pzero |
210 | } |
211 | } else if pn.exponent > 309 { |
212 | if pn.negative { |
213 | result = .minf |
214 | } else { |
215 | result = .pinf |
216 | } |
217 | } else if pn.exponent < -328 { |
218 | if pn.negative { |
219 | result = .mzero |
220 | } else { |
221 | result = .pzero |
222 | } |
223 | } |
224 | if i == 0 && s.len > 0 { |
225 | return ParserState.invalid_number, pn |
226 | } |
227 | return result, pn |
228 | } |
229 | |
230 | // converter returns a u64 with the bit image of the f64 number |
231 | fn converter(mut pn PrepNumber) u64 { |
232 | mut binexp := 92 |
233 | // s0,s1,s2 are the parts of a 96-bit precision integer |
234 | mut s2 := u32(0) |
235 | mut s1 := u32(0) |
236 | mut s0 := u32(0) |
237 | // q0,q1,q2 are the parts of a 96-bit precision integer |
238 | mut q2 := u32(0) |
239 | mut q1 := u32(0) |
240 | mut q0 := u32(0) |
241 | // r0,r1,r2 are the parts of a 96-bit precision integer |
242 | mut r2 := u32(0) |
243 | mut r1 := u32(0) |
244 | mut r0 := u32(0) |
245 | // |
246 | mask28 := u32(u64(0xF) << 28) |
247 | mut result := u64(0) |
248 | // working on 3 u32 to have 96 bit precision |
249 | s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF)) |
250 | s1 = u32(pn.mantissa >> 32) |
251 | s2 = u32(0) |
252 | // so we take the decimal exponent off |
253 | for pn.exponent > 0 { |
254 | q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2 |
255 | r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2 |
256 | s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2 |
257 | s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10 |
258 | pn.exponent-- |
259 | for (s2 & mask28) != 0 { |
260 | q2, q1, q0 = lsr96(s2, s1, s0) |
261 | binexp++ |
262 | s2 = q2 |
263 | s1 = q1 |
264 | s0 = q0 |
265 | } |
266 | } |
267 | for pn.exponent < 0 { |
268 | for !((s2 & (u32(1) << 31)) != 0) { |
269 | q2, q1, q0 = lsl96(s2, s1, s0) |
270 | binexp-- |
271 | s2 = q2 |
272 | s1 = q1 |
273 | s0 = q0 |
274 | } |
275 | q2 = s2 / strconv.c_ten |
276 | r1 = s2 % strconv.c_ten |
277 | r2 = (s1 >> 8) | (r1 << 24) |
278 | q1 = r2 / strconv.c_ten |
279 | r1 = r2 % strconv.c_ten |
280 | r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24) |
281 | r0 = r2 / strconv.c_ten |
282 | r1 = r2 % strconv.c_ten |
283 | q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16) |
284 | q0 = r0 << 16 |
285 | r2 = (s0 & u32(0xFFFF)) | (r1 << 16) |
286 | q0 |= r2 / strconv.c_ten |
287 | s2 = q2 |
288 | s1 = q1 |
289 | s0 = q0 |
290 | pn.exponent++ |
291 | } |
292 | // C.printf(c"mantissa before normalization: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp) |
293 | // normalization, the 28 bit in s2 must the leftest one in the variable |
294 | if s2 != 0 || s1 != 0 || s0 != 0 { |
295 | for (s2 & mask28) == 0 { |
296 | q2, q1, q0 = lsl96(s2, s1, s0) |
297 | binexp-- |
298 | s2 = q2 |
299 | s1 = q1 |
300 | s0 = q0 |
301 | } |
302 | } |
303 | // rounding if needed |
304 | /* |
305 | * "round half to even" algorithm |
306 | * Example for f32, just a reminder |
307 | * |
308 | * If bit 54 is 0, round down |
309 | * If bit 54 is 1 |
310 | * If any bit beyond bit 54 is 1, round up |
311 | * If all bits beyond bit 54 are 0 (meaning the number is halfway between two floating-point numbers) |
312 | * If bit 53 is 0, round down |
313 | * If bit 53 is 1, round up |
314 | */ |
315 | /* |
316 | test case 1 complete |
317 | s2=0x1FFFFFFF |
318 | s1=0xFFFFFF80 |
319 | s0=0x0 |
320 | */ |
321 | |
322 | /* |
323 | test case 1 check_round_bit |
324 | s2=0x18888888 |
325 | s1=0x88888880 |
326 | s0=0x0 |
327 | */ |
328 | |
329 | /* |
330 | test case check_round_bit + normalization |
331 | s2=0x18888888 |
332 | s1=0x88888F80 |
333 | s0=0x0 |
334 | */ |
335 | |
336 | // C.printf(c"mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp) |
337 | // s1 => 0xFFFFFFxx only F are rapresented |
338 | nbit := 7 |
339 | check_round_bit := u32(1) << u32(nbit) |
340 | check_round_mask := u32(0xFFFFFFFF) << u32(nbit) |
341 | if (s1 & check_round_bit) != 0 { |
342 | // C.printf(c"need round!! check mask: %08x\n", s1 & ~check_round_mask ) |
343 | if (s1 & ~check_round_mask) != 0 { |
344 | // C.printf(c"Add 1!\n") |
345 | s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0) |
346 | } else { |
347 | // C.printf(c"All 0!\n") |
348 | if (s1 & (check_round_bit << u32(1))) != 0 { |
349 | // C.printf(c"Add 1 form -1 bit control!\n") |
350 | s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0) |
351 | } |
352 | } |
353 | s1 = s1 & check_round_mask |
354 | s0 = u32(0) |
355 | // recheck normalization |
356 | if s2 & (mask28 << u32(1)) != 0 { |
357 | // C.printf(c"Renormalize!!\n") |
358 | q2, q1, q0 = lsr96(s2, s1, s0) |
359 | binexp++ |
360 | // dump(binexp) |
361 | s2 = q2 |
362 | s1 = q1 |
363 | s0 = q0 |
364 | } |
365 | } |
366 | // tmp := ( u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) |
367 | // C.printf(c"mantissa after rounding : %08x %08x %08x binexp: %d \n", s2,s1,s0,binexp) |
368 | // C.printf(c"Tmp result: %016x\n",tmp) |
369 | // end rounding |
370 | // offset the binary exponent IEEE 754 |
371 | binexp += 1023 |
372 | if binexp > 2046 { |
373 | if pn.negative { |
374 | result = strconv.double_minus_infinity |
375 | } else { |
376 | result = strconv.double_plus_infinity |
377 | } |
378 | } else if binexp < 1 { |
379 | if pn.negative { |
380 | result = strconv.double_minus_zero |
381 | } else { |
382 | result = strconv.double_plus_zero |
383 | } |
384 | } else if s2 != 0 { |
385 | mut q := u64(0) |
386 | binexs2 := u64(binexp) << 52 |
387 | q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2 |
388 | if pn.negative { |
389 | q |= (u64(1) << 63) |
390 | } |
391 | result = q |
392 | } |
393 | return result |
394 | } |
395 | |
396 | // atof64 parses the string `s`, and if possible, converts it into a f64 number |
397 | pub fn atof64(s string) !f64 { |
398 | if s.len == 0 { |
399 | return error('expected a number found an empty string') |
400 | } |
401 | mut res := Float64u{} |
402 | mut res_parsing, mut pn := parser(s) |
403 | match res_parsing { |
404 | .ok { |
405 | res.u = converter(mut pn) |
406 | } |
407 | .pzero { |
408 | res.u = strconv.double_plus_zero |
409 | } |
410 | .mzero { |
411 | res.u = strconv.double_minus_zero |
412 | } |
413 | .pinf { |
414 | res.u = strconv.double_plus_infinity |
415 | } |
416 | .minf { |
417 | res.u = strconv.double_minus_infinity |
418 | } |
419 | .invalid_number { |
420 | return error('not a number') |
421 | } |
422 | } |
423 | return unsafe { res.f } |
424 | } |