v / vlib / strconv
Raw file | 424 loc (398 sloc) | 9.39 KB | Latest commit hash 6cd1723de
1module strconv
2
3// Copyright (c) 2019-2023 Dario Deledda. All rights reserved.
4// Use of this source code is governed by an MIT license
5// that can be found in the LICENSE file.
6//
7// This file contains utilities for converting a string to a f64 variable.
8// IEEE 754 standard is used.
9// Know limitation: limited to 18 significant digits
10//
11// The code is inspired by:
12// Grzegorz Kraszewski [email protected]
13// URL: http://krashan.ppa.pl/articles/stringtofloat/
14// Original license: MIT
15// 96 bit operation utilities
16//
17// Note: when u128 will be available, these function can be refactored.
18
19// f32 constants
20pub const (
21 single_plus_zero = u32(0x0000_0000)
22 single_minus_zero = u32(0x8000_0000)
23 single_plus_infinity = u32(0x7F80_0000)
24 single_minus_infinity = u32(0xFF80_0000)
25)
26
27// f64 constants
28pub const (
29 digits = 18
30 double_plus_zero = u64(0x0000000000000000)
31 double_minus_zero = u64(0x8000000000000000)
32 double_plus_infinity = u64(0x7FF0000000000000)
33 double_minus_infinity = u64(0xFFF0000000000000)
34)
35
36// char constants
37pub const (
38 c_dpoint = `.`
39 c_plus = `+`
40 c_minus = `-`
41 c_zero = `0`
42 c_nine = `9`
43 c_ten = u32(10)
44)
45
46// right logical shift 96 bit
47fn lsr96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
48 mut r0 := u32(0)
49 mut r1 := u32(0)
50 mut r2 := u32(0)
51 r0 = (s0 >> 1) | ((s1 & u32(1)) << 31)
52 r1 = (s1 >> 1) | ((s2 & u32(1)) << 31)
53 r2 = s2 >> 1
54 return r2, r1, r0
55}
56
57// left logical shift 96 bit
58fn lsl96(s2 u32, s1 u32, s0 u32) (u32, u32, u32) {
59 mut r0 := u32(0)
60 mut r1 := u32(0)
61 mut r2 := u32(0)
62 r2 = (s2 << 1) | ((s1 & (u32(1) << 31)) >> 31)
63 r1 = (s1 << 1) | ((s0 & (u32(1) << 31)) >> 31)
64 r0 = s0 << 1
65 return r2, r1, r0
66}
67
68// sum on 96 bit
69fn add96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
70 mut w := u64(0)
71 mut r0 := u32(0)
72 mut r1 := u32(0)
73 mut r2 := u32(0)
74 w = u64(s0) + u64(d0)
75 r0 = u32(w)
76 w >>= 32
77 w += u64(s1) + u64(d1)
78 r1 = u32(w)
79 w >>= 32
80 w += u64(s2) + u64(d2)
81 r2 = u32(w)
82 return r2, r1, r0
83}
84
85// subtraction on 96 bit
86fn sub96(s2 u32, s1 u32, s0 u32, d2 u32, d1 u32, d0 u32) (u32, u32, u32) {
87 mut w := u64(0)
88 mut r0 := u32(0)
89 mut r1 := u32(0)
90 mut r2 := u32(0)
91 w = u64(s0) - u64(d0)
92 r0 = u32(w)
93 w >>= 32
94 w += u64(s1) - u64(d1)
95 r1 = u32(w)
96 w >>= 32
97 w += u64(s2) - u64(d2)
98 r2 = u32(w)
99 return r2, r1, r0
100}
101
102// Utility functions
103fn is_digit(x u8) bool {
104 return x >= strconv.c_zero && x <= strconv.c_nine
105}
106
107fn is_space(x u8) bool {
108 return x == `\t` || x == `\n` || x == `\v` || x == `\f` || x == `\r` || x == ` `
109}
110
111fn is_exp(x u8) bool {
112 return x == `E` || x == `e`
113}
114
115// Possible parser return values.
116enum ParserState {
117 ok // parser finished OK
118 pzero // no digits or number is smaller than +-2^-1022
119 mzero // number is negative, module smaller
120 pinf // number is higher than +HUGE_VAL
121 minf // number is lower than -HUGE_VAL
122 invalid_number // invalid number, used for '#@%^' for example
123}
124
125// parser tries to parse the given string into a number
126// NOTE: #TOFIX need one char after the last char of the number
127[direct_array_access]
128fn parser(s string) (ParserState, PrepNumber) {
129 mut digx := 0
130 mut result := ParserState.ok
131 mut expneg := false
132 mut expexp := 0
133 mut i := 0
134 mut pn := PrepNumber{}
135
136 // skip spaces
137 for i < s.len && s[i].is_space() {
138 i++
139 }
140
141 // check negatives
142 if s[i] == `-` {
143 pn.negative = true
144 i++
145 }
146
147 // positive sign ignore it
148 if s[i] == `+` {
149 i++
150 }
151
152 // read mantissa
153 for i < s.len && s[i].is_digit() {
154 // println("$i => ${s[i]}")
155 if digx < strconv.digits {
156 pn.mantissa *= 10
157 pn.mantissa += u64(s[i] - strconv.c_zero)
158 digx++
159 } else if pn.exponent < 2147483647 {
160 pn.exponent++
161 }
162 i++
163 }
164
165 // read mantissa decimals
166 if i < s.len && s[i] == `.` {
167 i++
168 for i < s.len && s[i].is_digit() {
169 if digx < strconv.digits {
170 pn.mantissa *= 10
171 pn.mantissa += u64(s[i] - strconv.c_zero)
172 pn.exponent--
173 digx++
174 }
175 i++
176 }
177 }
178
179 // read exponent
180 if i < s.len && (s[i] == `e` || s[i] == `E`) {
181 i++
182 if i < s.len {
183 // esponent sign
184 if s[i] == strconv.c_plus {
185 i++
186 } else if s[i] == strconv.c_minus {
187 expneg = true
188 i++
189 }
190
191 for i < s.len && s[i].is_digit() {
192 if expexp < 214748364 {
193 expexp *= 10
194 expexp += int(s[i] - strconv.c_zero)
195 }
196 i++
197 }
198 }
199 }
200
201 if expneg {
202 expexp = -expexp
203 }
204 pn.exponent += expexp
205 if pn.mantissa == 0 {
206 if pn.negative {
207 result = .mzero
208 } else {
209 result = .pzero
210 }
211 } else if pn.exponent > 309 {
212 if pn.negative {
213 result = .minf
214 } else {
215 result = .pinf
216 }
217 } else if pn.exponent < -328 {
218 if pn.negative {
219 result = .mzero
220 } else {
221 result = .pzero
222 }
223 }
224 if i == 0 && s.len > 0 {
225 return ParserState.invalid_number, pn
226 }
227 return result, pn
228}
229
230// converter returns a u64 with the bit image of the f64 number
231fn converter(mut pn PrepNumber) u64 {
232 mut binexp := 92
233 // s0,s1,s2 are the parts of a 96-bit precision integer
234 mut s2 := u32(0)
235 mut s1 := u32(0)
236 mut s0 := u32(0)
237 // q0,q1,q2 are the parts of a 96-bit precision integer
238 mut q2 := u32(0)
239 mut q1 := u32(0)
240 mut q0 := u32(0)
241 // r0,r1,r2 are the parts of a 96-bit precision integer
242 mut r2 := u32(0)
243 mut r1 := u32(0)
244 mut r0 := u32(0)
245 //
246 mask28 := u32(u64(0xF) << 28)
247 mut result := u64(0)
248 // working on 3 u32 to have 96 bit precision
249 s0 = u32(pn.mantissa & u64(0x00000000FFFFFFFF))
250 s1 = u32(pn.mantissa >> 32)
251 s2 = u32(0)
252 // so we take the decimal exponent off
253 for pn.exponent > 0 {
254 q2, q1, q0 = lsl96(s2, s1, s0) // q = s * 2
255 r2, r1, r0 = lsl96(q2, q1, q0) // r = s * 4 <=> q * 2
256 s2, s1, s0 = lsl96(r2, r1, r0) // s = s * 8 <=> r * 2
257 s2, s1, s0 = add96(s2, s1, s0, q2, q1, q0) // s = (s * 8) + (s * 2) <=> s*10
258 pn.exponent--
259 for (s2 & mask28) != 0 {
260 q2, q1, q0 = lsr96(s2, s1, s0)
261 binexp++
262 s2 = q2
263 s1 = q1
264 s0 = q0
265 }
266 }
267 for pn.exponent < 0 {
268 for !((s2 & (u32(1) << 31)) != 0) {
269 q2, q1, q0 = lsl96(s2, s1, s0)
270 binexp--
271 s2 = q2
272 s1 = q1
273 s0 = q0
274 }
275 q2 = s2 / strconv.c_ten
276 r1 = s2 % strconv.c_ten
277 r2 = (s1 >> 8) | (r1 << 24)
278 q1 = r2 / strconv.c_ten
279 r1 = r2 % strconv.c_ten
280 r2 = ((s1 & u32(0xFF)) << 16) | (s0 >> 16) | (r1 << 24)
281 r0 = r2 / strconv.c_ten
282 r1 = r2 % strconv.c_ten
283 q1 = (q1 << 8) | ((r0 & u32(0x00FF0000)) >> 16)
284 q0 = r0 << 16
285 r2 = (s0 & u32(0xFFFF)) | (r1 << 16)
286 q0 |= r2 / strconv.c_ten
287 s2 = q2
288 s1 = q1
289 s0 = q0
290 pn.exponent++
291 }
292 // C.printf(c"mantissa before normalization: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
293 // normalization, the 28 bit in s2 must the leftest one in the variable
294 if s2 != 0 || s1 != 0 || s0 != 0 {
295 for (s2 & mask28) == 0 {
296 q2, q1, q0 = lsl96(s2, s1, s0)
297 binexp--
298 s2 = q2
299 s1 = q1
300 s0 = q0
301 }
302 }
303 // rounding if needed
304 /*
305 * "round half to even" algorithm
306 * Example for f32, just a reminder
307 *
308 * If bit 54 is 0, round down
309 * If bit 54 is 1
310 * If any bit beyond bit 54 is 1, round up
311 * If all bits beyond bit 54 are 0 (meaning the number is halfway between two floating-point numbers)
312 * If bit 53 is 0, round down
313 * If bit 53 is 1, round up
314 */
315 /*
316 test case 1 complete
317 s2=0x1FFFFFFF
318 s1=0xFFFFFF80
319 s0=0x0
320 */
321
322 /*
323 test case 1 check_round_bit
324 s2=0x18888888
325 s1=0x88888880
326 s0=0x0
327 */
328
329 /*
330 test case check_round_bit + normalization
331 s2=0x18888888
332 s1=0x88888F80
333 s0=0x0
334 */
335
336 // C.printf(c"mantissa before rounding: %08x%08x%08x binexp: %d \n", s2,s1,s0,binexp)
337 // s1 => 0xFFFFFFxx only F are rapresented
338 nbit := 7
339 check_round_bit := u32(1) << u32(nbit)
340 check_round_mask := u32(0xFFFFFFFF) << u32(nbit)
341 if (s1 & check_round_bit) != 0 {
342 // C.printf(c"need round!! check mask: %08x\n", s1 & ~check_round_mask )
343 if (s1 & ~check_round_mask) != 0 {
344 // C.printf(c"Add 1!\n")
345 s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
346 } else {
347 // C.printf(c"All 0!\n")
348 if (s1 & (check_round_bit << u32(1))) != 0 {
349 // C.printf(c"Add 1 form -1 bit control!\n")
350 s2, s1, s0 = add96(s2, s1, s0, 0, check_round_bit, 0)
351 }
352 }
353 s1 = s1 & check_round_mask
354 s0 = u32(0)
355 // recheck normalization
356 if s2 & (mask28 << u32(1)) != 0 {
357 // C.printf(c"Renormalize!!\n")
358 q2, q1, q0 = lsr96(s2, s1, s0)
359 binexp++
360 // dump(binexp)
361 s2 = q2
362 s1 = q1
363 s0 = q0
364 }
365 }
366 // tmp := ( u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8)
367 // C.printf(c"mantissa after rounding : %08x %08x %08x binexp: %d \n", s2,s1,s0,binexp)
368 // C.printf(c"Tmp result: %016x\n",tmp)
369 // end rounding
370 // offset the binary exponent IEEE 754
371 binexp += 1023
372 if binexp > 2046 {
373 if pn.negative {
374 result = strconv.double_minus_infinity
375 } else {
376 result = strconv.double_plus_infinity
377 }
378 } else if binexp < 1 {
379 if pn.negative {
380 result = strconv.double_minus_zero
381 } else {
382 result = strconv.double_plus_zero
383 }
384 } else if s2 != 0 {
385 mut q := u64(0)
386 binexs2 := u64(binexp) << 52
387 q = (u64(s2 & ~mask28) << 24) | ((u64(s1) + u64(128)) >> 8) | binexs2
388 if pn.negative {
389 q |= (u64(1) << 63)
390 }
391 result = q
392 }
393 return result
394}
395
396// atof64 parses the string `s`, and if possible, converts it into a f64 number
397pub fn atof64(s string) !f64 {
398 if s.len == 0 {
399 return error('expected a number found an empty string')
400 }
401 mut res := Float64u{}
402 mut res_parsing, mut pn := parser(s)
403 match res_parsing {
404 .ok {
405 res.u = converter(mut pn)
406 }
407 .pzero {
408 res.u = strconv.double_plus_zero
409 }
410 .mzero {
411 res.u = strconv.double_minus_zero
412 }
413 .pinf {
414 res.u = strconv.double_plus_infinity
415 }
416 .minf {
417 res.u = strconv.double_minus_infinity
418 }
419 .invalid_number {
420 return error('not a number')
421 }
422 }
423 return unsafe { res.f }
424}