v / vlib / strconv
Raw file | 253 loc (231 sloc) | 7.81 KB | Latest commit hash 90941b3b1
1module strconv
2
3// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
4// Use of this source code is governed by an MIT license
5// that can be found in the LICENSE file.
6// TODO: use options, or some way to return default with error.
7const (
8 // int_size is the size in bits of an int or uint value.
9 // int_size = 32 << (~u32(0) >> 63)
10 // max_u64 = u64(u64(1 << 63) - 1)
11 int_size = 32
12 max_u64 = u64(18446744073709551615) // as u64 // use this until we add support
13)
14
15[inline]
16pub fn byte_to_lower(c u8) u8 {
17 return c | 32
18}
19
20// common_parse_uint is called by parse_uint and allows the parsing
21// to stop on non or invalid digit characters and return with an error
22pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 {
23 result, err := common_parse_uint2(s, _base, _bit_size)
24 // TODO: error_on_non_digit and error_on_high_digit have no difference
25 if err != 0 && (error_on_non_digit || error_on_high_digit) {
26 match err {
27 -1 { return error('common_parse_uint: wrong base ${_base} for ${s}') }
28 -2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') }
29 -3 { return error('common_parse_uint: integer overflow ${s}') }
30 else { return error('common_parse_uint: syntax error ${s}') }
31 }
32 }
33 return result
34}
35
36// the first returned value contains the parsed value,
37// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
38[direct_array_access]
39pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
40 if s.len < 1 {
41 return u64(0), 1
42 }
43
44 mut bit_size := _bit_size
45 mut base := _base
46 mut start_index := 0
47
48 if base == 0 {
49 // Look for octal, binary and hex prefix.
50 base = 10
51 if s[0] == `0` {
52 ch := s[1] | 32
53 if s.len >= 3 {
54 if ch == `b` {
55 base = 2
56 start_index += 2
57 } else if ch == `o` {
58 base = 8
59 start_index += 2
60 } else if ch == `x` {
61 base = 16
62 start_index += 2
63 }
64
65 // check for underscore after the base prefix
66 if s[start_index] == `_` {
67 start_index++
68 }
69 }
70 // manage leading zeros in decimal base's numbers
71 // otherwise it is an octal for C compatibility
72 // TODO: Check if this behaviour is logically right
73 else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) {
74 base = 10
75 start_index++
76 } else {
77 base = 8
78 start_index++
79 }
80 }
81 }
82
83 if bit_size == 0 {
84 bit_size = strconv.int_size
85 } else if bit_size < 0 || bit_size > 64 {
86 return u64(0), -2
87 }
88 // Cutoff is the smallest number such that cutoff*base > maxUint64.
89 // Use compile-time constants for common cases.
90 cutoff := strconv.max_u64 / u64(base) + u64(1)
91 max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
92 basem1 := base - 1
93
94 mut n := u64(0)
95 for i in start_index .. s.len {
96 mut c := s[i]
97
98 // manage underscore inside the number
99 if c == `_` {
100 if i == start_index || i >= (s.len - 1) {
101 // println("_ limit")
102 return u64(0), 1
103 }
104 if s[i - 1] == `_` || s[i + 1] == `_` {
105 // println("_ *2")
106 return u64(0), 1
107 }
108
109 continue
110 }
111
112 mut sub_count := 0
113
114 // get the 0-9 digit
115 c -= 48 // subtract the rune `0`
116
117 // check if we are in the superior base rune interval [A..Z]
118 if c >= 17 { // (65 - 48)
119 sub_count++
120 c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit
121
122 // check if we are in the superior base rune interval [a..z]
123 if c >= 42 { // (97 - 7 - 48)
124 sub_count++
125 c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
126 }
127 }
128
129 // check for digit over base
130 if c > basem1 || (sub_count == 0 && c > 9) {
131 return n, i + 1
132 }
133
134 // check if we are in the cutoff zone
135 if n >= cutoff {
136 // n*base overflows
137 // return error('parse_uint: range error $s')
138 return max_val, -3
139 }
140 n *= u64(base)
141 n1 := n + u64(c)
142 if n1 < n || n1 > max_val {
143 // n+v overflows
144 // return error('parse_uint: range error $s')
145 return max_val, -3
146 }
147 n = n1
148 }
149 return n, 0
150}
151
152// parse_uint is like parse_int but for unsigned numbers.
153pub fn parse_uint(s string, _base int, _bit_size int) !u64 {
154 return common_parse_uint(s, _base, _bit_size, true, true)
155}
156
157// common_parse_int is called by parse int and allows the parsing
158// to stop on non or invalid digit characters and return with an error
159[direct_array_access]
160pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 {
161 if _s.len < 1 {
162 // return error('parse_int: syntax error $s')
163 return i64(0)
164 }
165 mut bit_size := _bit_size
166 if bit_size == 0 {
167 bit_size = strconv.int_size
168 }
169 mut s := _s
170 // Pick off leading sign.
171 mut neg := false
172 if s[0] == `+` {
173 // s = s[1..]
174 unsafe {
175 s = tos(s.str + 1, s.len - 1)
176 }
177 } else if s[0] == `-` {
178 neg = true
179 // s = s[1..]
180 unsafe {
181 s = tos(s.str + 1, s.len - 1)
182 }
183 }
184
185 // Convert unsigned and check range.
186 // un := parse_uint(s, base, bit_size) or {
187 // return i64(0)
188 // }
189 un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)!
190 if un == 0 {
191 return i64(0)
192 }
193 // TODO: check should u64(bit_size-1) be size of int (32)?
194 cutoff := u64(1) << u64(bit_size - 1)
195 if !neg && un >= cutoff {
196 // return error('parse_int: range error $s0')
197 return i64(cutoff - u64(1))
198 }
199 if neg && un > cutoff {
200 // return error('parse_int: range error $s0')
201 return -i64(cutoff)
202 }
203 return if neg { -i64(un) } else { i64(un) }
204}
205
206// parse_int interprets a string s in the given base (0, 2 to 36) and
207// bit size (0 to 64) and returns the corresponding value i.
208//
209// If the base argument is 0, the true base is implied by the string's
210// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
211// Also, for argument base 0 only, underscore characters are permitted
212// as defined by the Go syntax for integer literals.
213//
214// The bitSize argument specifies the integer type
215// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
216// correspond to int, int8, int16, int32, and int64.
217// If bitSize is below 0 or above 64, an error is returned.
218pub fn parse_int(_s string, base int, _bit_size int) !i64 {
219 return common_parse_int(_s, base, _bit_size, true, true)
220}
221
222// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
223[direct_array_access]
224pub fn atoi(s string) !int {
225 if s == '' {
226 return error('strconv.atoi: parsing "": invalid syntax')
227 }
228 if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
229 || (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
230 // Fast path for small integers that fit int type.
231 mut start_idx := 0
232 if s[0] == `-` || s[0] == `+` {
233 start_idx++
234 if s.len - start_idx < 1 {
235 // return 0, &NumError{fnAtoi, s0, ErrSyntax}
236 return error('strconv.atoi: parsing "${s}": invalid syntax')
237 }
238 }
239 mut n := 0
240 for i in start_idx .. s.len {
241 ch := s[i] - `0`
242 if ch > 9 {
243 // return 0, &NumError{fnAtoi, s0, ErrSyntax}
244 return error('strconv.atoi: parsing "${s}": invalid syntax')
245 }
246 n = n * 10 + int(ch)
247 }
248 return if s[0] == `-` { -n } else { n }
249 }
250 // Slow path for invalid, big, or underscored integers.
251 int64 := parse_int(s, 10, 0)!
252 return int(int64)
253}