1 | module strconv |
2 | |
3 | // Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved. |
4 | // Use of this source code is governed by an MIT license |
5 | // that can be found in the LICENSE file. |
6 | // TODO: use options, or some way to return default with error. |
7 | const ( |
8 | // int_size is the size in bits of an int or uint value. |
9 | // int_size = 32 << (~u32(0) >> 63) |
10 | // max_u64 = u64(u64(1 << 63) - 1) |
11 | int_size = 32 |
12 | max_u64 = u64(18446744073709551615) // as u64 // use this until we add support |
13 | ) |
14 | |
15 | [inline] |
16 | pub fn byte_to_lower(c u8) u8 { |
17 | return c | 32 |
18 | } |
19 | |
20 | // common_parse_uint is called by parse_uint and allows the parsing |
21 | // to stop on non or invalid digit characters and return with an error |
22 | pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 { |
23 | result, err := common_parse_uint2(s, _base, _bit_size) |
24 | // TODO: error_on_non_digit and error_on_high_digit have no difference |
25 | if err != 0 && (error_on_non_digit || error_on_high_digit) { |
26 | match err { |
27 | -1 { return error('common_parse_uint: wrong base ${_base} for ${s}') } |
28 | -2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') } |
29 | -3 { return error('common_parse_uint: integer overflow ${s}') } |
30 | else { return error('common_parse_uint: syntax error ${s}') } |
31 | } |
32 | } |
33 | return result |
34 | } |
35 | |
36 | // the first returned value contains the parsed value, |
37 | // the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow) |
38 | [direct_array_access] |
39 | pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) { |
40 | if s.len < 1 { |
41 | return u64(0), 1 |
42 | } |
43 | |
44 | mut bit_size := _bit_size |
45 | mut base := _base |
46 | mut start_index := 0 |
47 | |
48 | if base == 0 { |
49 | // Look for octal, binary and hex prefix. |
50 | base = 10 |
51 | if s[0] == `0` { |
52 | ch := s[1] | 32 |
53 | if s.len >= 3 { |
54 | if ch == `b` { |
55 | base = 2 |
56 | start_index += 2 |
57 | } else if ch == `o` { |
58 | base = 8 |
59 | start_index += 2 |
60 | } else if ch == `x` { |
61 | base = 16 |
62 | start_index += 2 |
63 | } |
64 | |
65 | // check for underscore after the base prefix |
66 | if s[start_index] == `_` { |
67 | start_index++ |
68 | } |
69 | } |
70 | // manage leading zeros in decimal base's numbers |
71 | // otherwise it is an octal for C compatibility |
72 | // TODO: Check if this behaviour is logically right |
73 | else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) { |
74 | base = 10 |
75 | start_index++ |
76 | } else { |
77 | base = 8 |
78 | start_index++ |
79 | } |
80 | } |
81 | } |
82 | |
83 | if bit_size == 0 { |
84 | bit_size = strconv.int_size |
85 | } else if bit_size < 0 || bit_size > 64 { |
86 | return u64(0), -2 |
87 | } |
88 | // Cutoff is the smallest number such that cutoff*base > maxUint64. |
89 | // Use compile-time constants for common cases. |
90 | cutoff := strconv.max_u64 / u64(base) + u64(1) |
91 | max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) } |
92 | basem1 := base - 1 |
93 | |
94 | mut n := u64(0) |
95 | for i in start_index .. s.len { |
96 | mut c := s[i] |
97 | |
98 | // manage underscore inside the number |
99 | if c == `_` { |
100 | if i == start_index || i >= (s.len - 1) { |
101 | // println("_ limit") |
102 | return u64(0), 1 |
103 | } |
104 | if s[i - 1] == `_` || s[i + 1] == `_` { |
105 | // println("_ *2") |
106 | return u64(0), 1 |
107 | } |
108 | |
109 | continue |
110 | } |
111 | |
112 | mut sub_count := 0 |
113 | |
114 | // get the 0-9 digit |
115 | c -= 48 // subtract the rune `0` |
116 | |
117 | // check if we are in the superior base rune interval [A..Z] |
118 | if c >= 17 { // (65 - 48) |
119 | sub_count++ |
120 | c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit |
121 | |
122 | // check if we are in the superior base rune interval [a..z] |
123 | if c >= 42 { // (97 - 7 - 48) |
124 | sub_count++ |
125 | c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit |
126 | } |
127 | } |
128 | |
129 | // check for digit over base |
130 | if c > basem1 || (sub_count == 0 && c > 9) { |
131 | return n, i + 1 |
132 | } |
133 | |
134 | // check if we are in the cutoff zone |
135 | if n >= cutoff { |
136 | // n*base overflows |
137 | // return error('parse_uint: range error $s') |
138 | return max_val, -3 |
139 | } |
140 | n *= u64(base) |
141 | n1 := n + u64(c) |
142 | if n1 < n || n1 > max_val { |
143 | // n+v overflows |
144 | // return error('parse_uint: range error $s') |
145 | return max_val, -3 |
146 | } |
147 | n = n1 |
148 | } |
149 | return n, 0 |
150 | } |
151 | |
152 | // parse_uint is like parse_int but for unsigned numbers. |
153 | pub fn parse_uint(s string, _base int, _bit_size int) !u64 { |
154 | return common_parse_uint(s, _base, _bit_size, true, true) |
155 | } |
156 | |
157 | // common_parse_int is called by parse int and allows the parsing |
158 | // to stop on non or invalid digit characters and return with an error |
159 | [direct_array_access] |
160 | pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 { |
161 | if _s.len < 1 { |
162 | // return error('parse_int: syntax error $s') |
163 | return i64(0) |
164 | } |
165 | mut bit_size := _bit_size |
166 | if bit_size == 0 { |
167 | bit_size = strconv.int_size |
168 | } |
169 | mut s := _s |
170 | // Pick off leading sign. |
171 | mut neg := false |
172 | if s[0] == `+` { |
173 | // s = s[1..] |
174 | unsafe { |
175 | s = tos(s.str + 1, s.len - 1) |
176 | } |
177 | } else if s[0] == `-` { |
178 | neg = true |
179 | // s = s[1..] |
180 | unsafe { |
181 | s = tos(s.str + 1, s.len - 1) |
182 | } |
183 | } |
184 | |
185 | // Convert unsigned and check range. |
186 | // un := parse_uint(s, base, bit_size) or { |
187 | // return i64(0) |
188 | // } |
189 | un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)! |
190 | if un == 0 { |
191 | return i64(0) |
192 | } |
193 | // TODO: check should u64(bit_size-1) be size of int (32)? |
194 | cutoff := u64(1) << u64(bit_size - 1) |
195 | if !neg && un >= cutoff { |
196 | // return error('parse_int: range error $s0') |
197 | return i64(cutoff - u64(1)) |
198 | } |
199 | if neg && un > cutoff { |
200 | // return error('parse_int: range error $s0') |
201 | return -i64(cutoff) |
202 | } |
203 | return if neg { -i64(un) } else { i64(un) } |
204 | } |
205 | |
206 | // parse_int interprets a string s in the given base (0, 2 to 36) and |
207 | // bit size (0 to 64) and returns the corresponding value i. |
208 | // |
209 | // If the base argument is 0, the true base is implied by the string's |
210 | // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise. |
211 | // Also, for argument base 0 only, underscore characters are permitted |
212 | // as defined by the Go syntax for integer literals. |
213 | // |
214 | // The bitSize argument specifies the integer type |
215 | // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 |
216 | // correspond to int, int8, int16, int32, and int64. |
217 | // If bitSize is below 0 or above 64, an error is returned. |
218 | pub fn parse_int(_s string, base int, _bit_size int) !i64 { |
219 | return common_parse_int(_s, base, _bit_size, true, true) |
220 | } |
221 | |
222 | // atoi is equivalent to parse_int(s, 10, 0), converted to type int. |
223 | [direct_array_access] |
224 | pub fn atoi(s string) !int { |
225 | if s == '' { |
226 | return error('strconv.atoi: parsing "": invalid syntax') |
227 | } |
228 | if (strconv.int_size == 32 && (0 < s.len && s.len < 10)) |
229 | || (strconv.int_size == 64 && (0 < s.len && s.len < 19)) { |
230 | // Fast path for small integers that fit int type. |
231 | mut start_idx := 0 |
232 | if s[0] == `-` || s[0] == `+` { |
233 | start_idx++ |
234 | if s.len - start_idx < 1 { |
235 | // return 0, &NumError{fnAtoi, s0, ErrSyntax} |
236 | return error('strconv.atoi: parsing "${s}": invalid syntax') |
237 | } |
238 | } |
239 | mut n := 0 |
240 | for i in start_idx .. s.len { |
241 | ch := s[i] - `0` |
242 | if ch > 9 { |
243 | // return 0, &NumError{fnAtoi, s0, ErrSyntax} |
244 | return error('strconv.atoi: parsing "${s}": invalid syntax') |
245 | } |
246 | n = n * 10 + int(ch) |
247 | } |
248 | return if s[0] == `-` { -n } else { n } |
249 | } |
250 | // Slow path for invalid, big, or underscored integers. |
251 | int64 := parse_int(s, 10, 0)! |
252 | return int(int64) |
253 | } |