Gitly

v / vlib / strconv

Raw file | 253 loc (231 sloc) | 7.81 KB | Latest commit hash 90941b3b1


1 module strconv
2 
3 // Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
4 // Use of this source code is governed by an MIT license
5 // that can be found in the LICENSE file.
6 // TODO: use options, or some way to return default with error.
7 const (
8     // int_size is the size in bits of an int or uint value.
9     // int_size = 32 << (~u32(0) >> 63)
10     // max_u64 = u64(u64(1 << 63) - 1)
11     int_size = 32
12     max_u64  = u64(18446744073709551615) // as u64 // use this until we add support
13 )
14 
15 [inline]
16 pub fn byte_to_lower(c u8) u8 {
17     return c | 32
18 }
19 
20 // common_parse_uint is called by parse_uint and allows the parsing
21 // to stop on non or invalid digit characters and return with an error
22 pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 {
23     result, err := common_parse_uint2(s, _base, _bit_size)
24     // TODO: error_on_non_digit and error_on_high_digit have no difference
25     if err != 0 && (error_on_non_digit || error_on_high_digit) {
26         match err {
27             -1 { return error('common_parse_uint: wrong base ${_base} for ${s}') }
28             -2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') }
29             -3 { return error('common_parse_uint: integer overflow ${s}') }
30             else { return error('common_parse_uint: syntax error ${s}') }
31         }
32     }
33     return result
34 }
35 
36 // the first returned value contains the parsed value,
37 // the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
38 [direct_array_access]
39 pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
40     if s.len < 1 {
41         return u64(0), 1
42     }
43 
44     mut bit_size := _bit_size
45     mut base := _base
46     mut start_index := 0
47 
48     if base == 0 {
49         // Look for octal, binary and hex prefix.
50         base = 10
51         if s[0] == `0` {
52             ch := s[1] | 32
53             if s.len >= 3 {
54                 if ch == `b` {
55                     base = 2
56                     start_index += 2
57                 } else if ch == `o` {
58                     base = 8
59                     start_index += 2
60                 } else if ch == `x` {
61                     base = 16
62                     start_index += 2
63                 }
64 
65                 // check for underscore after the base prefix
66                 if s[start_index] == `_` {
67                     start_index++
68                 }
69             }
70             // manage leading zeros in decimal base's numbers
71             // otherwise it is an octal for C compatibility
72             // TODO: Check if this behaviour is logically right
73             else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) {
74                 base = 10
75                 start_index++
76             } else {
77                 base = 8
78                 start_index++
79             }
80         }
81     }
82 
83     if bit_size == 0 {
84         bit_size = strconv.int_size
85     } else if bit_size < 0 || bit_size > 64 {
86         return u64(0), -2
87     }
88     // Cutoff is the smallest number such that cutoff*base > maxUint64.
89     // Use compile-time constants for common cases.
90     cutoff := strconv.max_u64 / u64(base) + u64(1)
91     max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
92     basem1 := base - 1
93 
94     mut n := u64(0)
95     for i in start_index .. s.len {
96         mut c := s[i]
97 
98         // manage underscore inside the number
99         if c == `_` {
100             if i == start_index || i >= (s.len - 1) {
101                 // println("_ limit")
102                 return u64(0), 1
103             }
104             if s[i - 1] == `_` || s[i + 1] == `_` {
105                 // println("_ *2")
106                 return u64(0), 1
107             }
108 
109             continue
110         }
111 
112         mut sub_count := 0
113 
114         // get the 0-9 digit
115         c -= 48 // subtract the rune `0`
116 
117         // check if we are in the superior base rune interval [A..Z]
118         if c >= 17 { // (65 - 48)
119             sub_count++
120             c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit
121 
122             // check if we are in the superior base rune interval [a..z]
123             if c >= 42 { // (97 - 7 - 48)
124                 sub_count++
125                 c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
126             }
127         }
128 
129         // check for digit over base
130         if c > basem1 || (sub_count == 0 && c > 9) {
131             return n, i + 1
132         }
133 
134         // check if we are in the cutoff zone
135         if n >= cutoff {
136             // n*base overflows
137             // return error('parse_uint: range error $s')
138             return max_val, -3
139         }
140         n *= u64(base)
141         n1 := n + u64(c)
142         if n1 < n || n1 > max_val {
143             // n+v overflows
144             // return error('parse_uint: range error $s')
145             return max_val, -3
146         }
147         n = n1
148     }
149     return n, 0
150 }
151 
152 // parse_uint is like parse_int but for unsigned numbers.
153 pub fn parse_uint(s string, _base int, _bit_size int) !u64 {
154     return common_parse_uint(s, _base, _bit_size, true, true)
155 }
156 
157 // common_parse_int is called by parse int and allows the parsing
158 // to stop on non or invalid digit characters and return with an error
159 [direct_array_access]
160 pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 {
161     if _s.len < 1 {
162         // return error('parse_int: syntax error $s')
163         return i64(0)
164     }
165     mut bit_size := _bit_size
166     if bit_size == 0 {
167         bit_size = strconv.int_size
168     }
169     mut s := _s
170     // Pick off leading sign.
171     mut neg := false
172     if s[0] == `+` {
173         // s = s[1..]
174         unsafe {
175             s = tos(s.str + 1, s.len - 1)
176         }
177     } else if s[0] == `-` {
178         neg = true
179         // s = s[1..]
180         unsafe {
181             s = tos(s.str + 1, s.len - 1)
182         }
183     }
184 
185     // Convert unsigned and check range.
186     // un := parse_uint(s, base, bit_size) or {
187     // return i64(0)
188     // }
189     un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)!
190     if un == 0 {
191         return i64(0)
192     }
193     // TODO: check should u64(bit_size-1) be size of int (32)?
194     cutoff := u64(1) << u64(bit_size - 1)
195     if !neg && un >= cutoff {
196         // return error('parse_int: range error $s0')
197         return i64(cutoff - u64(1))
198     }
199     if neg && un > cutoff {
200         // return error('parse_int: range error $s0')
201         return -i64(cutoff)
202     }
203     return if neg { -i64(un) } else { i64(un) }
204 }
205 
206 // parse_int interprets a string s in the given base (0, 2 to 36) and
207 // bit size (0 to 64) and returns the corresponding value i.
208 //
209 // If the base argument is 0, the true base is implied by the string's
210 // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
211 // Also, for argument base 0 only, underscore characters are permitted
212 // as defined by the Go syntax for integer literals.
213 //
214 // The bitSize argument specifies the integer type
215 // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
216 // correspond to int, int8, int16, int32, and int64.
217 // If bitSize is below 0 or above 64, an error is returned.
218 pub fn parse_int(_s string, base int, _bit_size int) !i64 {
219     return common_parse_int(_s, base, _bit_size, true, true)
220 }
221 
222 // atoi is equivalent to parse_int(s, 10, 0), converted to type int.
223 [direct_array_access]
224 pub fn atoi(s string) !int {
225     if s == '' {
226         return error('strconv.atoi: parsing "": invalid syntax')
227     }
228     if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
229         || (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
230         // Fast path for small integers that fit int type.
231         mut start_idx := 0
232         if s[0] == `-` || s[0] == `+` {
233             start_idx++
234             if s.len - start_idx < 1 {
235                 // return 0, &NumError{fnAtoi, s0, ErrSyntax}
236                 return error('strconv.atoi: parsing "${s}": invalid syntax')
237             }
238         }
239         mut n := 0
240         for i in start_idx .. s.len {
241             ch := s[i] - `0`
242             if ch > 9 {
243                 // return 0, &NumError{fnAtoi, s0, ErrSyntax}
244                 return error('strconv.atoi: parsing "${s}": invalid syntax')
245             }
246             n = n * 10 + int(ch)
247         }
248         return if s[0] == `-` { -n } else { n }
249     }
250     // Slow path for invalid, big, or underscored integers.
251     int64 := parse_int(s, 10, 0)!
252     return int(int64)
253 }

1	module strconv
2
3	// Copyright (c) 2019-2023 Alexander Medvednikov. All rights reserved.
4	// Use of this source code is governed by an MIT license
5	// that can be found in the LICENSE file.
6	// TODO: use options, or some way to return default with error.
7	const (
8	// int_size is the size in bits of an int or uint value.
9	// int_size = 32 << (~u32(0) >> 63)
10	// max_u64 = u64(u64(1 << 63) - 1)
11	int_size = 32
12	max_u64 = u64(18446744073709551615) // as u64 // use this until we add support
13	)
14
15	[inline]
16	pub fn byte_to_lower(c u8) u8 {
17	return c \| 32
18	}
19
20	// common_parse_uint is called by parse_uint and allows the parsing
21	// to stop on non or invalid digit characters and return with an error
22	pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !u64 {
23	result, err := common_parse_uint2(s, _base, _bit_size)
24	// TODO: error_on_non_digit and error_on_high_digit have no difference
25	if err != 0 && (error_on_non_digit \|\| error_on_high_digit) {
26	match err {
27	-1 { return error('common_parse_uint: wrong base ${_base} for ${s}') }
28	-2 { return error('common_parse_uint: wrong bit size ${_bit_size} for ${s}') }
29	-3 { return error('common_parse_uint: integer overflow ${s}') }
30	else { return error('common_parse_uint: syntax error ${s}') }
31	}
32	}
33	return result
34	}
35
36	// the first returned value contains the parsed value,
37	// the second returned value contains the error code (0 = OK, >1 = index of first non-parseable character + 1, -1 = wrong base, -2 = wrong bit size, -3 = overflow)
38	[direct_array_access]
39	pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) {
40	if s.len < 1 {
41	return u64(0), 1
42	}
43
44	mut bit_size := _bit_size
45	mut base := _base
46	mut start_index := 0
47
48	if base == 0 {
49	// Look for octal, binary and hex prefix.
50	base = 10
51	if s[0] == `0` {
52	ch := s[1] \| 32
53	if s.len >= 3 {
54	if ch == `b` {
55	base = 2
56	start_index += 2
57	} else if ch == `o` {
58	base = 8
59	start_index += 2
60	} else if ch == `x` {
61	base = 16
62	start_index += 2
63	}
64
65	// check for underscore after the base prefix
66	if s[start_index] == `_` {
67	start_index++
68	}
69	}
70	// manage leading zeros in decimal base's numbers
71	// otherwise it is an octal for C compatibility
72	// TODO: Check if this behaviour is logically right
73	else if s.len >= 2 && (s[1] >= `0` && s[1] <= `9`) {
74	base = 10
75	start_index++
76	} else {
77	base = 8
78	start_index++
79	}
80	}
81	}
82
83	if bit_size == 0 {
84	bit_size = strconv.int_size
85	} else if bit_size < 0 \|\| bit_size > 64 {
86	return u64(0), -2
87	}
88	// Cutoff is the smallest number such that cutoffbase > maxUint64.*
89	// Use compile-time constants for common cases.
90	cutoff := strconv.max_u64 / u64(base) + u64(1)
91	max_val := if bit_size == 64 { strconv.max_u64 } else { (u64(1) << u64(bit_size)) - u64(1) }
92	basem1 := base - 1
93
94	mut n := u64(0)
95	for i in start_index .. s.len {
96	mut c := s[i]
97
98	// manage underscore inside the number
99	if c == `_` {
100	if i == start_index \|\| i >= (s.len - 1) {
101	// println("_ limit")
102	return u64(0), 1
103	}
104	if s[i - 1] == `_` \|\| s[i + 1] == `_` {
105	// println("_ 2")*
106	return u64(0), 1
107	}
108
109	continue
110	}
111
112	mut sub_count := 0
113
114	// get the 0-9 digit
115	c -= 48 // subtract the rune `0`
116
117	// check if we are in the superior base rune interval [A..Z]
118	if c >= 17 { // (65 - 48)
119	sub_count++
120	c -= 7 // subtract the `A` - `0` rune to obtain the value of the digit
121
122	// check if we are in the superior base rune interval [a..z]
123	if c >= 42 { // (97 - 7 - 48)
124	sub_count++
125	c -= 32 // subtract the `a` - `0` rune to obtain the value of the digit
126	}
127	}
128
129	// check for digit over base
130	if c > basem1 \|\| (sub_count == 0 && c > 9) {
131	return n, i + 1
132	}
133
134	// check if we are in the cutoff zone
135	if n >= cutoff {
136	// nbase overflows*
137	// return error('parse_uint: range error $s')
138	return max_val, -3
139	}
140	n *= u64(base)
141	n1 := n + u64(c)
142	if n1 < n \|\| n1 > max_val {
143	// n+v overflows
144	// return error('parse_uint: range error $s')
145	return max_val, -3
146	}
147	n = n1
148	}
149	return n, 0
150	}
151
152	// parse_uint is like parse_int but for unsigned numbers.
153	pub fn parse_uint(s string, _base int, _bit_size int) !u64 {
154	return common_parse_uint(s, _base, _bit_size, true, true)
155	}
156
157	// common_parse_int is called by parse int and allows the parsing
158	// to stop on non or invalid digit characters and return with an error
159	[direct_array_access]
160	pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) !i64 {
161	if _s.len < 1 {
162	// return error('parse_int: syntax error $s')
163	return i64(0)
164	}
165	mut bit_size := _bit_size
166	if bit_size == 0 {
167	bit_size = strconv.int_size
168	}
169	mut s := _s
170	// Pick off leading sign.
171	mut neg := false
172	if s[0] == `+` {
173	// s = s[1..]
174	unsafe {
175	s = tos(s.str + 1, s.len - 1)
176	}
177	} else if s[0] == `-` {
178	neg = true
179	// s = s[1..]
180	unsafe {
181	s = tos(s.str + 1, s.len - 1)
182	}
183	}
184
185	// Convert unsigned and check range.
186	// un := parse_uint(s, base, bit_size) or {
187	// return i64(0)
188	// }
189	un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit)!
190	if un == 0 {
191	return i64(0)
192	}
193	// TODO: check should u64(bit_size-1) be size of int (32)?
194	cutoff := u64(1) << u64(bit_size - 1)
195	if !neg && un >= cutoff {
196	// return error('parse_int: range error $s0')
197	return i64(cutoff - u64(1))
198	}
199	if neg && un > cutoff {
200	// return error('parse_int: range error $s0')
201	return -i64(cutoff)
202	}
203	return if neg { -i64(un) } else { i64(un) }
204	}
205
206	// parse_int interprets a string s in the given base (0, 2 to 36) and
207	// bit size (0 to 64) and returns the corresponding value i.
208	//
209	// If the base argument is 0, the true base is implied by the string's
210	// prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
211	// Also, for argument base 0 only, underscore characters are permitted
212	// as defined by the Go syntax for integer literals.
213	//
214	// The bitSize argument specifies the integer type
215	// that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
216	// correspond to int, int8, int16, int32, and int64.
217	// If bitSize is below 0 or above 64, an error is returned.
218	pub fn parse_int(_s string, base int, _bit_size int) !i64 {
219	return common_parse_int(_s, base, _bit_size, true, true)
220	}
221
222	// atoi is equivalent to parse_int(s, 10, 0), converted to type int.
223	[direct_array_access]
224	pub fn atoi(s string) !int {
225	if s == '' {
226	return error('strconv.atoi: parsing "": invalid syntax')
227	}
228	if (strconv.int_size == 32 && (0 < s.len && s.len < 10))
229	\|\| (strconv.int_size == 64 && (0 < s.len && s.len < 19)) {
230	// Fast path for small integers that fit int type.
231	mut start_idx := 0
232	if s[0] == `-` \|\| s[0] == `+` {
233	start_idx++
234	if s.len - start_idx < 1 {
235	// return 0, &NumError{fnAtoi, s0, ErrSyntax}
236	return error('strconv.atoi: parsing "${s}": invalid syntax')
237	}
238	}
239	mut n := 0
240	for i in start_idx .. s.len {
241	ch := s[i] - `0`
242	if ch > 9 {
243	// return 0, &NumError{fnAtoi, s0, ErrSyntax}
244	return error('strconv.atoi: parsing "${s}": invalid syntax')
245	}
246	n = n * 10 + int(ch)
247	}
248	return if s[0] == `-` { -n } else { n }
249	}
250	// Slow path for invalid, big, or underscored integers.
251	int64 := parse_int(s, 10, 0)!
252	return int(int64)
253	}