From 14862585914b9fec24940df00e42b89178fdc5a7 Mon Sep 17 00:00:00 2001 From: Flinner <85732279+Flinner@users.noreply.github.com> Date: Fri, 2 Jul 2021 10:39:57 +0300 Subject: [PATCH] strconv: fix `atoi` returning 0 on large strings (#10635) --- vlib/builtin/string.v | 14 +++---- vlib/strconv/atoi.v | 28 +++++++------ vlib/strconv/atoi_test.v | 63 +++++++++++++++++------------- vlib/strconv/number_to_base_test.v | 1 - vlib/v/scanner/scanner.v | 2 +- vlib/x/json2/scanner.v | 2 +- 6 files changed, 60 insertions(+), 50 deletions(-) diff --git a/vlib/builtin/string.v b/vlib/builtin/string.v index 73f81644c..85434f1bb 100644 --- a/vlib/builtin/string.v +++ b/vlib/builtin/string.v @@ -465,22 +465,22 @@ pub fn (s string) bool() bool { // int returns the value of the string as an integer `'1'.int() == 1`. pub fn (s string) int() int { - return int(strconv.common_parse_int(s, 0, 32, false, false)) + return int(strconv.common_parse_int(s, 0, 32, false, false) or { 0 }) } // i64 returns the value of the string as i64 `'1'.i64() == i64(1)`. pub fn (s string) i64() i64 { - return strconv.common_parse_int(s, 0, 64, false, false) + return strconv.common_parse_int(s, 0, 64, false, false) or { 0 } } // i8 returns the value of the string as i8 `'1'.i8() == i8(1)`. pub fn (s string) i8() i8 { - return i8(strconv.common_parse_int(s, 0, 8, false, false)) + return i8(strconv.common_parse_int(s, 0, 8, false, false) or { 0 }) } // i16 returns the value of the string as i16 `'1'.i16() == i16(1)`. pub fn (s string) i16() i16 { - return i16(strconv.common_parse_int(s, 0, 16, false, false)) + return i16(strconv.common_parse_int(s, 0, 16, false, false) or { 0 }) } // f32 returns the value of the string as f32 `'1.0'.f32() == f32(1)`. @@ -497,17 +497,17 @@ pub fn (s string) f64() f64 { // u16 returns the value of the string as u16 `'1'.u16() == u16(1)`. pub fn (s string) u16() u16 { - return u16(strconv.common_parse_uint(s, 0, 16, false, false)) + return u16(strconv.common_parse_uint(s, 0, 16, false, false) or { 0 }) } // u32 returns the value of the string as u32 `'1'.u32() == u32(1)`. pub fn (s string) u32() u32 { - return u32(strconv.common_parse_uint(s, 0, 32, false, false)) + return u32(strconv.common_parse_uint(s, 0, 32, false, false) or { 0 }) } // u64 returns the value of the string as u64 `'1'.u64() == u64(1)`. pub fn (s string) u64() u64 { - return strconv.common_parse_uint(s, 0, 64, false, false) + return strconv.common_parse_uint(s, 0, 64, false, false) or { 0 } } [direct_array_access] diff --git a/vlib/strconv/atoi.v b/vlib/strconv/atoi.v index 4cfe6635f..333491967 100644 --- a/vlib/strconv/atoi.v +++ b/vlib/strconv/atoi.v @@ -17,12 +17,16 @@ pub fn byte_to_lower(c byte) byte { } // common_parse_uint is called by parse_uint and allows the parsing -// to stop on non or invalid digit characters and return the result so far -pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) u64 { - result, error := common_parse_uint2(s, _base, _bit_size) - if error != 0 { - if error > 0 && (error_on_non_digit || error_on_high_digit) { - return u64(0) +// to stop on non or invalid digit characters and return with an error +pub fn common_parse_uint(s string, _base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?u64 { + result, err := common_parse_uint2(s, _base, _bit_size) + // TODO: error_on_non_digit and error_on_high_digit have no difference + if err != 0 && (error_on_non_digit || error_on_high_digit) { + match err { + -1 { return error('common_parse_uint: wrong base $_base for $s') } + -2 { return error('common_parse_uint: wrong bit size $_bit_size for $s') } + -3 { return error('common_parse_uint: integer overflow $s') } + else { return error('common_parse_uint: syntax error $s') } } } return result @@ -114,13 +118,13 @@ pub fn common_parse_uint2(s string, _base int, _bit_size int) (u64, int) { } // parse_uint is like parse_int but for unsigned numbers. -pub fn parse_uint(s string, _base int, _bit_size int) u64 { +pub fn parse_uint(s string, _base int, _bit_size int) ?u64 { return common_parse_uint(s, _base, _bit_size, true, true) } // common_parse_int is called by parse int and allows the parsing -// to stop on non or invalid digit characters and return the result so far -pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) i64 { +// to stop on non or invalid digit characters and return with an error +pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit bool, error_on_high_digit bool) ?i64 { mut s := _s mut bit_size := _bit_size if s.len < 1 { @@ -139,7 +143,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b // un := parse_uint(s, base, bit_size) or { // return i64(0) // } - un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit) + un := common_parse_uint(s, base, bit_size, error_on_non_digit, error_on_high_digit) ? if un == 0 { return i64(0) } @@ -171,7 +175,7 @@ pub fn common_parse_int(_s string, base int, _bit_size int, error_on_non_digit b // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 // correspond to int, int8, int16, int32, and int64. // If bitSize is below 0 or above 64, an error is returned. -pub fn parse_int(_s string, base int, _bit_size int) i64 { +pub fn parse_int(_s string, base int, _bit_size int) ?i64 { return common_parse_int(_s, base, _bit_size, true, true) } @@ -203,7 +207,7 @@ pub fn atoi(s string) ?int { return if s[0] == `-` { -n } else { n } } // Slow path for invalid, big, or underscored integers. - int64 := parse_int(s, 10, 0) + int64 := parse_int(s, 10, 0) ? return int(int64) } diff --git a/vlib/strconv/atoi_test.v b/vlib/strconv/atoi_test.v index 318934c57..b356db6a7 100644 --- a/vlib/strconv/atoi_test.v +++ b/vlib/strconv/atoi_test.v @@ -1,22 +1,18 @@ import strconv -fn test_atoi() { - if x := strconv.atoi('16') { - assert x == 16 - } else { - assert false - } - if x := strconv.atoi('+16') { - assert x == 16 - } else { +fn test_atoi() ? { + assert strconv.atoi('16') ? == 16 + assert strconv.atoi('+16') ? == 16 + assert strconv.atoi('-16') ? == -16 + + // invalid strings + if x := strconv.atoi('str') { + println(x) assert false - } - if x := strconv.atoi('-16') { - assert x == -16 } else { - assert false + assert true } - if x := strconv.atoi('str') { + if x := strconv.atoi('string_longer_than_10_chars') { println(x) assert false } else { @@ -30,23 +26,34 @@ fn test_atoi() { } } -fn test_parse_int() { +fn test_parse_int() ? { // Different bases - assert strconv.parse_int('16', 16, 0) == 0x16 - assert strconv.parse_int('16', 8, 0) == 0o16 - assert strconv.parse_int('11', 2, 0) == 3 + assert strconv.parse_int('16', 16, 0) ? == 0x16 + assert strconv.parse_int('16', 8, 0) ? == 0o16 + assert strconv.parse_int('11', 2, 0) ? == 3 // Different bit sizes - assert strconv.parse_int('127', 10, 8) == 127 - assert strconv.parse_int('128', 10, 8) == 127 - assert strconv.parse_int('32767', 10, 16) == 32767 - assert strconv.parse_int('32768', 10, 16) == 32767 - assert strconv.parse_int('2147483647', 10, 32) == 2147483647 - assert strconv.parse_int('2147483648', 10, 32) == 2147483647 - assert strconv.parse_int('9223372036854775807', 10, 64) == 9223372036854775807 - assert strconv.parse_int('9223372036854775808', 10, 64) == 9223372036854775807 + assert strconv.parse_int('127', 10, 8) ? == 127 + assert strconv.parse_int('128', 10, 8) ? == 127 + assert strconv.parse_int('32767', 10, 16) ? == 32767 + assert strconv.parse_int('32768', 10, 16) ? == 32767 + assert strconv.parse_int('2147483647', 10, 32) ? == 2147483647 + assert strconv.parse_int('2147483648', 10, 32) ? == 2147483647 + assert strconv.parse_int('9223372036854775807', 10, 64) ? == 9223372036854775807 + assert strconv.parse_int('9223372036854775808', 10, 64) ? == 9223372036854775807 + assert strconv.parse_int('baobab', 36, 64) ? == 683058467 // Invalid bit sizes - assert strconv.parse_int('123', 10, 65) == 0 - assert strconv.parse_int('123', 10, -1) == 0 + if x := strconv.parse_int('123', 10, -1) { + println(x) + assert false + } else { + assert true + } + if x := strconv.parse_int('123', 10, 65) { + println(x) + assert false + } else { + assert true + } } fn test_common_parse_uint2() { diff --git a/vlib/strconv/number_to_base_test.v b/vlib/strconv/number_to_base_test.v index cd793180c..b313b0fff 100644 --- a/vlib/strconv/number_to_base_test.v +++ b/vlib/strconv/number_to_base_test.v @@ -34,6 +34,5 @@ fn test_format_uint() { assert strconv.format_int(255, 16) == 'ff' assert strconv.format_uint(18446744073709551615, 2) == '1111111111111111111111111111111111111111111111111111111111111111' assert strconv.format_uint(18446744073709551615, 16) == 'ffffffffffffffff' - assert strconv.parse_int('baobab', 36, 64) == 683058467 assert strconv.format_uint(683058467, 36) == 'baobab' } diff --git a/vlib/v/scanner/scanner.v b/vlib/v/scanner/scanner.v index 1314adb90..d4ac45abf 100644 --- a/vlib/v/scanner/scanner.v +++ b/vlib/v/scanner/scanner.v @@ -1221,7 +1221,7 @@ fn decode_u_escapes(s string, start int, escapes_pos []int) string { for i, pos in escapes_pos { idx := pos - start end_idx := idx + 6 // "\uXXXX".len == 6 - ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32))) + ss << utf32_to_str(u32(strconv.parse_uint(s[idx + 2..end_idx], 16, 32) or { 0 })) if i + 1 < escapes_pos.len { ss << s[end_idx..escapes_pos[i + 1] - start] } else { diff --git a/vlib/x/json2/scanner.v b/vlib/x/json2/scanner.v index c77f4373b..473a83b18 100644 --- a/vlib/x/json2/scanner.v +++ b/vlib/x/json2/scanner.v @@ -159,7 +159,7 @@ fn (mut s Scanner) text_scan() Token { if codepoint.len != 4 { return s.error('unicode escape must have 4 hex digits') } - val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32)) + val := u32(strconv.parse_uint(codepoint.bytestr(), 16, 32) or { 0 }) converted := utf32_to_str(val) converted_bytes := converted.bytes() chrs << converted_bytes -- 2.30.2