From 437fa02f278c51c8ce0aab231dba57f98394af40 Mon Sep 17 00:00:00 2001 From: Ned <7358345+nedpals@users.noreply.github.com> Date: Fri, 4 Mar 2022 19:39:23 +0800 Subject: [PATCH] x.json2: add customized JSON output capability via Encoder (#13654) --- .../tests/alexcrichton.toml-rs-tests_test.v | 16 +- vlib/toml/tests/burntsushi.toml-test_test.v | 18 +- vlib/toml/tests/iarna.toml-spec-tests_test.v | 27 +- vlib/toml/to/to.v | 13 +- vlib/x/json2/encoder.v | 291 +++++++++++------- vlib/x/json2/encoder_test.v | 56 +++- 6 files changed, 264 insertions(+), 157 deletions(-) diff --git a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v index 9a8142d2a..f43d79073 100644 --- a/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v +++ b/vlib/toml/tests/alexcrichton.toml-rs-tests_test.v @@ -236,13 +236,13 @@ fn to_alexcrichton(value ast.Value, array_type int) string { match value { ast.Quoted { json_text := json2.Any(value.text).json_str() - return '{ "type": "string", "value": "$json_text" }' + return '{ "type": "string", "value": $json_text }' } ast.DateTime { // Normalization for json mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T') - typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-') + typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-') || json_text.all_after('T').contains('+') { 'datetime' } else { @@ -252,16 +252,16 @@ fn to_alexcrichton(value ast.Value, array_type int) string { // It seems it's implementation specific how time and // date-time values are represented in detail. For now we follow the BurntSushi format // that expands to 6 digits which is also a valid RFC 3339 representation. - json_text = to_alexcrichton_time(json_text) + json_text = to_alexcrichton_time(json_text[1..json_text.len - 1]) return '{ "type": "$typ", "value": "$json_text" }' } ast.Date { json_text := json2.Any(value.text).json_str() - return '{ "type": "date", "value": "$json_text" }' + return '{ "type": "date", "value": $json_text }' } ast.Time { mut json_text := json2.Any(value.text).json_str() - json_text = to_alexcrichton_time(json_text) + json_text = to_alexcrichton_time(json_text[1..json_text.len - 1]) return '{ "type": "time", "value": "$json_text" }' } ast.Bool { @@ -270,12 +270,12 @@ fn to_alexcrichton(value ast.Value, array_type int) string { } ast.Null { json_text := json2.Any(value.text).json_str() - return '{ "type": "null", "value": "$json_text" }' + return '{ "type": "null", "value": $json_text }' } ast.Number { text := value.text if text.contains('inf') || text.contains('nan') { - return '{ "type": "float", "value": "$value.text" }' + return '{ "type": "float", "value": $value.text }' } if !text.starts_with('0x') && (text.contains('.') || text.to_lower().contains('e')) { mut val := '' @@ -297,7 +297,7 @@ fn to_alexcrichton(value ast.Value, array_type int) string { mut str := '{ ' for key, val in value { json_key := json2.Any(key).json_str() - str += ' "$json_key": ${to_alexcrichton(val, array_type)},' + str += ' $json_key: ${to_alexcrichton(val, array_type)},' } str = str.trim_right(',') str += ' }' diff --git a/vlib/toml/tests/burntsushi.toml-test_test.v b/vlib/toml/tests/burntsushi.toml-test_test.v index 418d6ea47..7dd6be207 100644 --- a/vlib/toml/tests/burntsushi.toml-test_test.v +++ b/vlib/toml/tests/burntsushi.toml-test_test.v @@ -199,26 +199,30 @@ fn to_burntsushi(value ast.Value) string { match value { ast.Quoted { json_text := json2.Any(value.text).json_str() - return '{ "type": "string", "value": "$json_text" }' + return '{ "type": "string", "value": $json_text }' } ast.DateTime { // Normalization for json json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T') - typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-') + + // NB: Since encoding strings in JSON now automatically includes quotes, + // I added a somewhat a workaround by adding an ending quote in order to + // recognize properly the date time type. - Ned + typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-') || json_text.all_after('T').contains('+') { 'datetime' } else { 'datetime-local' } - return '{ "type": "$typ", "value": "$json_text" }' + return '{ "type": "$typ", "value": $json_text }' } ast.Date { json_text := json2.Any(value.text).json_str() - return '{ "type": "date-local", "value": "$json_text" }' + return '{ "type": "date-local", "value": $json_text }' } ast.Time { json_text := json2.Any(value.text).json_str() - return '{ "type": "time-local", "value": "$json_text" }' + return '{ "type": "time-local", "value": $json_text }' } ast.Bool { json_text := json2.Any(value.text.bool()).json_str() @@ -226,7 +230,7 @@ fn to_burntsushi(value ast.Value) string { } ast.Null { json_text := json2.Any(value.text).json_str() - return '{ "type": "null", "value": "$json_text" }' + return '{ "type": "null", "value": $json_text }' } ast.Number { if value.text.contains('inf') || value.text.contains('nan') { @@ -251,7 +255,7 @@ fn to_burntsushi(value ast.Value) string { mut str := '{ ' for key, val in value { json_key := json2.Any(key).json_str() - str += ' "$json_key": ${to_burntsushi(val)},' + str += ' $json_key: ${to_burntsushi(val)},' } str = str.trim_right(',') str += ' }' diff --git a/vlib/toml/tests/iarna.toml-spec-tests_test.v b/vlib/toml/tests/iarna.toml-spec-tests_test.v index 278642f8a..9f8476284 100644 --- a/vlib/toml/tests/iarna.toml-spec-tests_test.v +++ b/vlib/toml/tests/iarna.toml-spec-tests_test.v @@ -288,15 +288,15 @@ fn to_iarna(value ast.Value, skip_value_map bool) string { ast.Quoted { json_text := json2.Any(value.text).json_str() if skip_value_map { - return '"$json_text"' + return json_text } - return '{ "type": "string", "value": "$json_text" }' + return '{ "type": "string", "value": $json_text }' } ast.DateTime { // Normalization for json mut json_text := json2.Any(value.text).json_str().to_upper().replace(' ', 'T') - typ := if json_text.ends_with('Z') || json_text.all_after('T').contains('-') + typ := if json_text.ends_with('Z"') || json_text.all_after('T').contains('-') || json_text.all_after('T').contains('+') { 'datetime' } else { @@ -306,40 +306,41 @@ fn to_iarna(value ast.Value, skip_value_map bool) string { // It seems it's implementation specific how time and // date-time values are represented in detail. For now we follow the BurntSushi format // that expands to 6 digits which is also a valid RFC 3339 representation. - json_text = to_iarna_time(json_text) + json_text = to_iarna_time(json_text[1..json_text.len - 1]) if skip_value_map { - return '"$json_text"' + return json_text } return '{ "type": "$typ", "value": "$json_text" }' } ast.Date { json_text := json2.Any(value.text).json_str() if skip_value_map { - return '"$json_text"' + return json_text } - return '{ "type": "date", "value": "$json_text" }' + return '{ "type": "date", "value": $json_text }' } ast.Time { mut json_text := json2.Any(value.text).json_str() - json_text = to_iarna_time(json_text) + // NB: Removes the quotes of the encoded JSON string - Ned + json_text = to_iarna_time(json_text[1..json_text.len - 1]) if skip_value_map { - return '"$json_text"' + return json_text } return '{ "type": "time", "value": "$json_text" }' } ast.Bool { json_text := json2.Any(value.text.bool()).json_str() if skip_value_map { - return '$json_text' + return json_text } return '{ "type": "bool", "value": "$json_text" }' } ast.Null { json_text := json2.Any(value.text).json_str() if skip_value_map { - return '$json_text' + return json_text } - return '{ "type": "null", "value": "$json_text" }' + return '{ "type": "null", "value": $json_text }' } ast.Number { if value.text.contains('inf') { @@ -384,7 +385,7 @@ fn to_iarna(value ast.Value, skip_value_map bool) string { mut str := '{ ' for key, val in value { json_key := json2.Any(key).json_str() - str += ' "$json_key": ${to_iarna(val, skip_value_map)},' + str += ' $json_key: ${to_iarna(val, skip_value_map)},' } str = str.trim_right(',') str += ' }' diff --git a/vlib/toml/to/to.v b/vlib/toml/to/to.v index 502cfdfa9..c06f6a397 100644 --- a/vlib/toml/to/to.v +++ b/vlib/toml/to/to.v @@ -27,19 +27,16 @@ fn any_to_json(a toml.Any) string { return 'null' } toml.DateTime { - json_text := json2.Any(a.str()) - return '"$json_text.json_str()"' + return json2.Any(a.str()).json_str() } toml.Date { - json_text := json2.Any(a.str()) - return '"$json_text.json_str()"' + return json2.Any(a.str()).json_str() } toml.Time { - json_text := json2.Any(a.str()) - return '"$json_text.json_str()"' + return json2.Any(a.str()).json_str() } string { - return '"' + json2.Any(a.str()).json_str() + '"' + return json2.Any(a.str()).json_str() } bool { return json2.Any(bool(a)).json_str() @@ -63,7 +60,7 @@ fn any_to_json(a toml.Any) string { mut str := '{' for key, val in a { json_key := json2.Any(key) - str += ' "$json_key.json_str()": ${any_to_json(val)},' + str += ' $json_key.json_str(): ${any_to_json(val)},' } str = str.trim_right(',') str += ' }' diff --git a/vlib/x/json2/encoder.v b/vlib/x/json2/encoder.v index d91247e51..20ae67ea6 100644 --- a/vlib/x/json2/encoder.v +++ b/vlib/x/json2/encoder.v @@ -3,174 +3,234 @@ // that can be found in the LICENSE file. module json2 +import io import strings -fn write_value(v Any, i int, len int, mut wr strings.Builder) { - str := v.json_str() - if v is string { - wr.write_string('"$str"') - } else { - wr.write_string(str) - } - if i >= len - 1 { - return - } - wr.write_byte(`,`) +// Encoder encodes the an `Any` type into JSON representation. +// It provides parameters in order to change the end result. +pub struct Encoder { + newline byte + newline_spaces_count int + escape_unicode bool = true } -// str returns the string representation of the `map[string]Any`. -[manualfree] -pub fn (flds map[string]Any) str() string { - mut wr := strings.new_builder(200) - wr.write_byte(`{`) - mut i := 0 - for k, v in flds { - wr.write_string('"$k":') - write_value(v, i, flds.len, mut wr) - i++ - } - wr.write_byte(`}`) - defer { - unsafe { wr.free() } - } - res := wr.str() - return res -} +// byte array versions of the most common tokens/chars +// to avoid reallocations +const null_in_bytes = 'null'.bytes() -// str returns the string representation of the `[]Any`. -[manualfree] -pub fn (flds []Any) str() string { - mut wr := strings.new_builder(200) - wr.write_byte(`[`) - for i, v in flds { - write_value(v, i, flds.len, mut wr) - } - wr.write_byte(`]`) - defer { - unsafe { wr.free() } - } - res := wr.str() - return res +const true_in_bytes = 'true'.bytes() + +const false_in_bytes = 'false'.bytes() + +const zero_in_bytes = [byte(`0`)] + +const comma_bytes = [byte(`,`)] + +const colon_bytes = [byte(`:`)] + +const space_bytes = [byte(` `)] + +const unicode_escape_chars = [byte(`\\`), `u`] + +const quote_bytes = [byte(`"`)] + +const escaped_chars = [(r'\b').bytes(), (r'\f').bytes(), (r'\n').bytes(), + (r'\r').bytes(), (r'\t').bytes()] + +// encode_value encodes an `Any` value to the specific writer. +pub fn (e &Encoder) encode_value(f Any, mut wr io.Writer) ? { + e.encode_value_with_level(f, 1, mut wr) ? } -// str returns the string representation of the `Any` type. Use the `json_str` method -// if you want to use the escaped str() version of the `Any` type. -pub fn (f Any) str() string { - if f is string { - return f - } else { - return f.json_str() +fn (e &Encoder) encode_newline(level int, mut wr io.Writer) ? { + if e.newline != 0 { + wr.write([e.newline]) ? + for j := 0; j < level * e.newline_spaces_count; j++ { + wr.write(json2.space_bytes) ? + } } } -// json_str returns the JSON string representation of the `Any` type. -pub fn (f Any) json_str() string { +fn (e &Encoder) encode_value_with_level(f Any, level int, mut wr io.Writer) ? { match f { string { - return json_string(f) - } - bool, int, u64, i64 { - return f.str() + e.encode_string(f, mut wr) ? } - f32 { - $if !nofloat ? { - str_f32 := f.str() - if str_f32.ends_with('.') { - return '${str_f32}0' - } - return str_f32 + bool { + if f == true { + wr.write(json2.true_in_bytes) ? + } else { + wr.write(json2.false_in_bytes) ? } - - return '0' } - f64 { + int, u64, i64 { + wr.write(f.str().bytes()) ? + } + f32, f64 { $if !nofloat ? { - str_f64 := f.str() - if str_f64.ends_with('.') { - return '${str_f64}0' + str_float := f.str().bytes() + wr.write(str_float) ? + if str_float[str_float.len - 1] == `.` { + wr.write(json2.zero_in_bytes) ? } - return str_f64 + return } - return '0' + wr.write(json2.zero_in_bytes) ? } map[string]Any { - return f.str() + wr.write([byte(`{`)]) ? + mut i := 0 + for k, v in f { + e.encode_newline(level, mut wr) ? + e.encode_string(k, mut wr) ? + wr.write(json2.colon_bytes) ? + if e.newline != 0 { + wr.write(json2.space_bytes) ? + } + e.encode_value_with_level(v, level + 1, mut wr) ? + if i < f.len - 1 { + wr.write(json2.comma_bytes) ? + } + i++ + } + e.encode_newline(level - 1, mut wr) ? + wr.write([byte(`}`)]) ? } []Any { - return f.str() + wr.write([byte(`[`)]) ? + for i, v in f { + e.encode_newline(level, mut wr) ? + e.encode_value_with_level(v, level + 1, mut wr) ? + if i < f.len - 1 { + wr.write(json2.comma_bytes) ? + } + } + e.encode_newline(level - 1, mut wr) ? + wr.write([byte(`]`)]) ? } Null { - return 'null' + wr.write(json2.null_in_bytes) ? } } } -// char_len_list is a modified version of builtin.utf8_str_len -// that returns an array of character lengths. (e.g "tβœ”" => [1,2]) -fn char_len_list(s string) []int { - mut l := 1 - mut ls := []int{} - for i := 0; i < s.len; i++ { - c := s[i] - if (c & (1 << 7)) != 0 { - for t := byte(1 << 6); (c & t) != 0; t >>= 1 { - l++ - i++ - } - } - ls << l - l = 1 +// str returns the JSON string representation of the `map[string]Any` type. +pub fn (f map[string]Any) str() string { + return Any(f).json_str() +} + +// str returns the JSON string representation of the `[]Any` type. +pub fn (f []Any) str() string { + return Any(f).json_str() +} + +// str returns the string representation of the `Any` type. Use the `json_str` method +// if you want to use the escaped str() version of the `Any` type. +pub fn (f Any) str() string { + if f is string { + return f + } else { + return f.json_str() } - return ls } -const escaped_chars = [r'\b', r'\f', r'\n', r'\r', r'\t'] +// json_str returns the JSON string representation of the `Any` type. +[manualfree] +pub fn (f Any) json_str() string { + mut sb := strings.new_builder(4096) + defer { + unsafe { sb.free() } + } + mut enc := Encoder{} + enc.encode_value(f, mut sb) or { return '' } + return sb.str() +} -// json_string returns the JSON spec-compliant version of the string. +// prettify_json_str returns the pretty-formatted JSON string representation of the `Any` type. [manualfree] -fn json_string(s string) string { - // not the best implementation but will revisit it soon - char_lens := char_len_list(s) - mut sb := strings.new_builder(s.len) - mut i := 0 +pub fn (f Any) prettify_json_str() string { + mut sb := strings.new_builder(4096) + defer { + unsafe { sb.free() } + } + mut enc := Encoder{ + newline: `\n` + newline_spaces_count: 4 + } + enc.encode_value(f, mut sb) or { return '' } + return sb.str() +} + +// CharLengthIterator is an iterator that generates a char +// length value of every iteration based on the given text. +// (e.g.: "tβœ”" => [t => 1, βœ” => 2]) +struct CharLengthIterator { + text string +mut: + idx int +} + +fn (mut iter CharLengthIterator) next() ?int { + if iter.idx >= iter.text.len { + return none + } defer { - unsafe { - char_lens.free() - // freeing string builder on defer after - // returning .str() still isn't working :( - // sb.free() + iter.idx++ + } + mut len := 1 + c := iter.text[iter.idx] + if (c & (1 << 7)) != 0 { + for t := byte(1 << 6); (c & t) != 0; t >>= 1 { + len++ + iter.idx++ } } + return len +} + +// encode_string returns the JSON spec-compliant version of the string. +[manualfree] +fn (e &Encoder) encode_string(s string, mut wr io.Writer) ? { + mut char_lens := CharLengthIterator{ + text: s + } + mut i := 0 + wr.write(json2.quote_bytes) ? for char_len in char_lens { if char_len == 1 { chr := s[i] if chr in important_escapable_chars { for j := 0; j < important_escapable_chars.len; j++ { if chr == important_escapable_chars[j] { - sb.write_string(json2.escaped_chars[j]) + wr.write(json2.escaped_chars[j]) ? break } } } else if chr == `"` || chr == `/` || chr == `\\` { - sb.write_string('\\' + chr.ascii_str()) + wr.write([byte(`\\`), chr]) ? } else if int(chr) < 0x20 { - hex_code := chr.hex() - sb.write_string('\\u00$hex_code') + hex_code := chr.hex().bytes() + wr.write(json2.unicode_escape_chars) ? // \u + wr.write(json2.zero_in_bytes) ? // \u0 + wr.write(json2.zero_in_bytes) ? // \u00 + wr.write(hex_code) ? // \u00xxxx } else { - sb.write_byte(chr) + wr.write([byte(chr)]) ? } } else { slice := s[i..i + char_len] - hex_code := slice.utf32_code().hex() - if hex_code.len < 4 { - // an utf8 codepoint - sb.write_string(slice) + hex_code := slice.utf32_code().hex().bytes() + if !e.escape_unicode || hex_code.len < 4 { + // unescaped non-ASCII char + wr.write(slice.bytes()) ? } else if hex_code.len == 4 { - sb.write_string('\\u$hex_code') + // a unicode endpoint + wr.write(json2.unicode_escape_chars) ? + wr.write(hex_code) ? } else { // TODO: still figuring out what // to do with more than 4 chars - sb.write_byte(` `) + wr.write(json2.space_bytes) ? } unsafe { slice.free() @@ -179,7 +239,6 @@ fn json_string(s string) string { } i += char_len } - str := sb.str() - unsafe { sb.free() } - return str + + wr.write(json2.quote_bytes) ? } diff --git a/vlib/x/json2/encoder_test.v b/vlib/x/json2/encoder_test.v index 02ba71ef0..07ce6f75f 100644 --- a/vlib/x/json2/encoder_test.v +++ b/vlib/x/json2/encoder_test.v @@ -1,20 +1,21 @@ import x.json2 +import strings fn test_json_string_characters() { text := json2.raw_decode(r'"\n\r\b\f\t\\\"\/"') or { '' } - assert text.json_str() == '\\n\\r\\b\\f\\t\\\\\\"\\/' + assert text.json_str() == '"\\n\\r\\b\\f\\t\\\\\\"\\/"' } fn test_json_escape_low_chars() { esc := '\u001b' assert esc.len == 1 text := json2.Any(esc) - assert text.json_str() == r'\u001b' + assert text.json_str() == r'"\u001b"' } fn test_json_string() { text := json2.Any('teβœ”st') - assert text.json_str() == r'te\u2714st' + assert text.json_str() == r'"te\u2714st"' boolean := json2.Any(true) assert boolean.json_str() == 'true' integer := json2.Any(int(-5)) @@ -27,12 +28,12 @@ fn test_json_string() { fn test_json_string_emoji() { text := json2.Any('🐈') - assert text.json_str() == r' ' + assert text.json_str() == r'" "' } fn test_json_string_non_ascii() { text := json2.Any('γ²γ‚‰γŒγͺ') - assert text.json_str() == r'\u3072\u3089\u304c\u306a' + assert text.json_str() == r'"\u3072\u3089\u304c\u306a"' } fn test_utf8_strings_are_not_modified() ? { @@ -42,3 +43,48 @@ fn test_utf8_strings_are_not_modified() ? { // dump(deresult) assert deresult.str() == original } + +fn test_encoder_unescaped_utf32() ? { + jap_text := json2.Any('γ²γ‚‰γŒγͺ') + enc := json2.Encoder{ + escape_unicode: false + } + + mut sb := strings.new_builder(20) + enc.encode_value(jap_text, mut sb) ? + + assert sb.str() == '"$jap_text"' + sb.go_back_to(0) + + emoji_text := json2.Any('🐈') + enc.encode_value(emoji_text, mut sb) ? + assert sb.str() == '"$emoji_text"' +} + +fn test_encoder_prettify() ? { + obj := { + 'hello': json2.Any('world') + 'arr': [json2.Any('im a string'), [json2.Any('3rd level')]] + 'obj': { + 'map': json2.Any('map inside a map') + } + } + enc := json2.Encoder{ + newline: `\n` + newline_spaces_count: 2 + } + mut sb := strings.new_builder(20) + enc.encode_value(obj, mut sb) ? + assert sb.str() == '{ + "hello": "world", + "arr": [ + "im a string", + [ + "3rd level" + ] + ], + "obj": { + "map": "map inside a map" + } +}' +} -- 2.30.2