diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index 17250e10fd77d0..f8d1670c9c8125 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -576,6 +576,7 @@ struct ConvertImplGenericFromString { const bool is_complex = is_complex_type(data_type_to); DataTypeSerDe::FormatOptions format_options; format_options.converted_from_string = true; + format_options.escape_char = '\\'; for (size_t i = 0; i < size; ++i) { const auto& val = col_from_string->get_data_at(i); diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv new file mode 100644 index 00000000000000..e4f859e7511b1b --- /dev/null +++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv @@ -0,0 +1,5 @@ +1 \N +2 ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}'] +3 ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar'] +4 ['\/some\/cool\/url', '/some/cool/url', 'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e'] +5 ["\"双引号\"", "反斜\\线"] \ No newline at end of file diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.json b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json new file mode 100644 index 00000000000000..de718c8efdea5a --- /dev/null +++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.json @@ -0,0 +1,5 @@ +{"id":1,"a":null} +{"id":2,"a":['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', \'{"x" : 1}']} +{"id":3,"a":['foo\'bar', 'foo\"bar', 'foo\\\'bar', 'foo\'\'bar']} +{"id":4,"a":['\/some\/cool\/url', '/some/cool/url', 'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']} +{"id":5,"a":["\"双引号\"", "反斜\\线"]} \ No newline at end of file diff --git a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out new file mode 100644 index 00000000000000..99fb23ef9eed17 --- /dev/null +++ b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out @@ -0,0 +1,15 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_csv -- +1 \N +2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"] +3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"] +4 ["/some/cool/url", "/some/cool/url", "a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"] +5 [""双引号"", "反斜\\线"] + +-- !select_json -- +1 \N +2 ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "'{"x" : 1}'"] +3 ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"] +4 ["/some/cool/url", "/some/cool/url", "a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"] +5 [""双引号"", "反斜\\线"] + diff --git a/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy new file mode 100644 index 00000000000000..b728c46cb20c93 --- /dev/null +++ b/regression-test/suites/jsonb_p0/test_jsonb_with_unescaped_string.groovy @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_jsonb_with_unescaped_string", "p0") { + + // define a sql table with array which has some Escape Character and should also to cast to json + def testTable = "tbl_unescaped_jsonb" + def dataFile = "test_jsonb_unescaped.csv" + def dataFileJson = "test_jsonb_unescaped.json" + + sql """ set experimental_enable_nereids_planner = true """ + sql """ set enable_fallback_to_original_planner = true """ + + sql "DROP TABLE IF EXISTS ${testTable}" + + sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + id INT, + a ARRAY, + ) + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + + // load the jsonb data from csv file + streamLoad { + table testTable + + file dataFile // import csv file + time 10000 // limit inflight 10s + set 'strict_mode', 'true' + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(5, json.NumberTotalRows) + assertEquals(5, json.NumberLoadedRows) + assertTrue(json.LoadBytes > 0) + } + } + + sql """ sync; """ + + // check result + qt_select_csv "SELECT * FROM ${testTable} ORDER BY id" + + sql "truncate table ${testTable}" + // load the jsonb data from json file + streamLoad { + table testTable + + file dataFileJson // import json file + time 10000 // limit inflight 10s + set 'format', 'json' // import format + set 'read_json_by_line', 'true' // read json by line + set 'strict_mode', 'true' + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(5, json.NumberTotalRows) + assertEquals(5, json.NumberLoadedRows) + assertTrue(json.LoadBytes > 0) + } + } + + + sql """ sync; """ + + // check result + qt_select_json "SELECT * FROM ${testTable} ORDER BY id" +} \ No newline at end of file