Skip to content

Commit

Permalink
[fix](function)fix error result in replace_empty with utf8 chars (apa…
Browse files Browse the repository at this point in the history
…che#40869)

## Proposed changes

```
mysql [(none)]>select  replace_empty('你a好b世c界','','b');
+-------------------------------------------+
| replace_empty('你a好b世c界', '', 'b')     |
+-------------------------------------------+
| b�b�b�bab�b�b�bbb�b�b�bcb�b�b�b                       |
+----------------------------


mysql [(none)]>select  replace_empty('你a好b世c界','','b');
+-------------------------------------------+
| replace_empty('你a好b世c界', '', 'b')     |
+-------------------------------------------+
| b你bab好bbb世bcb界b                       |
+-------------------------------------------+
```

<!--Describe your changes.-->
  • Loading branch information
Mryange authored Sep 19, 2024
1 parent 722fabe commit 2385734
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 25 deletions.
76 changes: 51 additions & 25 deletions be/src/vec/functions/function_string.h
Original file line number Diff line number Diff line change
Expand Up @@ -3292,26 +3292,39 @@ class FunctionReplace : public IFunction {
size_t result, size_t input_rows_count) const override {
// We need a local variable to hold a reference to the converted column.
// So that the converted column will not be released before we use it.
auto col_origin =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto col_origin_str = assert_cast<const ColumnString*>(col_origin.get());
auto col_old =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
auto col_old_str = assert_cast<const ColumnString*>(col_old.get());
auto col_new =
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
auto col_new_str = assert_cast<const ColumnString*>(col_new.get());
ColumnPtr col[3];
bool col_const[3];
for (size_t i = 0; i < 3; ++i) {
std::tie(col[i], col_const[i]) =
unpack_if_const(block.get_by_position(arguments[i]).column);
}

const auto* col_origin_str = assert_cast<const ColumnString*>(col[0].get());
const auto* col_old_str = assert_cast<const ColumnString*>(col[1].get());
const auto* col_new_str = assert_cast<const ColumnString*>(col[2].get());

ColumnString::MutablePtr col_res = ColumnString::create();
for (int i = 0; i < input_rows_count; ++i) {
StringRef origin_str = col_origin_str->get_data_at(i);
StringRef old_str = col_old_str->get_data_at(i);
StringRef new_str = col_new_str->get_data_at(i);

std::string result = replace(origin_str.to_string(), old_str.to_string_view(),
new_str.to_string_view());
col_res->insert_data(result.data(), result.length());
}
std::visit(
[&](auto origin_str_const, auto old_str_const, auto new_str_const) {
for (int i = 0; i < input_rows_count; ++i) {
StringRef origin_str =
col_origin_str->get_data_at(index_check_const<origin_str_const>(i));
StringRef old_str =
col_old_str->get_data_at(index_check_const<old_str_const>(i));
StringRef new_str =
col_new_str->get_data_at(index_check_const<new_str_const>(i));

std::string result =
replace(origin_str.to_string(), old_str.to_string_view(),
new_str.to_string_view());

col_res->insert_data(result.data(), result.length());
}
},
vectorized::make_bool_variant(col_const[0]),
vectorized::make_bool_variant(col_const[1]),
vectorized::make_bool_variant(col_const[2]));

block.replace_by_position(result, std::move(col_res));
return Status::OK();
Expand All @@ -3328,16 +3341,29 @@ class FunctionReplace : public IFunction {
if (new_str.empty()) {
return str;
}
std::string result;
ColumnString::check_chars_length(
str.length() * (new_str.length() + 1) + new_str.length(), 0);
result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
for (char c : str) {
if (simd::VStringFunctions::is_ascii({str.data(), str.size()})) {
std::string result;
ColumnString::check_chars_length(
str.length() * (new_str.length() + 1) + new_str.length(), 0);
result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
for (char c : str) {
result += new_str;
result += c;
}
result += new_str;
return result;
} else {
std::string result;
result.reserve(str.length() * (new_str.length() + 1) + new_str.length());
for (size_t i = 0, utf8_char_len = 0; i < str.size(); i += utf8_char_len) {
utf8_char_len = UTF8_BYTE_LENGTH[(unsigned char)str[i]];
result += new_str;
result.append(&str[i], utf8_char_len);
}
result += new_str;
result += c;
ColumnString::check_chars_length(result.size(), 0);
return result;
}
result += new_str;
return result;
}
} else {
std::string::size_type pos = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,12 @@ xyz
-- !sql_relace_empty08 --
abc

-- !sql_relace_empty09 --
b你bab好bbb世bcb界b

-- !sql_relace_empty10 --
你a好b世c界

-- !sql_right_Varchar_Integer --
\N
1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ suite("nereids_scalar_fn_R") {
qt_sql_relace_empty06 "select replace_empty('xyz', 'x', '');"
qt_sql_relace_empty07 "select replace_empty('xyz', '', '');"
qt_sql_relace_empty08 "select replace_empty('', '', 'abc');"
qt_sql_relace_empty09 "select replace_empty('你a好b世c界','','b');"
qt_sql_relace_empty10 "select replace_empty('你a好b世c界','','');"
qt_sql_right_Varchar_Integer "select right(kvchrs1, kint) from fn_test order by kvchrs1, kint"
qt_sql_right_Varchar_Integer_notnull "select right(kvchrs1, kint) from fn_test_not_nullable order by kvchrs1, kint"
qt_sql_right_String_Integer "select right(kstr, kint) from fn_test order by kstr, kint"
Expand Down

0 comments on commit 2385734

Please sign in to comment.