Skip to content

Commit

Permalink
[fix](delete) Fix unrecognized column name delete handler (apache#32429)
Browse files Browse the repository at this point in the history
  • Loading branch information
gavinchou authored Mar 26, 2024
1 parent e834b45 commit a7052ab
Show file tree
Hide file tree
Showing 5 changed files with 179 additions and 32 deletions.
73 changes: 44 additions & 29 deletions be/src/olap/delete_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,15 @@ Status DeleteHandler::generate_delete_predicate(const TabletSchema& schema,
<< "condition size=" << in_pred->values().size();
} else {
// write sub predicate v1 for compactbility
del_pred->add_sub_predicates(construct_sub_predicate(condition));
std::string condition_str = construct_sub_predicate(condition);
if (TCondition tmp; !DeleteHandler::parse_condition(condition_str, &tmp)) {
LOG(WARNING) << "failed to parse condition_str, condtion="
<< ThriftDebugString(condition);
return Status::Error<DELETE_INVALID_CONDITION>(
"failed to parse condition_str, condtion={}", ThriftDebugString(condition));
}
VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << condition_str;
del_pred->add_sub_predicates(condition_str);
DeleteSubPredicatePB* sub_predicate = del_pred->add_sub_predicates_v2();
if (condition.__isset.column_unique_id) {
sub_predicate->set_column_unique_id(condition.column_unique_id);
Expand Down Expand Up @@ -127,8 +135,9 @@ std::string DeleteHandler::construct_sub_predicate(const TCondition& condition)
}
string condition_str;
if ("IS" == op) {
condition_str = condition.column_name + " " + op + " " + condition.condition_values[0];
} else {
// ATTN: tricky! Surround IS with spaces to make it "special"
condition_str = condition.column_name + " IS " + condition.condition_values[0];
} else { // multi-elements IN expr has been processed with InPredicatePB
if (op == "*=") {
op = "=";
} else if (op == "!*=") {
Expand Down Expand Up @@ -286,44 +295,50 @@ Status DeleteHandler::parse_condition(const DeleteSubPredicatePB& sub_cond, TCon
return Status::OK();
}

// clang-format off
// Condition string format, the format is (column_name)(op)(value)
// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
// and compactible with any the lagacy
// operator: matches "="
// value: matches "1597751948193618247 and length(source)<1;\n;\n"
//
// For more info, see DeleteHandler::construct_sub_predicates
// FIXME(gavin): support unicode. And this is a tricky implementation, it should
// not be the final resolution, refactor it.
const char* const CONDITION_STR_PATTERN =
// .----------------- column-name ----------------. .----------------------- operator ------------------------. .------------ value ----------.
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?: IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
// '----------------- group 1 --------------------' '--------------------- group 2 ---------------------------' | '-- group 4--' |
// match any of: = != >> << >= <= *= " IS " '----------- group 3 ---------'
// match **ANY THING** without(4)
// or with(3) single quote
boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
// clang-format on

Status DeleteHandler::parse_condition(const std::string& condition_str, TCondition* condition) {
bool matched = true;
bool matched = false;
boost::smatch what;

try {
// Condition string format, the format is (column_name)(op)(value)
// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
// group1: (\w+) matches "c1"
// group2: ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) matches "="
// group3: ((?:[\s\S]+)?) matches "1597751948193618247 and length(source)<1;\n;\n"
const char* const CONDITION_STR_PATTERN =
R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
boost::regex ex(CONDITION_STR_PATTERN);
if (boost::regex_match(condition_str, what, ex)) {
if (condition_str.size() != what[0].str().size()) {
matched = false;
}
} else {
matched = false;
}
VLOG_NOTICE << "condition_str: " << condition_str;
matched = boost::regex_match(condition_str, what, DELETE_HANDLER_REGEX) &&
condition_str.size() == what[0].str().size(); // exact match
} catch (boost::regex_error& e) {
VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; error=" << e.what()
<< "]";
matched = false;
}

if (!matched) {
return Status::Error<DELETE_INVALID_PARAMETERS>("fail to sub condition. condition={}",
condition_str);
}
condition->column_name = what[1].str();
condition->condition_op = what[2].str();
if (what[4].matched) { // match string with single quotes, eg. a = 'b'
condition->condition_values.push_back(what[4].str());
} else { // match string without quote, compat with old conditions, eg. a = b
condition->condition_values.push_back(what[3].str());
}

condition->column_name = what[1].str();
condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
// match string with single quotes, a = b or a = 'b'
condition->condition_values.push_back(what[3 + !!what[4].matched].str());
VLOG_NOTICE << "parsed condition_str: col_name={" << condition->column_name << "} op={"
<< condition->condition_op << "} val={" << condition->condition_values.back()
<< "}";
return Status::OK();
}

Expand Down
10 changes: 9 additions & 1 deletion be/src/olap/delete_handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ class DeleteHandler {

static void convert_to_sub_pred_v2(DeletePredicatePB* delete_pred, TabletSchemaSPtr schema);

/**
* Use regular expression to extract 'column_name', 'op' and 'operands'
*
* @param condition_str input predicate string in form of `X OP Y`
* @param condition output param
* @return OK if matched and extracted correctly otherwise DELETE_INVALID_PARAMETERS
*/
static Status parse_condition(const std::string& condition_str, TCondition* condition);

private:
// Validate the condition on the schema.
static Status check_condition_valid(const TabletSchema& tablet_schema, const TCondition& cond);
Expand All @@ -87,7 +96,6 @@ class DeleteHandler {

// extract 'column_name', 'op' and 'operands' to condition
static Status parse_condition(const DeleteSubPredicatePB& sub_cond, TCondition* condition);
static Status parse_condition(const std::string& condition_str, TCondition* condition);

public:
DeleteHandler() = default;
Expand Down
40 changes: 40 additions & 0 deletions be/test/olap/delete_handler_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1190,4 +1190,44 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
EXPECT_EQ(Status::OK(), res);
}

// clang-format off
TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
auto test = [](const std::tuple<std::string, bool, TCondition>& in) {
auto& [cond_str, exp_succ, exp_cond] = in;
TCondition parsed_cond;
EXPECT_EQ(DeleteHandler::parse_condition(cond_str, &parsed_cond), exp_succ) << " unexpected result, cond_str: " << cond_str;
if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result, cond_str: " << cond_str;
};

auto gen_cond = [](const std::string& col, const std::string& op, const std::string& val) {
TCondition cond;
cond.__set_column_name(col);
cond.__set_condition_op(op);
cond.__set_condition_values(std::vector<std::string>{val});
return cond;
};

// <cond_str, parsed, expect_value>>
std::vector<std::tuple<std::string, bool, TCondition>> test_input {
{R"(abc=b)" , true, gen_cond(R"(abc)" , "=" , R"(b)" )}, // normal case
{R"(abc!=b)" , true, gen_cond(R"(abc)" , "!=", R"(b)" )}, // normal case
{R"(abc<=b)" , true, gen_cond(R"(abc)" , "<=", R"(b)" )}, // normal case
{R"(abc>=b)" , true, gen_cond(R"(abc)" , ">=", R"(b)" )}, // normal case
{R"(abc>>b)" , true, gen_cond(R"(abc)" , ">>", R"(b)" )}, // normal case
{R"(abc<<b)" , true, gen_cond(R"(abc)" , "<<", R"(b)" )}, // normal case
{R"(abc!='b')" , true, gen_cond(R"(abc)" , "!=", R"(b)" )}, // value surrounded by '
{R"(abc=)" , true, gen_cond(R"(abc)" , "=" , R"()" )}, // missing value, it means not to be parsed succefully, how every it's a ignorable bug
{R"(@a*<<10086)" , true, gen_cond(R"(@a*)" , "<<", R"(10086)" )}, // column ends with *
{R"(*a=b)" , false, gen_cond(R"(*a)" , "=" , R"(b)" )}, // starts with *
{R"(a*a>>WTF(10086))" , true, gen_cond(R"(a*a)" , ">>", R"(WTF(10086))")}, // function
{R"(a-b IS NULL)" , true, gen_cond(R"(a-b)" , "IS", R"(NULL)" )}, // - in col name and test IS NULL
{R"(@a*-b IS NOT NULL)" , true, gen_cond(R"(@a*-b)" , "IS", R"(NOT NULL)" )}, // test IS NOT NULL
{R"(a IS b IS NOT NULL)", true, gen_cond(R"(a IS b)", "IS", R"(NOT NULL)" )}, // test " IS " in column name
{R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true, gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, // hellbound column name
{R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name=long)", true, gen_cond(R"(this is a col very loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon colum name)", "=", R"(long)")}, // test " IS " in column name
};
for (auto& i : test_input) { test(i); }
}
// clang-format on

} // namespace doris
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ public class FeNameFormat {
private static final String UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
private static final String COLUMN_NAME_REGEX = "^[_a-zA-Z@0-9\\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*\"\\s,:]{0,255}$";
private static final String COLUMN_NAME_REGEX = "^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";

private static final String UNICODE_LABEL_REGEX = "^[-_A-Za-z0-9:\\p{L}]{1,128}$";
private static final String UNICODE_COMMON_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_UNDERSCORE_COMMON_NAME_REGEX = "^[_a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
private static final String UNICODE_TABLE_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
private static final String UNICODE_USER_NAME_REGEX = "^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
private static final String UNICODE_COLUMN_NAME_REGEX
= "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/><?@#$%^&*\\p{L}]{0,255}$";
= "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";

public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";

Expand Down
84 changes: 84 additions & 0 deletions regression-test/suites/delete_p0/test_delete_handler.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

/**
* Test delete with strange name
*/
suite("test_delete_handler") {
// test condition operator
sql """drop table if exists td1;"""
sql """
CREATE TABLE `td1` (
`id` int(11) NULL,
`name` varchar(255) NULL,
`score` int(11) NULL
) ENGINE=OLAP
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
"""

sql """insert into td1(id,name,`score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """insert into td1(id,name,`score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """insert into td1(id,name,`score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""

sql """delete from td1 where `score` = 4 and `score` = 3 and `score` = 1;"""
sql """delete from td1 where `score` is null;"""
sql """delete from td1 where `score` is not null;"""
sql """delete from td1 where `score` in (1,2);"""
sql """delete from td1 where `score` not in (3,4);"""
sql """select * from td1;"""
sql """insert into td1(id,name,`score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """select * from td1;"""


// test column name
sql """drop table if exists td2;"""
sql """
CREATE TABLE `td2` (
`id` int(11) NULL,
`name` varchar(255) NULL,
`@score` int(11) NULL,
`scoreIS` int(11) NULL,
`sc ore` int(11) NULL,
`score IS score` int(11) NULL,
`_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` int(11) NULL
) ENGINE=OLAP
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES ( "replication_num" = "1" );
"""

sql """insert into td2(id,name,`@score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """insert into td2(id,name,`@score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """insert into td2(id,name,`@score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""

sql """select * from td2;"""
sql """delete from td2 where `@score` = 88;"""
sql """delete from td2 where `scoreIS` is null;"""
sql """delete from td2 where `score IS score` is null;"""
sql """delete from td2 where `sc ore` is null;"""
sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is null;"""
sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is not null;"""
sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` in (1,2,3);"""
sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` not in (1,2,3);"""
sql """select * from td2;"""
sql """insert into td2(id,name,`@score`) values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
sql """select * from td2;"""

}

0 comments on commit a7052ab

Please sign in to comment.