diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index b8e08086d63301..d44e6198170b1a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; import org.apache.doris.nereids.trees.expressions.Like; +import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; @@ -486,7 +487,8 @@ public Statistics visitNot(Not not, EstimationContext context) { child instanceof EqualPredicate || child instanceof InPredicate || child instanceof IsNull - || child instanceof Like, + || child instanceof Like + || child instanceof Match, "Not-predicate meet unexpected child: %s", child.toSql()); if (child instanceof Like) { rowCount = context.statistics.getRowCount() - childStats.getRowCount(); @@ -509,6 +511,9 @@ public Statistics visitNot(Not not, EstimationContext context) { .setMinExpr(originColStats.minExpr) .setMaxValue(originColStats.maxValue) .setMaxExpr(originColStats.maxExpr); + } else if (child instanceof Match) { + rowCount = context.statistics.getRowCount() - childStats.getRowCount(); + colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); } if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) { // only consider the single column numNull, otherwise, ignore diff --git a/regression-test/data/inverted_index_p0/test_or_not_match.out b/regression-test/data/inverted_index_p0/test_or_not_match.out new file mode 100644 index 00000000000000..22dde7a8bf7a00 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_or_not_match.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +GET / HTTP/1.0 +GET / HTTP/1.0 + diff --git a/regression-test/suites/inverted_index_p0/test_or_not_match.groovy b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy new file mode 100644 index 00000000000000..95af26480c9ea8 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_or_not_match", "p0") { + def tableName = "test_or_not_match" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + // load the json data + streamLoad { + table "${tableName}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + file 'documents-1000.json' // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + for (int i = 0; i < 10; i++) { + sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA';" + } + + sql "set enable_nereids_planner = true" + sql "set enable_fallback_to_original_planner = false" + + qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;" +}