Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement lucene pushdown on ST_DISTANCE for Equality #110348

Merged
merged 2 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ public class CsvTestsDataLoader {
"mapping-cartesian_multipolygons.json",
"cartesian_multipolygons.csv"
);
private static final TestsDataset DISTANCES = new TestsDataset("distances", "mapping-distances.json", "distances.csv");

private static final TestsDataset K8S = new TestsDataset("k8s", "k8s-mappings.json", "k8s.csv", "k8s-settings.json", true);

Expand All @@ -119,7 +120,8 @@ public class CsvTestsDataLoader {
Map.entry(COUNTRIES_BBOX_WEB.indexName, COUNTRIES_BBOX_WEB),
Map.entry(AIRPORT_CITY_BOUNDARIES.indexName, AIRPORT_CITY_BOUNDARIES),
Map.entry(CARTESIAN_MULTIPOLYGONS.indexName, CARTESIAN_MULTIPOLYGONS),
Map.entry(K8S.indexName, K8S)
Map.entry(K8S.indexName, K8S),
Map.entry(DISTANCES.indexName, DISTANCES)
);

private static final EnrichConfig LANGUAGES_ENRICH = new EnrichConfig("languages_policy", "enrich-policy-languages.json");
Expand Down
118 changes: 118 additions & 0 deletions x-pack/plugin/esql/qa/testFixtures/src/main/resources/distances.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
distance:double,location:geo_point
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.2848062860101461,POINT (5.867332220077515E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.30021218524180354,POINT (6.705522537231445E-7 2.514570951461792E-6)
0.26851794154977293,POINT (5.867332220077515E-7 2.4726614356040955E-6)
0.26851794154977293,POINT (5.867332220077515E-7 2.4726614356040955E-6)
0.26851794154977293,POINT (5.867332220077515E-7 2.4726614356040955E-6)
0.2848062860101461,POINT (6.705522537231445E-7 2.4726614356040955E-6)
0.2848062860101461,POINT (6.705522537231445E-7 2.4726614356040955E-6)
0.2848062860101461,POINT (6.705522537231445E-7 2.4726614356040955E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.2848062860101461,POINT (1.0058283805847168E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.30021218524180354,POINT (1.0896474123001099E-6 2.346932888031006E-6)
0.2685179415497728,POINT (1.0058283805847168E-6 2.3050233721733093E-6)
0.2685179415497728,POINT (1.0058283805847168E-6 2.3050233721733093E-6)
0.2685179415497728,POINT (1.0058283805847168E-6 2.3050233721733093E-6)
0.2848062860101459,POINT (1.0896474123001099E-6 2.3050233721733093E-6)
0.2848062860101459,POINT (1.0896474123001099E-6 2.3050233721733093E-6)
0.2848062860101459,POINT (1.0896474123001099E-6 2.3050233721733093E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.2848062860101459,POINT (1.341104507446289E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.30021218524180354,POINT (1.4249235391616821E-6 2.1792948246002197E-6)
0.2685179415497728,POINT (1.341104507446289E-6 2.137385308742523E-6)
0.2685179415497728,POINT (1.341104507446289E-6 2.137385308742523E-6)
0.2685179415497728,POINT (1.341104507446289E-6 2.137385308742523E-6)
0.2848062860101459,POINT (1.4249235391616821E-6 2.137385308742523E-6)
0.2848062860101459,POINT (1.4249235391616821E-6 2.137385308742523E-6)
0.2848062860101459,POINT (1.4249235391616821E-6 2.137385308742523E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.2848062860101459,POINT (1.5925616025924683E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.30021218524180354,POINT (1.6763806343078613E-6 2.0116567611694336E-6)
0.2685179415497728,POINT (1.5925616025924683E-6 1.969747245311737E-6)
0.2685179415497728,POINT (1.5925616025924683E-6 1.969747245311737E-6)
0.2685179415497728,POINT (1.5925616025924683E-6 1.969747245311737E-6)
0.2848062860101459,POINT (1.6763806343078613E-6 1.969747245311737E-6)
0.2848062860101459,POINT (1.6763806343078613E-6 1.969747245311737E-6)
0.2848062860101459,POINT (1.6763806343078613E-6 1.969747245311737E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.2848062860101459,POINT (1.7601996660232544E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.30021218524180354,POINT (1.8440186977386475E-6 1.8440186977386475E-6)
0.2685179415497728,POINT (1.7601996660232544E-6 1.802109181880951E-6)
0.2685179415497728,POINT (1.7601996660232544E-6 1.802109181880951E-6)
0.2685179415497728,POINT (1.7601996660232544E-6 1.802109181880951E-6)
0.2848062860101459,POINT (1.8440186977386475E-6 1.802109181880951E-6)
0.2848062860101459,POINT (1.8440186977386475E-6 1.802109181880951E-6)
0.2848062860101459,POINT (1.8440186977386475E-6 1.802109181880951E-6)
3.2597569375901188,POINT (2.3720785975456238E-5 -1.7224811017513275E-5)
3.2597569375901188,POINT (2.3720785975456238E-5 -1.7224811017513275E-5)
3.2597569375901188,POINT (2.3720785975456238E-5 -1.7224811017513275E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2625206150153967,POINT (2.3720785975456238E-5 -1.726672053337097E-5)
3.2597569375901188,POINT (2.346932888031006E-5 -1.7560087144374847E-5)
3.2597569375901188,POINT (2.346932888031006E-5 -1.7560087144374847E-5)
3.2597569375901188,POINT (2.346932888031006E-5 -1.7560087144374847E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.2625206150153967,POINT (2.346932888031006E-5 -1.7601996660232544E-5)
3.258374219844941,POINT (2.2547319531440735E-5 -1.873355358839035E-5)
3.258374219844941,POINT (2.2547319531440735E-5 -1.873355358839035E-5)
3.258374219844941,POINT (2.2547319531440735E-5 -1.873355358839035E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
3.262520615015394,POINT (2.2547319531440735E-5 -1.8775463104248047E-5)
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"properties": {
"distance": {
"type": "double"
},
"location": {
"type": "geo_point"
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -883,10 +883,10 @@ ROW wkt = ["POINT(1 1)", "POINT(-1 -1)", "POINT(-1 1)", "POINT(1 -1)"]
;

wkt:keyword | pt:geo_point | distance:double
"POINT(1 1)" | POINT(1 1) | 157249.59776811762
"POINT(-1 -1)" | POINT(-1 -1) | 157249.59776811762
"POINT(-1 1)" | POINT(-1 1) | 157249.59776811762
"POINT(1 -1)" | POINT(1 -1) | 157249.59776811762
"POINT(1 1)" | POINT(1 1) | 157249.5916907891
"POINT(-1 -1)" | POINT(-1 -1) | 157249.6015756357
"POINT(-1 1)" | POINT(-1 1) | 157249.5982806869
"POINT(1 -1)" | POINT(1 -1) | 157249.59498573805
;

airportCityLocationPointDistance
Expand All @@ -898,7 +898,7 @@ FROM airports
;

distance:double | count:long
15869.987675939537 | 891
15869.9876282387 | 891
;

airportDistanceToCityCopenhagen
Expand All @@ -914,7 +914,7 @@ FROM airports

// tag::st_distance-airports-result[]
abbrev:k | name:text | location:geo_point | city_location:geo_point | distance:d
CPH | Copenhagen | POINT(12.6493508684508 55.6285017221528) | POINT(12.5683 55.6761) | 7339.57266575626
CPH | Copenhagen | POINT(12.6493508684508 55.6285017221528) | POINT(12.5683 55.6761) | 7339.573896618216
// end::st_distance-airports-result[]
;

Expand Down Expand Up @@ -1054,11 +1054,69 @@ FROM airports
;

abbrev:k | name:text | location:geo_point | country:k | city:k | city_location:geo_point | distance:d
TRD | Trondheim Vaernes | POINT(10.9168095241445 63.472029381717) | Norway | Stjørdalshalsen | POINT(10.9189 63.4712) | 138.86728011324072
DHA | King Abdulaziz AB | POINT(50.1477245727844 26.2703680854768) | Saudi Arabia | Dhahran | POINT(50.15 26.2667) | 466.7314410344158
NDB | Nouadhibou Int'l | POINT(-17.0334398691538 20.9290523064387) | Mauritania | Nouadhibou | POINT(-17.0333 20.9333) | 472.545954400989
ESE | Ensenada | POINT(-116.595724400418 31.7977139760569) | Mexico | Rodolfo Sánchez Taboada | POINT(-116.5911 31.7958) | 486.1044856437723
INU | Nauru Int'l | POINT(166.91613965882 -0.545037226856384) | Nauru | Yaren | POINT(166.9209 -0.5477) | 606.4888777331985
TRD | Trondheim Vaernes | POINT(10.9168095241445 63.472029381717) | Norway | Stjørdalshalsen | POINT(10.9189 63.4712) | 138.86985803478004
DHA | King Abdulaziz AB | POINT(50.1477245727844 26.2703680854768) | Saudi Arabia | Dhahran | POINT(50.15 26.2667) | 466.7321285739462
NDB | Nouadhibou Int'l | POINT(-17.0334398691538 20.9290523064387) | Mauritania | Nouadhibou | POINT(-17.0333 20.9333) | 472.54642026512636
ESE | Ensenada | POINT(-116.595724400418 31.7977139760569) | Mexico | Rodolfo Sánchez Taboada | POINT(-116.5911 31.7958) | 486.1022373716486
INU | Nauru Int'l | POINT(166.91613965882 -0.545037226856384) | Nauru | Yaren | POINT(166.9209 -0.5477) | 606.4899254580574
;

distancesNearQuantizationBoundary
required_capability: st_distance

FROM distances
| EVAL d = ST_DISTANCE(location, TO_GEOPOINT("POINT(0 0)"))
| EVAL delta = ABS(distance - d)
| WHERE delta > 0
| KEEP distance, d, delta, location
;

distance:double | d:double | delta:double | location:geo_point
;

distancesNearQuantizationBoundaryStats
required_capability: st_distance

FROM distances
| EVAL d = ST_DISTANCE(location, TO_GEOPOINT("POINT(0 0)"))
| STATS count=COUNT(*) BY d
| SORT d ASC
;

count:long | d:double
12 | 0.2685179415497728
3 | 0.26851794154977293
30 | 0.2848062860101459
15 | 0.2848062860101461
30 | 0.30021218524180354
3 | 3.258374219844941
6 | 3.2597569375901188
6 | 3.262520615015394
12 | 3.2625206150153967
;

distancesNearQuantizationBoundaryFilterStatsA
required_capability: st_distance

FROM distances
| WHERE ST_DISTANCE(location, TO_GEOPOINT("POINT(0 0)")) == 0.2848062860101461
| STATS count=COUNT(*)
;

count:long
15
;

distancesNearQuantizationBoundaryFilterStatsB
required_capability: st_distance

FROM distances
| WHERE ST_DISTANCE(location, TO_GEOPOINT("POINT(0 0)")) == 3.2625206150153967
| STATS count=COUNT(*)
;

count:long
12
;

###############################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.SloppyMath;
import org.elasticsearch.common.geo.GeoUtils;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.compute.ann.Evaluator;
Expand Down Expand Up @@ -60,8 +61,12 @@ protected GeoDistanceCalculator() {

@Override
protected double distance(Point left, Point right) {
// TODO: investigate if we need to use the more complex validation in Lucenes Circle2D::HaversinDistance class
return SloppyMath.haversinMeters(left.getY(), left.getX(), right.getY(), right.getX());
return SloppyMath.haversinMeters(
GeoUtils.quantizeLat(left.getY()),
GeoUtils.quantizeLon(left.getX()),
GeoUtils.quantizeLat(right.getY()),
GeoUtils.quantizeLon(right.getX())
);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -625,15 +625,12 @@ protected PhysicalPlan rule(FilterExec filterExec, LocalPhysicalOptimizerContext
}

private boolean rewriteComparison(List<Expression> rewritten, StDistance dist, Expression literal, ComparisonType comparisonType) {
// Currently we do not support Equals
if (comparisonType.lt || comparisonType.gt) {
Object value = literal.fold();
if (value instanceof Number number) {
if (dist.right().foldable()) {
return rewriteDistanceFilter(rewritten, dist.source(), dist.left(), dist.right(), number, comparisonType);
} else if (dist.left().foldable()) {
return rewriteDistanceFilter(rewritten, dist.source(), dist.right(), dist.left(), number, comparisonType);
}
Object value = literal.fold();
if (value instanceof Number number) {
if (dist.right().foldable()) {
return rewriteDistanceFilter(rewritten, dist.source(), dist.left(), dist.right(), number, comparisonType);
} else if (dist.left().foldable()) {
return rewriteDistanceFilter(rewritten, dist.source(), dist.right(), dist.left(), number, comparisonType);
}
}
return false;
Expand All @@ -642,30 +639,35 @@ private boolean rewriteComparison(List<Expression> rewritten, StDistance dist, E
private boolean rewriteDistanceFilter(
List<Expression> rewritten,
Source source,
Expression spatialExpression,
Expression literalExpression,
Expression spatialExp,
Expression literalExp,
Number number,
ComparisonType comparisonType
) {
Geometry geometry = SpatialRelatesUtils.makeGeometryFromLiteral(literalExpression);
Geometry geometry = SpatialRelatesUtils.makeGeometryFromLiteral(literalExp);
if (geometry instanceof Point point) {
double distance = number.doubleValue();
if (comparisonType.eq == false) {
distance = comparisonType.lt ? Math.nextDown(distance) : Math.nextUp(distance);
if (comparisonType.lt) {
distance = comparisonType.eq ? distance : Math.nextDown(distance);
rewritten.add(new SpatialIntersects(source, spatialExp, makeCircleLiteral(point, distance, literalExp)));
} else if (comparisonType.gt) {
distance = comparisonType.eq ? distance : Math.nextUp(distance);
rewritten.add(new SpatialDisjoint(source, spatialExp, makeCircleLiteral(point, distance, literalExp)));
} else if (comparisonType.eq) {
rewritten.add(new SpatialIntersects(source, spatialExp, makeCircleLiteral(point, distance, literalExp)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was expecting for equality this to be disjoint too?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forget what I am saying it is fine

rewritten.add(new SpatialDisjoint(source, spatialExp, makeCircleLiteral(point, Math.nextDown(distance), literalExp)));
}
var circle = new Circle(point.getX(), point.getY(), distance);
var wkb = WellKnownBinary.toWKB(circle, ByteOrder.LITTLE_ENDIAN);
var cExp = new Literal(literalExpression.source(), new BytesRef(wkb), DataType.GEO_SHAPE);
rewritten.add(
comparisonType.lt
? new SpatialIntersects(source, spatialExpression, cExp)
: new SpatialDisjoint(source, spatialExpression, cExp)
);
return true;
}
return false;
}

private Literal makeCircleLiteral(Point point, double distance, Expression literalExpression) {
var circle = new Circle(point.getX(), point.getY(), distance);
var wkb = WellKnownBinary.toWKB(circle, ByteOrder.LITTLE_ENDIAN);
return new Literal(literalExpression.source(), new BytesRef(wkb), DataType.GEO_SHAPE);
}

/**
* This enum captures the key differences between various inequalities as perceived from the spatial distance function.
* In particular, we need to know which direction the inequality points, with lt=true meaning the left is expected to be smaller
Expand Down
Loading