From dab58b1532d37ec5b055c73b82d5fba56a71dfc6 Mon Sep 17 00:00:00 2001 From: Kyle Mumma Date: Mon, 13 May 2024 14:03:18 -0700 Subject: [PATCH] ref(api): parse pipeline tests, snql pipeline before treeify (#5886) --- snuba/query/dsl.py | 20 +- snuba/query/dsl_mapper.py | 7 +- .../pipeline/test_entity_processing_stage.py | 6 +- tests/query/parser/test_mql_query.py | 60 +- .../test_parse_snql_query_initial.py | 971 ++++++++++++++++++ 5 files changed, 1022 insertions(+), 42 deletions(-) create mode 100644 tests/query/parser/unit_tests/test_parse_snql_query_initial.py diff --git a/snuba/query/dsl.py b/snuba/query/dsl.py index af8bcd4e87..6df47fd4bc 100644 --- a/snuba/query/dsl.py +++ b/snuba/query/dsl.py @@ -151,24 +151,12 @@ def equals( return binary_condition("equals", left, right) -def and_cond(lhs: FunctionCall, rhs: FunctionCall, *args: FunctionCall) -> FunctionCall: - """ - if only lhs and rhs are given, return and(lhs, rhs) - otherwise (more than 2 conditions are given), returns and(lhs, and(rhs, and(...))) - """ - if len(args) == 0: - return binary_condition("and", lhs, rhs) - - sofar = args[len(args) - 1] - for i in range(len(args) - 2, -1, -1): - sofar = binary_condition("and", args[i], sofar) - sofar = binary_condition("and", rhs, sofar) - sofar = binary_condition("and", lhs, sofar) - return sofar +def and_cond(lhs: Expression, rhs: Expression, *args: Expression) -> FunctionCall: + return FunctionCall(None, "and", (lhs, rhs, *args)) -def or_cond(lhs: FunctionCall, rhs: FunctionCall) -> FunctionCall: - return binary_condition("or", lhs, rhs) +def or_cond(lhs: Expression, rhs: Expression, *args: Expression) -> FunctionCall: + return FunctionCall(None, "or", (lhs, rhs, *args)) def in_cond(lhs: Expression, rhs: Expression) -> FunctionCall: diff --git a/snuba/query/dsl_mapper.py b/snuba/query/dsl_mapper.py index 6968e8248f..87a4926785 100644 --- a/snuba/query/dsl_mapper.py +++ b/snuba/query/dsl_mapper.py @@ -2,7 +2,7 @@ from snuba.clickhouse.query import Query as ClickhouseQuery from snuba.query import LimitBy, OrderBy, SelectedExpression -from snuba.query.conditions import get_first_level_or_conditions +from snuba.query.composite import CompositeQuery from snuba.query.data_source.simple import Entity from snuba.query.expressions import ( Argument, @@ -42,8 +42,7 @@ def and_cond_repr(exp: Expression, visitor: ExpressionVisitor[str]) -> str: def or_cond_repr(exp: Expression, visitor: ExpressionVisitor[str]) -> str: assert isinstance(exp, FunctionCall) - conditions = get_first_level_or_conditions(exp) - parameters = ", ".join([arg.accept(visitor) for arg in conditions]) + parameters = ", ".join([arg.accept(visitor) for arg in exp.parameters]) return f"or_cond({parameters})" @@ -225,7 +224,7 @@ def ast_repr( return f"[{', '.join(strings)}]" -def query_repr(query: LogicalQuery | ClickhouseQuery) -> str: +def query_repr(query: LogicalQuery | ClickhouseQuery | CompositeQuery[Entity]) -> str: visitor = DSLMapperVisitor() selected = ast_repr(query.get_selected_columns(), visitor) arrayjoin = ast_repr(query.get_arrayjoin(), visitor) diff --git a/tests/pipeline/test_entity_processing_stage.py b/tests/pipeline/test_entity_processing_stage.py index 44f23b9c0c..b1171d7f47 100644 --- a/tests/pipeline/test_entity_processing_stage.py +++ b/tests/pipeline/test_entity_processing_stage.py @@ -76,8 +76,10 @@ def test_basic( selected_columns=[SelectedExpression("timestamp", column("timestamp"))], condition=and_cond( equals(literal(1), literal(1)), - equals(column("org_id"), literal(1)), - equals(column("project_id"), literal(1)), + and_cond( + equals(column("org_id"), literal(1)), + equals(column("project_id"), literal(1)), + ), ), limit=1000, ), diff --git a/tests/query/parser/test_mql_query.py b/tests/query/parser/test_mql_query.py index 011fede18f..da3cef89e6 100644 --- a/tests/query/parser/test_mql_query.py +++ b/tests/query/parser/test_mql_query.py @@ -127,27 +127,47 @@ f.equals( column("granularity", None, "_snuba_granularity"), literal(60) ), - in_cond( - column("project_id", None, "_snuba_project_id"), - f.tuple(literal(11)), - ), - in_cond(column("org_id", None, "_snuba_org_id"), f.tuple(literal(1))), - f.equals( - column("use_case_id", None, "_snuba_use_case_id"), - literal("transactions"), - ), - f.greaterOrEquals( - column("timestamp", None, "_snuba_timestamp"), - literal(datetime(2023, 11, 23, 18, 30)), - ), - f.less( - column("timestamp", None, "_snuba_timestamp"), - literal(datetime(2023, 11, 23, 22, 30)), - ), - f.equals( - column("metric_id", None, "_snuba_metric_id"), literal(123456) + and_cond( + in_cond( + column("project_id", None, "_snuba_project_id"), + f.tuple(literal(11)), + ), + and_cond( + in_cond( + column("org_id", None, "_snuba_org_id"), f.tuple(literal(1)) + ), + and_cond( + f.equals( + column("use_case_id", None, "_snuba_use_case_id"), + literal("transactions"), + ), + and_cond( + f.greaterOrEquals( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2023, 11, 23, 18, 30)), + ), + and_cond( + f.less( + column("timestamp", None, "_snuba_timestamp"), + literal(datetime(2023, 11, 23, 22, 30)), + ), + and_cond( + f.equals( + column( + "metric_id", None, "_snuba_metric_id" + ), + literal(123456), + ), + in_cond( + tags_raw["888"], + f.tuple(literal("dist1"), literal("dist2")), + ), + ), + ), + ), + ), + ), ), - in_cond(tags_raw["888"], f.tuple(literal("dist1"), literal("dist2"))), ), order_by=[ OrderBy( diff --git a/tests/query/parser/unit_tests/test_parse_snql_query_initial.py b/tests/query/parser/unit_tests/test_parse_snql_query_initial.py new file mode 100644 index 0000000000..c8ade5dcd3 --- /dev/null +++ b/tests/query/parser/unit_tests/test_parse_snql_query_initial.py @@ -0,0 +1,971 @@ +from typing import Type + +import pytest + +from snuba.datasets.entities.entity_key import EntityKey +from snuba.datasets.entities.factory import get_entity +from snuba.query import OrderBy, OrderByDirection, SelectedExpression +from snuba.query.composite import CompositeQuery +from snuba.query.data_source.simple import Entity +from snuba.query.dsl import Functions as f +from snuba.query.dsl import and_cond, column, in_cond, literal, or_cond +from snuba.query.logical import Query +from snuba.query.parser.exceptions import ParsingException +from snuba.query.snql.parser import parse_snql_query_initial + +test_cases = [ + pytest.param( + "MATCH (events) SELECT foo(1) AS `alias`, bar(2) AS `alias`", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("alias", f.foo(literal(1), alias="alias")), + SelectedExpression("alias", f.bar(literal(2), alias="alias")), + ], + array_join=None, + condition=None, + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT test_func(release) AS test_func_alias, event_id BY project_id, platform WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("project_id", column("project_id")), + SelectedExpression("platform", column("platform")), + SelectedExpression( + "test_func_alias", + f.test_func(column("release"), alias="test_func_alias"), + ), + SelectedExpression("event_id", column("event_id")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=[column("project_id"), column("platform")], + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count(platform) AS platforms, uniq(platform) AS uniq_platforms, testF(platform, release) AS top_platforms, f1(partition, offset) AS f1_alias, f2() AS f2_alias BY format_eventid(event_id) WHERE tags[sentry:dist] IN tuple('dist1', 'dist2') AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1 HAVING retention_days > 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression( + "format_eventid(event_id)", + f.format_eventid( + column("event_id"), alias="format_eventid(event_id)" + ), + ), + SelectedExpression( + "platforms", f.count(column("platform"), alias="platforms") + ), + SelectedExpression( + "uniq_platforms", f.uniq(column("platform"), alias="uniq_platforms") + ), + SelectedExpression( + "top_platforms", + f.testF( + column("platform"), column("release"), alias="top_platforms" + ), + ), + SelectedExpression( + "f1_alias", + f.f1(column("partition"), column("offset"), alias="f1_alias"), + ), + SelectedExpression("f2_alias", f.f2(alias="f2_alias")), + ], + array_join=None, + condition=and_cond( + in_cond( + column("tags[sentry:dist]"), + f.tuple(literal("dist1"), literal("dist2")), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=[ + f.format_eventid(column("event_id"), alias="format_eventid(event_id)") + ], + having=f.greater(column("retention_days"), literal(1)), + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT partition, offset WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1 ORDER BY partition ASC, offset DESC, func(retention_days) DESC", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("partition", column("partition")), + SelectedExpression("offset", column("offset")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=None, + having=None, + order_by=[ + OrderBy(OrderByDirection.ASC, column("partition")), + OrderBy(OrderByDirection.DESC, column("offset")), + OrderBy(OrderByDirection.DESC, f.func(column("retention_days"))), + ], + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT partition BY platform WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1 ORDER BY partition DESC", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("platform", column("platform")), + SelectedExpression("partition", column("partition")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=[column("platform")], + having=None, + order_by=[OrderBy(OrderByDirection.DESC, column("partition"))], + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT platform, tags[test] BY foo(tags[test2]) WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression( + "foo(tags[test2])", + f.foo(column("tags[test2]"), alias="foo(tags[test2])"), + ), + SelectedExpression("platform", column("platform")), + SelectedExpression("tags[test]", column("tags[test]")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=[f.foo(column("tags[test2]"), alias="foo(tags[test2])")], + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT group_id, goo(partition) AS issue_id, foo(zoo(offset)) AS offset WHERE foo(issue_id) AS group_id = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1 ORDER BY group_id ASC", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("group_id", column("group_id")), + SelectedExpression( + "issue_id", f.goo(column("partition"), alias="issue_id") + ), + SelectedExpression( + "offset", f.foo(f.zoo(column("offset")), alias="offset") + ), + ], + array_join=None, + condition=and_cond( + f.equals(f.foo(column("issue_id"), alias="group_id"), literal(1)), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=[OrderBy(OrderByDirection.ASC, column("group_id"))], + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT foo(partition) AS exp, foo(partition) AS exp WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("exp", f.foo(column("partition"), alias="exp")), + SelectedExpression("exp", f.foo(column("partition"), alias="exp")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT foo(partition) AS exp, exp WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("exp", f.foo(column("partition"), alias="exp")), + SelectedExpression("exp", column("exp")), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type ARRAY JOIN exception_stacks.type WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=[column("exception_stacks.type")], + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type WHERE exception_stacks.type LIKE 'Arithmetic%' AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=None, + condition=and_cond( + f.like(column("exception_stacks.type"), literal("Arithmetic%")), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type ARRAY JOIN exception_stacks.type WHERE exception_stacks.type LIKE 'Arithmetic%' AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=[column("exception_stacks.type")], + condition=and_cond( + f.like(column("exception_stacks.type"), literal("Arithmetic%")), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, arrayJoin(exception_stacks) WHERE exception_stacks.type LIKE 'Arithmetic%' AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "arrayJoin(exception_stacks)", + f.arrayJoin( + column("exception_stacks"), alias="arrayJoin(exception_stacks)" + ), + ), + ], + array_join=None, + condition=and_cond( + f.like(column("exception_stacks.type"), literal("Arithmetic%")), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type WHERE or(equals(exception_stacks.type, 'ArithmeticException'), equals(exception_stacks.type, 'RuntimeException')) = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=None, + condition=and_cond( + f.equals( + or_cond( + f.equals( + column("exception_stacks.type"), + literal("ArithmeticException"), + ), + f.equals( + column("exception_stacks.type"), literal("RuntimeException") + ), + ), + literal(1), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, arrayJoin(exception_stacks.type) WHERE or(equals(exception_stacks.type, 'ArithmeticException'), equals(exception_stacks.type, 'RuntimeException')) = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "arrayJoin(exception_stacks.type)", + f.arrayJoin( + column("exception_stacks.type"), + alias="arrayJoin(exception_stacks.type)", + ), + ), + ], + array_join=None, + condition=and_cond( + f.equals( + or_cond( + f.equals( + column("exception_stacks.type"), + literal("ArithmeticException"), + ), + f.equals( + column("exception_stacks.type"), literal("RuntimeException") + ), + ), + literal(1), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count BY tags_key WHERE or(equals(ifNull(tags[foo], ''), 'baz'), equals(ifNull(tags[foo.bar], ''), 'qux')) = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("tags_key", column("tags_key")), + SelectedExpression("count", f.count(alias="count")), + ], + array_join=None, + condition=and_cond( + f.equals( + or_cond( + f.equals( + f.ifNull(column("tags[foo]"), literal("")), literal("baz") + ), + f.equals( + f.ifNull(column("tags[foo.bar]"), literal("")), + literal("qux"), + ), + ), + literal(1), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=[column("tags_key")], + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type ARRAY JOIN exception_stacks WHERE or(equals(exception_stacks.type, 'ArithmeticException'), equals(exception_stacks.type, 'RuntimeException')) = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=[column("exception_stacks")], + condition=and_cond( + f.equals( + or_cond( + f.equals( + column("exception_stacks.type"), + literal("ArithmeticException"), + ), + f.equals( + column("exception_stacks.type"), literal("RuntimeException") + ), + ), + literal(1), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT count() AS count, exception_stacks.type ARRAY JOIN exception_stacks, hierarchical_hashes WHERE or(equals(exception_stacks.type, 'ArithmeticException'), equals(exception_stacks.type, 'RuntimeException')) = 1 AND timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("count", f.count(alias="count")), + SelectedExpression( + "exception_stacks.type", column("exception_stacks.type") + ), + ], + array_join=[column("exception_stacks"), column("hierarchical_hashes")], + condition=and_cond( + f.equals( + or_cond( + f.equals( + column("exception_stacks.type"), + literal("ArithmeticException"), + ), + f.equals( + column("exception_stacks.type"), literal("RuntimeException") + ), + ), + literal(1), + ), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2021-01-01T00:00:00")), + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT group_id, count(), divide(uniq(tags[url]) AS a+*, 1) BY group_id WHERE timestamp >= toDateTime('2021-01-01T00:00:00') AND timestamp < toDateTime('2021-01-02T00:00:00') AND project_id = 1", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("group_id", column("group_id")), + SelectedExpression("group_id", column("group_id")), + SelectedExpression("count()", f.count(alias="count()")), + SelectedExpression( + "divide(uniq(tags[url]) AS a+*, 1)", + f.divide( + f.uniq(column("tags[url]"), alias="a+*"), + literal(1), + alias="divide(uniq(tags[url]) AS a+*, 1)", + ), + ), + ], + array_join=None, + condition=and_cond( + f.greaterOrEquals( + column("timestamp"), f.toDateTime(literal("2021-01-01T00:00:00")) + ), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2021-01-02T00:00:00")), + ), + f.equals(column("project_id"), literal(1)), + ), + ), + groupby=[column("group_id")], + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT f1(column1, column2) AS f1_alias, f2() AS f2_alias, testF(platform, field2) AS f1_alias WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00')", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression( + "f1_alias", + f.f1(column("column1"), column("column2"), alias="f1_alias"), + ), + SelectedExpression("f2_alias", f.f2(alias="f2_alias")), + SelectedExpression( + "f1_alias", + f.testF(column("platform"), column("field2"), alias="f1_alias"), + ), + ], + array_join=None, + condition=and_cond( + f.equals(column("project_id"), literal(1)), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2020-01-01 12:00:00")), + ), + f.less( + column("timestamp"), + f.toDateTime(literal("2020-01-02 12:00:00")), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT f1(column1, f2) AS f1, f2(f1) AS f2 WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00')", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression( + "f1", f.f1(column("column1"), column("f2"), alias="f1") + ), + SelectedExpression("f2", f.f2(column("f1"), alias="f2")), + ], + array_join=None, + condition=and_cond( + f.equals(column("project_id"), literal(1)), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2020-01-01 12:00:00")), + ), + f.less( + column("timestamp"), + f.toDateTime(literal("2020-01-02 12:00:00")), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (events) SELECT f1(f2(c) AS f2) AS c WHERE project_id = 1 AND timestamp >= toDateTime('2020-01-01 12:00:00') AND timestamp < toDateTime('2020-01-02 12:00:00')", + Query( + from_clause=Entity( + EntityKey.EVENTS, get_entity(EntityKey.EVENTS).get_data_model() + ), + selected_columns=[ + SelectedExpression("c", f.f1(f.f2(column("c"), alias="f2"), alias="c")) + ], + array_join=None, + condition=and_cond( + f.equals(column("project_id"), literal(1)), + and_cond( + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2020-01-01 12:00:00")), + ), + f.less( + column("timestamp"), + f.toDateTime(literal("2020-01-02 12:00:00")), + ), + ), + ), + groupby=None, + having=None, + order_by=None, + limitby=None, + limit=1000, + offset=0, + totals=False, + granularity=None, + ), + ), + pytest.param( + "MATCH (replays) SELECT replay_id BY replay_id WHERE project_id IN array(4552673527463954) AND timestamp < toDateTime('2023-09-22T18:18:10.891157') AND timestamp >= toDateTime('2023-06-24T18:18:10.891157') HAVING or(1, 1, 1, 1) != 0 LIMIT 10", + Query( + from_clause=Entity( + EntityKey.REPLAYS, get_entity(EntityKey.REPLAYS).get_data_model() + ), + selected_columns=[ + SelectedExpression("replay_id", column("replay_id")), + SelectedExpression("replay_id", column("replay_id")), + ], + array_join=None, + condition=and_cond( + in_cond(column("project_id"), f.array(literal(4552673527463954))), + and_cond( + f.less( + column("timestamp"), + f.toDateTime(literal("2023-09-22T18:18:10.891157")), + ), + f.greaterOrEquals( + column("timestamp"), + f.toDateTime(literal("2023-06-24T18:18:10.891157")), + ), + ), + ), + groupby=[column("replay_id")], + having=f.notEquals( + or_cond(literal(1), literal(1), literal(1), literal(1)), literal(0) + ), + order_by=None, + limitby=None, + limit=10, + offset=0, + totals=False, + granularity=None, + ), + ), +] + + +@pytest.mark.parametrize("body, expected", test_cases) +def test_autogenerated(body: str, expected: Query | CompositeQuery[Entity]) -> None: + actual = parse_snql_query_initial(body) + eq, reason = actual.equals(expected) + assert eq, reason + + +failure_cases = [ + pytest.param( + "MATCH (events) SELECT f(i(am)bad((at(parentheses)+3() AS `alias`", + ParsingException, + ), +] + + +@pytest.mark.parametrize("body, expected_error", failure_cases) +def test_autogenerated_invalid(body: str, expected_error: Type[Exception]) -> None: + with pytest.raises(expected_error): + parse_snql_query_initial(body)