-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
110 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,45 +1,67 @@ | ||
import pathlib | ||
import argparse | ||
|
||
from substrait import proto | ||
from .functions_catalog import FunctionsCatalog | ||
from .extended_expression import parse_sql_extended_expression | ||
|
||
catalog = FunctionsCatalog() | ||
catalog.load_standard_extensions( | ||
pathlib.Path(__file__).parent.parent.parent.parent / "third_party" / "substrait" / "extensions", | ||
) | ||
|
||
# TODO: Turn this into a command line tool to test more queries. | ||
# We can probably have a quick way to declare schema using command line args. | ||
# like first_name=String,surname=String,age=I32 etc... | ||
schema = proto.NamedStruct( | ||
names=["first_name", "surname", "age"], | ||
struct=proto.Type.Struct( | ||
types=[ | ||
proto.Type( | ||
string=proto.Type.String( | ||
nullability=proto.Type.Nullability.NULLABILITY_REQUIRED | ||
) | ||
), | ||
proto.Type( | ||
string=proto.Type.String( | ||
nullability=proto.Type.Nullability.NULLABILITY_REQUIRED | ||
) | ||
), | ||
|
||
def main(): | ||
"""Commandline tool to test the SQL to ExtendedExpression parser. | ||
Run as python -m substrait.sql first_name=String,surname=String,age=I32 "SELECT surname, age + 1 as next_birthday, age + 2 WHERE age = 32" | ||
""" | ||
parser = argparse.ArgumentParser( | ||
description="Convert a SQL SELECT statement to an ExtendedExpression" | ||
) | ||
parser.add_argument("schema", type=str, help="Schema of the input data") | ||
parser.add_argument("sql", type=str, help="SQL SELECT statement") | ||
args = parser.parse_args() | ||
|
||
catalog = FunctionsCatalog() | ||
catalog.load_standard_extensions( | ||
pathlib.Path(__file__).parent.parent.parent.parent | ||
/ "third_party" | ||
/ "substrait" | ||
/ "extensions", | ||
) | ||
schema = parse_schema(args.schema) | ||
projection_expr, filter_expr = parse_sql_extended_expression( | ||
catalog, schema, args.sql | ||
) | ||
|
||
print("---- SQL INPUT ----") | ||
print(args.sql) | ||
print("---- PROJECTION ----") | ||
print(projection_expr) | ||
print("---- FILTER ----") | ||
print(filter_expr) | ||
|
||
|
||
def parse_schema(schema_string): | ||
"""Parse Schema from a comma separated string of fieldname=fieldtype pairs. | ||
For example: "first_name=String,surname=String,age=I32" | ||
""" | ||
types = [] | ||
names = [] | ||
|
||
fields = schema_string.split(",") | ||
for field in fields: | ||
fieldname, fieldtype = field.split("=") | ||
proto_type = getattr(proto.Type, fieldtype) | ||
names.append(fieldname) | ||
types.append( | ||
proto.Type( | ||
i32=proto.Type.I32( | ||
nullability=proto.Type.Nullability.NULLABILITY_REQUIRED | ||
) | ||
), | ||
] | ||
), | ||
) | ||
|
||
sql = "SELECT surname, age + 1 as next_birthday WHERE age = 32" | ||
projection_expr, filter_expr = parse_sql_extended_expression(catalog, schema, sql) | ||
print("---- SQL INPUT ----") | ||
print(sql) | ||
print("---- PROJECTION ----") | ||
print(projection_expr) | ||
print("---- FILTER ----") | ||
print(filter_expr) | ||
**{ | ||
fieldtype.lower(): proto_type( | ||
nullability=proto.Type.Nullability.NULLABILITY_REQUIRED | ||
) | ||
} | ||
) | ||
) | ||
return proto.NamedStruct(names=names, struct=proto.Type.Struct(types=types)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters