From 306ea96a4552fcbc5064f14c3f03fff047e45774 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 31 Jan 2024 22:23:38 -0800 Subject: [PATCH 01/35] Starting. --- .gitmodules | 3 +++ README.md | 2 +- third_party/substrait-cpp | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 160000 third_party/substrait-cpp diff --git a/.gitmodules b/.gitmodules index d9705e1..b8c576e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "third_party/substrait"] path = third_party/substrait url = https://github.com/substrait-io/substrait +[submodule "third_party/substrait-cpp"] + path = third_party/substrait-cpp + url = git@github.com:substrait-io/substrait-cpp.git diff --git a/README.md b/README.md index 19d663e..9be0d98 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ This project is not an execution engine for Substrait Plans. This is an experimental package that is still under development. # Example -At the moment, this project contains only generated Python classes for the Substrait protobuf messages. Let's use an existing Substrait producer, [Ibis](https://ibis-project.org), to provide an example using Python Substrait as the consumer. +At the moment, this project contains generated Python classes for the Substrait protobuf messages and a library for loading and saving them in various formats. Let's use an existing Substrait producer, [Ibis](https://ibis-project.org), to provide an example using Python Substrait as the consumer. ## Produce a Substrait Plan with Ibis ``` In [1]: import ibis diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp new file mode 160000 index 0000000..66c8d97 --- /dev/null +++ b/third_party/substrait-cpp @@ -0,0 +1 @@ +Subproject commit 66c8d9716036301cf89e3a870837c93536b31d9d From 42cee0e62cb915146d4523d79e7b88eef82cdf74 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 16:39:03 -0800 Subject: [PATCH 02/35] A working version but for some reason tests aren't working properly. --- CONTRIBUTING.md | 12 + pyproject.toml | 1 + src/substrait/planloader/__init__.py | 0 src/substrait/planloader/planloader.py | 79 +++ tests/test_planloader.py | 12 + tests/tpch-plan01.json | 823 +++++++++++++++++++++++++ third_party/substrait-cpp | 2 +- 7 files changed, 928 insertions(+), 1 deletion(-) create mode 100644 src/substrait/planloader/__init__.py create mode 100644 src/substrait/planloader/planloader.py create mode 100644 tests/test_planloader.py create mode 100644 tests/tpch-plan01.json diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3421534..375b8f1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -50,6 +50,18 @@ Generate the protobuf files manually. Requires protobuf `v3.20.1`. # Build + +## Build and install the textplan loader dynamic library +```commandline +pushd third_party/substrait-cpp +mkdir build +cd build +cmake .. +cd export/planloader +make install +popd +``` + ## Python package Editable installation. ``` diff --git a/pyproject.toml b/pyproject.toml index bb937af..04364e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,6 +17,7 @@ test = ["pytest >= 7.0.0"] [tool.pytest.ini_options] pythonpath = "src" +addopts = "--ignore=third_party" [build-system] requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2.0"] diff --git a/src/substrait/planloader/__init__.py b/src/substrait/planloader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py new file mode 100644 index 0000000..23c322f --- /dev/null +++ b/src/substrait/planloader/planloader.py @@ -0,0 +1,79 @@ +# SPDX-License-Identifier: Apache-2.0 + +import ctypes +import ctypes.util as ctutil +import enum +import substrait.gen.proto.plan_pb2 as plan_pb2 + + +class PlanFileFormat(enum.Enum): + BINARY = ctypes.c_int32(0) + JSON = ctypes.c_int32(1) + PROTOTEXT = ctypes.c_int32(2) + TEXT = ctypes.c_int32(3) + + +class PlanFileException(Exception): + pass + + +class SerializedPlan(ctypes.Structure): + pass + + +SerializedPlan._fields_ = [ + ("buffer", ctypes.POINTER(ctypes.c_byte)), + ("size", ctypes.c_uint32), + ("errorMessage", ctypes.c_char_p), +] + + +# Load the C++ library +# MEGAHACK -- Make this work on Windows, Linux, and MACOSX +#planloader_lib = ctypes.CDLL("/Users/davids/projects/substrait-cpp/cmake-build-release/export/planloader/libplanloader.dylib") +planloader_lib = ctypes.CDLL(ctutil.find_library("planloader")) + +# Declare the function signatures for the external functions. +external_load_substrait_plan = planloader_lib.load_substrait_plan +external_load_substrait_plan.argtypes = [ctypes.c_char_p] +external_load_substrait_plan.restype = ctypes.POINTER(SerializedPlan) + +external_free_substrait_plan = planloader_lib.free_substrait_plan +external_free_substrait_plan.argtypes = [ctypes.POINTER(SerializedPlan)] +external_free_substrait_plan.restype = None + +external_save_substrait_plan = planloader_lib.save_substrait_plan +external_save_substrait_plan.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_char_p, ctypes.c_uint32] +external_save_substrait_plan.restype = ctypes.c_char_p + + +def load_substrait_plan(filename: str) -> plan_pb2.Plan: + """ + Loads a Substrait plan (in any format) from disk. + + Returns: + A Plan protobuf object if successful. + Raises: + PlanFileException if an except occurs while converting or reading from disk. + """ + result = external_load_substrait_plan(filename.encode('UTF-8')) + if result.contents.errorMessage is not None: + raise PlanFileException(result.contents.errorMessage) + data = ctypes.string_at(result.contents.buffer, result.contents.size) + plan = plan_pb2.Plan() + plan.ParseFromString(data) + external_free_substrait_plan(result) + return plan + + +def save_substrait_plan(plan: plan_pb2.Plan, filename: str, file_format: PlanFileFormat): + """ + Saves the given plan to disk in the specified file format. + + Raises: + PlanFileException if an except occurs while converting or writing to disk. + """ + data = plan.SerializeToString() + err = external_save_substrait_plan(data, len(data), filename.encode('UTF-8'), file_format.value) + if err: + raise PlanFileException(err) diff --git a/tests/test_planloader.py b/tests/test_planloader.py new file mode 100644 index 0000000..0d575e2 --- /dev/null +++ b/tests/test_planloader.py @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 + + +from substrait import planloader + + + +def test_main(): + print(planloader.__file__) + print(dir(planloader)) + testplan = planloader.load_substrait_plan('tpch-plan01.json') + planloader.save_substrait_plan(testplan, 'myoutfile.splan', planloader.PlanFileFormat.TEXT.value) diff --git a/tests/tpch-plan01.json b/tests/tpch-plan01.json new file mode 100644 index 0000000..877b4b7 --- /dev/null +++ b/tests/tpch-plan01.json @@ -0,0 +1,823 @@ +# select l_returnflag, l_linestatus, sum(l_quantity) as sum_qty, sum(l_extendedprice) as sum_base_price, sum(l_extendedprice smoke.sh tpch_smoke.sh (1 - l_discount)) as sum_disc_price, sum(l_extendedprice smoke.sh tpch_smoke.sh (1 - l_discount) smoke.sh tpch_smoke.sh (1 + l_tax)) as sum_charge, avg(l_quantity) as avg_qty, avg(l_extendedprice) as avg_price, avg(l_discount) as avg_disc, count(*) as count_order from lineitem where l_shipdate <= date '1998-12-01' - interval '120' day (3) group by l_returnflag, l_linestatus order by l_returnflag, l_linestatus +{ + "extensionUris": [{ + "extensionUriAnchor": 3, + "uri": "/functions_aggregate_generic.yaml" + }, { + "extensionUriAnchor": 2, + "uri": "/functions_arithmetic_decimal.yaml" + }, { + "extensionUriAnchor": 1, + "uri": "/functions_datetime.yaml" + }], + "extensions": [{ + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 0, + "name": "lte:date_date" + } + }, { + "extensionFunction": { + "extensionUriReference": 1, + "functionAnchor": 1, + "name": "subtract:date_day" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 2, + "name": "multiply:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 3, + "name": "subtract:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 4, + "name": "add:dec_dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 5, + "name": "sum:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 2, + "functionAnchor": 6, + "name": "avg:dec" + } + }, { + "extensionFunction": { + "extensionUriReference": 3, + "functionAnchor": 7, + "name": "count:any" + } + }], + "relations": [{ + "root": { + "input": { + "sort": { + "common": { + "direct": { + } + }, + "input": { + "aggregate": { + "common": { + "direct": { + } + }, + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [16, 17, 18, 19, 20, 21, 22] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "struct": { + "types": [{ + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i32": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 1, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 25, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "fixedChar": { + "length": 10, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "varchar": { + "length": 44, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }], + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "namedTable": { + "names": ["LINEITEM"] + } + } + }, + "condition": { + "scalarFunction": { + "functionReference": 0, + "args": [], + "outputType": { + "bool": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 1, + "args": [], + "outputType": { + "date": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "literal": { + "date": 10561, + "nullable": false, + "typeVariationReference": 0 + } + } + }, { + "value": { + "literal": { + "intervalDayToSecond": { + "days": 120, + "seconds": 0, + "microseconds": 0 + }, + "nullable": false, + "typeVariationReference": 0 + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 8 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 9 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + }, { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }, { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 2, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 4, + "args": [], + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "cast": { + "type": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "input": { + "literal": { + "i32": 1, + "nullable": false, + "typeVariationReference": 0 + } + }, + "failureBehavior": "FAILURE_BEHAVIOR_UNSPECIFIED" + } + } + }, { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 7 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + } + }], + "options": [] + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + }] + } + }, + "groupings": [{ + "groupingExpressions": [{ + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }] + }], + "measures": [{ + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 4 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 5, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 5 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 2 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 3 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 6, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "decimal": { + "scale": 0, + "precision": 19, + "typeVariationReference": 0, + "nullability": "NULLABILITY_NULLABLE" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 6 + } + }, + "rootReference": { + } + } + } + }], + "options": [] + } + }, { + "measure": { + "functionReference": 7, + "args": [], + "sorts": [], + "phase": "AGGREGATION_PHASE_INITIAL_TO_RESULT", + "outputType": { + "i64": { + "typeVariationReference": 0, + "nullability": "NULLABILITY_REQUIRED" + } + }, + "invocation": "AGGREGATION_INVOCATION_ALL", + "arguments": [], + "options": [] + } + }] + } + }, + "sorts": [{ + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 0 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }, { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + }, + "rootReference": { + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_LAST" + }] + } + }, + "names": ["L_RETURNFLAG", "L_LINESTATUS", "SUM_QTY", "SUM_BASE_PRICE", "SUM_DISC_PRICE", "SUM_CHARGE", "AVG_QTY", "AVG_PRICE", "AVG_DISC", "COUNT_ORDER"] + } + }], + "expectedTypeUrls": [] +} diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index 66c8d97..73bec1f 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit 66c8d9716036301cf89e3a870837c93536b31d9d +Subproject commit 73bec1f5f5c0f91a21cc9c4cf618fc427419d78b From 684d0ac5e1824c7ee86b28f616279a6f80f2e5df Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 19:03:20 -0800 Subject: [PATCH 03/35] Now passes with hand-built library. --- myoutfile.splan | 134 +++++++++++++++++++++++++ src/substrait/planloader/__init__.py | 0 src/substrait/planloader/planloader.py | 12 ++- tests/test_planloader.py | 5 +- 4 files changed, 145 insertions(+), 6 deletions(-) create mode 100644 myoutfile.splan delete mode 100644 src/substrait/planloader/__init__.py diff --git a/myoutfile.splan b/myoutfile.splan new file mode 100644 index 0000000..7de7a7e --- /dev/null +++ b/myoutfile.splan @@ -0,0 +1,134 @@ +pipelines { + read -> filter -> project -> aggregate -> sort -> root; +} + +read relation read { + source named; + base_schema schema; +} + +filter relation filter { + filter lte(schema.L_SHIPDATE, + subtract("1998-12-01"_date, {120_days, 0_seconds, 0_microseconds}_interval_day)->date)->bool?; +} + +project relation project { + expression schema.L_RETURNFLAG; + expression schema.L_LINESTATUS; + expression schema.L_QUANTITY; + expression schema.L_EXTENDEDPRICE; + expression multiply(schema.L_EXTENDEDPRICE, + subtract2(1_i32 AS decimal?<19,0>, schema.L_DISCOUNT)->decimal?<19,0>)->decimal?<19,0> NAMED intermediate; + expression multiply( + multiply(schema.L_EXTENDEDPRICE, + subtract2(1_i32 AS decimal?<19,0>, schema.L_DISCOUNT)->decimal?<19,0>)->decimal?<19,0>, + add(1_i32 AS decimal?<19,0>, schema.L_TAX)->decimal?<19,0>)->decimal?<19,0> NAMED intermediate2; + expression schema.L_DISCOUNT; + + emit schema.L_RETURNFLAG; + emit schema.L_LINESTATUS; + emit schema.L_QUANTITY; + emit schema.L_EXTENDEDPRICE; + emit intermediate; + emit intermediate2; + emit schema.L_DISCOUNT; +} + +aggregate relation aggregate { + grouping schema.L_RETURNFLAG; + grouping schema.L_LINESTATUS; + measure { + measure sum(schema.L_QUANTITY)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename; + invocation all; + } + measure { + measure sum(schema.L_EXTENDEDPRICE)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename2; + invocation all; + } + measure { + measure sum(intermediate)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename3; + invocation all; + } + measure { + measure sum(intermediate2)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename4; + invocation all; + } + measure { + measure avg(schema.L_QUANTITY)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename5; + invocation all; + } + measure { + measure avg(schema.L_EXTENDEDPRICE)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename6; + invocation all; + } + measure { + measure avg(schema.L_DISCOUNT)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename7; + invocation all; + } + measure { + measure count()->i64@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename8; + invocation all; + } +} + +sort relation sort { + sort schema.L_RETURNFLAG by ASC_NULLS_LAST; + sort schema.L_LINESTATUS by ASC_NULLS_LAST; +} + +root { + names = [ + L_RETURNFLAG, + L_LINESTATUS, + SUM_QTY, + SUM_BASE_PRICE, + SUM_DISC_PRICE, + SUM_CHARGE, + AVG_QTY, + AVG_PRICE, + AVG_DISC, + COUNT_ORDER + ] +} + +schema schema { + L_ORDERKEY i64; + L_PARTKEY i64; + L_SUPPKEY i64; + L_LINENUMBER i32?; + L_QUANTITY decimal?<19,0>; + L_EXTENDEDPRICE decimal?<19,0>; + L_DISCOUNT decimal?<19,0>; + L_TAX decimal?<19,0>; + L_RETURNFLAG fixedchar?<1>; + L_LINESTATUS fixedchar?<1>; + L_SHIPDATE date?; + L_COMMITDATE date?; + L_RECEIPTDATE date?; + L_SHIPINSTRUCT fixedchar?<25>; + L_SHIPMODE fixedchar?<10>; + L_COMMENT varchar?<44>; +} + +source named_table named { + names = [ + "LINEITEM", + ] +} + +extension_space /functions_aggregate_generic.yaml { + function count:any as count; +} + +extension_space /functions_arithmetic_decimal.yaml { + function add:dec_dec as add; + function avg:dec as avg; + function multiply:dec_dec as multiply; + function subtract:dec_dec as subtract2; + function sum:dec as sum; +} + +extension_space /functions_datetime.yaml { + function lte:date_date as lte; + function subtract:date_day as subtract; +} diff --git a/src/substrait/planloader/__init__.py b/src/substrait/planloader/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index 23c322f..0ef6cff 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -1,9 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 - +"""Routines for loading and saving Substrait plans.""" import ctypes import ctypes.util as ctutil import enum import substrait.gen.proto.plan_pb2 as plan_pb2 +import sys class PlanFileFormat(enum.Enum): @@ -30,8 +31,13 @@ class SerializedPlan(ctypes.Structure): # Load the C++ library # MEGAHACK -- Make this work on Windows, Linux, and MACOSX -#planloader_lib = ctypes.CDLL("/Users/davids/projects/substrait-cpp/cmake-build-release/export/planloader/libplanloader.dylib") -planloader_lib = ctypes.CDLL(ctutil.find_library("planloader")) +#planloader_path = "/Users/davids/projects/substrait-cpp/cmake-build-release/export/planloader/libplanloader.dylib" +planloader_path = ctutil.find_library("planloader") +sys.stderr.write(f'Path is %s.' % planloader_path) +planloader_lib = ctypes.CDLL(planloader_path) +if planloader_lib is None: + print('Failed to find planloader library') + sys.exit(1) # Declare the function signatures for the external functions. external_load_substrait_plan = planloader_lib.load_substrait_plan diff --git a/tests/test_planloader.py b/tests/test_planloader.py index 0d575e2..d1a91c7 100644 --- a/tests/test_planloader.py +++ b/tests/test_planloader.py @@ -1,12 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 -from substrait import planloader - +from substrait.planloader import planloader def test_main(): print(planloader.__file__) print(dir(planloader)) - testplan = planloader.load_substrait_plan('tpch-plan01.json') + testplan = planloader.load_substrait_plan('tests/tpch-plan01.json') planloader.save_substrait_plan(testplan, 'myoutfile.splan', planloader.PlanFileFormat.TEXT.value) From 7287773e33b6ca3be1293b634be5be183b9ef820 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 19:11:06 -0800 Subject: [PATCH 04/35] Corrected build instructions. --- CONTRIBUTING.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 375b8f1..7d42ff6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,10 +54,8 @@ Generate the protobuf files manually. Requires protobuf `v3.20.1`. ## Build and install the textplan loader dynamic library ```commandline pushd third_party/substrait-cpp -mkdir build -cd build -cmake .. -cd export/planloader +make releaase +cd build-Release/export/planloader make install popd ``` From 0f8074e884549074ada13d238f13baf812e3d92b Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 19:19:00 -0800 Subject: [PATCH 05/35] Remove some comments. --- src/substrait/planloader/planloader.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index 0ef6cff..43d3462 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -30,10 +30,7 @@ class SerializedPlan(ctypes.Structure): # Load the C++ library -# MEGAHACK -- Make this work on Windows, Linux, and MACOSX -#planloader_path = "/Users/davids/projects/substrait-cpp/cmake-build-release/export/planloader/libplanloader.dylib" planloader_path = ctutil.find_library("planloader") -sys.stderr.write(f'Path is %s.' % planloader_path) planloader_lib = ctypes.CDLL(planloader_path) if planloader_lib is None: print('Failed to find planloader library') From 18b0bd8d2fc7c31421f8a194e8bdfca7d76c1bea Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 19:43:19 -0800 Subject: [PATCH 06/35] Experiment adding the planloader to the build. --- .github/workflows/release.yml | 6 ++++++ CONTRIBUTING.md | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index fe0aef9..1a582c2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,6 +25,12 @@ jobs: - name: Install build dependencies run: | python -m pip install build --user + - name: Build Substrait planloader + run: | + cd third_party/substrait-cpp + make release + cd build-Release/export/planloader + make install - name: Build package run: | python -m build diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7d42ff6..3d932c8 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -54,7 +54,7 @@ Generate the protobuf files manually. Requires protobuf `v3.20.1`. ## Build and install the textplan loader dynamic library ```commandline pushd third_party/substrait-cpp -make releaase +make release cd build-Release/export/planloader make install popd From 0f5922bdfe8f9b1f502b3c8933481c01f5d77ec9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 22:36:47 -0800 Subject: [PATCH 07/35] Now handles errors consistently. --- src/substrait/planloader/planloader.py | 2 +- third_party/substrait-cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index 43d3462..31e61a9 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -60,7 +60,7 @@ def load_substrait_plan(filename: str) -> plan_pb2.Plan: PlanFileException if an except occurs while converting or reading from disk. """ result = external_load_substrait_plan(filename.encode('UTF-8')) - if result.contents.errorMessage is not None: + if result.contents.errorMessage: raise PlanFileException(result.contents.errorMessage) data = ctypes.string_at(result.contents.buffer, result.contents.size) plan = plan_pb2.Plan() diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index 73bec1f..74bf673 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit 73bec1f5f5c0f91a21cc9c4cf618fc427419d78b +Subproject commit 74bf673bc9f1e7c603aa69d0035aa35fcde6ea37 From bba04a31dacc1a48133e6bea72ddf0bea83449b9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 5 Feb 2024 22:53:09 -0800 Subject: [PATCH 08/35] Remove accidentally added test output file. --- myoutfile.splan | 134 ------------------------------------------------ 1 file changed, 134 deletions(-) delete mode 100644 myoutfile.splan diff --git a/myoutfile.splan b/myoutfile.splan deleted file mode 100644 index 7de7a7e..0000000 --- a/myoutfile.splan +++ /dev/null @@ -1,134 +0,0 @@ -pipelines { - read -> filter -> project -> aggregate -> sort -> root; -} - -read relation read { - source named; - base_schema schema; -} - -filter relation filter { - filter lte(schema.L_SHIPDATE, - subtract("1998-12-01"_date, {120_days, 0_seconds, 0_microseconds}_interval_day)->date)->bool?; -} - -project relation project { - expression schema.L_RETURNFLAG; - expression schema.L_LINESTATUS; - expression schema.L_QUANTITY; - expression schema.L_EXTENDEDPRICE; - expression multiply(schema.L_EXTENDEDPRICE, - subtract2(1_i32 AS decimal?<19,0>, schema.L_DISCOUNT)->decimal?<19,0>)->decimal?<19,0> NAMED intermediate; - expression multiply( - multiply(schema.L_EXTENDEDPRICE, - subtract2(1_i32 AS decimal?<19,0>, schema.L_DISCOUNT)->decimal?<19,0>)->decimal?<19,0>, - add(1_i32 AS decimal?<19,0>, schema.L_TAX)->decimal?<19,0>)->decimal?<19,0> NAMED intermediate2; - expression schema.L_DISCOUNT; - - emit schema.L_RETURNFLAG; - emit schema.L_LINESTATUS; - emit schema.L_QUANTITY; - emit schema.L_EXTENDEDPRICE; - emit intermediate; - emit intermediate2; - emit schema.L_DISCOUNT; -} - -aggregate relation aggregate { - grouping schema.L_RETURNFLAG; - grouping schema.L_LINESTATUS; - measure { - measure sum(schema.L_QUANTITY)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename; - invocation all; - } - measure { - measure sum(schema.L_EXTENDEDPRICE)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename2; - invocation all; - } - measure { - measure sum(intermediate)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename3; - invocation all; - } - measure { - measure sum(intermediate2)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename4; - invocation all; - } - measure { - measure avg(schema.L_QUANTITY)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename5; - invocation all; - } - measure { - measure avg(schema.L_EXTENDEDPRICE)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename6; - invocation all; - } - measure { - measure avg(schema.L_DISCOUNT)->decimal?<19,0>@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename7; - invocation all; - } - measure { - measure count()->i64@AGGREGATION_PHASE_INITIAL_TO_RESULT NAMED measurename8; - invocation all; - } -} - -sort relation sort { - sort schema.L_RETURNFLAG by ASC_NULLS_LAST; - sort schema.L_LINESTATUS by ASC_NULLS_LAST; -} - -root { - names = [ - L_RETURNFLAG, - L_LINESTATUS, - SUM_QTY, - SUM_BASE_PRICE, - SUM_DISC_PRICE, - SUM_CHARGE, - AVG_QTY, - AVG_PRICE, - AVG_DISC, - COUNT_ORDER - ] -} - -schema schema { - L_ORDERKEY i64; - L_PARTKEY i64; - L_SUPPKEY i64; - L_LINENUMBER i32?; - L_QUANTITY decimal?<19,0>; - L_EXTENDEDPRICE decimal?<19,0>; - L_DISCOUNT decimal?<19,0>; - L_TAX decimal?<19,0>; - L_RETURNFLAG fixedchar?<1>; - L_LINESTATUS fixedchar?<1>; - L_SHIPDATE date?; - L_COMMITDATE date?; - L_RECEIPTDATE date?; - L_SHIPINSTRUCT fixedchar?<25>; - L_SHIPMODE fixedchar?<10>; - L_COMMENT varchar?<44>; -} - -source named_table named { - names = [ - "LINEITEM", - ] -} - -extension_space /functions_aggregate_generic.yaml { - function count:any as count; -} - -extension_space /functions_arithmetic_decimal.yaml { - function add:dec_dec as add; - function avg:dec as avg; - function multiply:dec_dec as multiply; - function subtract:dec_dec as subtract2; - function sum:dec as sum; -} - -extension_space /functions_datetime.yaml { - function lte:date_date as lte; - function subtract:date_day as subtract; -} From b06464cfafa51df90c84f9b8dce45c65401271af Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:14:41 -0800 Subject: [PATCH 09/35] Updated based on review. --- tests/test_planloader.py | 2 -- third_party/substrait-cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/test_planloader.py b/tests/test_planloader.py index d1a91c7..1cdb82d 100644 --- a/tests/test_planloader.py +++ b/tests/test_planloader.py @@ -5,7 +5,5 @@ def test_main(): - print(planloader.__file__) - print(dir(planloader)) testplan = planloader.load_substrait_plan('tests/tpch-plan01.json') planloader.save_substrait_plan(testplan, 'myoutfile.splan', planloader.PlanFileFormat.TEXT.value) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index 74bf673..8fc79fb 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit 74bf673bc9f1e7c603aa69d0035aa35fcde6ea37 +Subproject commit 8fc79fb7b8f20aaf268dea66c81dce25961dd609 From e207987ac55ac24e12632774d0239046c78ea361 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:23:16 -0800 Subject: [PATCH 10/35] Updated workflow. --- .github/workflows/release.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 1a582c2..842ac0c 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -25,11 +25,13 @@ jobs: - name: Install build dependencies run: | python -m pip install build --user - - name: Build Substrait planloader + - name: Build Substrait planloader library run: | - cd third_party/substrait-cpp + cd ${{ github.workspace }}/third_party/substrait-cpp make release - cd build-Release/export/planloader + - name: Install Substrait planloader library + run: | + cd ${{ github.workspace }}/third_party/substrait-cpp/build-Release/export/planloader make install - name: Build package run: | From 50010a3fff65e5851d6f0507df6804b78a3d41b2 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:42:02 -0800 Subject: [PATCH 11/35] Added missing requirements to test workflow. --- .github/workflows/test.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a770484..8fb8c66 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,6 +26,14 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} + - name: Build Substrait planloader library + run: | + cd ${{ github.workspace }}/third_party/substrait-cpp + make release + - name: Install Substrait planloader library + run: | + cd ${{ github.workspace }}/third_party/substrait-cpp/build-Release/export/planloader + make install - name: Install package and test dependencies run: | python -m pip install --upgrade pip From e91bb929baabbbf7550c0bb32f40c077e07d8b17 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:47:11 -0800 Subject: [PATCH 12/35] See if we can add a C++ library dependency. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 04364e2..3ef7cbc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dynamic = ["version"] write_to = "src/substrait/_version.py" [project.optional-dependencies] -gen_proto = ["protobuf == 3.20.1", "protoletariat >= 2.0.0"] +gen_proto = ["protobuf == 3.20.1", "protoletariat >= 2.0.0", "libprotobuf-dev == 3.20.1"] test = ["pytest >= 7.0.0"] [tool.pytest.ini_options] From 7b716f5ad649be2f4f1534fcb1d8c8d34c2aa469 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:48:14 -0800 Subject: [PATCH 13/35] Not that way. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3ef7cbc..04364e2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ dynamic = ["version"] write_to = "src/substrait/_version.py" [project.optional-dependencies] -gen_proto = ["protobuf == 3.20.1", "protoletariat >= 2.0.0", "libprotobuf-dev == 3.20.1"] +gen_proto = ["protobuf == 3.20.1", "protoletariat >= 2.0.0"] test = ["pytest >= 7.0.0"] [tool.pytest.ini_options] From b44d3114ce9e527552b028203ee5171aed04aadd Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:49:13 -0800 Subject: [PATCH 14/35] Another attempt at the C++ library dependency. --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index e0116bb..35bca1d 100644 --- a/environment.yml +++ b/environment.yml @@ -11,3 +11,4 @@ dependencies: - python >= 3.8.1 - setuptools >= 61.0.0 - setuptools_scm >= 6.2.0 + - libprotobuf = 3.20.1 # matches protobuf From e2618b4e4a12ed0a30937be4f8050820f6827e5e Mon Sep 17 00:00:00 2001 From: David Sisson Date: Thu, 8 Feb 2024 23:50:07 -0800 Subject: [PATCH 15/35] Not that way either. --- environment.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/environment.yml b/environment.yml index 35bca1d..e0116bb 100644 --- a/environment.yml +++ b/environment.yml @@ -11,4 +11,3 @@ dependencies: - python >= 3.8.1 - setuptools >= 61.0.0 - setuptools_scm >= 6.2.0 - - libprotobuf = 3.20.1 # matches protobuf From 09b2f30533173065f1f68f423e04602e5d68dfa0 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 9 Feb 2024 00:23:29 -0800 Subject: [PATCH 16/35] Correct updated size type. --- src/substrait/planloader/planloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index 31e61a9..82987f9 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -24,7 +24,7 @@ class SerializedPlan(ctypes.Structure): SerializedPlan._fields_ = [ ("buffer", ctypes.POINTER(ctypes.c_byte)), - ("size", ctypes.c_uint32), + ("size", ctypes.c_int32), ("errorMessage", ctypes.c_char_p), ] From 85e55a433d36ae1004bf95ac48b38cd409188675 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 9 Feb 2024 00:27:13 -0800 Subject: [PATCH 17/35] Always free the returned SerializedPlan. --- src/substrait/planloader/planloader.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index 82987f9..d7e0d59 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -60,12 +60,14 @@ def load_substrait_plan(filename: str) -> plan_pb2.Plan: PlanFileException if an except occurs while converting or reading from disk. """ result = external_load_substrait_plan(filename.encode('UTF-8')) - if result.contents.errorMessage: - raise PlanFileException(result.contents.errorMessage) - data = ctypes.string_at(result.contents.buffer, result.contents.size) - plan = plan_pb2.Plan() - plan.ParseFromString(data) - external_free_substrait_plan(result) + try: + if result.contents.errorMessage: + raise PlanFileException(result.contents.errorMessage) + data = ctypes.string_at(result.contents.buffer, result.contents.size) + plan = plan_pb2.Plan() + plan.ParseFromString(data) + finally: + external_free_substrait_plan(result) return plan From 006da91f460db131c037c37793e7e5bc5602fa48 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Fri, 9 Feb 2024 21:50:27 -0800 Subject: [PATCH 18/35] Be consistent with signed int32. --- src/substrait/planloader/planloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/substrait/planloader/planloader.py b/src/substrait/planloader/planloader.py index d7e0d59..090a3ed 100644 --- a/src/substrait/planloader/planloader.py +++ b/src/substrait/planloader/planloader.py @@ -46,7 +46,7 @@ class SerializedPlan(ctypes.Structure): external_free_substrait_plan.restype = None external_save_substrait_plan = planloader_lib.save_substrait_plan -external_save_substrait_plan.argtypes = [ctypes.c_void_p, ctypes.c_uint32, ctypes.c_char_p, ctypes.c_uint32] +external_save_substrait_plan.argtypes = [ctypes.c_void_p, ctypes.c_int32, ctypes.c_char_p, ctypes.c_int32] external_save_substrait_plan.restype = ctypes.c_char_p From 51da6b0024cf1ebd587e734017fc13c89b9af587 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 13 Feb 2024 13:13:18 -0800 Subject: [PATCH 19/35] Now point at latest head. --- third_party/substrait-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index 8fc79fb..c6caaab 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit 8fc79fb7b8f20aaf268dea66c81dce25961dd609 +Subproject commit c6caaab939f9525be80e1afc90940a40b3702439 From 154e5e39c2fbe6d7da26940407de6afa8fcc22ea Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 13 Feb 2024 15:13:11 -0800 Subject: [PATCH 20/35] Switch to turning submodules to true following documents to see if that works correctly. --- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 842ac0c..5b816f7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - submodules: recursive + submodules: true - name: Set up Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8fb8c66..86ed858 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,7 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - submodules: recursive + submodules: true - name: Set up Python uses: actions/setup-python@v5 with: From 75178aca992b69b102df87264da70438277df82e Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 13 Feb 2024 15:18:15 -0800 Subject: [PATCH 21/35] Another attempt at recursing into submodules. --- .github/workflows/release.yml | 6 +++++- .github/workflows/test.yml | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5b816f7..2088b0d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -17,7 +17,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - submodules: true + submodules: recursive + - name: Pull submodules + run: | + git submodule update --init --recursive + git submodule update --recursive - name: Set up Python uses: actions/setup-python@v5 with: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 86ed858..f6fbe48 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,7 +21,11 @@ jobs: - name: Checkout code uses: actions/checkout@v4 with: - submodules: true + submodules: recursive + - name: Pull submodules + run: | + git submodule update --init --recursive + git submodule update --recursive - name: Set up Python uses: actions/setup-python@v5 with: From 791fc14373023d591e6ffaa9c44973bf96bde2ed Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 14 Feb 2024 01:10:03 -0800 Subject: [PATCH 22/35] Use pending version of substrait-cpp which uses updated substrait repo. --- third_party/substrait-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index c6caaab..83e732e 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit c6caaab939f9525be80e1afc90940a40b3702439 +Subproject commit 83e732e19d55a1572c00ca27ecf15cfcbafe6f22 From 5c52ff8e06780edfdef3a0f98797cb222bf709fd Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 19 Feb 2024 23:41:16 -0800 Subject: [PATCH 23/35] Updated substrait-cpp dependency. --- third_party/substrait-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index 83e732e..b23ffd7 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit 83e732e19d55a1572c00ca27ecf15cfcbafe6f22 +Subproject commit b23ffd73e8b8b16dccc995d9ba99ead8c77c82d1 From 2b5a510c4e51d4166baad54d66a2cc1ef93d02c6 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Mon, 19 Feb 2024 23:45:36 -0800 Subject: [PATCH 24/35] Add cmake dependency to rules. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 04364e2..3d70ac9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ pythonpath = "src" addopts = "--ignore=third_party" [build-system] -requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2.0"] +requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2.0", "ninja", "cmake>=3.20"] build-backend = "setuptools.build_meta" [tool.ruff] From 595f7da1b2f1cdeedf5471f154194a1cba2ec133 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Feb 2024 11:52:58 -0800 Subject: [PATCH 25/35] Switch back to the version of substrait-cpp that matches the current substrait used by substrait-python. --- third_party/substrait-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index b23ffd7..ed37bf8 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit b23ffd73e8b8b16dccc995d9ba99ead8c77c82d1 +Subproject commit ed37bf8195aeccb505689ee464917410e44cd19b From e32a7824c27c5ed94378f9d36d16437e780c372f Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Feb 2024 12:11:24 -0800 Subject: [PATCH 26/35] See if we can add a protobuf-c dependency. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3d70ac9..40d17ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ authors = [{name = "Substrait contributors", email = "substrait@googlegroups.com license = {text = "Apache-2.0"} readme = "README.md" requires-python = ">=3.8.1" -dependencies = ["protobuf >= 3.20"] +dependencies = ["protobuf >= 3.20", "protobuf-c >= 3.20"] dynamic = ["version"] [tool.setuptools_scm] From 09bef75b903166a51b6a695a3d1051b8c34950ea Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Feb 2024 12:19:26 -0800 Subject: [PATCH 27/35] Try using an apt-get action. --- .github/workflows/test.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f6fbe48..8c3a573 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,6 +30,10 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} + - name: Install dependencies + uses: awalsh128/cache-apt-pkgs-action@latest + with: + packages: protobuf-c - name: Build Substrait planloader library run: | cd ${{ github.workspace }}/third_party/substrait-cpp From e89780c3e92863b9a438e011ec88c2dd271850b1 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Feb 2024 12:21:37 -0800 Subject: [PATCH 28/35] Change package name. --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8c3a573..f2a8694 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -33,7 +33,7 @@ jobs: - name: Install dependencies uses: awalsh128/cache-apt-pkgs-action@latest with: - packages: protobuf-c + packages: libprotobuf-c-dev - name: Build Substrait planloader library run: | cd ${{ github.workspace }}/third_party/substrait-cpp From f0f21c0ba6f7c766a9b4a51a4f93d787aa2b0793 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 20 Feb 2024 15:09:09 -0800 Subject: [PATCH 29/35] Switch to using apt-get directly (Linux only). --- .github/workflows/test.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f2a8694..7ae15c5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -31,9 +31,8 @@ jobs: with: python-version: ${{ matrix.python }} - name: Install dependencies - uses: awalsh128/cache-apt-pkgs-action@latest - with: - packages: libprotobuf-c-dev + run: | + sudo apt-get install libprotobuf-c-dev - name: Build Substrait planloader library run: | cd ${{ github.workspace }}/third_party/substrait-cpp From 82340d12dde827fcefc4f44b1f1a8d717103619a Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 17:53:54 -0800 Subject: [PATCH 30/35] Update substrait-cpp (provides an external protobuf dependency. --- third_party/substrait-cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/substrait-cpp b/third_party/substrait-cpp index ed37bf8..e26585f 160000 --- a/third_party/substrait-cpp +++ b/third_party/substrait-cpp @@ -1 +1 @@ -Subproject commit ed37bf8195aeccb505689ee464917410e44cd19b +Subproject commit e26585f45cdfd0ed3bf03f700c354f04685398c6 From 9ff593bb8813db348bf54f9621e32ebbebe86342 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 17:55:19 -0800 Subject: [PATCH 31/35] Remove apt-get call. --- .github/workflows/test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ae15c5..f6fbe48 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -30,9 +30,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} - - name: Install dependencies - run: | - sudo apt-get install libprotobuf-c-dev - name: Build Substrait planloader library run: | cd ${{ github.workspace }}/third_party/substrait-cpp From 7f5c02364b4352bf0597664207f6290091d81afd Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 17:57:23 -0800 Subject: [PATCH 32/35] See if we can add curl to the environment. --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index e0116bb..16ecc59 100644 --- a/environment.yml +++ b/environment.yml @@ -11,3 +11,4 @@ dependencies: - python >= 3.8.1 - setuptools >= 61.0.0 - setuptools_scm >= 6.2.0 + - curl From 10a91dd206ecb54a7a6dd681b7844c22f6bd30f9 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 17:58:58 -0800 Subject: [PATCH 33/35] See if we can add curl to the environment. --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 16ecc59..5f9ff27 100644 --- a/environment.yml +++ b/environment.yml @@ -11,4 +11,4 @@ dependencies: - python >= 3.8.1 - setuptools >= 61.0.0 - setuptools_scm >= 6.2.0 - - curl + - libcurl-dev From 3044b7c4d9131b81182c8ee86aaf246437dcd0ba Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 18:22:34 -0800 Subject: [PATCH 34/35] add more environment dependencies --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 5f9ff27..fc005cc 100644 --- a/environment.yml +++ b/environment.yml @@ -11,4 +11,4 @@ dependencies: - python >= 3.8.1 - setuptools >= 61.0.0 - setuptools_scm >= 6.2.0 - - libcurl-dev + - libcurl4-openssl-dev From b42276c70772750937a34575a00492a22b686a9b Mon Sep 17 00:00:00 2001 From: David Sisson Date: Wed, 21 Feb 2024 19:22:46 -0800 Subject: [PATCH 35/35] Adding curl somewhere else. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 40d17ee..149a722 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ authors = [{name = "Substrait contributors", email = "substrait@googlegroups.com license = {text = "Apache-2.0"} readme = "README.md" requires-python = ">=3.8.1" -dependencies = ["protobuf >= 3.20", "protobuf-c >= 3.20"] +dependencies = ["protobuf >= 3.24", "protobuf-c >= 3.20"] dynamic = ["version"] [tool.setuptools_scm] @@ -20,7 +20,7 @@ pythonpath = "src" addopts = "--ignore=third_party" [build-system] -requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2.0", "ninja", "cmake>=3.20"] +requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2.0", "ninja", "cmake>=3.24", "libcurl4-openssl-dev", "scikit-build>=0.13"] build-backend = "setuptools.build_meta" [tool.ruff]