-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: python native JSON format parser (#53)
Add `substrait.json.load_json` and `substrait.json.parse_json` functions able to load the JSON representation of a Substrait plan to a `substrait.proto.Plan` object. It also adds the `substrait-cpp` repository as a git submodule to reuse the test files. This is reasonable because we might end up using the cpp library in the future to create bindings to other features too, so it's helpful to already have it as a submodule. --------- Co-authored-by: Matthijs Brobbel <m1brobbel@gmail.com>
- Loading branch information
Showing
10 changed files
with
189 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,4 +32,4 @@ jobs: | |
python -m pip install ".[test]" | ||
- name: Run tests | ||
run: | | ||
python -m pytest | ||
python -m pytest tests |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
[submodule "third_party/substrait"] | ||
path = third_party/substrait | ||
url = https://github.com/substrait-io/substrait | ||
[submodule "third_party/substrait-cpp"] | ||
path = third_party/substrait-cpp | ||
url = https://github.com/substrait-io/substrait-cpp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from google.protobuf import json_format | ||
|
||
from substrait.proto import Plan | ||
|
||
|
||
def load_json(filename): | ||
"""Load a Substrait Plan from a json file""" | ||
with open(filename, encoding="utf-8") as f: | ||
return parse_json(f.read()) | ||
|
||
|
||
def parse_json(text): | ||
"""Generate a Substrait Plan from its JSON definition""" | ||
return json_format.Parse(text=text, message=Plan()) | ||
|
||
|
||
def write_json(plan, filename): | ||
"""Write a Substrait Plan to a json file""" | ||
with open(filename, "w+") as f: | ||
f.write(dump_json(plan)) | ||
|
||
|
||
def dump_json(plan): | ||
"""Dump a Substrait Plan to a string in JSON format""" | ||
return json_format.MessageToJson(plan) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import os | ||
import pathlib | ||
import tempfile | ||
import json | ||
|
||
from substrait.proto import Plan | ||
from substrait.json import load_json, parse_json, dump_json, write_json | ||
|
||
import pytest | ||
|
||
|
||
JSON_FIXTURES = ( | ||
pathlib.Path(os.path.dirname(__file__)) | ||
/ ".." | ||
/ "third_party" | ||
/ "substrait-cpp" | ||
/ "src" | ||
/ "substrait" | ||
/ "textplan" | ||
/ "data" | ||
) | ||
JSON_TEST_FILE = sorted(JSON_FIXTURES.glob("*.json")) | ||
JSON_TEST_FILENAMES = [path.name for path in JSON_TEST_FILE] | ||
|
||
|
||
@pytest.mark.parametrize("jsonfile", JSON_TEST_FILE, ids=JSON_TEST_FILENAMES) | ||
def test_json_load(jsonfile): | ||
with open(jsonfile) as f: | ||
jsondata = _strip_json_comments(f) | ||
parsed_plan = parse_json(jsondata) | ||
|
||
# Save to a temporary file so we can test load_json | ||
# on content stripped of comments. | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
# We use a TemporaryDirectory as on Windows NamedTemporaryFile | ||
# doesn't allow for easy reopening of the file. | ||
with open(pathlib.Path(tmpdir) / "jsonfile.json", "w+") as stripped_file: | ||
stripped_file.write(jsondata) | ||
loaded_plan = load_json(stripped_file.name) | ||
|
||
# The Plan constructor itself will throw an exception | ||
# in case there is anything wrong in parsing the JSON | ||
# so we can take for granted that if the plan was created | ||
# it is a valid plan in terms of protobuf definition. | ||
assert type(loaded_plan) is Plan | ||
|
||
# Ensure that when loading from file or from string | ||
# the outcome is the same | ||
assert parsed_plan == loaded_plan | ||
|
||
|
||
@pytest.mark.parametrize("jsonfile", JSON_TEST_FILE, ids=JSON_TEST_FILENAMES) | ||
def test_json_roundtrip(jsonfile): | ||
with open(jsonfile) as f: | ||
jsondata = _strip_json_comments(f) | ||
|
||
parsed_plan = parse_json(jsondata) | ||
assert parse_json(dump_json(parsed_plan)) == parsed_plan | ||
|
||
# Test with write/load | ||
with tempfile.TemporaryDirectory() as tmpdir: | ||
filename = pathlib.Path(tmpdir) / "jsonfile.json" | ||
write_json(parsed_plan, filename) | ||
assert load_json(filename) == parsed_plan | ||
|
||
|
||
def _strip_json_comments(jsonfile): | ||
# The JSON files in the cpp testsuite are prefixed with | ||
# a comment containing the SQL that matches the json plan. | ||
# As Python JSON parser doesn't support comments, | ||
# we have to strip them to make the content readable | ||
return "\n".join(l for l in jsonfile.readlines() if l[0] != "#") |
Submodule substrait-cpp
added at
cc8d08
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
echo "Updating substrait-cpp submodule..." | ||
git submodule update --remote third_party/substrait-cpp | ||
|
File renamed without changes.