From e64553bd0d56650e1bd1a544d70f261002ac3301 Mon Sep 17 00:00:00 2001 From: Matthew A Johnson Date: Fri, 27 Sep 2024 09:38:23 +0100 Subject: [PATCH] Addressing some issues in the Python and Rust wrappers. (#164) **New Features** - The Python wrapper now packages the tzdata database inside the wheel to ensure consistent performance across platforms. - The Rust wrapper now has the ability to download a fresh copy of the tzdata database if needed - Added a `regoSetTZDataPath` method to the C API and exposed it for the Python and Rust wrappers. - The `regoNew` C API method now supports the `v1_compatible` flag for interpreter creation - The library embeds the `windowsZones.xml` mapping file so it can provide it where needed - The Python wrapper provides a more natural interface for sets and objects - The CMake system will now look for a `REGOCPP_TZDATA_PATH` environment variable to use for setting the default path **Bug Fix** - Fixed a bug where builtins would not be available if an interpreter was re-used - Fixed a bug with the Rust wrapper where it was aggressively trimming strings Signed-off-by: Matthew Johnson --- .github/workflows/build_wheels.yml | 19 ++- CHANGELOG | 17 +++ CMakeLists.txt | 28 ++++- VERSION | 2 +- examples/rust/Cargo.toml | 2 +- include/rego/rego.hh | 13 ++ include/rego/rego_c.h | 25 ++++ src/CMakeLists.txt | 1 + src/builtins/time.cc | 31 +++++ src/rego_c.cc | 22 +++- src/unify/skip_refs.cc | 5 +- src/windows_zones.h.in | 17 +++ wrappers/python/.gitignore | 2 + wrappers/python/CMakeLists.txt | 1 + wrappers/python/MANIFEST.in | 1 + wrappers/python/docs/source/conf.py | 2 +- wrappers/python/setup.py | 7 +- wrappers/python/src/_regopy.cc | 16 ++- wrappers/python/src/regopy/__init__.py | 15 ++- wrappers/python/src/regopy/interpreter.py | 5 +- wrappers/python/src/regopy/node.py | 63 ++++++---- wrappers/python/src/regopy/output.py | 11 +- wrappers/python/test/test_interpreter.py | 24 ++++ wrappers/rust/regorust/.gitignore | 5 +- wrappers/rust/regorust/Cargo.toml | 7 +- wrappers/rust/regorust/build.rs | 1 + wrappers/rust/regorust/src/lib.rs | 140 +++++++++++++++++++++- 27 files changed, 424 insertions(+), 58 deletions(-) create mode 100644 src/windows_zones.h.in create mode 100644 wrappers/python/MANIFEST.in diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index d8984087..f14918c0 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -28,8 +28,9 @@ jobs: with: package-dir: ${{github.workspace}}/wrappers/python/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: + name: ${{ matrix.python }}-windows path: ./wheelhouse/*.whl build_mac_wheels: name: Build wheels on mac @@ -47,11 +48,6 @@ jobs: python-version: "3.10 - 3.12" update-environment: false - - name: Install pipx - run: | - brew install pipx - pipx ensurepath - - name: Git config for fetching pull requests run: | git config --global --add remote.origin.fetch +refs/pull/*/merge:refs/remotes/pull/* @@ -66,14 +62,15 @@ jobs: CIBW_BUILD: ${{ matrix.python }}-macosx* CIBW_ARCHS_MACOS: arm64 CIBW_ENVIRONMENT_MACOS : > - MACOSX_DEPLOYMENT_TARGET=10.15 + MACOSX_DEPLOYMENT_TARGET=11.0 REGOCPP_REPO=https://github.com/${{github.repository}} REGOCPP_TAG=${{github.sha}} with: package-dir: ${{github.workspace}}/wrappers/python/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: + name: ${{ matrix.python }}-mac path: ./wheelhouse/*.whl build_manylinux_wheels: name: Build wheels on manylinux @@ -100,8 +97,9 @@ jobs: with: package-dir: ${{github.workspace}}/wrappers/python/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: + name: ${{ matrix.python }}-manylinux path: ./wheelhouse/*.whl build_musllinux_wheels: @@ -128,6 +126,7 @@ jobs: with: package-dir: ${{github.workspace}}/wrappers/python/ - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: + name: ${{ matrix.python }}-musllinux path: ./wheelhouse/*.whl \ No newline at end of file diff --git a/CHANGELOG b/CHANGELOG index 1d6b13be..823476af 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,22 @@ # Changelog +## 2024-09-26 - Version 0.4.5 +Point release addressing some issues in the Python and Rust wrappers. + +**New Features** +- The Python wrapper now packages the tzdata database inside the wheel to ensure consistent performance across platforms. +- The Rust wrapper now has the ability to download a fresh copy of the tzdata database if needed +- Added a `regoSetTZDataPath` method to the C API and exposed it for the Python and Rust wrappers. +- The `regoNew` C API method now supports the `v1_compatible` flag for interpreter creation +- The library embeds the `windowsZones.xml` mapping file so it can provide it where needed +- The Python wrapper provides a more natural interface for sets and objects +- The CMake system will now look for a `REGOCPP_TZDATA_PATH` environment variable to use for setting the default path + +**Bug Fix** +- Fixed a bug where builtins would not be available if an interpreter was re-used +- Fixed a bug with the Rust wrapper where it was aggressively trimming strings + + ## 2024-09-23 - Version 0.4.4 Point release adding the `uuid`, `time`, and `walk` builtins. diff --git a/CMakeLists.txt b/CMakeLists.txt index 878a365e..43fd1623 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,9 +110,18 @@ if(MSVC) set(REGOCPP_USE_MANUAL_TZDATA ON) endif() -set(REGOCPP_TZDATA_PATH "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Path to the tzdata directory") +if(DEFINED ENV{REGOCPP_TZDATA_PATH}) + set(REGOCPP_TZDATA_PATH $ENV{REGOCPP_TZDATA_PATH} CACHE STRING "Path to the tzdata directory") + message("REGOCPP_TZDATA_PATH set to ${REGOCPP_TZDATA_PATH}") +else() + set(REGOCPP_TZDATA_PATH "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Path to the tzdata directory") + message("REGOCPP_TZDATA_PATH not set, using ${REGOCPP_TZDATA_PATH}") +endif() + file(TO_CMAKE_PATH ${REGOCPP_TZDATA_PATH} REGOCPP_TZDATA_PATH) +set(REGOCPP_WINDOWS_ZONES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/templates/tzdata/windowsZones.xml") + if(REGOCPP_USE_MANUAL_TZDATA) set(MANUAL_TZ_DB ON) set(USE_SYSTEM_TZ_DB OFF) @@ -124,13 +133,18 @@ if(REGOCPP_USE_MANUAL_TZDATA) FILE(DOWNLOAD https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz ${TZDATA_GZ_PATH} SHOW_PROGRESS) message("Extracting ${TZDATA_GZ_PATH} to ${TZDATA_PATH}") FILE(ARCHIVE_EXTRACT INPUT ${TZDATA_GZ_PATH} DESTINATION ${TZDATA_PATH}) + endif() + if(NOT EXISTS ${REGOCPP_TZDATA_PATH}/tzdata) # Copy the tzdata directory to the specified path. message("Copying ${TZDATA_PATH} to ${REGOCPP_TZDATA_PATH}") file(COPY ${TZDATA_PATH} DESTINATION ${REGOCPP_TZDATA_PATH}) - if(MSVC) + endif() + + if(MSVC) + if(NOT EXISTS ${REGOCPP_TZDATA_PATH}/tzdata/windowsZones.xml) message("MSVC: Copying windowsZones.xml to ${REGOCPP_TZDATA_PATH}/tzdata") - file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/templates/tzdata/windowsZones.xml DESTINATION ${REGOCPP_TZDATA_PATH}/tzdata) + file(COPY ${REGOCPP_WINDOWS_ZONES_PATH} DESTINATION ${REGOCPP_TZDATA_PATH}/tzdata) endif() endif() else() @@ -139,6 +153,14 @@ else() set(MANUAL_TZ_DB OFF) endif() +file( READ ${REGOCPP_WINDOWS_ZONES_PATH} REGOCPP_WINDOWS_ZONES_RAW ) +string(LENGTH "${REGOCPP_WINDOWS_ZONES_RAW}" REGOCPP_WINDOWS_ZONES_RAW_LENGTH) +math(EXPR REGOCPP_WINDOWS_ZONES_RAW_LENGTH "${REGOCPP_WINDOWS_ZONES_RAW_LENGTH} - 1") +foreach(iter RANGE 0 ${REGOCPP_WINDOWS_ZONES_RAW_LENGTH} 2048) + string(SUBSTRING "${REGOCPP_WINDOWS_ZONES_RAW}" ${iter} 2048 line) + string(APPEND REGOCPP_WINDOWS_ZONES_SRC "${line})xml\",\n\tR\"xml(") +endforeach() + set(BUILD_TZ_LIB ON) set(COMPILE_WITH_C_LOCALE ON) FetchContent_MakeAvailable_ExcludeFromAll(date) diff --git a/VERSION b/VERSION index b300caa3..c8a5397f 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.4.4 \ No newline at end of file +0.4.5 \ No newline at end of file diff --git a/examples/rust/Cargo.toml b/examples/rust/Cargo.toml index e0ea8fc6..7ab20940 100644 --- a/examples/rust/Cargo.toml +++ b/examples/rust/Cargo.toml @@ -6,5 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -regorust = "0.4.4" +regorust = "0.4.5" clap = { version = "4.0", features = ["derive"] } \ No newline at end of file diff --git a/include/rego/rego.hh b/include/rego/rego.hh index 64551e23..e1ddad59 100644 --- a/include/rego/rego.hh +++ b/include/rego/rego.hh @@ -560,6 +560,19 @@ namespace rego */ Node set(const Nodes& set_members); + /** + * Sets the location where rego-cpp will look for timezone database + * information. + * + * The timezone database will be interpreted as one obtained from the IANA + * (https://www.iana.org/time-zones) which has been downloaded and unpacked + * into at the path provided. If the library was built without manual tzdata + * support, this function will throw an exception. + * + * @param path The path to the timezone database. + */ + void set_tzdata_path(const std::filesystem::path& path); + /** This constant indicates that a built-in can receive any number of * arguments. */ const std::size_t AnyArity = std::numeric_limits::max(); diff --git a/include/rego/rego_c.h b/include/rego/rego_c.h index 1b10ab4c..e671b3ad 100644 --- a/include/rego/rego_c.h +++ b/include/rego/rego_c.h @@ -20,6 +20,7 @@ typedef unsigned int regoSize; #define REGO_ERROR 1 #define REGO_ERROR_BUFFER_TOO_SMALL 2 #define REGO_ERROR_INVALID_LOG_LEVEL 3 +#define REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED 4 // term node types #define REGO_NODE_BINDING 1000 @@ -93,6 +94,21 @@ extern "C" */ regoEnum regoSetLogLevelFromString(const char* level); + /** + * Sets the location where rego-cpp will look for timezone database + * information. + * + * The timezone database will be interpreted as one obtained from the IANA + * (https://www.iana.org/time-zones) which has been downloaded and unpacked + * into at the path provided. If the library was built without manual tzdata + * support, this function will return an error code. + * + * @param path The path to the timezone database. + * @return REGO_OK if successful, REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED + * otherwise. + */ + regoEnum regoSetTZDataPath(const char* path); + /** * Allocates and initializes a new Rego interpreter. * @@ -102,6 +118,15 @@ extern "C" */ regoInterpreter* regoNew(void); + /** + * Allocates and initializes a new V1 Rego interpreter. + * + * The caller is responsible for freeing the interpreter with regoFree. + * + * @return A pointer to the new V1 interpreter. + */ + regoInterpreter* regoNewV1(void); + /** * Frees a Rego interpreter. * diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e40262e0..7a4b399d 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,5 +1,6 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/version.h" @ONLY) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/builtins/tzdata.h.in" "${CMAKE_CURRENT_BINARY_DIR}/tzdata.h" @ONLY) +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/windows_zones.h.in" "${CMAKE_CURRENT_BINARY_DIR}/windows_zones.h" @ONLY) set( SOURCES bigint.cc diff --git a/src/builtins/time.cc b/src/builtins/time.cc index afa8be6a..3cea406d 100644 --- a/src/builtins/time.cc +++ b/src/builtins/time.cc @@ -4,8 +4,10 @@ #include "re2/stringpiece.h" #include "rego.hh" #include "tzdata.h" +#include "windows_zones.h" #include +#include #include namespace @@ -739,4 +741,33 @@ namespace rego BuiltInDef::create(Location("time.weekday"), 1, ::weekday)}; } } + + void set_tzdata_path(const std::filesystem::path& path) + { +#ifdef REGOCPP_USE_MANUAL_TZDATA + date::set_install(path.string()); + auto wz_path = path / "windowsZones.xml"; + if (!std::filesystem::exists(wz_path)) + { + std::ofstream file(wz_path); + if (file.is_open()) + { + for (auto& line : WINDOWS_ZONES_SRC) + { + file << line << std::endl; + } + file.close(); + } + else + { + throw std::runtime_error( + "Failed to create required windowsZones.xml at tzdata path"); + } + } +#else + throw std::runtime_error( + "Cannot set tzdata path to " + path.string() + + " because REGOCPP_USE_MANUAL_TZDATA was not defined"); +#endif + } } \ No newline at end of file diff --git a/src/rego_c.cc b/src/rego_c.cc index 31987267..51f0d13f 100644 --- a/src/rego_c.cc +++ b/src/rego_c.cc @@ -75,13 +75,33 @@ extern "C" return REGO_ERROR_INVALID_LOG_LEVEL; } + regoEnum regoSetTZDataPath(const char* path) + { + try + { + rego::set_tzdata_path(path); + return REGO_OK; + } + catch (const std::exception&) + { + return REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED; + } + } + regoInterpreter* regoNew() { - auto ptr = reinterpret_cast(new rego::Interpreter()); + auto ptr = reinterpret_cast(new rego::Interpreter(false)); logging::Debug() << "regoNew: " << ptr; return ptr; } + regoInterpreter* regoNewV1() + { + auto ptr = reinterpret_cast(new rego::Interpreter(true)); + logging::Debug() << "regoNewV1: " << ptr; + return ptr; + } + void regoFree(regoInterpreter* rego) { logging::Debug() << "regoFree: " << rego; diff --git a/src/unify/skip_refs.cc b/src/unify/skip_refs.cc index f3f33b29..e8f70f08 100644 --- a/src/unify/skip_refs.cc +++ b/src/unify/skip_refs.cc @@ -165,11 +165,10 @@ namespace rego return 0; }); - auto added_builtins = std::make_shared>(); - - skip_refs.post(Rego, [skips, builtins, added_builtins](Node node) { + skip_refs.post(Rego, [skips, builtins](Node node) { std::set used_builtins; find_used_builtins(node, builtins, used_builtins); + auto added_builtins = std::make_shared>(); Node skipseq = node / SkipSeq; int changes = 0; for (auto& builtin : used_builtins) diff --git a/src/windows_zones.h.in b/src/windows_zones.h.in new file mode 100644 index 00000000..72088c46 --- /dev/null +++ b/src/windows_zones.h.in @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +#ifndef _REGOCPP_WINDOWS_ZONES_H_ +#define _REGOCPP_WINDOWS_ZONES_H_ + +#include +#include + +namespace rego +{ +const std::vector WINDOWS_ZONES_SRC = { + R"xml(@REGOCPP_WINDOWS_ZONES_SRC@)xml" +}; +} // namespace rego + +#endif \ No newline at end of file diff --git a/wrappers/python/.gitignore b/wrappers/python/.gitignore index eb404d99..e3f62d58 100644 --- a/wrappers/python/.gitignore +++ b/wrappers/python/.gitignore @@ -127,3 +127,5 @@ dmypy.json # Pyre type checker .pyre/ +tzdata +debug \ No newline at end of file diff --git a/wrappers/python/CMakeLists.txt b/wrappers/python/CMakeLists.txt index b926b6a0..0a1bfcfc 100644 --- a/wrappers/python/CMakeLists.txt +++ b/wrappers/python/CMakeLists.txt @@ -36,6 +36,7 @@ FetchContent_Declare( set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) set(SNMALLOC_ENABLE_DYNAMIC_LOADING ON) set(REGOCPP_USE_CXX17 ON) +set(REGOCPP_USE_MANUAL_TZDATA ON) FetchContent_MakeAvailable(regocpp) FetchContent_MakeAvailable(pybind11) diff --git a/wrappers/python/MANIFEST.in b/wrappers/python/MANIFEST.in new file mode 100644 index 00000000..11a5eb0d --- /dev/null +++ b/wrappers/python/MANIFEST.in @@ -0,0 +1 @@ +graft src diff --git a/wrappers/python/docs/source/conf.py b/wrappers/python/docs/source/conf.py index 028471d0..a2708179 100644 --- a/wrappers/python/docs/source/conf.py +++ b/wrappers/python/docs/source/conf.py @@ -9,7 +9,7 @@ project = 'regopy' copyright = '2024, Microsoft' author = 'Microsoft' -release = '0.4.4' +release = '0.4.5' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/wrappers/python/setup.py b/wrappers/python/setup.py index caa46028..b81d97e3 100644 --- a/wrappers/python/setup.py +++ b/wrappers/python/setup.py @@ -65,6 +65,7 @@ def build_extension(self, ext: CMakeExtension): cfg = "Debug" if self.debug else "Release" extdir = os.path.abspath(os.path.dirname( self.get_ext_fullpath(ext.name))) + cmake_args = ["-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir, "-DCMAKE_BUILD_TYPE=" + cfg, "-DPYTHON_EXECUTABLE=" + sys.executable] @@ -83,8 +84,11 @@ def build_extension(self, ext: CMakeExtension): else: cmake_args += ["-A", "Win32"] + env = os.environ.copy() + env["REGOCPP_TZDATA_PATH"] = extdir + subprocess.check_call(["cmake", ext.source_dir] + - cmake_args, cwd=self.build_temp) + cmake_args, cwd=self.build_temp, env=env) subprocess.check_call(["cmake", "--build", "."] + build_args, cwd=self.build_temp) @@ -100,6 +104,7 @@ def build_extension(self, ext: CMakeExtension): long_description_content_type="text/markdown", packages=find_packages("src"), package_dir={"": "src"}, + include_package_data=True, python_requires=">=3.6, <4", ext_modules=[CMakeExtension("regopy._regopy")], classifiers=[ diff --git a/wrappers/python/src/_regopy.cc b/wrappers/python/src/_regopy.cc index a14b06a3..4745f1d6 100644 --- a/wrappers/python/src/_regopy.cc +++ b/wrappers/python/src/_regopy.cc @@ -4,7 +4,8 @@ using namespace pybind11::literals; -namespace { +namespace +{ std::string get_value(regoNode* node) { regoSize size = regoNodeValueSize(node); @@ -35,6 +36,10 @@ PYBIND11_MODULE(_regopy, m) // error codes m.attr("REGO_OK") = REGO_OK; m.attr("REGO_ERROR") = REGO_ERROR; + m.attr("REGO_ERROR_BUFFER_TOO_SMALL") = REGO_ERROR_BUFFER_TOO_SMALL; + m.attr("REGO_ERROR_INVALID_LOG_LEVEL") = REGO_ERROR_INVALID_LOG_LEVEL; + m.attr("REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED") = + REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED; // term node types m.attr("REGO_NODE_BINDING") = REGO_NODE_BINDING; @@ -85,7 +90,11 @@ PYBIND11_MODULE(_regopy, m) ®oSetLogLevelFromString, "Sets the level of logging.", "level"_a); + m.def( + "regoSetTZDataPath", ®oSetTZDataPath, "Sets the TZData path.", "path"_a); m.def("regoNew", ®oNew, "Returns a pointer to a new rego instance."); + m.def( + "regoNewV1", ®oNewV1, "Returns a pointer to a new V1 rego instance."); m.def("regoFree", ®oFree, "Deletes a rego instance."); m.def( "regoAddModuleFile", @@ -233,10 +242,7 @@ PYBIND11_MODULE(_regopy, m) "Returns a human-readable node type name.", "node"_a); m.def( - "regoNodeValue", - &get_value, - "Gets the node value as a string.", - "node"_a); + "regoNodeValue", &get_value, "Gets the node value as a string.", "node"_a); m.def("regoNodeSize", ®oNodeSize, "Returns the node size.", "node"_a); m.def( "regoNodeGet", diff --git a/wrappers/python/src/regopy/__init__.py b/wrappers/python/src/regopy/__init__.py index 08c933a6..aafd464b 100644 --- a/wrappers/python/src/regopy/__init__.py +++ b/wrappers/python/src/regopy/__init__.py @@ -1,6 +1,7 @@ """regopy - Python wrapper for rego-cpp.""" from enum import IntEnum +import os from ._regopy import ( REGO_LOG_LEVEL_DEBUG, @@ -16,7 +17,8 @@ REGOCPP_PLATFORM, REGOCPP_VERSION, regoSetLogLevel, - regoSetLogLevelFromString + regoSetLogLevelFromString, + regoSetTZDataPath, ) from .interpreter import Interpreter, RegoError from .node import Node, NodeKind @@ -31,6 +33,17 @@ "regoNew", "regoFree", ] +regoSetTZDataPath(os.path.join(os.path.basename(__file__), "tzdata")) + + +def set_tzdata_path(path: str): + """Sets the path to the tzdata directory. + + Args: + path (str): The path to the tzdata directory. + """ + regoSetTZDataPath(path) + class LogLevel(IntEnum): NONE = REGO_LOG_LEVEL_NONE diff --git a/wrappers/python/src/regopy/interpreter.py b/wrappers/python/src/regopy/interpreter.py index 8138e73e..d6c201bd 100644 --- a/wrappers/python/src/regopy/interpreter.py +++ b/wrappers/python/src/regopy/interpreter.py @@ -13,6 +13,7 @@ regoGetStrictBuiltInErrors, regoGetWellFormedChecksEnabled, regoNew, + regoNewV1, regoQuery, regoSetDebugEnabled, regoSetDebugPath, @@ -104,9 +105,9 @@ class Interpreter: {"bindings":{"x":[{"bar":"Foo", "baz":5, "be":true, "bop":23.4}, "20", {"name":"smoke1"}]}} """ - def __init__(self): + def __init__(self, v1_compatible=False): """Initializer.""" - self._impl = regoNew() + self._impl = regoNewV1() if v1_compatible else regoNew() def __del__(self): """Destructor.""" diff --git a/wrappers/python/src/regopy/node.py b/wrappers/python/src/regopy/node.py index 324db987..b05ba6b5 100644 --- a/wrappers/python/src/regopy/node.py +++ b/wrappers/python/src/regopy/node.py @@ -1,6 +1,7 @@ """Module providing an interface to the rego-cpp Node object.""" from enum import IntEnum +import json from typing import Union from ._regopy import ( @@ -113,7 +114,22 @@ def __init__(self, impl): else: self._kind = NodeKind(regoNodeType(self._impl)) - self._children = [Node(regoNodeGet(self._impl, i)) for i in range(len(self))] + size = regoNodeSize(self._impl) + if self._kind == NodeKind.Object: + self._children = {} + for i in range(size): + child = Node(regoNodeGet(self._impl, i)) + assert child._kind == NodeKind.ObjectItem + key = regoNodeJSON(child.at(0)._impl) + self._children[key] = child.at(1) + elif self._kind == NodeKind.Set: + self._children = {} + for i in range(size): + child = Node(regoNodeGet(self._impl, i)) + key = regoNodeJSON(child._impl) + self._children[key] = child + else: + self._children = [None] * size @property def kind(self) -> NodeKind: @@ -181,14 +197,17 @@ def value(self) -> Union[str, int, float, bool, None]: def __len__(self) -> int: """Returns the number of child nodes.""" - return regoNodeSize(self._impl) + if self._kind == NodeKind.Term: + return len(self.at(0)) + + return len(self._children) def index(self, index: int) -> "Node": """Returns the node at an index of an array. - + Returns: Node: The child node. - + Raises: IndexError: If the index is out of bounds. TypeError: If the node is not an array. @@ -211,7 +230,7 @@ def lookup(self, key: str) -> "Node": Returns: Node: The child node. - + Raises: KeyError: If the key is not found. TypeError: If the node does not support lookup @@ -219,25 +238,20 @@ def lookup(self, key: str) -> "Node": if self._kind == NodeKind.Term: return self.at(0).lookup(key) + key = json.dumps(key) if self._kind == NodeKind.Object: - for child in self._children: - assert child.kind == NodeKind.ObjectItem - child_key = child.at(0).value - if child_key == key: - return child.at(1) + if key in self._children: + return self._children[key] - if child_key.startswith('"') and child_key.endswith('"'): - child_key = child_key[1:-1] - - if child_key == key: - return child.at(1) + key = '"' + key + '"' + if key in self._children: + return self._children[key] raise KeyError(f"Key {key} not found") if self._kind == NodeKind.Set: - for child in self._children: - if child.value == key: - return child + if key in self._children: + return self._children[key] return None @@ -245,6 +259,9 @@ def lookup(self, key: str) -> "Node": def at(self, index: int) -> "Node": """Returns the child node at the given index.""" + if self._children[index] is None: + self._children[index] = Node(regoNodeGet(self._impl, index)) + return self._children[index] def __iter__(self): @@ -265,13 +282,15 @@ def __getitem__(self, key: Union[int, str]) -> "Node": RegoError: If the key is out of bounds. TypeError: If the key is not an integer or a string. """ - if isinstance(key, int): - return self.index(key) + kind = self._kind + if kind == NodeKind.Term: + kind = self.at(0)._kind - if isinstance(key, str): + if kind in [NodeKind.Set, NodeKind.Object]: return self.lookup(key) - raise TypeError("key must be an integer or a string") + if isinstance(key, int): + return self.index(key) def json(self) -> str: """Returns the node as a JSON string.""" diff --git a/wrappers/python/src/regopy/output.py b/wrappers/python/src/regopy/output.py index 152ddd11..03db8478 100644 --- a/wrappers/python/src/regopy/output.py +++ b/wrappers/python/src/regopy/output.py @@ -12,9 +12,10 @@ ) from .node import Node + class Result: """A result from a Rego output. - + Each result contains a list of terms, and set of bindings. Examples: @@ -34,7 +35,7 @@ class Result: def __init__(self, obj: dict): self.expressions = obj.get("expressions", []) self.bindings = obj.get("bindings", {}) - + def __str__(self) -> str: return json.dumps({"expressions": self.expressions, "bindings": self.bindings}) @@ -75,7 +76,11 @@ def __init__(self, impl): """ self._impl = impl if regoOutputOk(impl): - output = json.loads(regoOutputString(impl)) + if regoOutputString(impl) == "undefined": + output = {} + else: + output = json.loads(regoOutputString(impl)) + if isinstance(output, list): self.results = [Result(obj) for obj in output] else: diff --git a/wrappers/python/test/test_interpreter.py b/wrappers/python/test/test_interpreter.py index a1698d07..8f55c7b5 100644 --- a/wrappers/python/test/test_interpreter.py +++ b/wrappers/python/test/test_interpreter.py @@ -120,3 +120,27 @@ def test_multiple_inputs(): output = rego.query("x = data.multi.a") assert output is not None assert output.binding("x").value == 70 + + +def test_tzdata(): + rego = Interpreter() + output = rego.query("""x=time.clock([1727267567139080131, "America/Los_Angeles"])""") + assert output is not None + clock = output.binding("x") + assert len(clock) == 3 + assert clock[0].value == 5 + assert clock[1].value == 32 + assert clock[2].value == 47 + + +def test_set(): + rego = Interpreter() + output = rego.query("""a = {1, "2", false, 4.3}""") + assert output is not None + a = output.binding("a") + assert len(a) == 4 + assert a[1].value == 1 + assert a["2"].value == '"2"' + assert not a[False].value + assert a[4.3].value == 4.3 + assert a[6] is None diff --git a/wrappers/rust/regorust/.gitignore b/wrappers/rust/regorust/.gitignore index d8996a7c..4ab7e466 100644 --- a/wrappers/rust/regorust/.gitignore +++ b/wrappers/rust/regorust/.gitignore @@ -15,4 +15,7 @@ Cargo.lock # Rego rego-cpp -rego \ No newline at end of file +rego + +# tzdata +tzdata* \ No newline at end of file diff --git a/wrappers/rust/regorust/Cargo.toml b/wrappers/rust/regorust/Cargo.toml index 202a96b9..dc797223 100644 --- a/wrappers/rust/regorust/Cargo.toml +++ b/wrappers/rust/regorust/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "regorust" -version = "0.4.4" +version = "0.4.5" edition = "2021" description = "Rust bindings for the rego-cpp Rego compiler and interpreter" license = "MIT" @@ -9,6 +9,9 @@ homepage = "https://microsoft.github.io/rego-cpp/" repository = "https://github.com/microsoft/rego-cpp" [dependencies] +reqwest = { version = "0.12.7", features = ["blocking", "json"] } # HTTP client +flate2 = { version = "1.0.33"} # Compression library +tar = "0.4.42" # Tar archive library [build-dependencies] -bindgen = "0.65.1" +bindgen = "0.70.1" diff --git a/wrappers/rust/regorust/build.rs b/wrappers/rust/regorust/build.rs index 197fb438..090e9b3c 100644 --- a/wrappers/rust/regorust/build.rs +++ b/wrappers/rust/regorust/build.rs @@ -53,6 +53,7 @@ fn main() { .as_str(), "-DCMAKE_INSTALL_PREFIX=rust", "-DREGOCPP_COPY_EXAMPLES=ON", + "-DREGOCPP_USE_MANUAL_TZDATA=ON" ]) .current_dir(®ocpp_path) .status() diff --git a/wrappers/rust/regorust/src/lib.rs b/wrappers/rust/regorust/src/lib.rs index 410b726c..b1f331d8 100644 --- a/wrappers/rust/regorust/src/lib.rs +++ b/wrappers/rust/regorust/src/lib.rs @@ -2,11 +2,19 @@ include!(concat!(env!("OUT_DIR"), "/bindings.rs")); +extern crate flate2; +extern crate reqwest; +extern crate tar; + +use flate2::read::GzDecoder; use std::ffi::{CStr, CString}; use std::fmt; +use std::fs::File; +use std::io; use std::ops::Index; use std::path::Path; use std::str; +use tar::Archive; /// Returns the build information as a string. /// @@ -23,6 +31,14 @@ pub fn build_info() -> String { ) } +fn remove_quotes(s: String) -> String { + if s.starts_with('"') && s.ends_with('"') { + s[1..s.len() - 1].to_string() + } else { + s + } +} + /// Interface for the Rego interpreter. /// /// This wraps the Rego C API, and handles passing calls to @@ -362,6 +378,100 @@ pub fn set_log_level(level: LogLevel) -> Result<(), &'static str> { } } +fn download_tzdata(path: &str) -> Result<(), &'static str> { + let url = "https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz"; + let gz_path = "tzdata-latest.tar.gz"; + + let client = reqwest::blocking::Client::new(); + let mut resp = client + .get(url) + .header(reqwest::header::USER_AGENT, "Rust Maven 1.42") + .send() + .expect("request failed"); + + let mut out = File::create(gz_path).expect("failed to create file"); + io::copy(&mut resp, &mut out).expect("failed to copy content"); + + let tar_gz = File::open(gz_path).expect("unable to open file"); + let tar = GzDecoder::new(tar_gz); + let mut archive = Archive::new(tar); + archive + .unpack(path) + .expect("failed to unpack tzdata archive"); + + Ok(()) +} + +/// Sets the path to the TZData directory. +/// +/// Some of the `time` built-ins require access to the IANA tzdata database +/// in order to resolve timezone names. If the TZ data path is not set, these +/// built-ins will not function properly. This function allows you to point the +/// Rego interpreter to the location of the TZ data. Optionally, you can request +/// that the latest TZ data be downloaded to the path you specify. +/// +/// # Example +/// ``` +/// use regorust::*; +/// let path = "./tzdata"; +/// set_tzdata_path(path, true).expect("cannot set TZData path"); +/// let rego = Interpreter::new(); +/// match rego.query(r#"x=time.clock([1727267567139080131, "America/Los_Angeles"])"#) { +/// Ok(result) => { +/// let x = result.binding("x").expect("cannot get x"); +/// println!("x = {}", x.json().unwrap()); +/// if let NodeValue::Int(hour) = x +/// .index(0) +/// .unwrap() +/// .value() +/// .unwrap() +/// { +/// println!("hour = {}", hour); +/// # assert_eq!(hour, 5); +/// } +/// +/// if let NodeValue::Int(minute) = x +/// .index(1) +/// .unwrap() +/// .value() +/// .unwrap() +/// { +/// println!("minute = {}", minute); +/// # assert_eq!(minute, 32); +/// } +/// +/// if let NodeValue::Int(second) = x +/// .index(2) +/// .unwrap() +/// .value() +/// .unwrap() +/// { +/// println!("second = {}", second); +/// # assert_eq!(second, 47); +/// } +/// } +/// Err(e) => { +/// panic!("error: {}", e); +/// } +/// } +/// ``` +pub fn set_tzdata_path(path: &str, download: bool) -> Result<(), &'static str> { + if download { + download_tzdata(path)?; + } + + let path_cstr = CString::new(path).unwrap(); + let path_ptr = path_cstr.as_ptr(); + let result: regoEnum = unsafe { regoSetTZDataPath(path_ptr) }; + match result { + REGO_OK => Ok(()), + REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED => { + Err("Error setting TZData path: Manual TZData not supported") + } + _ => Err("Unknown error"), + } +} + impl Interpreter { /// Creates a new Rego interpreter. pub fn new() -> Self { @@ -371,6 +481,13 @@ impl Interpreter { } } + pub fn new_v1() -> Self { + let interpreter_ptr = unsafe { regoNewV1() }; + Self { + c_ptr: interpreter_ptr, + } + } + /// Returns the error message for the last operation. fn get_error(&self) -> &str { let c_str = unsafe { regoGetError(self.c_ptr) }; @@ -1004,7 +1121,7 @@ impl Node { NodeKind::String => self .scalar_value() .ok() - .map(|s| NodeValue::String(s[1..s.len() - 1].to_string())), + .map(|s| NodeValue::String(remove_quotes(s))), _ => None, } } @@ -1471,4 +1588,25 @@ mod tests { } } } + + #[test] + fn doublequotes() { + let rego = Interpreter::new(); + match rego.query(r#"x := {"bar": sprintf("%s", ["foo"])}"#) { + Ok(value) => { + let x = value.binding("x").expect("cannot get x"); + if let NodeValue::String(bar) = x + .lookup("bar") + .expect("bar key missing") + .value() + .expect("bar value missing") + { + assert_eq!(bar, "foo"); + } + } + Err(e) => { + panic!("error: {}", e); + } + } + } }