Skip to content

Commit

Permalink
Addressing some issues in the Python and Rust wrappers. (#164)
Browse files Browse the repository at this point in the history
**New Features**
- The Python wrapper now packages the tzdata database inside the wheel to ensure consistent performance across platforms.
- The Rust wrapper now has the ability to download a fresh copy of the tzdata database if needed
- Added a `regoSetTZDataPath` method to the C API and exposed it for the Python and Rust wrappers.
- The `regoNew` C API method now supports the `v1_compatible` flag for interpreter creation
- The library embeds the `windowsZones.xml` mapping file so it can provide it where needed
- The Python wrapper provides a more natural interface for sets and objects
- The CMake system will now look for a `REGOCPP_TZDATA_PATH` environment variable to use for setting the default path

**Bug Fix**
- Fixed a bug where builtins would not be available if an interpreter was re-used
- Fixed a bug with the Rust wrapper where it was aggressively trimming strings

Signed-off-by: Matthew Johnson <matjoh@microsoft.com>
  • Loading branch information
matajoh authored Sep 27, 2024
1 parent 3eb9a7c commit e64553b
Show file tree
Hide file tree
Showing 27 changed files with 424 additions and 58 deletions.
19 changes: 9 additions & 10 deletions .github/workflows/build_wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ jobs:
with:
package-dir: ${{github.workspace}}/wrappers/python/

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.python }}-windows
path: ./wheelhouse/*.whl
build_mac_wheels:
name: Build wheels on mac
Expand All @@ -47,11 +48,6 @@ jobs:
python-version: "3.10 - 3.12"
update-environment: false

- name: Install pipx
run: |
brew install pipx
pipx ensurepath
- name: Git config for fetching pull requests
run: |
git config --global --add remote.origin.fetch +refs/pull/*/merge:refs/remotes/pull/*
Expand All @@ -66,14 +62,15 @@ jobs:
CIBW_BUILD: ${{ matrix.python }}-macosx*
CIBW_ARCHS_MACOS: arm64
CIBW_ENVIRONMENT_MACOS : >
MACOSX_DEPLOYMENT_TARGET=10.15
MACOSX_DEPLOYMENT_TARGET=11.0
REGOCPP_REPO=https://github.com/${{github.repository}}
REGOCPP_TAG=${{github.sha}}
with:
package-dir: ${{github.workspace}}/wrappers/python/

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.python }}-mac
path: ./wheelhouse/*.whl
build_manylinux_wheels:
name: Build wheels on manylinux
Expand All @@ -100,8 +97,9 @@ jobs:
with:
package-dir: ${{github.workspace}}/wrappers/python/

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.python }}-manylinux
path: ./wheelhouse/*.whl

build_musllinux_wheels:
Expand All @@ -128,6 +126,7 @@ jobs:
with:
package-dir: ${{github.workspace}}/wrappers/python/

- uses: actions/upload-artifact@v3
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.python }}-musllinux
path: ./wheelhouse/*.whl
17 changes: 17 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,5 +1,22 @@
# Changelog

## 2024-09-26 - Version 0.4.5
Point release addressing some issues in the Python and Rust wrappers.

**New Features**
- The Python wrapper now packages the tzdata database inside the wheel to ensure consistent performance across platforms.
- The Rust wrapper now has the ability to download a fresh copy of the tzdata database if needed
- Added a `regoSetTZDataPath` method to the C API and exposed it for the Python and Rust wrappers.
- The `regoNew` C API method now supports the `v1_compatible` flag for interpreter creation
- The library embeds the `windowsZones.xml` mapping file so it can provide it where needed
- The Python wrapper provides a more natural interface for sets and objects
- The CMake system will now look for a `REGOCPP_TZDATA_PATH` environment variable to use for setting the default path

**Bug Fix**
- Fixed a bug where builtins would not be available if an interpreter was re-used
- Fixed a bug with the Rust wrapper where it was aggressively trimming strings


## 2024-09-23 - Version 0.4.4
Point release adding the `uuid`, `time`, and `walk` builtins.

Expand Down
28 changes: 25 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,18 @@ if(MSVC)
set(REGOCPP_USE_MANUAL_TZDATA ON)
endif()

set(REGOCPP_TZDATA_PATH "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Path to the tzdata directory")
if(DEFINED ENV{REGOCPP_TZDATA_PATH})
set(REGOCPP_TZDATA_PATH $ENV{REGOCPP_TZDATA_PATH} CACHE STRING "Path to the tzdata directory")
message("REGOCPP_TZDATA_PATH set to ${REGOCPP_TZDATA_PATH}")
else()
set(REGOCPP_TZDATA_PATH "${CMAKE_INSTALL_PREFIX}" CACHE STRING "Path to the tzdata directory")
message("REGOCPP_TZDATA_PATH not set, using ${REGOCPP_TZDATA_PATH}")
endif()

file(TO_CMAKE_PATH ${REGOCPP_TZDATA_PATH} REGOCPP_TZDATA_PATH)

set(REGOCPP_WINDOWS_ZONES_PATH "${CMAKE_CURRENT_SOURCE_DIR}/templates/tzdata/windowsZones.xml")

if(REGOCPP_USE_MANUAL_TZDATA)
set(MANUAL_TZ_DB ON)
set(USE_SYSTEM_TZ_DB OFF)
Expand All @@ -124,13 +133,18 @@ if(REGOCPP_USE_MANUAL_TZDATA)
FILE(DOWNLOAD https://www.iana.org/time-zones/repository/tzdata-latest.tar.gz ${TZDATA_GZ_PATH} SHOW_PROGRESS)
message("Extracting ${TZDATA_GZ_PATH} to ${TZDATA_PATH}")
FILE(ARCHIVE_EXTRACT INPUT ${TZDATA_GZ_PATH} DESTINATION ${TZDATA_PATH})
endif()

if(NOT EXISTS ${REGOCPP_TZDATA_PATH}/tzdata)
# Copy the tzdata directory to the specified path.
message("Copying ${TZDATA_PATH} to ${REGOCPP_TZDATA_PATH}")
file(COPY ${TZDATA_PATH} DESTINATION ${REGOCPP_TZDATA_PATH})
if(MSVC)
endif()

if(MSVC)
if(NOT EXISTS ${REGOCPP_TZDATA_PATH}/tzdata/windowsZones.xml)
message("MSVC: Copying windowsZones.xml to ${REGOCPP_TZDATA_PATH}/tzdata")
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/templates/tzdata/windowsZones.xml DESTINATION ${REGOCPP_TZDATA_PATH}/tzdata)
file(COPY ${REGOCPP_WINDOWS_ZONES_PATH} DESTINATION ${REGOCPP_TZDATA_PATH}/tzdata)
endif()
endif()
else()
Expand All @@ -139,6 +153,14 @@ else()
set(MANUAL_TZ_DB OFF)
endif()

file( READ ${REGOCPP_WINDOWS_ZONES_PATH} REGOCPP_WINDOWS_ZONES_RAW )
string(LENGTH "${REGOCPP_WINDOWS_ZONES_RAW}" REGOCPP_WINDOWS_ZONES_RAW_LENGTH)
math(EXPR REGOCPP_WINDOWS_ZONES_RAW_LENGTH "${REGOCPP_WINDOWS_ZONES_RAW_LENGTH} - 1")
foreach(iter RANGE 0 ${REGOCPP_WINDOWS_ZONES_RAW_LENGTH} 2048)
string(SUBSTRING "${REGOCPP_WINDOWS_ZONES_RAW}" ${iter} 2048 line)
string(APPEND REGOCPP_WINDOWS_ZONES_SRC "${line})xml\",\n\tR\"xml(")
endforeach()

set(BUILD_TZ_LIB ON)
set(COMPILE_WITH_C_LOCALE ON)
FetchContent_MakeAvailable_ExcludeFromAll(date)
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.4
0.4.5
2 changes: 1 addition & 1 deletion examples/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
regorust = "0.4.4"
regorust = "0.4.5"
clap = { version = "4.0", features = ["derive"] }
13 changes: 13 additions & 0 deletions include/rego/rego.hh
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,19 @@ namespace rego
*/
Node set(const Nodes& set_members);

/**
* Sets the location where rego-cpp will look for timezone database
* information.
*
* The timezone database will be interpreted as one obtained from the IANA
* (https://www.iana.org/time-zones) which has been downloaded and unpacked
* into at the path provided. If the library was built without manual tzdata
* support, this function will throw an exception.
*
* @param path The path to the timezone database.
*/
void set_tzdata_path(const std::filesystem::path& path);

/** This constant indicates that a built-in can receive any number of
* arguments. */
const std::size_t AnyArity = std::numeric_limits<std::size_t>::max();
Expand Down
25 changes: 25 additions & 0 deletions include/rego/rego_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ typedef unsigned int regoSize;
#define REGO_ERROR 1
#define REGO_ERROR_BUFFER_TOO_SMALL 2
#define REGO_ERROR_INVALID_LOG_LEVEL 3
#define REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED 4

// term node types
#define REGO_NODE_BINDING 1000
Expand Down Expand Up @@ -93,6 +94,21 @@ extern "C"
*/
regoEnum regoSetLogLevelFromString(const char* level);

/**
* Sets the location where rego-cpp will look for timezone database
* information.
*
* The timezone database will be interpreted as one obtained from the IANA
* (https://www.iana.org/time-zones) which has been downloaded and unpacked
* into at the path provided. If the library was built without manual tzdata
* support, this function will return an error code.
*
* @param path The path to the timezone database.
* @return REGO_OK if successful, REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED
* otherwise.
*/
regoEnum regoSetTZDataPath(const char* path);

/**
* Allocates and initializes a new Rego interpreter.
*
Expand All @@ -102,6 +118,15 @@ extern "C"
*/
regoInterpreter* regoNew(void);

/**
* Allocates and initializes a new V1 Rego interpreter.
*
* The caller is responsible for freeing the interpreter with regoFree.
*
* @return A pointer to the new V1 interpreter.
*/
regoInterpreter* regoNewV1(void);

/**
* Frees a Rego interpreter.
*
Expand Down
1 change: 1 addition & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/version.h.in" "${CMAKE_CURRENT_BINARY_DIR}/version.h" @ONLY)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/builtins/tzdata.h.in" "${CMAKE_CURRENT_BINARY_DIR}/tzdata.h" @ONLY)
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/windows_zones.h.in" "${CMAKE_CURRENT_BINARY_DIR}/windows_zones.h" @ONLY)

set( SOURCES
bigint.cc
Expand Down
31 changes: 31 additions & 0 deletions src/builtins/time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
#include "re2/stringpiece.h"
#include "rego.hh"
#include "tzdata.h"
#include "windows_zones.h"

#include <chrono>
#include <fstream>
#include <inttypes.h>

namespace
Expand Down Expand Up @@ -739,4 +741,33 @@ namespace rego
BuiltInDef::create(Location("time.weekday"), 1, ::weekday)};
}
}

void set_tzdata_path(const std::filesystem::path& path)
{
#ifdef REGOCPP_USE_MANUAL_TZDATA
date::set_install(path.string());
auto wz_path = path / "windowsZones.xml";
if (!std::filesystem::exists(wz_path))
{
std::ofstream file(wz_path);
if (file.is_open())
{
for (auto& line : WINDOWS_ZONES_SRC)
{
file << line << std::endl;
}
file.close();
}
else
{
throw std::runtime_error(
"Failed to create required windowsZones.xml at tzdata path");
}
}
#else
throw std::runtime_error(
"Cannot set tzdata path to " + path.string() +
" because REGOCPP_USE_MANUAL_TZDATA was not defined");
#endif
}
}
22 changes: 21 additions & 1 deletion src/rego_c.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,33 @@ extern "C"
return REGO_ERROR_INVALID_LOG_LEVEL;
}

regoEnum regoSetTZDataPath(const char* path)
{
try
{
rego::set_tzdata_path(path);
return REGO_OK;
}
catch (const std::exception&)
{
return REGO_ERROR_MANUAL_TZDATA_NOT_SUPPORTED;
}
}

regoInterpreter* regoNew()
{
auto ptr = reinterpret_cast<regoInterpreter*>(new rego::Interpreter());
auto ptr = reinterpret_cast<regoInterpreter*>(new rego::Interpreter(false));
logging::Debug() << "regoNew: " << ptr;
return ptr;
}

regoInterpreter* regoNewV1()
{
auto ptr = reinterpret_cast<regoInterpreter*>(new rego::Interpreter(true));
logging::Debug() << "regoNewV1: " << ptr;
return ptr;
}

void regoFree(regoInterpreter* rego)
{
logging::Debug() << "regoFree: " << rego;
Expand Down
5 changes: 2 additions & 3 deletions src/unify/skip_refs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,10 @@ namespace rego
return 0;
});

auto added_builtins = std::make_shared<std::set<std::string>>();

skip_refs.post(Rego, [skips, builtins, added_builtins](Node node) {
skip_refs.post(Rego, [skips, builtins](Node node) {
std::set<std::string> used_builtins;
find_used_builtins(node, builtins, used_builtins);
auto added_builtins = std::make_shared<std::set<std::string>>();
Node skipseq = node / SkipSeq;
int changes = 0;
for (auto& builtin : used_builtins)
Expand Down
17 changes: 17 additions & 0 deletions src/windows_zones.h.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

#ifndef _REGOCPP_WINDOWS_ZONES_H_
#define _REGOCPP_WINDOWS_ZONES_H_

#include <string>
#include <vector>

namespace rego
{
const std::vector<std::string> WINDOWS_ZONES_SRC = {
R"xml(@REGOCPP_WINDOWS_ZONES_SRC@)xml"
};
} // namespace rego

#endif
2 changes: 2 additions & 0 deletions wrappers/python/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/
tzdata
debug
1 change: 1 addition & 0 deletions wrappers/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ FetchContent_Declare(
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
set(SNMALLOC_ENABLE_DYNAMIC_LOADING ON)
set(REGOCPP_USE_CXX17 ON)
set(REGOCPP_USE_MANUAL_TZDATA ON)

FetchContent_MakeAvailable(regocpp)
FetchContent_MakeAvailable(pybind11)
Expand Down
1 change: 1 addition & 0 deletions wrappers/python/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
graft src
2 changes: 1 addition & 1 deletion wrappers/python/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
project = 'regopy'
copyright = '2024, Microsoft'
author = 'Microsoft'
release = '0.4.4'
release = '0.4.5'

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
7 changes: 6 additions & 1 deletion wrappers/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def build_extension(self, ext: CMakeExtension):
cfg = "Debug" if self.debug else "Release"
extdir = os.path.abspath(os.path.dirname(
self.get_ext_fullpath(ext.name)))

cmake_args = ["-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=" + extdir,
"-DCMAKE_BUILD_TYPE=" + cfg,
"-DPYTHON_EXECUTABLE=" + sys.executable]
Expand All @@ -83,8 +84,11 @@ def build_extension(self, ext: CMakeExtension):
else:
cmake_args += ["-A", "Win32"]

env = os.environ.copy()
env["REGOCPP_TZDATA_PATH"] = extdir

subprocess.check_call(["cmake", ext.source_dir] +
cmake_args, cwd=self.build_temp)
cmake_args, cwd=self.build_temp, env=env)
subprocess.check_call(["cmake", "--build", "."] +
build_args, cwd=self.build_temp)

Expand All @@ -100,6 +104,7 @@ def build_extension(self, ext: CMakeExtension):
long_description_content_type="text/markdown",
packages=find_packages("src"),
package_dir={"": "src"},
include_package_data=True,
python_requires=">=3.6, <4",
ext_modules=[CMakeExtension("regopy._regopy")],
classifiers=[
Expand Down
Loading

0 comments on commit e64553b

Please sign in to comment.