-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit c2ebdbf
Showing
38 changed files
with
1,209 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
name: CI | ||
|
||
on: | ||
pull_request: | ||
push: | ||
branches: | ||
- main | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
|
||
env: | ||
SCCACHE_GHA_ENABLED: "true" | ||
RUSTC_WRAPPER: "sccache" | ||
|
||
jobs: | ||
test: | ||
runs-on: ${{ matrix.os }} | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
os: [ubuntu-latest, macos-latest] | ||
steps: | ||
- uses: actions/checkout@v3 | ||
|
||
- uses: actions/setup-python@v4 | ||
with: | ||
python-version: '3.11' | ||
|
||
- name: Set up Rust | ||
run: rustup show | ||
|
||
- uses: mozilla-actions/sccache-action@v0.0.3 | ||
|
||
- run: cargo test | ||
working-directory: pyo3-polars | ||
|
||
- run: make install | ||
working-directory: example/extend_polars_python_dispatch | ||
|
||
- run: venv/bin/python run.py | ||
working-directory: example/extend_polars_python_dispatch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
/pyo3-polars/target | ||
Cargo.lock | ||
.idea/ | ||
venv/ | ||
target/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[workspace] | ||
resolver = "2" | ||
members = [ | ||
"example/derive_expression/expression_lib", | ||
"example/extend_polars_python_dispatch/extend_polars", | ||
"pyo3-polars", | ||
"pyo3-polars-derive", | ||
] | ||
|
||
[workspace.dependencies] | ||
polars = { version = "0.33.2", default-features = false } | ||
polars-core = { version = "0.33.2", default-features = false } | ||
polars-ffi = { version = "0.33.2", default-features = false } | ||
polars-plan = { version = "0.33.2", default-feautres = false } | ||
polars-lazy = { version = "0.33.2", default-features = false } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
## 1. Shared library plugins for Polars | ||
|
||
This is new functionality and not entirely stable, but should be preferred over `2.` as this | ||
will circumvent the GIL and will be the way we want to support extending polars. | ||
|
||
Parallelism and optimizations are managed by the default polars runtime. That runtime will call into the plugin function. | ||
The plugin functions are compiled separately. | ||
|
||
We can therefore keep polars more lean and maybe add support for a `polars-distance`, `polars-geo`, `polars-ml`, etc. Those can then have specialized expressions and don't have to worry as much for code bloat as they can be optionally installed. | ||
|
||
The idea is that you define an expression in another Rust crate with a proc_macro `polars_expr`. | ||
|
||
That macro can have the following attributes: | ||
|
||
- `output_type` -> to define the output type of that expression | ||
- `type_func` -> to define a function that computes the output type based on input types. | ||
|
||
Here is an example of a `String` conversion expression that converts any string to [pig latin](https://en.wikipedia.org/wiki/Pig_Latin): | ||
|
||
```rust | ||
fn pig_latin_str(value: &str, output: &mut String) { | ||
if let Some(first_char) = value.chars().next() { | ||
write!(output, "{}{}ay", &value[1..], first_char).unwrap() | ||
} | ||
} | ||
|
||
#[polars_expr(output_type=Utf8)] | ||
fn pig_latinnify(inputs: &[Series]) -> PolarsResult<Series> { | ||
let ca = inputs[0].utf8()?; | ||
let out: Utf8Chunked = ca.apply_to_buffer(pig_latin_str); | ||
Ok(out.into_series()) | ||
} | ||
``` | ||
|
||
On the python side this expression can then be registered under a namespace: | ||
|
||
```python | ||
import polars as pl | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
|
||
@pl.api.register_expr_namespace("language") | ||
class Language: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def pig_latinnify(self) -> pl.Expr: | ||
return self._expr._register_plugin( | ||
lib=lib, | ||
symbol="pig_latinnify", | ||
is_elementwise=True, | ||
) | ||
``` | ||
|
||
Compile/ship and then it is ready to use: | ||
|
||
```python | ||
import polars as pl | ||
from expression_lib import Language | ||
|
||
df = pl.DataFrame({ | ||
"names": ["Richard", "Alice", "Bob"], | ||
}) | ||
|
||
|
||
out = df.with_columns( | ||
pig_latin = pl.col("names").language.pig_latinnify() | ||
) | ||
``` | ||
|
||
See the full example in [example/derive_expression]: https://github.com/pola-rs/pyo3-polars/tree/plugin/example/derive_expression | ||
|
||
## 2. Pyo3 extensions for Polars | ||
|
||
<a href="https://crates.io/crates/pyo3-polars"> | ||
<img src="https://img.shields.io/crates/v/pyo3-polars.svg"/> | ||
</a> | ||
|
||
See the `example` directory for a concrete example. Here we send a polars `DataFrame` to rust and then compute a | ||
`jaccard similarity` in parallel using `rayon` and rust hash sets. | ||
|
||
## Run example | ||
|
||
`$ cd example && make install` | ||
`$ venv/bin/python run.py` | ||
|
||
This will output: | ||
|
||
``` | ||
shape: (2, 2) | ||
┌───────────┬───────────────┐ | ||
│ list_a ┆ list_b │ | ||
│ --- ┆ --- │ | ||
│ list[i64] ┆ list[i64] │ | ||
╞═══════════╪═══════════════╡ | ||
│ [1, 2, 3] ┆ [1, 2, ... 8] │ | ||
│ [5, 5] ┆ [5, 1, 1] │ | ||
└───────────┴───────────────┘ | ||
shape: (2, 1) | ||
┌─────────┐ | ||
│ jaccard │ | ||
│ --- │ | ||
│ f64 │ | ||
╞═════════╡ | ||
│ 0.75 │ | ||
│ 0.5 │ | ||
└─────────┘ | ||
``` | ||
|
||
## Compile for release | ||
|
||
`$ make install-release` | ||
|
||
# What to expect | ||
|
||
This crate offers a `PySeries` and a `PyDataFrame` which are simple wrapper around `Series` and `DataFrame`. The | ||
advantage of these wrappers is that they can be converted to and from python as they implement `FromPyObject` and `IntoPy`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"json": { | ||
}, | ||
"markdown": { | ||
}, | ||
"toml": { | ||
}, | ||
"includes": ["**/*.{json,md,toml}"], | ||
"excludes": [ | ||
"**/*-lock.json" | ||
], | ||
"plugins": [ | ||
"https://plugins.dprint.dev/json-0.17.4.wasm", | ||
"https://plugins.dprint.dev/markdown-0.16.1.wasm", | ||
"https://plugins.dprint.dev/toml-0.5.4.wasm" | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
SHELL=/bin/bash | ||
|
||
venv: ## Set up virtual environment | ||
python3 -m venv venv | ||
venv/bin/pip install -r requirements.txt | ||
|
||
install: venv | ||
unset CONDA_PREFIX && \ | ||
source venv/bin/activate && maturin develop -m expression_lib/Cargo.toml | ||
|
||
install-release: venv | ||
unset CONDA_PREFIX && \ | ||
source venv/bin/activate && maturin develop --release -m expression_lib/Cargo.toml | ||
|
||
clean: | ||
-@rm -r venv | ||
-@cd expression_lib && cargo clean | ||
|
||
|
||
run: install | ||
source venv/bin/activate && python run.py | ||
|
||
run-release: install-release | ||
source venv/bin/activate && python run.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/target | ||
|
||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
.pytest_cache/ | ||
*.py[cod] | ||
|
||
# C extensions | ||
*.so | ||
|
||
# Distribution / packaging | ||
.Python | ||
.venv/ | ||
env/ | ||
bin/ | ||
build/ | ||
develop-eggs/ | ||
dist/ | ||
eggs/ | ||
lib/ | ||
lib64/ | ||
parts/ | ||
sdist/ | ||
var/ | ||
include/ | ||
man/ | ||
venv/ | ||
*.egg-info/ | ||
.installed.cfg | ||
*.egg | ||
|
||
# Installer logs | ||
pip-log.txt | ||
pip-delete-this-directory.txt | ||
pip-selfcheck.json | ||
|
||
# Unit test / coverage reports | ||
htmlcov/ | ||
.tox/ | ||
.coverage | ||
.cache | ||
nosetests.xml | ||
coverage.xml | ||
|
||
# Translations | ||
*.mo | ||
|
||
# Mr Developer | ||
.mr.developer.cfg | ||
.project | ||
.pydevproject | ||
|
||
# Rope | ||
.ropeproject | ||
|
||
# Django stuff: | ||
*.log | ||
*.pot | ||
|
||
.DS_Store | ||
|
||
# Sphinx documentation | ||
docs/_build/ | ||
|
||
# PyCharm | ||
.idea/ | ||
|
||
# VSCode | ||
.vscode/ | ||
|
||
# Pyenv | ||
.python-version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
[package] | ||
name = "expression_lib" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
[lib] | ||
name = "expression_lib" | ||
crate-type = ["cdylib"] | ||
|
||
[dependencies] | ||
polars = { workspace = true, features = ["fmt"], default-features = false } | ||
polars-plan = { workspace = true, default-features = false } | ||
pyo3 = { version = "0.19.0", features = ["extension-module"] } | ||
pyo3-polars = { version = "*", path = "../../../pyo3-polars", features = ["derive"] } |
18 changes: 18 additions & 0 deletions
18
example/derive_expression/expression_lib/expression_lib/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import polars as pl | ||
from polars.type_aliases import IntoExpr | ||
from polars.utils.udfs import _get_shared_lib_location | ||
|
||
lib = _get_shared_lib_location(__file__) | ||
|
||
@pl.api.register_expr_namespace("accumulate") | ||
class Accumulate: | ||
def __init__(self, expr: pl.Expr): | ||
self._expr = expr | ||
|
||
def min_time(self, other: IntoExpr) -> pl.Expr: | ||
return self._expr._register_plugin( | ||
lib=lib, | ||
args=[other], | ||
symbol="min_time", | ||
is_elementwise=True, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[build-system] | ||
requires = ["maturin>=1.0,<2.0"] | ||
build-backend = "maturin" | ||
|
||
[project] | ||
name = "expression_lib" | ||
requires-python = ">=3.8" | ||
classifiers = [ | ||
"Programming Language :: Rust", | ||
"Programming Language :: Python :: Implementation :: CPython", | ||
"Programming Language :: Python :: Implementation :: PyPy", | ||
] |
24 changes: 24 additions & 0 deletions
24
example/derive_expression/expression_lib/src/expressions.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
use polars::prelude::*; | ||
use pyo3_polars::derive::polars_expr; | ||
|
||
#[polars_expr(output_type=Int64)] | ||
fn min_time(inputs: &[Series]) -> PolarsResult<Series> { | ||
let time = inputs[0].i64()?; | ||
let minutes = inputs[1].i64()?; | ||
|
||
let initial_value = time.get(0).unwrap() + minutes.get(0).unwrap(); | ||
|
||
let out = time | ||
.into_iter() | ||
.zip(minutes.into_iter()) | ||
.scan(initial_value, |state, (time, minutes)| { | ||
let time = time?; | ||
let minutes = minutes?; | ||
if *state > time { | ||
*state = time + minutes | ||
} | ||
Some(*state) | ||
}) | ||
.collect(); | ||
Ok(out) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
mod expressions; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
maturin | ||
polars[pyarrow] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import polars as pl | ||
from expression_lib import Accumulate | ||
|
||
df = pl.DataFrame({ | ||
"Time": [5, 3, 4, 1, 2], | ||
"Minutes": [2, 1, 2, 1, 3], | ||
}) | ||
|
||
print( | ||
df.with_columns( | ||
cum_min_time = pl.col("Time").accumulate.min_time("Minutes") | ||
) | ||
) |
Oops, something went wrong.