Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more docstrings #73

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Top Level Functions

read_parquet
read_json
read_avro
get_parquet_schema
get_json_schema
get_avro_schema

Accessor
~~~~~~~~
Expand Down
22 changes: 14 additions & 8 deletions src/akimbo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
from __future__ import annotations

from awkward import ( # re-export
behavior,
metadata_from_parquet,
mixin_class,
mixin_class_method,
)
from awkward import behavior
from awkward import metadata_from_parquet as get_parquet_schema # re-export
from awkward import mixin_class, mixin_class_method

import akimbo.datetimes as datetimes
import akimbo.mixin as mixin
import akimbo.strings as strings
from akimbo.io import join, read_avro, read_json, read_parquet
from akimbo.io import (
get_avro_schema,
get_json_schema,
join,
read_avro,
read_json,
read_parquet,
)
from akimbo.version import version as __version__ # noqa

__all__ = (
Expand All @@ -23,6 +27,8 @@
"behavior",
"mixin_class",
"mixin_class_method",
"metadata_from_parquet",
"get_parquet_schema",
"get_json_schema",
"get_avro_schema",
"strings",
)
32 changes: 31 additions & 1 deletion src/akimbo/mixin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import functools
import operator
from typing import Callable, Iterable
Expand Down Expand Up @@ -250,7 +252,35 @@ def unmerge(self):
out = {k: self.to_output(arr[k]) for k in arr.fields}
return self.dataframe_type(out)

def join(self, other, key: str, colname="match", sort=False, rkey=None):
def join(
self,
other,
key: str,
colname: str = "match",
sort: bool = False,
rkey: str | None = None,
numba: bool = True,
):
"""DB ORM-style left join to other dataframe/series with nesting but no copy

Related records of the ``other`` table will appear as a list under the new field
``colname`` for all matching keys. This is the speed and memory efficient way
to doing a pandas-style merge/join, which explodes out the values to a much
bigger memory footprint.

Parameters
----------
other: series or table
key: name of the field in this table to match on
colname: the field that will be added to each record. This field will exist even
if there are no matches, in which case the list will be empty.
sort: if False, assumes that they key is sorted in both tables. If True, an
argsort is performed first, and the match is done by indexing. This may be
significantly slower.
rkey: if the name of the field to match on in different in the ``other`` table.
numba: the matching algorithm will go much faster using numba. However, you can
set this to False if you do not have numba installed.
"""
from akimbo.io import join

out = join(
Expand Down
2 changes: 1 addition & 1 deletion src/akimbo/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
@pd.api.extensions.register_series_accessor("ak")
@pd.api.extensions.register_dataframe_accessor("ak")
class PandasAwkwardAccessor(Accessor):
"""Perhaps awkward operations on pandas data
"""Perform awkward operations on pandas data

Nested structures are handled using arrow as the
storage backend. If you use pandas object columns
Expand Down
2 changes: 1 addition & 1 deletion tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_read_parquet(m): # noqa (m is a fixture)
df.to_parquet(fn)

out = akimbo.read_parquet(fn)
meta = akimbo.metadata_from_parquet(fn)
meta = akimbo.get_parquet_schema(fn)
assert meta["columns"] == ["a.list.element"] # parquet column naming convention
assert out.columns == ["a"]
assert out.a.to_list() == data
Expand Down
Loading