Skip to content

Commit

Permalink
πŸ”– 0.7.0 (#97)
Browse files Browse the repository at this point in the history
* Support modin as backend (#96)

* ✨ Support `modin` as backend

* 🩹 Use modin.pandas.read_csv to read datasets if backend is modin

* πŸ“Œ Update poetry.lock

* πŸ‘· Make show versions to include modin version when backend is modin

* ✨ Add `_return` argument for `datar.options()`

* πŸ“ Add doc about backends

* πŸ› Fix `tidyr.expand()` when `nesting(f.name)` as argument

* πŸ”– 0.7.0
  • Loading branch information
pwwang authored Mar 24, 2022
1 parent 7357907 commit a03f1c0
Show file tree
Hide file tree
Showing 161 changed files with 704 additions and 410 deletions.
48 changes: 44 additions & 4 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,10 @@ jobs:
python -m pip install flake8
python -m pip install poetry
poetry config virtualenvs.create false
poetry install -v
poetry install -E pdtypes -v
pip install wcwidth
pip install scipy
pip install python-slugify
pip install pdtypes
# reinstall pandas to specific version
pip install -U $PANDAS
env:
Expand All @@ -62,13 +61,54 @@ jobs:
if: ${{ always() }}
- name: Run codacy-coverage-reporter
uses: codacy/codacy-coverage-reporter-action@master
if: matrix.python-version == 3.8 && matrix.pandas == 'pandas'
if: matrix.python-version == 3.9 && matrix.pandas == 'pandas'
with:
project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}
coverage-reports: cov.xml

build-modin:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
# can't build numpy for 3.10 in CI
# python-version: [3.7, 3.8, 3.9, "3.10"]
python-version: [3.7, 3.8, 3.9]
modin: [
modin==0.10.2,
modin # lastest
]
exclude:
- python-version: 3.7
modin: modin==0.10.2

steps:
- uses: actions/checkout@v2
- name: Setup Python # Set Python version
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8
python -m pip install poetry
poetry config virtualenvs.create false
poetry install -E modin -E pdtypes -v
pip install wcwidth
pip install scipy
pip install python-slugify
# reinstall modin to specific version
pip install -U $MODIN
env:
MODIN: ${{ matrix.modin }}
- name: Show versions
run: python -c 'import datar; datar.options(backend="modin"); datar.get_versions()'
- name: Test with pytest
run: pytest tests/ --backend modin --junitxml=junit/test-results-${{ matrix.python-version }}.xml

deploy:
needs: build
needs: [build, build-modin]
runs-on: ubuntu-latest
if: github.event_name == 'release'
strategy:
Expand Down
13 changes: 10 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,16 @@ repos:
if [[ $v1 == $v2 ]]; then exit 0; else exit 1; fi
pass_filenames: false
files: ^pyproject\.toml|datar/__init__\.py$
- id: pytest
name: Run pytest
entry: pytest
- id: pytest-pandas
name: Run pytest with pandas backend
entry: pytest --backend pandas
language: system
args: [tests/]
pass_filenames: false
files: ^tests/.+$|^datar/.+$
- id: pytest-modin
name: Run pytest with modin backend
entry: pytest --backend modin
language: system
args: [tests/]
pass_filenames: false
Expand Down
13 changes: 8 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,14 @@ A Grammar of Data Manipulation in python

```shell
pip install -U datar
```
or
```shell
conda install -c conda-forge datar
# mamba install -c conda-forge datar

# install pdtypes support
pip install -U datar[pdtypes]

# install dependencies for modin as backend
pip install -U datar[modin]
# you may also need to install dependencies for modin engines
# pip install -U modin[ray]
```

## Example usage
Expand Down
38 changes: 15 additions & 23 deletions datar/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
# register operator
from collections import namedtuple as _namedtuple

from .core import operator as _
from .core import f, options_context, options, add_option, get_option, logger

Expand All @@ -16,24 +13,11 @@

options(enable_pdtypes=True)

_VersionsTuple = _namedtuple(
"_VersionsTuple",
[
"python",
"datar",
"numpy",
"pandas",
"pipda",
"executing",
"varname",
],
)

__all__ = ("f", "get_versions")
__version__ = "0.6.4"
__version__ = "0.7.0"


def get_versions(prnt: bool = True) -> _VersionsTuple:
def get_versions(prnt: bool = True):
"""Print or return related versions which help for bug reporting.
Args:
Expand All @@ -45,25 +29,33 @@ def get_versions(prnt: bool = True) -> _VersionsTuple:
import sys

import numpy
import pandas
import pipda
import executing
import varname
from diot import Diot

out = _VersionsTuple(
out = Diot(
python=sys.version,
datar=__version__,
numpy=numpy.__version__,
pandas=pandas.__version__,
pipda=pipda.__version__,
executing=executing.__version__,
varname=varname.__version__,
)

backend = get_option("backend")
if backend == "pandas":
import pandas
out["pandas"] = pandas.__version__
elif backend == "modin": # pragma: no cover
import modin
out["modin"] = modin.__version__

if not prnt:
return out

keylen = max(map(len, out._fields))
for key in out._fields:
keylen = max(map(len, out))
for key in out:
ver = getattr(out, key)
verlines = ver.splitlines()
print(f"{key.ljust(keylen)}: {verlines.pop(0)}")
Expand Down
8 changes: 4 additions & 4 deletions datar/base/arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
from typing import TYPE_CHECKING, Union

import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from pandas.api.types import is_scalar
from pandas.core.groupby import SeriesGroupBy, GroupBy
from ..core.backends import pandas as pd
from ..core.backends.pandas import DataFrame, Series
from ..core.backends.pandas.api.types import is_scalar
from ..core.backends.pandas.core.groupby import SeriesGroupBy, GroupBy

from ..core.factory import func_factory, verb_factory
from ..core.tibble import Tibble, TibbleGrouped
Expand Down
8 changes: 5 additions & 3 deletions datar/base/casting.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Cast values between types"""
import numpy as np
import pandas as pd
from pandas.api.types import is_scalar, is_categorical_dtype
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func

from ..core.backends import pandas as pd

from ..core.backends.pandas.api.types import is_scalar, is_categorical_dtype
from ..core.backends.pandas.core.groupby import SeriesGroupBy

from ..core.contexts import Context

from .factor import _ensure_categorical
Expand Down
3 changes: 2 additions & 1 deletion datar/base/complex.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Functions related to complex numbers"""
import numpy as np
from pandas.api.types import is_complex_dtype
from pipda import register_func

from ..core.backends.pandas.api.types import is_complex_dtype

from ..core.contexts import Context
from ..core.factory import func_factory

Expand Down
4 changes: 2 additions & 2 deletions datar/base/cum.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
"""Cumulative functions"""
import numpy as np
from pandas.core.groupby import GroupBy

from datar.core.tibble import TibbleGrouped
from ..core.backends.pandas.core.groupby import GroupBy

from ..core.tibble import TibbleGrouped
from ..core.factory import func_factory
from .arithmetic import SINGLE_ARG_SIGNATURE

Expand Down
5 changes: 2 additions & 3 deletions datar/base/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import functools

import numpy as np
import pandas as pd
# from pandas import Series
from pandas.api.types import is_scalar, is_integer

from ..core.backends import pandas as pd
from ..core.backends.pandas.api.types import is_scalar, is_integer
from ..core.factory import func_factory


Expand Down
5 changes: 3 additions & 2 deletions datar/base/factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
R's factors support NAs in levels but Categorical cannot have NAs in categories.
"""
import numpy as np
from pandas import Categorical, Series
from pandas.api.types import is_categorical_dtype, is_scalar
from pipda import register_func

from ..core.backends.pandas import Categorical, Series
from ..core.backends.pandas.api.types import is_categorical_dtype, is_scalar

from ..core.contexts import Context


Expand Down
7 changes: 4 additions & 3 deletions datar/base/funs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
import itertools

import numpy as np
import pandas as pd
from pandas.api.types import is_scalar
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func

from ..core.backends import pandas as pd
from ..core.backends.pandas.api.types import is_scalar
from ..core.backends.pandas.core.groupby import SeriesGroupBy

from ..core.middlewares import WithDataEnv
from ..core.contexts import Context
from ..core.factory import func_factory
Expand Down
2 changes: 1 addition & 1 deletion datar/base/logical.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
"""Logical/Boolean functions"""
import numpy as np
from pandas.api.types import is_scalar, is_bool_dtype
from pipda import register_func

from ..core.backends.pandas.api.types import is_scalar, is_bool_dtype
from ..core.contexts import Context

from .testing import _register_type_testing
Expand Down
2 changes: 1 addition & 1 deletion datar/base/na.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""NA related constants or functions"""
import numpy as np
import pandas as pd

from ..core.backends import pandas as pd
from ..core.defaults import NA_REPR
from ..core.factory import func_factory

Expand Down
9 changes: 5 additions & 4 deletions datar/base/rep.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from functools import singledispatch

import numpy as np
import pandas as pd
from pandas import DataFrame, Series, Categorical
from pandas.api.types import is_scalar, is_integer
from pandas.core.groupby import SeriesGroupBy
from pipda import register_func

from ..core.backends import pandas as pd
from ..core.backends.pandas import DataFrame, Series, Categorical
from ..core.backends.pandas.api.types import is_scalar, is_integer
from ..core.backends.pandas.core.groupby import SeriesGroupBy

from ..core.contexts import Context
from ..core.tibble import TibbleGrouped, reconstruct_tibble
from ..core.utils import ensure_nparray, logger
Expand Down
7 changes: 4 additions & 3 deletions datar/base/seq.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import numpy as np
from pandas import DataFrame, Series
from pandas.api.types import is_scalar
from pandas.core.groupby import SeriesGroupBy, GroupBy
from pipda import register_func

from ..core.backends.pandas import DataFrame, Series
from ..core.backends.pandas.api.types import is_scalar
from ..core.backends.pandas.core.groupby import SeriesGroupBy, GroupBy

from ..core.utils import logger, regcall
from ..core.factory import func_factory
from ..core.contexts import Context
Expand Down
3 changes: 2 additions & 1 deletion datar/base/special.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from pipda import register_func

import numpy as np
from pandas.api.types import is_scalar

from ..core.backends.pandas.api.types import is_scalar
from ..core.contexts import Context
from .bessel import _get_special_func_from_scipy

Expand Down
6 changes: 3 additions & 3 deletions datar/base/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
from functools import singledispatch

import numpy as np
from pandas import DataFrame, Series
from pandas.api.types import is_scalar

from datar.core.tibble import TibbleGrouped, TibbleRowwise
from ..core.backends.pandas import DataFrame, Series
from ..core.backends.pandas.api.types import is_scalar

from ..core.tibble import TibbleGrouped, TibbleRowwise
from ..core.factory import func_factory


Expand Down
10 changes: 5 additions & 5 deletions datar/base/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
import re

import numpy as np
import pandas as pd
from pandas import Series
from pandas.core.base import PandasObject
from pandas.core.groupby import SeriesGroupBy
from pandas.api.types import is_string_dtype, is_scalar
from pipda import register_func

from ..core.backends import pandas as pd
from ..core.backends.pandas import Series
from ..core.backends.pandas.core.base import PandasObject
from ..core.backends.pandas.core.groupby import SeriesGroupBy
from ..core.backends.pandas.api.types import is_string_dtype, is_scalar

from ..core.tibble import TibbleGrouped, TibbleRowwise
from ..core.contexts import Context
Expand Down
7 changes: 4 additions & 3 deletions datar/base/table.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""Port `table` function from r-base"""
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from pandas.api.types import is_scalar, is_categorical_dtype
from pipda import register_func

from ..core.backends import pandas as pd
from ..core.backends.pandas import DataFrame, Series
from ..core.backends.pandas.api.types import is_scalar, is_categorical_dtype

from ..core.contexts import Context
from ..core.utils import ensure_nparray, regcall
from ..core.defaults import NA_REPR
Expand Down
Loading

0 comments on commit a03f1c0

Please sign in to comment.