Skip to content

Commit

Permalink
0.2.3 (#21)
Browse files Browse the repository at this point in the history
* Update logo

* Add favicon for docs

* Fix compatibility with pandas v1.2.0~4

* Fix ci configuration

* Fix ci configuration

* Fix linting

* Fix linting

* Remove pandas 1.2.1~4 from CI

* Fix base.table when inputs are factors and exclude is NA; Add base.scale/col_sums/row_sums/col_means/row_means/col_sds/row_sds/col_medians/row_medians

* Fix linting
  • Loading branch information
pwwang authored Jun 29, 2021
1 parent 50daefb commit 72ab957
Show file tree
Hide file tree
Showing 28 changed files with 449 additions and 66 deletions.
28 changes: 21 additions & 7 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,17 @@ jobs:
build:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.7, 3.8, 3.9]
pandas: [
pandas==1.2.0,
# pandas==1.2.1,
# pandas==1.2.2,
# pandas==1.2.3,
# pandas==1.2.4,
pandas # lastest 1.2.5
]

steps:
- uses: actions/checkout@v2
Expand All @@ -19,15 +28,20 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install pylint
python -m pip install pylint=='2.8.*'
python -m pip install poetry
poetry config virtualenvs.create false
poetry install -v
poetry run pip install scipy
poetry run pip install wcwidth
# - name: Run pylint
# run: pylint datar
pip install wcwidth
pip install scipy
# reinstall pandas to specific version
pip install $PANDAS
env:
PANDAS: ${{ matrix.pandas }}
- name: Run pylint
run: pylint datar
- name: Test with pytest
run: poetry run pytest tests/ --junitxml=junit/test-results-${{ matrix.python-version }}.xml
run: pytest tests/ --junitxml=junit/test-results-${{ matrix.python-version }}.xml
- name: Upload pytest test results
uses: actions/upload-artifact@v2
with:
Expand All @@ -45,7 +59,7 @@ jobs:
deploy:
needs: build
runs-on: ubuntu-latest
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
if: github.event_name == 'release'
strategy:
matrix:
python-version: [3.8]
Expand Down
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ disable=print-statement,
not-callable,
unsubscriptable-object,
unused-arguments,
fixme
fixme,
consider-using-dict-items

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
2 changes: 1 addition & 1 deletion datar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .core import frame_format_patch as _
from .core.defaults import f

__version__ = '0.2.2'
__version__ = '0.2.3'
9 changes: 9 additions & 0 deletions datar/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@
pmin,
sqrt,
var,
scale,
col_sums,
row_sums,
col_means,
row_means,
col_sds,
row_sds,
col_medians,
row_medians,
min as min_,
max as max_,
round as round_,
Expand Down
277 changes: 268 additions & 9 deletions datar/base/arithmetic.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
"""Arithmetic or math functions"""

from multiprocessing.dummy import Array
from typing import Any, Callable, Iterable, Optional
from typing import Any, Callable, Iterable, Optional, Union

import numpy
from pandas import DataFrame, Series
from pipda import register_func, register_verb

from ..core.contexts import Context
from ..core.types import NumericOrIter, NumericType, is_not_null
from ..core.types import NumericOrIter, NumericType, is_not_null, is_scalar
from ..core.utils import Array, register_numpy_func_x, recycle_value, length_of
from ..core.collections import Collection

# TODO: docstring
# weighted_mean, sd, cor?, range, quantile, summary, iqr
# cor?, range, summary, iqr

def _register_arithmetic_agg(
name: str,
Expand All @@ -22,11 +20,15 @@ def _register_arithmetic_agg(
) -> Callable:
"""Register an arithmetic function"""
@register_func(None, context=Context.EVAL)
def _arithmetric(x: Iterable, na_rm: bool = False):
def _arithmetric(x: Iterable, na_rm: bool = False) -> Iterable:
"""Arithmetric function"""
if na_rm:
x = Array(x)[is_not_null(x)]
return getattr(numpy, np_name)(x)
# na_rm not working for numpy functions
# with x is a Series object
if isinstance(x, Series):
return getattr(x, np_name)(skipna=na_rm)

fun_name = f"nan{np_name}" if na_rm else np_name
return getattr(numpy, fun_name)(x)

_arithmetric.__name__ = name
_arithmetric.__doc__ = doc
Expand Down Expand Up @@ -232,3 +234,260 @@ def _(x: Iterable, y: Iterable, ddof: int = 1) -> DataFrame:
"""Compute covariance for two iterables"""
# ddof: numpy v1.5+
return numpy.cov(x, y, ddof=ddof)[0][1]

@register_verb(DataFrame, context=Context.EVAL)
def scale(
x: DataFrame,
center: Union[bool, Iterable[NumericType]] = True,
# pylint: disable=redefined-outer-name
scale: Union[bool, Iterable[NumericType]] = True
) -> DataFrame:
"""Scaling and Centering of a numeric data frame
See Details in `?scale` in `R`
Args:
x: The numeric data frame to scale
center: either a logical value or numeric-alike vector of length
equal to the number of columns of `x`
scale: either a logical value or a numeric-alike vector of length
equal to the number of columns of `x`.
Returns:
The centered, scaled data frame
"""
# center
ncols = x.shape[1]
center_is_true = center is True
out_attrs = {}

if center is True:
center = col_means(x)

elif center is not False:
if is_scalar(center):
center = [center]
if len(center) != ncols:
raise ValueError(
f"length of `center` ({len(center)}) must equal "
f"the number of columns of `x` ({ncols})"
)

if center is not False:
x = x - center
out_attrs['scaled:center'] = Array(center)

# scale
if scale is True:
def _rms(col: Series) -> Series:
nonnas = col[is_not_null(col)] ** 2
return sqrt(nonnas.sum() / (len(nonnas) - 1))

scale = col_sds(x) if center_is_true else x.agg(_rms)

elif scale is not False:
if is_scalar(scale):
scale = [scale]
if len(scale) != ncols:
raise ValueError(
f"length of `scale` ({len(center)}) must equal "
f"the number of columns of `x` ({ncols})"
)

if scale is not False:
x = x / scale
out_attrs['scaled:scale'] = Array(scale)

if center is False and scale is False:
x = x.copy()

x.attrs.update(out_attrs)
return x

# being able to refer it inside the function
# as scale also used as an argument
_scale = scale

@scale.register(Series)
def _(
x: Series,
center: Union[bool, Iterable[NumericType]] = True,
# pylint: disable=redefined-outer-name
scale: Union[bool, Iterable[NumericType]] = True
) -> DataFrame:
"""Scaling on series"""
return _scale(x.to_frame(), center, scale)

@scale.register((numpy.ndarray, list, tuple))
def _(
x: Iterable,
center: Union[bool, Iterable[NumericType]] = True,
# pylint: disable=redefined-outer-name
scale: Union[bool, Iterable[NumericType]] = True
) -> DataFrame:
"""Scaling on iterables"""
return _scale(Series(x, name='scaled'), center, scale)

@register_verb(DataFrame)
def col_sums(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate sum of a data frame by column
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The sums by column.
"""
return x.agg(sum, na_rm=na_rm)

@register_verb(DataFrame)
def row_sums(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate sum of a data frame by row
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The sums by row.
"""
return x.agg(sum, axis=1, na_rm=na_rm)


@register_verb(DataFrame)
def col_means(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate mean of a data frame by column
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The means by column.
"""
return x.agg(mean, na_rm=na_rm)

@register_verb(DataFrame)
def row_means(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate mean of a data frame by row
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The means by row.
"""
return x.agg(mean, axis=1, na_rm=na_rm)

@register_verb(DataFrame)
def col_sds(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate stdev of a data frame by column
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The stdevs by column.
"""
from ..stats import sd
return x.agg(sd, na_rm=na_rm)

@register_verb(DataFrame)
def row_sds(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate stdev of a data frame by row
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The stdevs by row.
"""
from ..stats import sd
return x.agg(sd, axis=1, na_rm=na_rm)


@register_verb(DataFrame)
def col_medians(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate median of a data frame by column
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The medians by column.
"""
return x.agg(median, na_rm=na_rm)

@register_verb(DataFrame)
def row_medians(
x: DataFrame,
na_rm: bool = False,
# dims: int = 1,
# weights = None,
# freq = None,
# n = None
) -> Iterable[NumericType]:
"""Calculate median of a data frame by row
Args:
x: The data frame
na_rm: Specifies how to handle missing values in `x`.
Returns:
The medians by row.
"""
return x.agg(median, axis=1, na_rm=na_rm)
Loading

0 comments on commit 72ab957

Please sign in to comment.