Skip to content

Commit

Permalink
0.3.0 (#22)
Browse files Browse the repository at this point in the history
* Update CI configuration

* Rename argument dtypes of unchop and unnest to ptype

* Change all `_base0` to `base0_`

* Change argument `how` of tidyr.drop_na to `how_`

* Add advanced usage in docs; Adopt pipda v0.3.0;

* 0.3.0

* Update docs

* Remove pull_request from docs building CI
  • Loading branch information
pwwang authored Jul 1, 2021
1 parent 72ab957 commit 9e71287
Show file tree
Hide file tree
Showing 93 changed files with 4,488 additions and 1,280 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Build and Deploy

on: [push, pull_request]
on:
push:
release:
types: [published]

jobs:

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: Build Docs

on: [push, pull_request]
on: [push]

jobs:
docs:
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
fail_fast: false
fail_fast: true
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: 5df1a4bf6f04a1ed3a643167b38d502575e29aef
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Port of [dplyr][2] and other related R packages in python, using [pipda][3].

<img width="30%" style="margin: 10px 10px 10px 30px" align="right" src="logo.png">

Unlike other similar packages in python that just mimic the piping sign, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
Unlike other similar packages in python that just mimic the piping syntax, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.


## Installtion
Expand Down
2 changes: 1 addition & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Port of `dplyr <https://dplyr.tidyverse.org/index.html>`_ and other related R pa

:raw-html-m2r:`<img width="30%" style="margin: 10px 10px 10px 30px" align="right" src="logo.png">`

Unlike other similar packages in python that just mimic the piping sign, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
Unlike other similar packages in python that just mimic the piping syntax, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.

Installtion
-----------
Expand Down
2 changes: 1 addition & 1 deletion datar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
from .core import frame_format_patch as _
from .core.defaults import f

__version__ = '0.2.3'
__version__ = '0.3.0'
26 changes: 13 additions & 13 deletions datar/base/seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,37 +17,37 @@
@register_func(None, context=Context.EVAL)
def seq_along(
along_with: Iterable[Any],
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> ArrayLikeType:
"""Generate sequences along an iterable
Args:
along_with: An iterable to seq along with
_base0: Whether the generated sequence should be 0-based.
base0_: Whether the generated sequence should be 0-based.
If not provided, will use `datar.base.get_option('index.base.0')`
Returns:
The generated sequence.
"""
_base0 = get_option('index.base.0', _base0)
return Array(range(len(along_with))) + int(not _base0)
base0_ = get_option('index.base.0', base0_)
return Array(range(len(along_with))) + int(not base0_)

@register_func(None, context=Context.EVAL)
def seq_len(
length_out: IntOrIter,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> ArrayLikeType:
"""Generate sequences with the length"""
_base0 = get_option('index.base.0', _base0)
base0_ = get_option('index.base.0', base0_)
if is_scalar(length_out):
return Array(range(int(length_out))) + int(not _base0)
return Array(range(int(length_out))) + int(not base0_)
if len(length_out) > 1:
logger.warning(
"In seq_len(%r) : first element used of 'length_out' argument",
length_out
)
length_out = int(list(length_out)[0])
return Array(range(length_out)) + int(not _base0)
return Array(range(length_out)) + int(not base0_)


@register_func(None, context=Context.EVAL)
Expand All @@ -57,23 +57,23 @@ def seq(
by: IntType = None,
length_out: IntType = None,
along_with: IntType = None,
_base0: Optional[bool] = None,
base0_: Optional[bool] = None,
) -> ArrayLikeType:
"""Generate a sequence
https://rdrr.io/r/base/seq.html
Note that this API is consistent with r-base's seq. 1-based and inclusive.
"""
_base0 = get_option('index.base.0', _base0)
base0_ = get_option('index.base.0', base0_)
if along_with is not None:
return seq_along(along_with, _base0)
return seq_along(along_with, base0_)
if from_ is not None and not is_scalar(from_):
return seq_along(from_, _base0)
return seq_along(from_, base0_)
if length_out is not None and from_ is None and to is None:
return seq_len(length_out)

base = int(not _base0)
base = int(not base0_)

if from_ is None:
from_ = base
Expand Down
28 changes: 14 additions & 14 deletions datar/base/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def grep(
value: bool = False,
fixed: bool = False,
invert: bool = False,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> Iterable[Union[int, str]]:
"""R's grep, get the element in x matching the pattern
Expand All @@ -83,7 +83,7 @@ def grep(
value: Return values instead of indices?
fixed: Fixed matching (instead of regex matching)?
invert: Return elements thata don't match instead?
_base0: When return indices, whether return 0-based indices?
base0_: When return indices, whether return 0-based indices?
If not set, will use `datar.base.get_option('which.base.0')`
Returns:
Expand All @@ -104,8 +104,8 @@ def grep(
if value:
return x[matched]

_base0 = get_option('which.base.0', _base0)
return numpy.flatnonzero(matched) + int(not _base0)
base0_ = get_option('which.base.0', base0_)
return numpy.flatnonzero(matched) + int(not base0_)

@register_func(None, context=Context.EVAL)
def grepl(
Expand Down Expand Up @@ -439,15 +439,15 @@ def substr(
x: StringOrIter,
start: IntOrIter,
stop: IntOrIter,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> StringOrIter:
"""Extract substrings in strings.
Args:
x: The strings
start: The start positions to extract
stop: The stop positions to extract
_base0: Whether `start` and `stop` are 0-based
base0_: Whether `start` and `stop` are 0-based
If not provided, will use `datar.base.get_option('index.base.0')`
Returns:
Expand All @@ -456,15 +456,15 @@ def substr(
if is_scalar(x) and is_scalar(start) and is_scalar(stop):
if is_null(x):
return NA
_base0 = get_option('index.base.0', _base0)
base0_ = get_option('index.base.0', base0_)
x = as_character(x)
lenx = len(x)
# int() converts numpy.int64 to int
start0 = position_at(int(start), lenx, base0=_base0)
start0 = position_at(int(start), lenx, base0=base0_)
stop0 = position_at(
min(int(stop), lenx - int(_base0)),
min(int(stop), lenx - int(base0_)),
lenx,
base0=_base0
base0=base0_
)
return x[start0:stop0+1]

Expand All @@ -479,7 +479,7 @@ def substr(
start = recycle_value(start, maxlen)
stop = recycle_value(stop, maxlen)
out = [
substr(elem, start_, stop_, _base0)
substr(elem, start_, stop_, base0_)
for elem, start_, stop_ in zip(x, start, stop)
]
if is_null(out).any():
Expand All @@ -491,21 +491,21 @@ def substring(
x: StringOrIter,
first: IntOrIter,
last: IntOrIter = 1000000,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> StringOrIter:
"""Extract substrings in strings.
Args:
x: The strings
start: The start positions to extract
stop: The stop positions to extract
_base0: Whether `start` and `stop` are 0-based
base0_: Whether `start` and `stop` are 0-based
If not provided, will use `datar.base.get_option('index.base.0')`
Returns:
The substrings from `x`
"""
return substr(x, first, last, _base0)
return substr(x, first, last, base0_)

# strsplit --------------------------------

Expand Down
18 changes: 9 additions & 9 deletions datar/base/which.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,48 +8,48 @@
from ..core.contexts import Context

@register_func(None, context=Context.EVAL)
def which(x: Iterable[bool], _base0: Optional[bool] = None) -> Iterable[int]:
def which(x: Iterable[bool], base0_: Optional[bool] = None) -> Iterable[int]:
"""Convert a bool iterable to indexes
Args:
x: An iterable of bools.
Note that non-bool values will be converted into
_base0: Whether the returned indexes are 0-based.
base0_: Whether the returned indexes are 0-based.
Controlled by `get_option('which.base.0')` if not provided
Returns:
The indexes
"""
return numpy.flatnonzero(x) + int(not get_option('which.base.0', _base0))
return numpy.flatnonzero(x) + int(not get_option('which.base.0', base0_))

@register_func(None)
def which_min(x: Iterable, _base0: Optional[bool] = None) -> int:
def which_min(x: Iterable, base0_: Optional[bool] = None) -> int:
"""R's `which.min()`
Get the index of the element with the maximum value
Args:
x: The iterable
_base0: Whether the index to return is 0-based or not.
base0_: Whether the index to return is 0-based or not.
Controlled by `get_option('which.base.0')` if not provided
Returns:
The index of the element with the maximum value
"""
return numpy.argmin(x) + int(not get_option('which.base.0', _base0))
return numpy.argmin(x) + int(not get_option('which.base.0', base0_))

@register_func(None)
def which_max(x: Iterable, _base0: bool = True) -> int:
def which_max(x: Iterable, base0_: bool = True) -> int:
"""R's `which.max()`
Get the index of the element with the minimum value
Args:
x: The iterable
_base0: Whether the index to return is 0-based or not
base0_: Whether the index to return is 0-based or not
Not that this is not controlled by `get_option('index.base.0')`
Returns:
The index of the element with the minimum value
"""
return numpy.argmax(x) + int(not get_option('which.base.0', _base0))
return numpy.argmax(x) + int(not get_option('which.base.0', base0_))
9 changes: 4 additions & 5 deletions datar/core/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,11 @@ def __init__(
def _pipda_eval(
self,
data: Any,
context: ContextAnnoType,
level: int = 0
context: ContextAnnoType
) -> Any:
"""Defines how the object should be evaluated when evaluated by
pipda's evaluation"""
self.elems = evaluate_args(self.elems, data, context, level)
self.elems = evaluate_args(self.elems, data, context)
return self

@abstractmethod
Expand All @@ -60,9 +59,9 @@ class Collection(CollectionBase, list):
convert them into 0-based finally
The Inverted, Negated and slice objects will be expanded immediately. This
means there is no chance to apply `_base0` that is received later on. So
means there is no chance to apply `base0_` that is received later on. So
the original elements are stored in `self.elems` to wait for a second
evaluation with the correct `_base0`.
evaluation with the correct `base0_`.
Args:
*args: The elements
Expand Down
22 changes: 11 additions & 11 deletions datar/core/names.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ def _repair_names_unique(
names: Iterable[str],
quiet: bool = False,
sanitizer: Optional[Callable[[str], str]] = None,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> List[str]:
"""Make sure names are unique"""
base = int(not _base0)
base = int(not base0_)
min_names = _repair_names_minimal(names)
neat_names = [
re.sub(r'(?:(?<!_)_{1,2}\d+|(?<!_)__)+$', '', name)
Expand All @@ -53,7 +53,7 @@ def _repair_names_unique(
def _repair_names_universal(
names: Iterable[str],
quiet: bool = False,
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> List[str]:
"""Make sure names are safely to be used as variable or attribute"""
min_names = _repair_names_minimal(names)
Expand All @@ -66,7 +66,7 @@ def _repair_names_universal(
if keyword.iskeyword(name) or (name and name[0].isdigit())
else name
),
_base0=_base0
base0_=base0_
)
if not quiet:
changed_names = [
Expand Down Expand Up @@ -100,7 +100,7 @@ def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]:
def repair_names(
names: Iterable[str],
repair: Union[str, Callable],
_base0: Optional[bool] = None
base0_: Optional[bool] = None
) -> List[str]:
"""Repair names based on the method
Expand All @@ -118,7 +118,7 @@ def repair_names(
- A function, accepts either a list of names or a single name.
Function accepts a list of names must annotate the first
argument with `typing.Iterable` or `typing.Sequence`.
_base0: Whether the numeric suffix starts from 0 or not.
base0_: Whether the numeric suffix starts from 0 or not.
If not specified, will use `datar.base.get_option('index.base.0')`.
Examples:
Expand All @@ -141,7 +141,7 @@ def repair_names(
NameNonUniqueError: when check_unique fails
"""
from .utils import get_option
_base0 = get_option('index.base.0', _base0)
base0_ = get_option('index.base.0', base0_)
if isinstance(repair, str):
repair = BUILTIN_REPAIR_METHODS[repair]
elif is_iterable(repair) and all(isinstance(elem, str) for elem in repair):
Expand All @@ -156,15 +156,15 @@ def repair_names(
annotation._name not in ('Iterable', 'Sequence')
): # scalar input
return [
repair(name, _base0=_base0)
if '_base0' in parameters
repair(name, base0_=base0_)
if 'base0_' in parameters
else repair(name)
for name in names
]

names = list(names)
return (
repair(names, _base0=_base0)
if '_base0' in parameters
repair(names, base0_=base0_)
if 'base0_' in parameters
else repair(names)
)
Loading

0 comments on commit 9e71287

Please sign in to comment.