0.3.0 (#22)

* Update CI configuration * Rename argument dtypes of unchop and unnest to ptype * Change all `_base0` to `base0_` * Change argument `how` of tidyr.drop_na to `how_` * Add advanced usage in docs; Adopt pipda v0.3.0; * 0.3.0 * Update docs * Remove pull_request from docs building CI
pwwang · Jul 1, 2021 · 9e71287 · 9e71287
1 parent 72ab957
commit 9e71287
Show file tree

Hide file tree

Showing 93 changed files with 4,488 additions and 1,280 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -1,6 +1,9 @@
 name: Build and Deploy
 
-on: [push, pull_request]
+on:
+ push:
+ release:
+ types: [published]
 
 jobs:
 

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -1,6 +1,6 @@
 name: Build Docs
 
-on: [push, pull_request]
+on: [push]
 
 jobs:
  docs:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-fail_fast: false
+fail_fast: true
 repos:
 - repo: https://github.com/pre-commit/pre-commit-hooks
  rev: 5df1a4bf6f04a1ed3a643167b38d502575e29aef

diff --git a/README.md b/README.md
@@ -9,7 +9,7 @@ Port of [dplyr][2] and other related R packages in python, using [pipda][3].
 
 <img width="30%" style="margin: 10px 10px 10px 30px" align="right" src="logo.png">
 
-Unlike other similar packages in python that just mimic the piping sign, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
+Unlike other similar packages in python that just mimic the piping syntax, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
 
 
 ## Installtion

diff --git a/README.rst b/README.rst
@@ -38,7 +38,7 @@ Port of `dplyr <https://dplyr.tidyverse.org/index.html>`_ and other related R pa
 
 :raw-html-m2r:`<img width="30%" style="margin: 10px 10px 10px 30px" align="right" src="logo.png">`
 
-Unlike other similar packages in python that just mimic the piping sign, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
+Unlike other similar packages in python that just mimic the piping syntax, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python.
 
 Installtion
 -----------

diff --git a/datar/__init__.py b/datar/__init__.py
@@ -4,4 +4,4 @@
 from .core import frame_format_patch as _
 from .core.defaults import f
 
-__version__ = '0.2.3'
+__version__ = '0.3.0'
diff --git a/datar/base/seq.py b/datar/base/seq.py
@@ -17,37 +17,37 @@
 @register_func(None, context=Context.EVAL)
 def seq_along(
  along_with: Iterable[Any],
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> ArrayLikeType:
  """Generate sequences along an iterable
 
  Args:
  along_with: An iterable to seq along with
- _base0: Whether the generated sequence should be 0-based.
+ base0_: Whether the generated sequence should be 0-based.
  If not provided, will use `datar.base.get_option('index.base.0')`
 
  Returns:
  The generated sequence.
  """
- _base0 = get_option('index.base.0', _base0)
- return Array(range(len(along_with))) + int(not _base0)
+ base0_ = get_option('index.base.0', base0_)
+ return Array(range(len(along_with))) + int(not base0_)
 
 @register_func(None, context=Context.EVAL)
 def seq_len(
  length_out: IntOrIter,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> ArrayLikeType:
  """Generate sequences with the length"""
- _base0 = get_option('index.base.0', _base0)
+ base0_ = get_option('index.base.0', base0_)
  if is_scalar(length_out):
- return Array(range(int(length_out))) + int(not _base0)
+ return Array(range(int(length_out))) + int(not base0_)
  if len(length_out) > 1:
  logger.warning(
  "In seq_len(%r) : first element used of 'length_out' argument",
  length_out
  )
  length_out = int(list(length_out)[0])
- return Array(range(length_out)) + int(not _base0)
+ return Array(range(length_out)) + int(not base0_)
 
 
 @register_func(None, context=Context.EVAL)
@@ -57,23 +57,23 @@ def seq(
  by: IntType = None,
  length_out: IntType = None,
  along_with: IntType = None,
- _base0: Optional[bool] = None,
+ base0_: Optional[bool] = None,
 ) -> ArrayLikeType:
  """Generate a sequence
 
  https://rdrr.io/r/base/seq.html
 
  Note that this API is consistent with r-base's seq. 1-based and inclusive.
  """
- _base0 = get_option('index.base.0', _base0)
+ base0_ = get_option('index.base.0', base0_)
  if along_with is not None:
- return seq_along(along_with, _base0)
+ return seq_along(along_with, base0_)
  if from_ is not None and not is_scalar(from_):
- return seq_along(from_, _base0)
+ return seq_along(from_, base0_)
  if length_out is not None and from_ is None and to is None:
  return seq_len(length_out)
 
- base = int(not _base0)
+ base = int(not base0_)
 
  if from_ is None:
  from_ = base

diff --git a/datar/base/string.py b/datar/base/string.py
@@ -72,7 +72,7 @@ def grep(
  value: bool = False,
  fixed: bool = False,
  invert: bool = False,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> Iterable[Union[int, str]]:
  """R's grep, get the element in x matching the pattern
 
@@ -83,7 +83,7 @@ def grep(
  value: Return values instead of indices?
  fixed: Fixed matching (instead of regex matching)?
  invert: Return elements thata don't match instead?
- _base0: When return indices, whether return 0-based indices?
+ base0_: When return indices, whether return 0-based indices?
  If not set, will use `datar.base.get_option('which.base.0')`
 
  Returns:
@@ -104,8 +104,8 @@ def grep(
  if value:
  return x[matched]
 
- _base0 = get_option('which.base.0', _base0)
- return numpy.flatnonzero(matched) + int(not _base0)
+ base0_ = get_option('which.base.0', base0_)
+ return numpy.flatnonzero(matched) + int(not base0_)
 
 @register_func(None, context=Context.EVAL)
 def grepl(
@@ -439,15 +439,15 @@ def substr(
  x: StringOrIter,
  start: IntOrIter,
  stop: IntOrIter,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> StringOrIter:
  """Extract substrings in strings.
 
  Args:
  x: The strings
  start: The start positions to extract
  stop: The stop positions to extract
- _base0: Whether `start` and `stop` are 0-based
+ base0_: Whether `start` and `stop` are 0-based
  If not provided, will use `datar.base.get_option('index.base.0')`
 
  Returns:
@@ -456,15 +456,15 @@ def substr(
  if is_scalar(x) and is_scalar(start) and is_scalar(stop):
  if is_null(x):
  return NA
- _base0 = get_option('index.base.0', _base0)
+ base0_ = get_option('index.base.0', base0_)
  x = as_character(x)
  lenx = len(x)
  # int() converts numpy.int64 to int
- start0 = position_at(int(start), lenx, base0=_base0)
+ start0 = position_at(int(start), lenx, base0=base0_)
  stop0 = position_at(
- min(int(stop), lenx - int(_base0)),
+ min(int(stop), lenx - int(base0_)),
  lenx,
- base0=_base0
+ base0=base0_
  )
  return x[start0:stop0+1]
 
@@ -479,7 +479,7 @@ def substr(
  start = recycle_value(start, maxlen)
  stop = recycle_value(stop, maxlen)
  out = [
- substr(elem, start_, stop_, _base0)
+ substr(elem, start_, stop_, base0_)
  for elem, start_, stop_ in zip(x, start, stop)
  ]
  if is_null(out).any():
@@ -491,21 +491,21 @@ def substring(
  x: StringOrIter,
  first: IntOrIter,
  last: IntOrIter = 1000000,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> StringOrIter:
  """Extract substrings in strings.
 
  Args:
  x: The strings
  start: The start positions to extract
  stop: The stop positions to extract
- _base0: Whether `start` and `stop` are 0-based
+ base0_: Whether `start` and `stop` are 0-based
  If not provided, will use `datar.base.get_option('index.base.0')`
 
  Returns:
  The substrings from `x`
  """
- return substr(x, first, last, _base0)
+ return substr(x, first, last, base0_)
 
 # strsplit --------------------------------
 

diff --git a/datar/base/which.py b/datar/base/which.py
@@ -8,48 +8,48 @@
 from ..core.contexts import Context
 
 @register_func(None, context=Context.EVAL)
-def which(x: Iterable[bool], _base0: Optional[bool] = None) -> Iterable[int]:
+def which(x: Iterable[bool], base0_: Optional[bool] = None) -> Iterable[int]:
  """Convert a bool iterable to indexes
 
  Args:
  x: An iterable of bools.
  Note that non-bool values will be converted into
- _base0: Whether the returned indexes are 0-based.
+ base0_: Whether the returned indexes are 0-based.
  Controlled by `get_option('which.base.0')` if not provided
 
  Returns:
  The indexes
  """
- return numpy.flatnonzero(x) + int(not get_option('which.base.0', _base0))
+ return numpy.flatnonzero(x) + int(not get_option('which.base.0', base0_))
 
 @register_func(None)
-def which_min(x: Iterable, _base0: Optional[bool] = None) -> int:
+def which_min(x: Iterable, base0_: Optional[bool] = None) -> int:
  """R's `which.min()`
 
  Get the index of the element with the maximum value
 
  Args:
  x: The iterable
- _base0: Whether the index to return is 0-based or not.
+ base0_: Whether the index to return is 0-based or not.
  Controlled by `get_option('which.base.0')` if not provided
 
  Returns:
  The index of the element with the maximum value
  """
- return numpy.argmin(x) + int(not get_option('which.base.0', _base0))
+ return numpy.argmin(x) + int(not get_option('which.base.0', base0_))
 
 @register_func(None)
-def which_max(x: Iterable, _base0: bool = True) -> int:
+def which_max(x: Iterable, base0_: bool = True) -> int:
  """R's `which.max()`
 
  Get the index of the element with the minimum value
 
  Args:
  x: The iterable
- _base0: Whether the index to return is 0-based or not
+ base0_: Whether the index to return is 0-based or not
  Not that this is not controlled by `get_option('index.base.0')`
 
  Returns:
  The index of the element with the minimum value
  """
- return numpy.argmax(x) + int(not get_option('which.base.0', _base0))
+ return numpy.argmax(x) + int(not get_option('which.base.0', base0_))
diff --git a/datar/core/collections.py b/datar/core/collections.py
@@ -36,12 +36,11 @@ def __init__(
  def _pipda_eval(
  self,
  data: Any,
- context: ContextAnnoType,
- level: int = 0
+ context: ContextAnnoType
  ) -> Any:
  """Defines how the object should be evaluated when evaluated by
  pipda's evaluation"""
- self.elems = evaluate_args(self.elems, data, context, level)
+ self.elems = evaluate_args(self.elems, data, context)
  return self
 
  @abstractmethod
@@ -60,9 +59,9 @@ class Collection(CollectionBase, list):
  convert them into 0-based finally
 
  The Inverted, Negated and slice objects will be expanded immediately. This
- means there is no chance to apply `_base0` that is received later on. So
+ means there is no chance to apply `base0_` that is received later on. So
  the original elements are stored in `self.elems` to wait for a second
- evaluation with the correct `_base0`.
+ evaluation with the correct `base0_`.
 
  Args:
  *args: The elements

diff --git a/datar/core/names.py b/datar/core/names.py
@@ -26,10 +26,10 @@ def _repair_names_unique(
  names: Iterable[str],
  quiet: bool = False,
  sanitizer: Optional[Callable[[str], str]] = None,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> List[str]:
  """Make sure names are unique"""
- base = int(not _base0)
+ base = int(not base0_)
  min_names = _repair_names_minimal(names)
  neat_names = [
  re.sub(r'(?:(?<!_)_{1,2}\d+|(?<!_)__)+$', '', name)
@@ -53,7 +53,7 @@ def _repair_names_unique(
 def _repair_names_universal(
  names: Iterable[str],
  quiet: bool = False,
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> List[str]:
  """Make sure names are safely to be used as variable or attribute"""
  min_names = _repair_names_minimal(names)
@@ -66,7 +66,7 @@ def _repair_names_universal(
  if keyword.iskeyword(name) or (name and name[0].isdigit())
  else name
  ),
- _base0=_base0
+ base0_=base0_
  )
  if not quiet:
  changed_names = [
@@ -100,7 +100,7 @@ def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]:
 def repair_names(
  names: Iterable[str],
  repair: Union[str, Callable],
- _base0: Optional[bool] = None
+ base0_: Optional[bool] = None
 ) -> List[str]:
  """Repair names based on the method
 
@@ -118,7 +118,7 @@ def repair_names(
  - A function, accepts either a list of names or a single name.
  Function accepts a list of names must annotate the first
  argument with `typing.Iterable` or `typing.Sequence`.
- _base0: Whether the numeric suffix starts from 0 or not.
+ base0_: Whether the numeric suffix starts from 0 or not.
  If not specified, will use `datar.base.get_option('index.base.0')`.
 
  Examples:
@@ -141,7 +141,7 @@ def repair_names(
  NameNonUniqueError: when check_unique fails
  """
  from .utils import get_option
- _base0 = get_option('index.base.0', _base0)
+ base0_ = get_option('index.base.0', base0_)
  if isinstance(repair, str):
  repair = BUILTIN_REPAIR_METHODS[repair]
  elif is_iterable(repair) and all(isinstance(elem, str) for elem in repair):
@@ -156,15 +156,15 @@ def repair_names(
  annotation._name not in ('Iterable', 'Sequence')
  ): # scalar input
  return [
- repair(name, _base0=_base0)
- if '_base0' in parameters
+ repair(name, base0_=base0_)
+ if 'base0_' in parameters
  else repair(name)
  for name in names
  ]
 
  names = list(names)
  return (
- repair(names, _base0=_base0)
- if '_base0' in parameters
+ repair(names, base0_=base0_)
+ if 'base0_' in parameters
  else repair(names)
  )