diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 077acc6f..27adaee0 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,6 +1,9 @@ name: Build and Deploy -on: [push, pull_request] +on: + push: + release: + types: [published] jobs: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0f0b3a50..63675cae 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,6 +1,6 @@ name: Build Docs -on: [push, pull_request] +on: [push] jobs: docs: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c1435f91..ff18698a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -fail_fast: false +fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: 5df1a4bf6f04a1ed3a643167b38d502575e29aef diff --git a/README.md b/README.md index c0cfb3fb..934f4639 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Port of [dplyr][2] and other related R packages in python, using [pipda][3]. -Unlike other similar packages in python that just mimic the piping sign, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python. +Unlike other similar packages in python that just mimic the piping syntax, `datar` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python. ## Installtion diff --git a/README.rst b/README.rst index e4f6c68e..4f62a26e 100644 --- a/README.rst +++ b/README.rst @@ -38,7 +38,7 @@ Port of `dplyr `_ and other related R pa :raw-html-m2r:`` -Unlike other similar packages in python that just mimic the piping sign, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python. +Unlike other similar packages in python that just mimic the piping syntax, ``datar`` follows the API designs from the original packages as much as possible. So that minimal effort is needed for those who are familar with those R packages to transition to python. Installtion ----------- diff --git a/datar/__init__.py b/datar/__init__.py index cf4b4936..c74573d6 100644 --- a/datar/__init__.py +++ b/datar/__init__.py @@ -4,4 +4,4 @@ from .core import frame_format_patch as _ from .core.defaults import f -__version__ = '0.2.3' +__version__ = '0.3.0' diff --git a/datar/base/seq.py b/datar/base/seq.py index 94959478..cd6bf7e5 100644 --- a/datar/base/seq.py +++ b/datar/base/seq.py @@ -17,37 +17,37 @@ @register_func(None, context=Context.EVAL) def seq_along( along_with: Iterable[Any], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> ArrayLikeType: """Generate sequences along an iterable Args: along_with: An iterable to seq along with - _base0: Whether the generated sequence should be 0-based. + base0_: Whether the generated sequence should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')` Returns: The generated sequence. """ - _base0 = get_option('index.base.0', _base0) - return Array(range(len(along_with))) + int(not _base0) + base0_ = get_option('index.base.0', base0_) + return Array(range(len(along_with))) + int(not base0_) @register_func(None, context=Context.EVAL) def seq_len( length_out: IntOrIter, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> ArrayLikeType: """Generate sequences with the length""" - _base0 = get_option('index.base.0', _base0) + base0_ = get_option('index.base.0', base0_) if is_scalar(length_out): - return Array(range(int(length_out))) + int(not _base0) + return Array(range(int(length_out))) + int(not base0_) if len(length_out) > 1: logger.warning( "In seq_len(%r) : first element used of 'length_out' argument", length_out ) length_out = int(list(length_out)[0]) - return Array(range(length_out)) + int(not _base0) + return Array(range(length_out)) + int(not base0_) @register_func(None, context=Context.EVAL) @@ -57,7 +57,7 @@ def seq( by: IntType = None, length_out: IntType = None, along_with: IntType = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, ) -> ArrayLikeType: """Generate a sequence @@ -65,15 +65,15 @@ def seq( Note that this API is consistent with r-base's seq. 1-based and inclusive. """ - _base0 = get_option('index.base.0', _base0) + base0_ = get_option('index.base.0', base0_) if along_with is not None: - return seq_along(along_with, _base0) + return seq_along(along_with, base0_) if from_ is not None and not is_scalar(from_): - return seq_along(from_, _base0) + return seq_along(from_, base0_) if length_out is not None and from_ is None and to is None: return seq_len(length_out) - base = int(not _base0) + base = int(not base0_) if from_ is None: from_ = base diff --git a/datar/base/string.py b/datar/base/string.py index ccb211d2..4376b73f 100644 --- a/datar/base/string.py +++ b/datar/base/string.py @@ -72,7 +72,7 @@ def grep( value: bool = False, fixed: bool = False, invert: bool = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> Iterable[Union[int, str]]: """R's grep, get the element in x matching the pattern @@ -83,7 +83,7 @@ def grep( value: Return values instead of indices? fixed: Fixed matching (instead of regex matching)? invert: Return elements thata don't match instead? - _base0: When return indices, whether return 0-based indices? + base0_: When return indices, whether return 0-based indices? If not set, will use `datar.base.get_option('which.base.0')` Returns: @@ -104,8 +104,8 @@ def grep( if value: return x[matched] - _base0 = get_option('which.base.0', _base0) - return numpy.flatnonzero(matched) + int(not _base0) + base0_ = get_option('which.base.0', base0_) + return numpy.flatnonzero(matched) + int(not base0_) @register_func(None, context=Context.EVAL) def grepl( @@ -439,7 +439,7 @@ def substr( x: StringOrIter, start: IntOrIter, stop: IntOrIter, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> StringOrIter: """Extract substrings in strings. @@ -447,7 +447,7 @@ def substr( x: The strings start: The start positions to extract stop: The stop positions to extract - _base0: Whether `start` and `stop` are 0-based + base0_: Whether `start` and `stop` are 0-based If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -456,15 +456,15 @@ def substr( if is_scalar(x) and is_scalar(start) and is_scalar(stop): if is_null(x): return NA - _base0 = get_option('index.base.0', _base0) + base0_ = get_option('index.base.0', base0_) x = as_character(x) lenx = len(x) # int() converts numpy.int64 to int - start0 = position_at(int(start), lenx, base0=_base0) + start0 = position_at(int(start), lenx, base0=base0_) stop0 = position_at( - min(int(stop), lenx - int(_base0)), + min(int(stop), lenx - int(base0_)), lenx, - base0=_base0 + base0=base0_ ) return x[start0:stop0+1] @@ -479,7 +479,7 @@ def substr( start = recycle_value(start, maxlen) stop = recycle_value(stop, maxlen) out = [ - substr(elem, start_, stop_, _base0) + substr(elem, start_, stop_, base0_) for elem, start_, stop_ in zip(x, start, stop) ] if is_null(out).any(): @@ -491,7 +491,7 @@ def substring( x: StringOrIter, first: IntOrIter, last: IntOrIter = 1000000, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> StringOrIter: """Extract substrings in strings. @@ -499,13 +499,13 @@ def substring( x: The strings start: The start positions to extract stop: The stop positions to extract - _base0: Whether `start` and `stop` are 0-based + base0_: Whether `start` and `stop` are 0-based If not provided, will use `datar.base.get_option('index.base.0')` Returns: The substrings from `x` """ - return substr(x, first, last, _base0) + return substr(x, first, last, base0_) # strsplit -------------------------------- diff --git a/datar/base/which.py b/datar/base/which.py index 2dc8a44f..61a03b4c 100644 --- a/datar/base/which.py +++ b/datar/base/which.py @@ -8,48 +8,48 @@ from ..core.contexts import Context @register_func(None, context=Context.EVAL) -def which(x: Iterable[bool], _base0: Optional[bool] = None) -> Iterable[int]: +def which(x: Iterable[bool], base0_: Optional[bool] = None) -> Iterable[int]: """Convert a bool iterable to indexes Args: x: An iterable of bools. Note that non-bool values will be converted into - _base0: Whether the returned indexes are 0-based. + base0_: Whether the returned indexes are 0-based. Controlled by `get_option('which.base.0')` if not provided Returns: The indexes """ - return numpy.flatnonzero(x) + int(not get_option('which.base.0', _base0)) + return numpy.flatnonzero(x) + int(not get_option('which.base.0', base0_)) @register_func(None) -def which_min(x: Iterable, _base0: Optional[bool] = None) -> int: +def which_min(x: Iterable, base0_: Optional[bool] = None) -> int: """R's `which.min()` Get the index of the element with the maximum value Args: x: The iterable - _base0: Whether the index to return is 0-based or not. + base0_: Whether the index to return is 0-based or not. Controlled by `get_option('which.base.0')` if not provided Returns: The index of the element with the maximum value """ - return numpy.argmin(x) + int(not get_option('which.base.0', _base0)) + return numpy.argmin(x) + int(not get_option('which.base.0', base0_)) @register_func(None) -def which_max(x: Iterable, _base0: bool = True) -> int: +def which_max(x: Iterable, base0_: bool = True) -> int: """R's `which.max()` Get the index of the element with the minimum value Args: x: The iterable - _base0: Whether the index to return is 0-based or not + base0_: Whether the index to return is 0-based or not Not that this is not controlled by `get_option('index.base.0')` Returns: The index of the element with the minimum value """ - return numpy.argmax(x) + int(not get_option('which.base.0', _base0)) + return numpy.argmax(x) + int(not get_option('which.base.0', base0_)) diff --git a/datar/core/collections.py b/datar/core/collections.py index dc22ca2a..369f1a7e 100644 --- a/datar/core/collections.py +++ b/datar/core/collections.py @@ -36,12 +36,11 @@ def __init__( def _pipda_eval( self, data: Any, - context: ContextAnnoType, - level: int = 0 + context: ContextAnnoType ) -> Any: """Defines how the object should be evaluated when evaluated by pipda's evaluation""" - self.elems = evaluate_args(self.elems, data, context, level) + self.elems = evaluate_args(self.elems, data, context) return self @abstractmethod @@ -60,9 +59,9 @@ class Collection(CollectionBase, list): convert them into 0-based finally The Inverted, Negated and slice objects will be expanded immediately. This - means there is no chance to apply `_base0` that is received later on. So + means there is no chance to apply `base0_` that is received later on. So the original elements are stored in `self.elems` to wait for a second - evaluation with the correct `_base0`. + evaluation with the correct `base0_`. Args: *args: The elements diff --git a/datar/core/names.py b/datar/core/names.py index d0dbbe25..5ec675fc 100644 --- a/datar/core/names.py +++ b/datar/core/names.py @@ -26,10 +26,10 @@ def _repair_names_unique( names: Iterable[str], quiet: bool = False, sanitizer: Optional[Callable[[str], str]] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> List[str]: """Make sure names are unique""" - base = int(not _base0) + base = int(not base0_) min_names = _repair_names_minimal(names) neat_names = [ re.sub(r'(?:(? List[str]: """Make sure names are safely to be used as variable or attribute""" min_names = _repair_names_minimal(names) @@ -66,7 +66,7 @@ def _repair_names_universal( if keyword.iskeyword(name) or (name and name[0].isdigit()) else name ), - _base0=_base0 + base0_=base0_ ) if not quiet: changed_names = [ @@ -100,7 +100,7 @@ def _repair_names_check_unique(names: Iterable[str]) -> Iterable[str]: def repair_names( names: Iterable[str], repair: Union[str, Callable], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> List[str]: """Repair names based on the method @@ -118,7 +118,7 @@ def repair_names( - A function, accepts either a list of names or a single name. Function accepts a list of names must annotate the first argument with `typing.Iterable` or `typing.Sequence`. - _base0: Whether the numeric suffix starts from 0 or not. + base0_: Whether the numeric suffix starts from 0 or not. If not specified, will use `datar.base.get_option('index.base.0')`. Examples: @@ -141,7 +141,7 @@ def repair_names( NameNonUniqueError: when check_unique fails """ from .utils import get_option - _base0 = get_option('index.base.0', _base0) + base0_ = get_option('index.base.0', base0_) if isinstance(repair, str): repair = BUILTIN_REPAIR_METHODS[repair] elif is_iterable(repair) and all(isinstance(elem, str) for elem in repair): @@ -156,15 +156,15 @@ def repair_names( annotation._name not in ('Iterable', 'Sequence') ): # scalar input return [ - repair(name, _base0=_base0) - if '_base0' in parameters + repair(name, base0_=base0_) + if 'base0_' in parameters else repair(name) for name in names ] names = list(names) return ( - repair(names, _base0=_base0) - if '_base0' in parameters + repair(names, base0_=base0_) + if 'base0_' in parameters else repair(names) ) diff --git a/datar/core/operator.py b/datar/core/operator.py index 83e2e5d9..1cd44935 100644 --- a/datar/core/operator.py +++ b/datar/core/operator.py @@ -1,12 +1,11 @@ """Operators for datar""" -from typing import Any, Optional, Tuple +from typing import Any, Tuple from functools import partial import operator import numpy from pandas import Series from pipda import register_operator, Operator -from pipda.context import ContextBase from .utils import length_of, recycle_value from .collections import Collection, Inverted, Negated, Intersect @@ -30,7 +29,7 @@ def _arithmetize2(self, left: Any, right: Any, op: str) -> Any: left, right = _recycle_left_right(left, right) return op_func(left, right) - def invert(self, operand: Any, _context: Optional[ContextBase]) -> Any: + def invert(self, operand: Any) -> Any: """Interpretation for ~x""" if isinstance(operand, (slice, str, list, tuple, Collection)): return Inverted(operand) diff --git a/datar/dplyr/across.py b/datar/dplyr/across.py index a7545cb0..305f1070 100644 --- a/datar/dplyr/across.py +++ b/datar/dplyr/across.py @@ -8,7 +8,7 @@ import numpy from pandas import DataFrame, Series from pipda import register_func, evaluate_expr, evaluate_args, evaluate_kwargs -from pipda.utils import functype +from pipda.utils import functype, PipingEnvs from pipda.context import ContextBase from pipda.symbolic import DirectRefAttr @@ -105,7 +105,7 @@ def evaluate( DirectRefAttr(self.data, column), *args, **kwargs, - _env='piping' + _env=PipingEnvs.PIPING )._pipda_eval(self.data, context) if ret is None: @@ -157,7 +157,7 @@ def across( *args: Any, _names: Optional[str] = None, _fn_context: Optional[Union[Context, ContextBase]] = Context.EVAL, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """Apply the same transformation to multiple columns @@ -178,7 +178,7 @@ def across( single function case and `{_col}_{_fn}` for the case where a list is used for _fns. In such a case, `{_fn}` is 0-based. To use 1-based index, use `{_fn1}` - _base0: Indicating whether the columns are 0-based if selected + base0_: Indicating whether the columns are 0-based if selected by indexes. if not provided, will use `datar.base.get_option('index.base.0')`. _fn_context: Defines the context to evaluate the arguments for functions @@ -197,21 +197,21 @@ def across( _cols = evaluate_expr(_cols, _data, Context.SELECT) return Across( - _data, _cols, _fns, _names, _base0, args, kwargs + _data, _cols, _fns, _names, base0_, args, kwargs ).evaluate(_fn_context) @register_func(context=Context.SELECT, verb_arg_only=True) def c_across( _data: DataFrame, _cols: Optional[Iterable[str]] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> Series: """Apply the same transformation to multiple columns rowwisely Args: _data: The dataframe _cols: The columns - _base0: Indicating whether the columns are 0-based if selected + base0_: Indicating whether the columns are 0-based if selected by indexes. if not provided, will use `datar.base.get_option('index.base.0')`. @@ -221,7 +221,7 @@ def c_across( if not _cols: _cols = everything(_data) - _cols = vars_select(_data.columns.tolist(), _cols, base0=_base0) + _cols = vars_select(_data.columns.tolist(), _cols, base0=base0_) series = [_data.iloc[:, col] for col in _cols] return numpy.concatenate(series) @@ -237,7 +237,7 @@ def if_any( *args: Any, _names: Optional[str] = None, _context: Optional[ContextBase] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> Iterable[bool]: """Apply the same predicate function to a selection of columns and combine @@ -253,7 +253,7 @@ def if_any( _cols, _fns, *args = args return IfAny( - _data, _cols, _fns, _names, _base0, args, kwargs + _data, _cols, _fns, _names, base0_, args, kwargs ).evaluate(_context) @@ -270,7 +270,7 @@ def if_all( *args: Any, _names: Optional[str] = None, _context: Optional[ContextBase] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> Iterable[bool]: """Apply the same predicate function to a selection of columns and combine @@ -286,5 +286,5 @@ def if_all( _cols, _fns, *args = args return IfAll( - _data, _cols, _fns, _names, _base0, args, kwargs + _data, _cols, _fns, _names, base0_, args, kwargs ).evaluate(_context) diff --git a/datar/dplyr/bind.py b/datar/dplyr/bind.py index 9bdecbb3..6a94100b 100644 --- a/datar/dplyr/bind.py +++ b/datar/dplyr/bind.py @@ -24,7 +24,7 @@ def bind_rows( _data: Optional[Union[DataFrame, list, dict]], *datas: Optional[Union[DataFrame, dict]], _id: Optional[str] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _copy: bool = True, **kwargs: Union[DataFrame, dict] ) -> DataFrame: @@ -38,8 +38,8 @@ def bind_rows( Could be a dict or a list, keys/indexes will be used for _id col *datas: Other dataframes to combine _id: The name of the id columns - _base0: Whether `_id` starts from 0 or not, if no keys are provided. - If `_base0` is not provided, will use + base0_: Whether `_id` starts from 0 or not, if no keys are provided. + If `base0_` is not provided, will use `datar.base.get_option('index.base.0')` _copy: If `False`, do not copy data unnecessarily. Original API does not support this. This argument will be @@ -49,7 +49,7 @@ def bind_rows( Returns: The combined dataframe """ - base = int(not get_option('index.base.0', _base0)) + base = int(not get_option('index.base.0', base0_)) if _id is not None and not isinstance(_id, str): raise ValueError("`_id` must be a scalar string.") @@ -130,9 +130,9 @@ def _( @register_verb((DataFrame, dict, NoneType), context=Context.EVAL) def bind_cols( _data: Optional[Union[DataFrame, dict]], - *datas: Optional[Union[DataFrame, dict]], + *datas: Union[DataFrame, dict], _name_repair: Union[str, Callable] = "unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _copy: bool = True ) -> DataFrame: """Bind columns of give dataframes @@ -151,7 +151,7 @@ def bind_cols( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the numeric suffix starts from 0 or not. + base0_: Whether the numeric suffix starts from 0 or not. If not specified, will use `datar.base.get_option('index.base.0')`. _copy: If `False`, do not copy data unnecessarily. Original API does not support this. This argument will be @@ -176,6 +176,6 @@ def bind_cols( ret.columns = repair_names( ret.columns.tolist(), repair=_name_repair, - _base0=_base0 + base0_=base0_ ) return ret diff --git a/datar/dplyr/dslice.py b/datar/dplyr/dslice.py index b08c75e1..7468df40 100644 --- a/datar/dplyr/dslice.py +++ b/datar/dplyr/dslice.py @@ -22,7 +22,7 @@ def slice( # pylint: disable=redefined-builtin _data: DataFrame, *rows: Any, _preserve: bool = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Index rows by their (integer) locations @@ -47,7 +47,7 @@ def slice( # pylint: disable=redefined-builtin If _preserve = FALSE (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is. - _base0: If rows are selected by indexes, whether they are 0-based. + base0_: If rows are selected by indexes, whether they are 0-based. If not provided, `datar.base.get_option('index.base.0')` is used. Returns: @@ -56,7 +56,7 @@ def slice( # pylint: disable=redefined-builtin if not rows: return _data - rows = _sanitize_rows(rows, _data.shape[0], _base0) + rows = _sanitize_rows(rows, _data.shape[0], base0_) out = _data.iloc[rows, :] if isinstance(_data.index, RangeIndex): out.reset_index(drop=True, inplace=True) @@ -68,11 +68,11 @@ def _( _data: DataFrameGroupBy, *rows: Any, _preserve: bool = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrameGroupBy: """Slice on grouped dataframe""" out = _data.datar_apply( - lambda df: slice(df, *rows, _base0=_base0) + lambda df: slice(df, *rows, base0_=base0_) ) out = reconstruct_tibble(_data, out, keep_rowwise=True) gdata = _filter_groups(out, _data) @@ -106,7 +106,7 @@ def slice_head( The sliced dataframe """ n = _n_from_prop(_data.shape[0], n, prop) - return slice(_data, builtins.slice(None, n), _base0=True) + return slice(_data, builtins.slice(None, n), base0_=True) @slice_head.register(DataFrameGroupBy, context=Context.PENDING) def _( @@ -132,7 +132,7 @@ def slice_tail( [`slice_head()`](datar.dplyr.slice.slice_head) """ n = _n_from_prop(_data.shape[0], n, prop) - return slice(_data, builtins.slice(-n, None), _base0=True) + return slice(_data, builtins.slice(-n, None), base0_=True) @slice_tail.register(DataFrameGroupBy, context=Context.PENDING) def _( diff --git a/datar/dplyr/funs.py b/datar/dplyr/funs.py index 76c72959..4c2b08c0 100644 --- a/datar/dplyr/funs.py +++ b/datar/dplyr/funs.py @@ -157,7 +157,7 @@ def nth( n: int, order_by: Optional[Iterable[Any]] = None, default: Any = NA, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> Any: """Get the nth element of x @@ -169,7 +169,7 @@ def nth( order_by: An optional vector used to determine the order default: A default value to use if the position does not exist in the input. - _base0: Whether `n` is 0-based or not. + base0_: Whether `n` is 0-based or not. Returns: A single element of x at `n'th` @@ -182,7 +182,7 @@ def nth( raise TypeError("`nth` expects `n` to be an integer") try: - return x[position_at(n, len(x), base0=_base0)] + return x[position_at(n, len(x), base0=base0_)] except (ValueError, IndexError, TypeError): return default diff --git a/datar/dplyr/group_by.py b/datar/dplyr/group_by.py index 806e3589..07db1415 100644 --- a/datar/dplyr/group_by.py +++ b/datar/dplyr/group_by.py @@ -21,6 +21,8 @@ from .group_data import group_vars +# pylint: disable=unused-argument + @register_verb(DataFrame, context=Context.PENDING) def group_by( _data: DataFrame, @@ -49,7 +51,7 @@ def group_by( *args: variables or computations to group by. Note that columns here cannot be selected by indexes. As they are treated as computations to be added as new columns. - So no `_base0` argument is supported. + So no `base0_` argument is supported. **kwargs: Extra variables to group the dataframe Return: @@ -71,7 +73,7 @@ def group_by( def rowwise( _data: DataFrame, *columns: Union[str, int], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrameRowwise: """Compute on a data frame a row-at-a-time @@ -82,14 +84,14 @@ def rowwise( *columns: Variables to be preserved when calling summarise(). This is typically a set of variables whose combination uniquely identify each row. - _base0: Whether indexes are 0-based if columns are selected by indexes. + base0_: Whether indexes are 0-based if columns are selected by indexes. If not given, will use `datar.base.get_option('index.base.0')` Returns: A row-wise data frame """ check_column_uniqueness(_data) - idxes = vars_select(_data.columns, *columns, base0=_base0) + idxes = vars_select(_data.columns, *columns, base0=base0_) if len(idxes) == 0: return DataFrameRowwise(_data) return DataFrameRowwise(_data, _group_vars=_data.columns[idxes].tolist()) @@ -98,7 +100,7 @@ def rowwise( def _( _data: DataFrameGroupBy, *columns: str, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrameRowwise: # grouped dataframe's columns are unique already if columns: @@ -114,9 +116,9 @@ def _( def _( _data: DataFrameRowwise, *columns: str, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrameRowwise: - idxes = vars_select(_data.columns, *columns, base0=_base0) + idxes = vars_select(_data.columns, *columns, base0=base0_) if len(idxes) == 0: # copy_attrs? return DataFrameRowwise(_data) @@ -127,7 +129,7 @@ def _( def ungroup( x: DataFrame, *cols: Union[str, int], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Ungroup a grouped data @@ -136,7 +138,7 @@ def ungroup( Args: x: The data frame *cols: Variables to remove from the grouping variables. - _base0: If columns are selected with indexes, whether they are 0-based. + base0_: If columns are selected with indexes, whether they are 0-based. If not given, will use `datar.base.get_option('index.base.0')` Returns: @@ -150,12 +152,12 @@ def ungroup( def _( x: DataFrameGroupBy, *cols: Union[str, int], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: if not cols: return DataFrame(x, index=x.index) old_groups = group_vars(x) - to_remove = vars_select(x.columns, *cols, base0=_base0) + to_remove = vars_select(x.columns, *cols, base0=base0_) new_groups = setdiff(old_groups, x.columns[to_remove]) return group_by(x, *new_groups) @@ -164,7 +166,7 @@ def _( def _( x: DataFrameRowwise, *cols: Union[str, int], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: if cols: raise ValueError('`*cols` is not empty.') diff --git a/datar/dplyr/group_iter.py b/datar/dplyr/group_iter.py index 92f6a5f7..f8d5e3c1 100644 --- a/datar/dplyr/group_iter.py +++ b/datar/dplyr/group_iter.py @@ -33,7 +33,7 @@ def group_map( *args: Any, _keep: bool = False, **kwargs: Any, -) -> List[Any]: +) -> Iterable: """A generator to map function to data in each group""" keys = group_keys(_data) if nargs(_f) > 1 else None for i, chunk in enumerate(group_split(_data, _keep=_keep)): @@ -42,9 +42,20 @@ def group_map( else: yield _f(chunk, keys.iloc[[i], :], *args, **kwargs) -group_map.list = register_verb(DataFrame, context=Context.PENDING)( - lambda *args, **kwargs: list(group_map(*args, **kwargs)) -) +def _group_map_list( + _data: DataFrame, + _f: Callable, + *args: Any, + _keep: bool = False, + **kwargs: Any +) -> List: + """List version of group_map""" + return list(_data >> group_map(_f, *args, _keep=_keep, **kwargs)) + +group_map.list = register_verb( + DataFrame, + context=Context.PENDING +)(_group_map_list) @register_verb(DataFrame, context=Context.EVAL) def group_modify( @@ -204,9 +215,19 @@ def _( return group_split_impl(_data, _keep=True) -group_split.list = register_verb(DataFrame, context=Context.PENDING)( - lambda *args, **kwargs: list(group_split(*args, **kwargs)) -) +def _group_split_list( + _data: DataFrame, + *args: Any, + _keep: bool = True, + **kwargs: Any +) -> Iterable[DataFrame]: + """List version of group_split""" + return list(_data >> group_split(*args, _keep=_keep, **kwargs)) + +group_split.list = register_verb( + DataFrame, + context=Context.PENDING +)(_group_split_list) def group_split_impl(data: DataFrame, _keep: bool): """Implement splitting data frame by groups""" diff --git a/datar/dplyr/mutate.py b/datar/dplyr/mutate.py index 0c50c3e3..eb785515 100644 --- a/datar/dplyr/mutate.py +++ b/datar/dplyr/mutate.py @@ -30,7 +30,7 @@ def mutate( _keep: str = 'all', _before: Optional[Union[int, str]] = None, _after: Optional[Union[int, str]] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: # pylint: disable=too-many-branches @@ -54,7 +54,7 @@ def mutate( _after: Optionally, control where new columns should appear (the default is to add to the right hand side). See relocate() for more details. - _base0: Whether `_before` and `_after` are 0-based if given by indexes. + base0_: Whether `_before` and `_after` are 0-based if given by indexes. If not provided, will use `datar.base.get_option('index.base.0')` *args: and **kwargs: Name-value pairs. The name gives the name of the column @@ -97,7 +97,7 @@ def mutate( out = out[setdiff(out.columns, removed)] if _before is not None or _after is not None: new = setdiff(cols.columns, _data.columns) - out = relocate(out, *new, _before=_before, _after=_after, _base0=_base0) + out = relocate(out, *new, _before=_before, _after=_after, base0_=base0_) if keep == 'all': keep = out.columns @@ -130,7 +130,7 @@ def _( _keep: str = 'all', _before: Optional[str] = None, _after: Optional[str] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrameGroupBy: """Mutate on DataFrameGroupBy object""" @@ -143,7 +143,7 @@ def apply_func(df): _keep=_keep, _before=_before, _after=_after, - _base0=_base0, + base0_=base0_, **kwargs ) ret.index = rows @@ -175,7 +175,7 @@ def transmute( *args: Any, _before: Optional[Union[int, str]] = None, _after: Optional[Union[int, str]] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """Mutate with _keep='none' @@ -188,7 +188,7 @@ def transmute( _keep='none', _before=_before, _after=_after, - _base0=_base0, + base0_=base0_, **kwargs ) diff --git a/datar/dplyr/pull.py b/datar/dplyr/pull.py index 61ef8e33..ad9b226b 100644 --- a/datar/dplyr/pull.py +++ b/datar/dplyr/pull.py @@ -22,7 +22,7 @@ def pull( var: Union[int, str] = -1, name: Optional[StringOrIter] = None, to: Optional[str] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> Union[DataFrame, ArrayLikeType, Mapping[str, ArrayLikeType]]: # pylint: disable=too-many-branches """Pull a series or a dataframe from a dataframe @@ -52,7 +52,7 @@ def pull( - If not provided: `series` when pulled data has only one columns. `dict` if `name` provided and has the same length as the pulled single column. Otherwise `frame`. - _base0: Whether `var` is 0-based if given by index + base0_: Whether `var` is 0-based if given by index If not provided, `datar.base.get_option('index.base.0')` is used. Returns: @@ -66,7 +66,7 @@ def pull( name = [name] if isinstance(var, int): - var = position_at(var, _data.shape[1], base0=_base0) + var = position_at(var, _data.shape[1], base0=base0_) var = _data.columns[var] var = var.split('$', 1)[0] diff --git a/datar/dplyr/recode.py b/datar/dplyr/recode.py index eec32c2a..60f2895b 100644 --- a/datar/dplyr/recode.py +++ b/datar/dplyr/recode.py @@ -29,13 +29,13 @@ def _args_to_recodings( **kwargs: Any ) -> Mapping[Any, Any]: """Convert arguments to replaceable""" - _base0 = get_option('index.base.0') + base0_ = get_option('index.base.0') values = {} for i, arg in enumerate(args): if isinstance(arg, dict): values.update(arg) else: - values[i + int(not _base0)] = arg + values[i + int(not base0_)] = arg values.update(kwargs) if _force_index: diff --git a/datar/dplyr/relocate.py b/datar/dplyr/relocate.py index 37f2a25b..c0bae1da 100644 --- a/datar/dplyr/relocate.py +++ b/datar/dplyr/relocate.py @@ -15,7 +15,7 @@ def relocate( *args: Any, _before: Optional[Union[int, str]] = None, _after: Optional[Union[int, str]] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """change column positions @@ -30,7 +30,7 @@ def relocate( _before: and _after: Destination. Supplying neither will move columns to the left-hand side; specifying both is an error. - _base0: Whether `_before` and `_after` are 0-based if given by indexes. + base0_: Whether `_before` and `_after` are 0-based if given by indexes. If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -48,7 +48,7 @@ def relocate( all_columns, *args, **kwargs, - _base0=_base0, + base0_=base0_, _group_vars=gvars ) if _before is not None and _after is not None: @@ -59,14 +59,14 @@ def relocate( # length = len(all_columns) if _before is not None: where = min(_eval_select( - all_columns, _before, _group_vars=[], _base0=_base0 + all_columns, _before, _group_vars=[], base0_=base0_ )[0]) if where not in to_move: to_move.append(where) elif _after is not None: where = max(_eval_select( - all_columns, _after, _group_vars=[], _base0=_base0 + all_columns, _after, _group_vars=[], base0_=base0_ )[0]) if where not in to_move: to_move.insert(0, where) diff --git a/datar/dplyr/rename.py b/datar/dplyr/rename.py index 108c0799..855c95f2 100644 --- a/datar/dplyr/rename.py +++ b/datar/dplyr/rename.py @@ -16,7 +16,7 @@ @register_verb(DataFrame, context=Context.SELECT) def rename( _data: DataFrame, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: str ) -> DataFrame: """Changes the names of individual variables using new_name = old_name @@ -25,7 +25,7 @@ def rename( Args: _data: The dataframe **kwargs: The new_name = old_name pairs - _base0: Whether the old_name is 0-based if given by indexes. + base0_: Whether the old_name is 0-based if given by indexes. If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -36,7 +36,7 @@ def rename( selected, new_names = _eval_select( all_columns, _group_vars=gvars, - _base0=_base0, + base0_=base0_, **kwargs, ) @@ -61,7 +61,7 @@ def rename_with( _data: DataFrame, _fn: Callable[[str], str], *args: Any, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """Renames columns using a function. @@ -74,7 +74,7 @@ def rename_with( no non-keyword arguments are allowed to pass to the function, use keyword arguments instead. **kwargs: keyword arguments for `_fn` - _base0: Whether the old_name is 0-based if given by indexes. + base0_: Whether the old_name is 0-based if given by indexes. If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -86,6 +86,6 @@ def rename_with( cols = args[0] args = args[1:] - cols = _data.columns[vars_select(_data.columns, cols, base0=_base0)] + cols = _data.columns[vars_select(_data.columns, cols, base0=base0_)] new_columns = {_fn(col, *args, **kwargs): col for col in cols} - return rename(_data, **new_columns, _base0=True) + return rename(_data, **new_columns, base0_=True) diff --git a/datar/dplyr/rows.py b/datar/dplyr/rows.py index 867fdc58..47ae5ccb 100644 --- a/datar/dplyr/rows.py +++ b/datar/dplyr/rows.py @@ -274,4 +274,5 @@ def _rows_match(x: DataFrame, y: DataFrame) -> numpy.ndarray: """Mimic vctrs::vec_match""" id_col = '__id__' y_with_id = rownames_to_column(y, var=id_col) - return left_join(x, y_with_id)[id_col].values + # pylint: disable=no-value-for-parameter + return (x >> left_join(y_with_id))[id_col].values diff --git a/datar/dplyr/select.py b/datar/dplyr/select.py index 371df317..180b54f6 100644 --- a/datar/dplyr/select.py +++ b/datar/dplyr/select.py @@ -20,7 +20,7 @@ def select( _data: DataFrame, *args: Union[StringOrIter, Inverted], - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Mapping[str, str] ) -> DataFrame: """Select (and optionally rename) variables in a data frame @@ -37,7 +37,7 @@ def select( Args: *columns: The columns to select **renamings: The columns to rename and select in new => old column way. - _base0: Whether the columns are 0-based if given by indexes + base0_: Whether the columns are 0-based if given by indexes If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -50,7 +50,7 @@ def select( *args, **kwargs, _group_vars=gvars, - _base0=_base0 + base0_=base0_ ) out = _data.iloc[:, selected].copy() @@ -73,7 +73,7 @@ def _eval_select( _all_columns: Index, *args: Any, _group_vars: Iterable[str], - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> Tuple[List[int], Mapping[str, str]]: """Evaluate selections to get locations @@ -81,7 +81,7 @@ def _eval_select( Returns: A tuple of (selected columns, dict of old-to-new renaming columns) """ - selected = vars_select(_all_columns, *args, *kwargs.values(), base0=_base0) + selected = vars_select(_all_columns, *args, *kwargs.values(), base0=base0_) missing = setdiff(_group_vars, _all_columns[selected]) if missing: logger.info( @@ -110,7 +110,7 @@ def _eval_select( # try: # If out of bounds, it should be raised at getting missing val = _all_columns[ - position_at(val, len(_all_columns), base0=_base0) + position_at(val, len(_all_columns), base0=base0_) ] # except IndexError: # raise ColumnNotExistingError( diff --git a/datar/dplyr/tidyselect.py b/datar/dplyr/tidyselect.py index b92cce58..18324ec8 100644 --- a/datar/dplyr/tidyselect.py +++ b/datar/dplyr/tidyselect.py @@ -189,7 +189,7 @@ def matches( def all_of( _data: DataFrame, x: Iterable[Union[int, str]], - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> List[str]: """For strict selection. @@ -199,7 +199,7 @@ def all_of( Args: _data: The data piped in x: A set of variables to match the columns - _base0: Whether `x` is 0-based or not. + base0_: Whether `x` is 0-based or not. if not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -210,7 +210,7 @@ def all_of( in `_data` columns """ all_columns = _data.columns - x = all_columns[vars_select(all_columns, x, base0=_base0)] + x = all_columns[vars_select(all_columns, x, base0=base0_)] # where do errors raise? # nonexists = setdiff(x, all_columns) @@ -228,7 +228,7 @@ def any_of( x: Iterable[Union[int, str]], # pylint: disable=redefined-builtin vars: Optional[Iterable[str]] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> List[str]: """Select but doesn't check for missing variables. @@ -238,14 +238,14 @@ def any_of( Args: _data: The data piped in x: A set of variables to match the columns - _base0: Whether `x` is 0-based or not. + base0_: Whether `x` is 0-based or not. if not provided, will use `datar.base.get_option('index.base.0')` Returns: The matched column names """ vars = vars or _data.columns - x = vars_select(vars, x, raise_nonexists=False, base0=_base0) + x = vars_select(vars, x, raise_nonexists=False, base0=base0_) # exists = [] # for idx in x: # try: @@ -260,7 +260,7 @@ def num_range( prefix: str, range: Iterable[int], # pylint: disable=redefined-builtin width: Optional[int] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> List[str]: """Matches a numerical range like x01, x02, x03. @@ -270,16 +270,16 @@ def num_range( range_: A sequence of integers, like `range(3)` (produces `0,1,2`). width: Optionally, the "width" of the numeric range. For example, a range of 2 gives "01", a range of three "001", etc. - _base0: Whether it is 0-based + base0_: Whether it is 0-based Returns: A list of ranges with prefix. """ - _base0 = get_option('index.base.0', _base0) + base0_ = get_option('index.base.0', base0_) zfill = lambda elem: ( - elem + int(not _base0) + elem + int(not base0_) if not width - else str(elem + int(not _base0)).zfill(width) + else str(elem + int(not base0_)).zfill(width) ) return Array([ f"{prefix}{zfill(elem)}" diff --git a/datar/tibble/tibble.py b/datar/tibble/tibble.py index e15cdce5..32512ff4 100644 --- a/datar/tibble/tibble.py +++ b/datar/tibble/tibble.py @@ -25,7 +25,7 @@ def tibble( *args: Any, _name_repair: Union[str, Callable] = 'check_unique', _rows: Optional[int] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, **kwargs: Any ) -> DataFrame: @@ -43,7 +43,7 @@ def tibble( - a function: apply custom name repair _rows: Number of rows of a 0-col dataframe when args and kwargs are not provided. When args or kwargs are provided, this is ignored. - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -84,7 +84,7 @@ def tibble( names, values, _name_repair=_name_repair, - _base0=_base0, + base0_=base0_, _dtypes=_dtypes ) @@ -101,7 +101,7 @@ def tibble( def tibble_row( *args: Any, _name_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, **kwargs: Any ) -> DataFrame: @@ -119,7 +119,7 @@ def tibble_row( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -128,7 +128,7 @@ def tibble_row( if not args and not kwargs: df = DataFrame(index=[0]) # still one row else: - df = tibble(*args, **kwargs, _name_repair=_name_repair, _base0=_base0) + df = tibble(*args, **kwargs, _name_repair=_name_repair, base0_=base0_) if df.shape[0] > 1: raise ValueError("All arguments must be size one, use `[]` to wrap.") @@ -144,7 +144,7 @@ def tibble_row( def fibble( *args: Any, _name_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _rows: Optional[int] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, **kwargs: Any @@ -170,14 +170,14 @@ def fibble( *args, **kwargs, _name_repair=_name_repair, _rows=_rows, - _base0=_base0, + base0_=base0_, _dtypes=_dtypes ) def tribble( *dummies: Any, _name_repair: Union[str, Callable] = 'minimal', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None ) -> DataFrame: """Create dataframe using an easier to read row-by-row layout @@ -226,7 +226,7 @@ def tribble( columns, [[]]*len(columns), _name_repair=_name_repair, - _base0=_base0, + base0_=base0_, _dtypes=_dtypes ) @@ -241,7 +241,7 @@ def tribble( columns, list(zip(*data)), _name_repair=_name_repair, - _base0=_base0, + base0_=base0_, _dtypes=_dtypes ) @@ -249,7 +249,7 @@ def zibble( names: Iterable[Optional[str]], values: Iterable, _name_repair: Union[str, Callable] = 'minimal', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None ) -> DataFrame: """Zip version of tibble, where names specify together and so do values. @@ -275,7 +275,7 @@ def zibble( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -321,7 +321,7 @@ def zibble( if out is None: out = DataFrame() - names = repair_names(out.columns.tolist(), _name_repair, _base0) + names = repair_names(out.columns.tolist(), _name_repair, base0_) out.columns = names apply_dtypes(out, _dtypes) diff --git a/datar/tibble/verbs.py b/datar/tibble/verbs.py index b3f9755c..698ddb7b 100644 --- a/datar/tibble/verbs.py +++ b/datar/tibble/verbs.py @@ -19,7 +19,7 @@ def enframe( x: Optional[Union[Iterable, Mapping]], name: Optional[str] = "name", value: str = "value", - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Converts mappings or lists to one- or two-column data frames. @@ -29,7 +29,7 @@ def enframe( value: value Names of the columns that store the names and values. If `None`, a one-column dataframe is returned. `value` cannot be `None` - _base0: Whether the indexes for lists converted to name are 0-based + base0_: Whether the indexes for lists converted to name are 0-based or not. Returns: @@ -54,8 +54,8 @@ def enframe( elif name: if not isinstance(x, dict): - _base0 = get_option('index.base.0', _base0) - names = (i + int(not _base0) for i in range(len(x))) + base0_ = get_option('index.base.0', base0_) + names = (i + int(not base0_) for i in range(len(x))) values = x else: names = x.keys() @@ -95,7 +95,7 @@ def add_row( *args: Any, _before: Optional[int] = None, _after: Optional[int] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """Add one or more rows of data to an existing data frame. @@ -109,7 +109,7 @@ def add_row( _before: and _after: row index where to add the new rows. (default to add after the last row) - _base0: Whether `_before` and `_after` are 0-based or not. + base0_: Whether `_before` and `_after` are 0-based or not. Returns: The dataframe with the added rows @@ -133,7 +133,7 @@ def add_row( if extra_vars: raise ValueError(f"New rows can't add columns: {extra_vars}") - pos = _pos_from_before_after(_before, _after, _data.shape[0], _base0) + pos = _pos_from_before_after(_before, _after, _data.shape[0], base0_) out = _rbind_at(_data, df, pos) if isinstance(_data, DataFrameRowwise): @@ -155,7 +155,7 @@ def add_column( _before: Optional[Union[str, int]] = None, _after: Optional[Union[str, int]] = None, _name_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, _dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, **kwargs: Any ) -> DataFrame: @@ -168,7 +168,7 @@ def add_column( _before: and _after: Column index or name where to add the new columns (default to add after the last column) - _base0: Whether `_before` and `_after` are 0-based if they are index. + base0_: Whether `_before` and `_after` are 0-based if they are index. if not given, will be determined by `get_option('index_base_0')`, which is `False` by default. _dtypes: The dtypes for the new columns, either a uniform dtype or a @@ -187,7 +187,7 @@ def add_column( _before, _after, _data.columns.tolist(), - _base0 + base0_ ) out = _cbind_at(_data, df, pos, _name_repair) @@ -255,7 +255,7 @@ def rownames_to_column(_data: DataFrame, var="rowname") -> DataFrame: def rowid_to_column( _data: DataFrame, var="rowid", - _base0: bool = False + base0_: bool = False ) -> DataFrame: """Add rownames as a column @@ -271,7 +271,7 @@ def rowid_to_column( raise ValueError(f"Column name `{var}` must not be duplicated.") from ..dplyr.mutate import mutate - base = int(not _base0) + base = int(not base0_) return remove_rownames(mutate( _data, **{var: range(base, _data.shape[0] + base)}, @@ -348,7 +348,7 @@ def _cbind_at( from ..dplyr import bind_cols part1 = data.iloc[:, :pos] part2 = data.iloc[:, pos:] - return bind_cols(part1, df, part2, _name_repair=_name_repair) + return part1 >> bind_cols(df, part2, _name_repair=_name_repair) def _pos_from_before_after( before: Optional[int], diff --git a/datar/tidyr/chop.py b/datar/tidyr/chop.py index 91906057..efdee1e5 100644 --- a/datar/tidyr/chop.py +++ b/datar/tidyr/chop.py @@ -29,7 +29,7 @@ def chop( data: DataFrame, cols: Optional[Union[IntOrIter, StringOrIter]] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Makes data frame shorter by converting rows within each group into list-columns. @@ -37,7 +37,7 @@ def chop( Args: data: A data frame cols: Columns to chop - _base0: Whether `cols` are 0-based + base0_: Whether `cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -47,7 +47,7 @@ def chop( return data.copy() all_columns = data.columns - cols = vars_select(all_columns, cols, base0=_base0) + cols = vars_select(all_columns, cols, base0=base0_) cols = all_columns[cols] # when cols is empty # order may change for all_columns.difference([]) @@ -75,7 +75,7 @@ def chop( else: vals = pandas.concat(compacted, ignore_index=True) - out = bind_cols(split_key, vals) + out = split_key >> bind_cols(vals) return reconstruct_tibble(data, out, keep_rowwise=True) @register_verb(DataFrame, context=Context.SELECT) @@ -83,8 +83,8 @@ def unchop( data: DataFrame, cols: Optional[Union[IntOrIter, StringOrIter]] = None, keep_empty: bool = False, - dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, - _base0: Optional[bool] = None + ptype: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, + base0_: Optional[bool] = None ) -> DataFrame: """Makes df longer by expanding list-columns so that each element of the list-column gets its own row in the output. @@ -108,21 +108,21 @@ def unchop( dropped from the output. If you want to preserve all rows, use `keep_empty` = `True` to replace size-0 elements with a single row of missing values. - dtypes: NOT `ptype`. Providing the dtypes for the output columns. + ptype: Providing the dtypes for the output columns. Could be a single dtype, which will be applied to all columns, or a dictionary of dtypes with keys for the columns and values the dtypes. For nested data frames, we need to specify `col$a` as key. If `col` is used as key, all columns of the nested data frames will be casted into that dtype. - _base0: Whether `cols` are 0-based + base0_: Whether `cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` Returns: A data frame with selected columns unchopped. """ all_columns = data.columns - cols = vars_select(all_columns, cols, base0=_base0) + cols = vars_select(all_columns, cols, base0=base0_) if len(cols) == 0 or data.shape[0] == 0: return data.copy() @@ -131,7 +131,7 @@ def unchop( key_cols = all_columns.difference(cols).tolist() out = _unchopping(data, cols, key_cols, keep_empty) - apply_dtypes(out, dtypes) + apply_dtypes(out, ptype) return reconstruct_tibble(data, out, keep_rowwise=True) def _vec_split( @@ -148,7 +148,7 @@ def _vec_split( if isinstance(by, Series): # pragma: no cover, always a data frame? by = by.to_frame() - df = bind_cols(x, by) + df = x >> bind_cols(by) if df.shape[0] == 0: return DataFrame(columns=['key', 'val']) df = df >> group_by(*by.columns) @@ -213,7 +213,7 @@ def _unchopping( # say y$a, then ['y'] will not select it out = keep_column_order(DataFrame(key_data), data.columns) if not keep_empty: - out = drop_na(out, *val_data, how='all') + out = drop_na(out, *val_data, how_='all') apply_dtypes(out, dtypes) copy_attrs(out, data) return out diff --git a/datar/tidyr/drop_na.py b/datar/tidyr/drop_na.py index 61af591d..108e856d 100644 --- a/datar/tidyr/drop_na.py +++ b/datar/tidyr/drop_na.py @@ -14,8 +14,8 @@ def drop_na( _data: DataFrame, *columns: str, - how: str = 'any', - _base0: Optional[bool] = None + how_: str = 'any', + base0_: Optional[bool] = None ) -> DataFrame: """Drop rows containing missing values @@ -24,23 +24,23 @@ def drop_na( Args: data: A data frame. *columns: Columns to inspect for missing values. - how: How to select the rows to drop + how_: How to select the rows to drop - all: All columns of `columns` to be `NA`s - any: Any columns of `columns` to be `NA`s (tidyr doesn't support this argument) - _base0: Whether `*columns` are 0-based if given by indexes + base0_: Whether `*columns` are 0-based if given by indexes If not provided, will use `datar.base.get_option('index.base.0')` Returns: Dataframe with rows with NAs dropped and indexes dropped """ - arg_match(how, 'how', ['any', 'all']) + arg_match(how_, 'how_', ['any', 'all']) all_columns = _data.columns if columns: - columns = vars_select(all_columns, *columns, base0=_base0) + columns = vars_select(all_columns, *columns, base0=base0_) columns = all_columns[columns] - out = _data.dropna(subset=columns, how=how).reset_index(drop=True) + out = _data.dropna(subset=columns, how=how_).reset_index(drop=True) else: - out = _data.dropna(how=how).reset_index(drop=True) + out = _data.dropna(how=how_).reset_index(drop=True) return reconstruct_tibble(_data, out, keep_rowwise=True) diff --git a/datar/tidyr/expand.py b/datar/tidyr/expand.py index 202c6f31..97f96014 100644 --- a/datar/tidyr/expand.py +++ b/datar/tidyr/expand.py @@ -27,7 +27,7 @@ def expand_grid( *args: Iterable[Any], _name_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Iterable[Any] ) -> DataFrame: """Create a tibble from all combinations of inputs @@ -44,7 +44,7 @@ def expand_grid( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -84,15 +84,15 @@ def expand_grid( ## tibble will somehow flatten the nested dataframes into fake nested df. ## do it inside _flatten_nested - # out = tibble(out, _name_repair=_name_repair, _base0=_base0) - return _flatten_nested(out, named, _name_repair, _base0) + # out = tibble(out, _name_repair=_name_repair, base0_=base0_) + return _flatten_nested(out, named, _name_repair, base0_) @register_verb(DataFrame, context=Context.EVAL) def expand( data: DataFrame, *args: Union[Series, DataFrame], _name_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Union[Series, DataFrame] ) -> DataFrame: """Generates all combination of variables found in a dataset. @@ -117,7 +117,7 @@ def expand( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -129,8 +129,8 @@ def expand( key: _sorted_unique(val) for key, val in cols.items() } - out = expand_grid(**cols, _name_repair=_name_repair, _base0=_base0) - out = _flatten_nested(out, named, _name_repair, _base0) + out = expand_grid(**cols, _name_repair=_name_repair, base0_=base0_) + out = _flatten_nested(out, named, _name_repair, base0_) copy_attrs(out, data) return out @@ -140,7 +140,7 @@ def _( data: DataFrameGroupBy, *args: Union[Series, DataFrame], _name_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Union[Series, DataFrame] ) -> DataFrameGroupBy: """Expand on grouped data frame""" @@ -149,7 +149,7 @@ def apply_func(df): df, *args, _name_repair=_name_repair, - _base0=_base0, + base0_=base0_, **kwargs ) @@ -161,7 +161,7 @@ def _( data: DataFrameRowwise, *args: Union[Series, DataFrame], _name_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Union[Series, DataFrame] ) -> DataFrame: """Expand on rowwise dataframe""" @@ -169,7 +169,7 @@ def _( data, *args, _name_repair=_name_repair, - _base0=_base0, + base0_=base0_, **kwargs ) @@ -177,7 +177,7 @@ def _( def nesting( *args: Any, _name_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """A helper that only finds combinations already present in the data. @@ -201,7 +201,7 @@ def nesting( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -210,16 +210,16 @@ def nesting( cols = _dots_cols(*args, **kwargs) named = cols.pop('__named__') out = _sorted_unique( - tibble(**cols, _name_repair=_name_repair, _base0=_base0) + tibble(**cols, _name_repair=_name_repair, base0_=base0_) ) - return _flatten_nested(out, named, _name_repair, _base0) + return _flatten_nested(out, named, _name_repair, base0_) @register_func(None, context=Context.EVAL) def crossing( *args: Any, _name_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **kwargs: Any ) -> DataFrame: """A wrapper around `expand_grid()` that de-duplicates and sorts its inputs @@ -245,7 +245,7 @@ def crossing( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether the suffixes of repaired names should be 0-based. + base0_: Whether the suffixes of repaired names should be 0-based. If not provided, will use `datar.base.get_option('index.base.0')`. Returns: @@ -258,8 +258,8 @@ def crossing( for key, val in cols.items() } - out = expand_grid(**out, _name_repair=_name_repair, _base0=_base0) - return _flatten_nested(out, named, _name_repair, _base0) + out = expand_grid(**out, _name_repair=_name_repair, base0_=base0_) + return _flatten_nested(out, named, _name_repair, base0_) @@ -332,7 +332,7 @@ def _flatten_nested( for key, val in x.items() } out = _flatten_at(x, to_flatten) - return tibble(**out, _name_repair=name_repair, _base0=base0) + return tibble(**out, _name_repair=name_repair, base0_=base0) def _flatten_at( x: Mapping[str, Iterable[Any]], diff --git a/datar/tidyr/extract.py b/datar/tidyr/extract.py index 0d3494ae..863db859 100644 --- a/datar/tidyr/extract.py +++ b/datar/tidyr/extract.py @@ -23,7 +23,7 @@ def extract( regex: str = r'(\w+)', remove: bool = True, convert: Union[bool, Dtype, Mapping[str, Dtype]] = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Given a regular expression with capturing groups, extract() turns each group into a new column. If the groups don't match, or the input is NA, @@ -41,7 +41,7 @@ def extract( remove: If TRUE, remove input column from output data frame. convert: The universal type for the extracted columns or a dict for individual ones - _base0: Whether `col` is 0-based when given by index + base0_: Whether `col` is 0-based when given by index If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -51,7 +51,7 @@ def extract( into = [into] all_columns = data.columns - col = vars_select(all_columns, col, base0=_base0) + col = vars_select(all_columns, col, base0=base0_) col = all_columns[col[0]] outcols = {} diff --git a/datar/tidyr/fill.py b/datar/tidyr/fill.py index 3b1e126d..eabb68f3 100644 --- a/datar/tidyr/fill.py +++ b/datar/tidyr/fill.py @@ -21,7 +21,7 @@ def fill( _data: DataFrame, *columns: Union[str, int], _direction: str = "down", - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Fills missing values in selected columns using the next or previous entry. @@ -35,7 +35,7 @@ def fill( Currently either "down" (the default), "up", "downup" (i.e. first down and then up) or "updown" (first up and then down). - _base0: Whether `*columns` are 0-based if given by indexes + base0_: Whether `*columns` are 0-based if given by indexes If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -51,7 +51,7 @@ def fill( method='ffill' if _direction.endswith('down') else 'bfill', ) else: - colidx = vars_select(data.columns, *columns, base0=_base0) + colidx = vars_select(data.columns, *columns, base0=base0_) data.iloc[:, colidx] = fill(data.iloc[:, colidx], _direction=_direction) return data diff --git a/datar/tidyr/nest.py b/datar/tidyr/nest.py index aaed353c..78c36689 100644 --- a/datar/tidyr/nest.py +++ b/datar/tidyr/nest.py @@ -24,7 +24,7 @@ def nest( _data: DataFrame, _names_sep: Optional[str] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **cols: Union[str, int] ) -> DataFrame: """Nesting creates a list-column of data frames @@ -38,7 +38,7 @@ def nest( The names of the new outer columns will be formed by pasting together the outer and the inner column names, separated by `_names_sep`. - _base0: Whether `**cols` are 0-based + base0_: Whether `**cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -51,7 +51,7 @@ def nest( colgroups = {} usedcols = set() for group, columns in cols.items(): - oldcols = all_columns[vars_select(all_columns, columns, base0=_base0)] + oldcols = all_columns[vars_select(all_columns, columns, base0=base0_)] usedcols = usedcols.union(oldcols) newcols = ( oldcols if _names_sep is None else @@ -83,20 +83,20 @@ def nest( out.columns = list(colgroups) if u_keys.shape[1] == 0: return out if isinstance(out, DataFrame) else out.to_frame() - return bind_cols(u_keys, recycle_value(out, u_keys.shape[0])) + return u_keys >> bind_cols(recycle_value(out, u_keys.shape[0])) @nest.register(DataFrameGroupBy, context=Context.SELECT) def _( _data: DataFrameGroupBy, _names_sep: Optional[str] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **cols: Mapping[str, Union[str, int]] ) -> DataFrameGroupBy: """Nesting grouped dataframe""" if not cols: cols = {'data': setdiff(_data.columns, group_vars(_data))} out = nest.dispatch(DataFrame)( - _data, **cols, _names_sep=_names_sep, _base0=_base0 + _data, **cols, _names_sep=_names_sep, base0_=base0_ ) return reconstruct_tibble(_data, out, keep_rowwise=True) @@ -105,10 +105,10 @@ def unnest( data: DataFrame, *cols: Union[str, int], keep_empty: bool = False, - dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, + ptype: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, names_sep: Optional[str] = None, names_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Flattens list-column of data frames back out into regular columns. @@ -122,7 +122,7 @@ def unnest( dropped from the output. If you want to preserve all rows, use `keep_empty` = `True` to replace size-0 elements with a single row of missing values. - dtypes: NOT `ptype`. Providing the dtypes for the output columns. + ptype: Providing the dtypes for the output columns. Could be a single dtype, which will be applied to all columns, or a dictionary of dtypes with keys for the columns and values the dtypes. @@ -139,7 +139,7 @@ def unnest( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether `cols` are 0-based + base0_: Whether `cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -149,7 +149,7 @@ def unnest( raise ValueError("`*cols` is required when using unnest().") all_columns = data.columns - cols = vars_select(all_columns, cols, base0=_base0) + cols = vars_select(all_columns, cols, base0=base0_) cols = all_columns[cols] out = data.copy() @@ -158,7 +158,7 @@ def unnest( out = unchop( out, cols, - keep_empty=keep_empty, dtypes=dtypes, _base0=_base0 + keep_empty=keep_empty, ptype=ptype, base0_=base0_ ) return unpack( out, cols, @@ -170,19 +170,19 @@ def _( data: DataFrameRowwise, *cols: Union[str, int], keep_empty: bool = False, - dtypes: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, + ptype: Optional[Union[Dtype, Mapping[str, Dtype]]] = None, names_sep: Optional[str] = None, names_repair: Union[str, Callable] = 'check_unique', - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Unnest rowwise dataframe""" out = unnest.dispatch(DataFrame)( data, *cols, keep_empty=keep_empty, - dtypes=dtypes, + ptype=ptype, names_sep=names_sep, names_repair=names_repair, - _base0=_base0 + base0_=base0_ ) return DataFrameGroupBy( out, diff --git a/datar/tidyr/pack.py b/datar/tidyr/pack.py index dd3341f1..477f8850 100644 --- a/datar/tidyr/pack.py +++ b/datar/tidyr/pack.py @@ -19,7 +19,7 @@ def pack( _data: DataFrame, _names_sep: Optional[str] = None, - _base0: Optional[bool] = None, + base0_: Optional[bool] = None, **cols: Union[str, int] ) -> DataFrame: """Makes df narrow by collapsing a set of columns into a single df-column. @@ -33,7 +33,7 @@ def pack( The names of the new outer columns will be formed by pasting together the outer and the inner column names, separated by `_names_sep`. - _base0: Whether `**cols` are 0-based + base0_: Whether `**cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` """ if not cols: @@ -44,7 +44,7 @@ def pack( colgroups = {} usedcols = set() for group, columns in cols.items(): - oldcols = all_columns[vars_select(all_columns, columns, base0=_base0)] + oldcols = all_columns[vars_select(all_columns, columns, base0=base0_)] usedcols = usedcols.union(oldcols) newcols = ( oldcols if _names_sep is None else @@ -58,7 +58,7 @@ def pack( cols[f'{group}${newcol}'] = _data[oldcol] asis = setdiff(_data.columns, usedcols) - out = bind_cols(_data[asis], DataFrame(cols)) + out = _data[asis] >> bind_cols(DataFrame(cols)) return reconstruct_tibble(_data, out, keep_rowwise=True) @register_verb(DataFrame, context=Context.SELECT) @@ -67,7 +67,7 @@ def unpack( cols: Union[StringOrIter, IntOrIter], names_sep: Optional[str] = None, names_repair: Union[str, Callable] = "check_unique", - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Makes df wider by expanding df-columns back out into individual columns. @@ -89,7 +89,7 @@ def unpack( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether `cols` are 0-based + base0_: Whether `cols` are 0-based if not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -101,7 +101,7 @@ def unpack( all_columns = data.columns cols = _check_present( data, cols, all_columns, - base0=_base0, + base0=base0_, ) out = data.copy() @@ -120,7 +120,7 @@ def unpack( else: new_cols.append(col) - new_cols = repair_names(new_cols, names_repair, _base0) + new_cols = repair_names(new_cols, names_repair, base0_) out.columns = new_cols copy_attrs(out, data) diff --git a/datar/tidyr/pivot_long.py b/datar/tidyr/pivot_long.py index d07ed505..16aa55ca 100644 --- a/datar/tidyr/pivot_long.py +++ b/datar/tidyr/pivot_long.py @@ -48,7 +48,7 @@ def pivot_longer( values_transform: Optional[ Union[Callable, Mapping[str, Callable]] ] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ): """"lengthens" data, increasing the number of rows and decreasing the number of columns. @@ -133,7 +133,7 @@ def pivot_longer( but check they are unique, - "universal": Make the names unique and syntactic - a function: apply custom name repair - _base0: Whether `cols` are 0-based if given by indexes + base0_: Whether `cols` are 0-based if given by indexes If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -142,7 +142,7 @@ def pivot_longer( rowid_column = '_PIVOT_ROWID_' ret = _data.assign(**{rowid_column: range(_data.shape[0])}) all_columns = ret.columns - columns = _data.columns[vars_select(_data.columns, cols, base0=_base0)] + columns = _data.columns[vars_select(_data.columns, cols, base0=base0_)] id_columns = all_columns.difference(columns) if is_scalar(names_to): @@ -203,7 +203,7 @@ def pivot_longer( sep=names_sep ) # extract/separate puts `into` last - ret = relocate(ret, values_to, _after=-1, _base0=True) + ret = relocate(ret, values_to, _after=-1, base0_=True) if '.value' in names_to: @@ -258,7 +258,7 @@ def pivot_longer( elif name in values_transform: ret[name] = ret[name].apply(values_transform[name]) - names = repair_names(ret.columns.tolist(), names_repair, _base0=_base0) + names = repair_names(ret.columns.tolist(), names_repair, base0_=base0_) ret.columns = names return reconstruct_tibble(_data, ret) diff --git a/datar/tidyr/pivot_wide.py b/datar/tidyr/pivot_wide.py index f6bb9222..80fc3f79 100644 --- a/datar/tidyr/pivot_wide.py +++ b/datar/tidyr/pivot_wide.py @@ -31,7 +31,7 @@ def pivot_wider( values_from: StringOrIter = "value", values_fill: Any = None, values_fn: Union[Callable, Mapping[str, Callable]] = identity, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """"widens" data, increasing the number of columns and decreasing the number of rows. @@ -65,7 +65,7 @@ def pivot_wider( This can be a dict you want to apply different aggregations to different value columns. If not specified, will be `numpy.mean` - _base0: Whether `id_cols`, `names_from` and `values_from` + base0_: Whether `id_cols`, `names_from` and `values_from` are 0-based if given by indexes. If not provided, will use `datar.base.get_option('index.base.0')` @@ -81,7 +81,7 @@ def pivot_wider( if id_cols is None: all_cols = _data.columns - names_from = all_cols[vars_select(all_cols, names_from, base0=_base0)] + names_from = all_cols[vars_select(all_cols, names_from, base0=base0_)] # values_from could be a df-column new_values_from = [] for value_from in values_from: @@ -95,7 +95,7 @@ def pivot_wider( else: new_values_from.append(value_from) values_from = all_cols[ - vars_select(all_cols, *new_values_from, base0=_base0) + vars_select(all_cols, *new_values_from, base0=base0_) ] id_cols = ( all_cols diff --git a/datar/tidyr/separate.py b/datar/tidyr/separate.py index b2570c45..10560ba2 100644 --- a/datar/tidyr/separate.py +++ b/datar/tidyr/separate.py @@ -31,7 +31,7 @@ def separate( convert: Union[bool, Dtype, Mapping[str, Dtype]] = False, extra: str = "warn", fill: str = "warn", - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Given either a regular expression or a vector of character positions, turns a single character column into multiple columns. @@ -62,7 +62,7 @@ def separate( - "warn" (the default): emit a warning and fill from the right - "right": fill with missing values on the right - "left": fill with missing values on the left - _base0: Whether `col` is 0-based when given by index and Whether `sep` + base0_: Whether `col` is 0-based when given by index and Whether `sep` is 0-based if given by position If not provided, will use `datar.base.get_option('index.base.0')` @@ -76,7 +76,7 @@ def separate( raise ValueError("`into` must be a string or a list of strings.") all_columns = data.columns - col = vars_select(all_columns, col, base0=_base0) + col = vars_select(all_columns, col, base0=base0_) col = all_columns[col[0]] colindex = [ @@ -96,7 +96,7 @@ def separate( sep=sep, extra=extra, fill=fill, - base0=_base0, + base0=base0_, extra_warns=extra_warns, missing_warns=missing_warns ) @@ -133,7 +133,7 @@ def separate_rows( *columns: str, sep: str = r'[^0-9A-Za-z]+', convert: Union[bool, Dtype, Mapping[str, Dtype]] = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Separates the values and places each one in its own row. @@ -143,14 +143,14 @@ def separate_rows( sep: Separator between columns. convert: The universal type for the extracted columns or a dict for individual ones - _base0: Whether `columns` is 0-based when given by index + base0_: Whether `columns` is 0-based when given by index If not provided, will use `datar.base.get_option('index.base.0')` Returns: Dataframe with rows separated and repeated. """ all_columns = data.columns - selected = all_columns[vars_select(all_columns, *columns, base0=_base0)] + selected = all_columns[vars_select(all_columns, *columns, base0=base0_)] out = data.copy() for sel in selected: out[sel] = out[sel].apply( @@ -159,12 +159,12 @@ def separate_rows( sep=sep, extra="merge", fill="right", - base0=_base0, + base0=base0_, extra_warns=[], missing_warns=[] ) - out = unchop(out, selected, keep_empty=True, dtypes=convert, _base0=_base0) + out = unchop(out, selected, keep_empty=True, ptype=convert, base0_=base0_) return reconstruct_tibble(out, ungroup(out), selected, keep_rowwise=True) def _separate_col( diff --git a/datar/tidyr/uncount.py b/datar/tidyr/uncount.py index 5fb22be3..b03dfe7b 100644 --- a/datar/tidyr/uncount.py +++ b/datar/tidyr/uncount.py @@ -20,7 +20,7 @@ def uncount( weights: IntOrIter, _remove: bool = True, _id: Optional[str] = None, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Duplicating rows according to a weighting variable @@ -31,7 +31,7 @@ def uncount( then this column is removed. _id: Supply a string to create a new variable which gives a unique identifier for each created row (0-based). - _base0: Whether the generated `_id` columns are 0-based. + base0_: Whether the generated `_id` columns are 0-based. If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -60,7 +60,7 @@ def uncount( out.reset_index(drop=True, inplace=True) if _id: - base = int(not get_option('index.base.0', _base0)) + base = int(not get_option('index.base.0', base0_)) # pylint: disable=no-value-for-parameter out = ( out >> diff --git a/datar/tidyr/unite.py b/datar/tidyr/unite.py index 781d143f..f36bcf2e 100644 --- a/datar/tidyr/unite.py +++ b/datar/tidyr/unite.py @@ -19,7 +19,7 @@ def unite( sep: str = '_', remove: bool = True, na_rm: bool = False, - _base0: Optional[bool] = None + base0_: Optional[bool] = None ) -> DataFrame: """Unite multiple columns into one by pasting strings together @@ -31,7 +31,7 @@ def unite( remove: If True, remove input columns from output data frame. na_rm: If True, missing values will be remove prior to uniting each value. - _base0: Whether `columns` is 0-based when given by index + base0_: Whether `columns` is 0-based when given by index If not provided, will use `datar.base.get_option('index.base.0')` Returns: @@ -41,7 +41,7 @@ def unite( if not columns: columns = all_columns else: - columns = all_columns[vars_select(all_columns, *columns, base0=_base0)] + columns = all_columns[vars_select(all_columns, *columns, base0=base0_)] out = data.copy() diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index a5df6d91..65b6ac35 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.3.0 +- Adopt `pipda` v0.3.0 + +Breaking changes: + +- Rename argument `dtypes` of `unchop` and `unnest` back to `ptype` +- Change all `_base0` to `base0_` +- Change argument `how` of `tidyr.drop_na` to `how_` + ## 0.2.3 - Fix compatibility with `pandas` `v1.2.0~4` (#20, thanks to @antonio-yu) - Fix base.table when inputs are factors and exclude is NA; @@ -63,7 +72,7 @@ Changed: ## 0.0.6 - Add `options`, `get_option` and `options_context` to `datar.base` to allow set/get global options - Add options: `dplyr.summarise.inform` -- Add `_base0` argument to all related APIs +- Add `base0_` argument to all related APIs - Add `nycflights13` datasets - Support slice_head/slice_tail for grouped data diff --git a/docs/TODO.md b/docs/TODO.md deleted file mode 100644 index 8c5afd0a..00000000 --- a/docs/TODO.md +++ /dev/null @@ -1,10 +0,0 @@ - -- [x] Add tests for tidyr from original tidyverse/tidyr cases -- [x] Add more tests for base/core -- [ ] Port more functions from `r-base`, `r-stats`, etc -- [ ] Port more datasets from `r-datasets` namespace -- [x] Add more detailed documentations -- [x] Add reference maps to compare to the references from the original packages. -- [x] Add `_base0` argument to APIs with index involved -- [x] Add `options` for `index_base`, `dplyr.summarise.inform`, etc -- [x] Rename `base.context` to `base.data_context` diff --git a/docs/advanced.md b/docs/advanced.md new file mode 100644 index 00000000..091fd574 --- /dev/null +++ b/docs/advanced.md @@ -0,0 +1,68 @@ +## Register your own verb + +Verbs are registered by `register_verb()` from [`pipda`][1]. + +The full signature of `register_verb()` is as follows: + +```python +register_verb( + types: Union[function, Type, Iterable[Type]] = , + context: Union[pipda.context.Context, pipda.context.ContextBase] = None, + func: Union[function, NoneType] = None, + extra_contexts: Union[Mapping[str, Union[pipda.context.Context, pipda.context.ContextBase]], NoneType] = None, + **attrs: Any +) -> Callable + """ + Register a verb with specific types of data + + If `func` is not given (works like `register_verb(types, context=...)`), + it returns a function, works as a decorator. + + For example + >>> @register_verb(DataFrame, context=Context.EVAL) + >>> def verb(data, ...): + >>> ... + + When function is passed as a non-keyword argument, other arguments are as + defaults + >>> @register_verb + >>> def verb(data, ...): + >>> ... + + In such a case, it is like a generic function to work with all types of + data. + + Args: + types: The classes of data for the verb + Multiple classes are supported to be passed as a list/tuple/set. + context: The context to evaluate the Expression objects + func: The function to be decorated if passed explicitly + extra_contexts: Extra contexts (if not the same as `context`) + for specific arguments + **attrs: Other attributes to be attached to the function + + Returns: + A decorator function if `func` is not given or a wrapper function + like a singledispatch generic function that can register other types, + show all registry and dispatch for a specific type + """ +``` + +Note that when define a verb, a data argument as the first argument is requried. + +## Register your own function + +There are types of functions, with data argument (as the first argument) or without. + +The functions with data argument, is just like a verb, but just don't support piping and they are supposed to be used inside verbs. To limit the function to be used inside a verb, use `verb_arg_only=True` with `register_func`. + +!!! Note: + + In `dplyr`, you can call a verb regularly: `mutate(df, across(...))`, and `across` can still be recognized as a verb argument, but we cann't do it in python. When used in this situation, error will still be raised. Instead, use the piping syntax: `df >> mutate(across(...))` + +For the functions without data argument, the `types` argument should be specified as `None`. + +Both types of functions are supposed to be used inside a verb (as its arguments), but you can still use them somewhere else. That means you are calling it regularly. For caveats for that, please checkout [this section][2]. + +[1]: https://github.com/pwwang/pipda +[2]: https://github.com/pwwang/pipda#caveats diff --git a/docs/f.md b/docs/f.md index aca62a93..af56ceeb 100644 --- a/docs/f.md +++ b/docs/f.md @@ -1,3 +1,7 @@ +## Why `f`? + +It is just fast for you to type, since usually, it is `.` right after `f`. Then you have your left hand and right hand working together sequentially. + ## The `Symbolic` object `f` You can import it by `from datar import f`, or `from datar.all import *` @@ -19,6 +23,8 @@ There are different uses for the `f`. ) ``` +## If you don't like `f` ... + Sometimes if you have mixed verbs with piping and you want to distinguish to proxies for different verbs: ```python diff --git a/docs/indexing.md b/docs/indexing.md index 723d9a38..8d6a0fca 100644 --- a/docs/indexing.md +++ b/docs/indexing.md @@ -39,13 +39,13 @@ with options_context(index_base_0=True): # rows #2,3,4: [1,2,3] ``` -APIs with arguments related to indexing selection usually have a `_base0` argument, which also switch the index base temporarily. For example: +APIs with arguments related to indexing selection usually have a `base0_` argument, which also switch the index base temporarily. For example: ```python from datar.datasets iris from datar.all import mutate, across, c -iris >> mutate(across(c(0,1), round, _base0=True)) +iris >> mutate(across(c(0,1), round, base0_=True)) # Sepal_Length Sepal_Width Petal_Length Petal_Width Species # 0 5.0 4.0 1.4 0.2 setosa # 1 5.0 3.0 1.4 0.2 setosa diff --git a/docs/notebooks/across.ipynb b/docs/notebooks/across.ipynb index 7de9c4d6..979ced31 100644 --- a/docs/notebooks/across.ipynb +++ b/docs/notebooks/across.ipynb @@ -78,7 +78,7 @@ "    a list is used for _fns. In such a case, `{_fn}` is 0-based. \n", "    To use 1-based index, use `{_fn1}` \n", "\n", - "  `_base0`: Indicating whether the columns are 0-based if selected \n", + "  `base0_`: Indicating whether the columns are 0-based if selected \n", "    by indexes. if not provided, will use \n", "    `datar.base.get_option('index.base.0')`. \n", "\n", @@ -174,7 +174,7 @@ "##### Args:\n", "  `_data`: The dataframe \n", "  `_cols`: The columns \n", - "  `_base0`: Indicating whether the columns are 0-based if selected \n", + "  `base0_`: Indicating whether the columns are 0-based if selected \n", "    by indexes. if not provided, will use \n", "    `datar.base.get_option('index.base.0')`. \n", "\n", @@ -509,7 +509,7 @@ "execution_count": 4, "source": [ "# use 0-base index\n", - "iris >> mutate(across(c(0,1), round, _base0=True))\n", + "iris >> mutate(across(c(0,1), round, base0_=True))\n", "\n" ], "outputs": [ @@ -1990,7 +1990,7 @@ " starts_with(\"Sepal\"), \n", " [mean, sd], \n", " _names=\"{_col}.fn{_fn}\", \n", - " _base0=True\n", + " base0_=True\n", " )\n", ")\n", "# or use _fn0\n", diff --git a/docs/notebooks/add_column.ipynb b/docs/notebooks/add_column.ipynb index f52d2e22..f6676110 100644 --- a/docs/notebooks/add_column.ipynb +++ b/docs/notebooks/add_column.ipynb @@ -77,7 +77,7 @@ "  `_after`: Column index or name where to add the new columns \n", "    (default to add after the last column) \n", "\n", - "  `_base0`: Whether `_before` and `_after` are 0-based if they are index. \n", + "  `base0_`: Whether `_before` and `_after` are 0-based if they are index. \n", "    if not given, will be determined by `get_option('index_base_0')`, \n", "    which is `False` by default. \n", "\n", diff --git a/docs/notebooks/add_row.ipynb b/docs/notebooks/add_row.ipynb index 519490c1..5bb0579f 100644 --- a/docs/notebooks/add_row.ipynb +++ b/docs/notebooks/add_row.ipynb @@ -77,7 +77,7 @@ "  `_after`: row index where to add the new rows. \n", "    (default to add after the last row) \n", "\n", - "  `_base0`: Whether `_before` and `_after` are 0-based or not. \n", + "  `base0_`: Whether `_before` and `_after` are 0-based or not. \n", "\n", "##### Returns:\n", "  The dataframe with the added rows \n" diff --git a/docs/notebooks/base.ipynb b/docs/notebooks/base.ipynb index 45d96363..c1b22947 100644 --- a/docs/notebooks/base.ipynb +++ b/docs/notebooks/base.ipynb @@ -1003,7 +1003,7 @@ "debug(\n", " str(c(c(1,2), seq(3,4))),\n", " seq_len(5),\n", - " seq_len(5, _base0=True),\n", + " seq_len(5, base0_=True),\n", " seq_along([4,2,1]),\n", " rev(seq_len(5)),\n", " rep([1,2], 2),\n", @@ -1030,7 +1030,7 @@ "--------------------\n", "array([1, 2, 3, 4, 5])\n", "\n", - "seq_len(5, _base0=True)\n", + "seq_len(5, base0_=True)\n", "--------------------\n", "array([0, 1, 2, 3, 4])\n", "\n", @@ -1175,7 +1175,7 @@ " as_character([1,2]),\n", " grep(\".\", [\"ab\", \"c.d\"]),\n", " grep(\".\", [\"ab\", \"c.d\"], fixed=True),\n", - " grep(\".\", [\"ab\", \"c.d\"], fixed=True, _base0=False),\n", + " grep(\".\", [\"ab\", \"c.d\"], fixed=True, base0_=False),\n", " grepl(\".\", [\"ab\", \"c.d\"], fixed=True),\n", " sub(\".\", \"x\", [\"ab\", \"c.d.e\"]),\n", " sub(\".\", \"x\", [\"ab\", \"c.d.e\"], fixed=True),\n", @@ -1189,7 +1189,7 @@ " paste0([\"a\", \"c\"], [\"b\", \"d\"], collapse=\"; \"),\n", " sprintf(\"%s, %d, %.3f\", pi, pi, pi),\n", " substr(\"abcde\", 1, 3),\n", - " substr(\"abcde\", 1, 3, _base0=True),\n", + " substr(\"abcde\", 1, 3, base0_=True),\n", " substring(\"abcde\", 1),\n", " strsplit([\"a.b.c\", \"e.f\"], \".\", fixed=True),\n", " **debug_kwargs\n", @@ -1221,7 +1221,7 @@ "--------------------\n", "array([1])\n", "\n", - "grep(\".\", [\"ab\", \"c.d\"], fixed=True, _base0=False)\n", + "grep(\".\", [\"ab\", \"c.d\"], fixed=True, base0_=False)\n", "--------------------\n", "array([2])\n", "\n", @@ -1277,7 +1277,7 @@ "--------------------\n", "'abc'\n", "\n", - "substr(\"abcde\", 1, 3, _base0=True)\n", + "substr(\"abcde\", 1, 3, base0_=True)\n", "--------------------\n", "'bcd'\n", "\n", @@ -1510,7 +1510,7 @@ "\n", "debug(\n", " which([True, False, True]),\n", - " which([True, False, True], _base0=False),\n", + " which([True, False, True], base0_=False),\n", " which_max([3,2,4,1]),\n", " which_min([3,2,4,1]),\n", " **debug_kwargs\n", @@ -1526,7 +1526,7 @@ "--------------------\n", "array([0, 2])\n", "\n", - "which([True, False, True], _base0=False)\n", + "which([True, False, True], base0_=False)\n", "--------------------\n", "array([1, 3])\n", "\n", diff --git a/docs/notebooks/bind.ipynb b/docs/notebooks/bind.ipynb index 06be91a1..9e69369e 100644 --- a/docs/notebooks/bind.ipynb +++ b/docs/notebooks/bind.ipynb @@ -2,8 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, - "id": "departmental-balloon", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:36.534923Z", @@ -14,44 +13,107 @@ }, "outputs": [ { - "output_type": "display_data", "data": { - "text/plain": "", - "text/html": "
Try this notebook on binder.
" + "text/html": [ + "
Try this notebook on binder.
" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # bind_rows " + "text/markdown": [ + "### # bind_rows " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Bind rows of give dataframes\n\nOriginal APIs https://dplyr.tidyverse.org/reference/bind.html \n\n##### Args:\n  `_data`: The seed dataframe to bind others \n    Could be a dict or a list, keys/indexes will be used for _id col \n\n  `*datas`: Other dataframes to combine \n  `_id`: The name of the id columns \n  `_base0`: Whether `_id` starts from 0 or not, if no keys are provided. \n    If `_base0` is not provided, will use \n    `datar.base.get_option('index.base.0')` \n\n  `**kwargs`: A mapping of dataframe, keys will be used as _id col. \n\n##### Returns:\n  The combined dataframe \n" + "text/markdown": [ + "##### Bind rows of give dataframes\n", + "\n", + "Original APIs https://dplyr.tidyverse.org/reference/bind.html \n", + "\n", + "##### Args:\n", + "  `_data`: The seed dataframe to bind others \n", + "    Could be a dict or a list, keys/indexes will be used for _id col \n", + "\n", + "  `*datas`: Other dataframes to combine \n", + "  `_id`: The name of the id columns \n", + "  `base0_`: Whether `_id` starts from 0 or not, if no keys are provided. \n", + "    If `base0_` is not provided, will use \n", + "    `datar.base.get_option('index.base.0')` \n", + "\n", + "  `**kwargs`: A mapping of dataframe, keys will be used as _id col. \n", + "\n", + "##### Returns:\n", + "  The combined dataframe \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # bind_cols " + "text/markdown": [ + "### # bind_cols " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Bind columns of give dataframes\n\nNote that unlike `dplyr`, mismatched dimensions are allowed and \nmissing rows will be filled with `NA`s \n\n##### Args:\n  `_data`: The seed dataframe to bind others \n    Could be a dict, keys will be used for _id col \n\n  `*datas`: other dataframes to bind \n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether the numeric suffix starts from 0 or not. \n    If not specified, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  The combined dataframe \n" + "text/markdown": [ + "##### Bind columns of give dataframes\n", + "\n", + "Note that unlike `dplyr`, mismatched dimensions are allowed and \n", + "missing rows will be filled with `NA`s \n", + "\n", + "##### Args:\n", + "  `_data`: The seed dataframe to bind others \n", + "    Could be a dict, keys will be used for _id col \n", + "\n", + "  `*datas`: other dataframes to bind \n", + "  `_name_repair`: treatment of problematic column names: \n", + "    - \"minimal\": No name repair or checks, beyond basic existence,\n", + "\n", + "    - \"unique\": Make sure names are unique and not empty,\n", + "\n", + "    - \"check_unique\": (default value), no name repair,\n", + "      but check they are unique, \n", + "\n", + "    - \"universal\": Make the names unique and syntactic\n", + "\n", + "    - a function: apply custom name repair\n", + "\n", + "  `base0_`: Whether the numeric suffix starts from 0 or not. \n", + "    If not specified, will use `datar.base.get_option('index.base.0')`. \n", + "\n", + "##### Returns:\n", + "  The combined dataframe \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -66,8 +128,7 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "loose-citation", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.339085Z", @@ -78,8 +139,156 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " name height mass hair_color skin_color eye_color \\\n", "0 Luke Skywalker 172.0 77.0 blond fair blue \n", @@ -100,11 +309,11 @@ "5 57.0 male masculine Stewjon Human \n", "6 41.9 male masculine Tatooine Human \n", "7 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], "source": [ @@ -116,12 +325,160 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " name height mass hair_color skin_color eye_color \\\n", "0 Luke Skywalker 172.0 77.0 blond fair blue \n", @@ -142,11 +499,11 @@ "5 57.0 male masculine Stewjon Human \n", "6 41.9 male masculine Tatooine Human \n", "7 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -155,12 +512,272 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
8Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
9Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
10Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
11Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
12Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
13C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
14R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
15Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
\n", + "
" + ], "text/plain": [ " name height mass hair_color skin_color eye_color \\\n", "0 Luke Skywalker 172.0 77.0 blond fair blue \n", @@ -197,11 +814,11 @@ "13 112.0 none masculine Tatooine Droid \n", "14 33.0 none masculine Naboo Droid \n", "15 41.9 male masculine Tatooine Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
nameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
8Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
9Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
10Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
11Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
12Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
13C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
14R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
15Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
\n
" + ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -210,8 +827,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "entire-overall", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.403386Z", @@ -222,17 +838,54 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
134
\n", + "
" + ], "text/plain": [ " a b\n", "0 1 2\n", "1 3 4" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ab
012
134
\n
" + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -241,8 +894,7 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "primary-convertible", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.522360Z", @@ -253,19 +905,66 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ab
012
135
246
378
\n", + "
" + ], "text/plain": [ " a b\n", "0 1 2\n", "1 3 5\n", "2 4 6\n", "3 7 8" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ab
012
135
246
378
\n
" + ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ @@ -277,12 +976,169 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
01Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
11C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
21R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
31Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
42Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
52Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
62Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
72Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " id name height mass hair_color skin_color \\\n", "0 1 Luke Skywalker 172.0 77.0 blond fair \n", @@ -303,11 +1159,11 @@ "5 blue-gray 57.0 male masculine Stewjon Human \n", "6 blue 41.9 male masculine Tatooine Human \n", "7 blue 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
01Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
11C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
21R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
31Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
42Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
52Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
62Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
72Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" } ], "source": [ @@ -316,12 +1172,169 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
00Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
10C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
20R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
30Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
41Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
51Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
61Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
71Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " id name height mass hair_color skin_color \\\n", "0 0 Luke Skywalker 172.0 77.0 blond fair \n", @@ -342,25 +1355,182 @@ "5 blue-gray 57.0 male masculine Stewjon Human \n", "6 blue 41.9 male masculine Tatooine Human \n", "7 blue 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
00Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
10C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
20R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
30Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
41Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
51Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
61Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
71Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], "source": [ - "bind_rows([one, two], _id = \"id\", _base0=True) # 0-based id" + "bind_rows([one, two], _id = \"id\", base0_=True) # 0-based id" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0aLuke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1aC-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2aR2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3aDarth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4bBiggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5bObi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6bAnakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7bWilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " id name height mass hair_color skin_color eye_color \\\n", "0 a Luke Skywalker 172.0 77.0 blond fair blue \n", @@ -381,11 +1551,11 @@ "5 57.0 male masculine Stewjon Human \n", "6 41.9 male masculine Tatooine Human \n", "7 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
idnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0aLuke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1aC-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2aR2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3aDarth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4bBiggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5bObi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6bAnakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7bWilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ @@ -395,12 +1565,169 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
groupsnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0group 1Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1group 1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2group 1R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3group 1Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4group 2Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5group 2Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6group 2Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7group 2Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "
" + ], "text/plain": [ " groups name height mass hair_color skin_color \\\n", "0 group 1 Luke Skywalker 172.0 77.0 blond fair \n", @@ -421,11 +1748,11 @@ "5 blue-gray 57.0 male masculine Stewjon Human \n", "6 blue 41.9 male masculine Tatooine Human \n", "7 blue 64.0 male masculine Eriadu Human " - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
groupsnameheightmasshair_colorskin_coloreye_colorbirth_yearsexgenderhomeworldspecies
0group 1Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooineHuman
1group 1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooineDroid
2group 1R2-D296.032.0NaNwhite, bluered33.0nonemasculineNabooDroid
3group 1Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooineHuman
4group 2Biggs Darklighter183.084.0blacklightbrown24.0malemasculineTatooineHuman
5group 2Obi-Wan Kenobi182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
6group 2Anakin Skywalker188.084.0blondfairblue41.9malemasculineTatooineHuman
7group 2Wilhuff Tarkin180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n
" + ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], "source": [ @@ -434,8 +1761,7 @@ }, { "cell_type": "code", - "execution_count": 11, - "id": "aggressive-pilot", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.538541Z", @@ -446,8 +1772,70 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
00.0NaN
11.0NaN
22.0NaN
3NaN0.0
4NaN1.0
5NaN2.0
6NaN3.0
\n", + "
" + ], "text/plain": [ " x y\n", "0 0.0 NaN\n", @@ -457,11 +1845,11 @@ "4 NaN 1.0\n", "5 NaN 2.0\n", "6 NaN 3.0" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
00.0NaN
11.0NaN
22.0NaN
3NaN0.0
4NaN1.0
5NaN2.0
6NaN3.0
\n
" + ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -472,8 +1860,7 @@ }, { "cell_type": "code", - "execution_count": 12, - "id": "sexual-memphis", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.571552Z", @@ -484,18 +1871,60 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
000.0
111.0
22NaN
\n", + "
" + ], "text/plain": [ " x y\n", "0 0 0.0\n", "1 1 1.0\n", "2 2 NaN" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
000.0
111.0
22NaN
\n
" + ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } ], "source": [ @@ -507,32 +1936,69 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
x
00
11
22
\n", + "
" + ], "text/plain": [ " x\n", "0 0\n", "1 1\n", "2 2" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
x
00
11
22
\n
" + ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], "source": [ - "bind_cols(tibble(x = range(3)), tibble())" + "tibble(x = range(3)) >> bind_cols(tibble())" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "amateur-interaction", + "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:51:37.605622Z", @@ -543,8 +2009,8 @@ }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ "[2021-05-24 17:52:22][datar][WARNING] New names:\n", "[2021-05-24 17:52:22][datar][WARNING] * 'name' -> 'name__1'\n", @@ -572,8 +2038,151 @@ ] }, { - "output_type": "execute_result", "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
name__1height__2mass__3hair_color__4skin_color__5eye_color__6birth_year__7sex__8gender__9homeworld__10...height__13mass__14hair_color__15skin_color__16eye_color__17birth_year__18sex__19gender__20homeworld__21species__22
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooine...183.084.0blacklightbrown24.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooine...182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNaboo...188.084.0blondfairblue41.9malemasculineTatooineHuman
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooine...180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n", + "

4 rows × 22 columns

\n", + "
" + ], "text/plain": [ " name__1 height__2 mass__3 hair_color__4 skin_color__5 \\\n", "0 Luke Skywalker 172.0 77.0 blond fair \n", @@ -600,11 +2209,11 @@ "3 male masculine Eriadu Human \n", "\n", "[4 rows x 22 columns]" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
name__1height__2mass__3hair_color__4skin_color__5eye_color__6birth_year__7sex__8gender__9homeworld__10...height__13mass__14hair_color__15skin_color__16eye_color__17birth_year__18sex__19gender__20homeworld__21species__22
0Luke Skywalker172.077.0blondfairblue19.0malemasculineTatooine...183.084.0blacklightbrown24.0malemasculineTatooineHuman
1C-3PO167.075.0NaNgoldyellow112.0nonemasculineTatooine...182.077.0auburn, whitefairblue-gray57.0malemasculineStewjonHuman
2R2-D296.032.0NaNwhite, bluered33.0nonemasculineNaboo...188.084.0blondfairblue41.9malemasculineTatooineHuman
3Darth Vader202.0136.0nonewhiteyellow41.9malemasculineTatooine...180.0NaNauburn, greyfairblue64.0malemasculineEriaduHuman
\n

4 rows × 22 columns

\n
" + ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], "source": [ @@ -613,22 +2222,16 @@ } ], "metadata": { + "interpreter": { + "hash": "c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc" + }, "kernelspec": { - "display_name": "Python 3", - "language": "python", + "display_name": "Python 3.7.8 64-bit ('base': conda)", "name": "python3" }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "" } }, "nbformat": 4, diff --git a/docs/notebooks/chop.ipynb b/docs/notebooks/chop.ipynb index ccbd67a3..f2d33714 100644 --- a/docs/notebooks/chop.ipynb +++ b/docs/notebooks/chop.ipynb @@ -1,25 +1,4 @@ { - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python378jvsc74a57bd0c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc", - "display_name": "Python 3.7.8 64-bit ('base': conda)" - } - }, - "nbformat": 4, - "nbformat_minor": 2, "cells": [ { "cell_type": "code", @@ -27,44 +6,125 @@ "metadata": {}, "outputs": [ { - "output_type": "display_data", + "name": "stderr", + "output_type": "stream", + "text": [ + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"min\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"max\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"sum\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"abs\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"round\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"all\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"any\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"re\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"filter\" has been overriden by datar.\n", + "[2021-06-29 16:34:36][datar][WARNING] Builtin name \"slice\" has been overriden by datar.\n" + ] + }, + { "data": { - "text/plain": "", - "text/html": "
Try this notebook on binder.
" + "text/html": [ + "
Try this notebook on binder.
" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # chop " + "text/markdown": [ + "### # chop " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Makes data frame shorter by converting rows within each group\ninto list-columns. \n\n##### Args:\n  `data`: A data frame \n  `cols`: Columns to chop \n  `_base0`: Whether `cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Data frame with selected columns chopped \n" + "text/markdown": [ + "##### Makes data frame shorter by converting rows within each group\n", + "into list-columns. \n", + "\n", + "##### Args:\n", + "  `data`: A data frame \n", + "  `cols`: Columns to chop \n", + "  `base0_`: Whether `cols` are 0-based \n", + "    if not provided, will use `datar.base.get_option('index.base.0')` \n", + "\n", + "##### Returns:\n", + "  Data frame with selected columns chopped \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # unchop " + "text/markdown": [ + "### # unchop " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Makes df longer by expanding list-columns so that each element\nof the list-column gets its own row in the output. \n\nSee https://tidyr.tidyverse.org/reference/chop.html \n\nRecycling size-1 elements might be different from `tidyr` \n  >>> df = tibble(x=[1, [2,3]], y=[[2,3], 1]) \n  >>> df >> unchop([f.x, f.y]) \n  >>> # tibble(x=[1,2,3], y=[2,3,1]) \n  >>> # instead of following in tidyr \n  >>> # tibble(x=[1,1,2,3], y=[2,3,1,1]) \n\n##### Args:\n  `data`: A data frame. \n  `cols`: Columns to unchop. \n  `keep_empty`: By default, you get one row of output for each element \n    of the list your unchopping/unnesting. \n    This means that if there's a size-0 element \n    (like NULL or an empty data frame), that entire row will be \n    dropped from the output. \n    If you want to preserve all rows, use `keep_empty` = `True` to \n    replace size-0 elements with a single row of missing values. \n\n  `dtypes`: NOT `ptype`. Providing the dtypes for the output columns. \n    Could be a single dtype, which will be applied to all columns, or \n    a dictionary of dtypes with keys for the columns and values the \n    dtypes. \n    For nested data frames, we need to specify `col$a` as key. If `col` \n    is used as key, all columns of the nested data frames will be casted \n    into that dtype. \n\n  `_base0`: Whether `cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  A data frame with selected columns unchopped. \n" + "text/markdown": [ + "##### Makes df longer by expanding list-columns so that each element\n", + "of the list-column gets its own row in the output. \n", + "\n", + "See https://tidyr.tidyverse.org/reference/chop.html \n", + "\n", + "Recycling size-1 elements might be different from `tidyr` \n", + "  >>> df = tibble(x=[1, [2,3]], y=[[2,3], 1]) \n", + "  >>> df >> unchop([f.x, f.y]) \n", + "  >>> # tibble(x=[1,2,3], y=[2,3,1]) \n", + "  >>> # instead of following in tidyr \n", + "  >>> # tibble(x=[1,1,2,3], y=[2,3,1,1]) \n", + "\n", + "##### Args:\n", + "  `data`: A data frame. \n", + "  `cols`: Columns to unchop. \n", + "  `keep_empty`: By default, you get one row of output for each element \n", + "    of the list your unchopping/unnesting. \n", + "    This means that if there's a size-0 element \n", + "    (like NULL or an empty data frame), that entire row will be \n", + "    dropped from the output. \n", + "    If you want to preserve all rows, use `keep_empty` = `True` to \n", + "    replace size-0 elements with a single row of missing values. \n", + "\n", + "  `ptype`: Providing the dtypes for the output columns. \n", + "    Could be a single dtype, which will be applied to all columns, or \n", + "    a dictionary of dtypes with keys for the columns and values the \n", + "    dtypes. \n", + "    For nested data frames, we need to specify `col$a` as key. If `col` \n", + "    is used as key, all columns of the nested data frames will be casted \n", + "    into that dtype. \n", + "\n", + "  `base0_`: Whether `cols` are 0-based \n", + "    if not provided, will use `datar.base.get_option('index.base.0')` \n", + "\n", + "##### Returns:\n", + "  A data frame with selected columns unchopped. \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -81,24 +141,51 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x data\n", - "0 1 y z\n", - "0 1 6\n", - "1 2 5\n", - "2 3 4\n", - "1 2 y z\n", - "3 4 3\n", - "4 5 2\n", - "2 3 y z\n", - "5 6 1" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xdata
<int64><object>
01<DF 3x2>
12<DF 2x2>
23<DF 1x2>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xdata
01y z\n0 1 6\n1 2 5\n2 3 4
12y z\n3 4 3\n4 5 2
23y z\n5 6 1
\n
" + "text/plain": [ + " x data\n", + " \n", + "0 1 \n", + "1 2 \n", + "2 3 " + ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], "source": [ @@ -112,18 +199,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y z\n", - "0 1 [1, 2, 3] [6, 5, 4]\n", - "1 2 [4, 5] [3, 2]\n", - "2 3 [6] [1]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyz
<int64><object><object>
01[1, 2, 3][6, 5, 4]
12[4, 5][3, 2]
23[6][1]
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xyz
01[1, 2, 3][6, 5, 4]
12[4, 5][3, 2]
23[6][1]
\n
" + "text/plain": [ + " x y z\n", + " \n", + "0 1 [1, 2, 3] [6, 5, 4]\n", + "1 2 [4, 5] [3, 2]\n", + "2 3 [6] [1]" + ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -136,21 +261,69 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 2 1.0\n", - "1 3 1.0\n", - "2 3 2.0\n", - "3 4 1.0\n", - "4 4 2.0\n", - "5 4 3.0" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<int64><object>
021.0
131.0
232.0
341.0
442.0
543.0
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
021.0
131.0
232.0
341.0
442.0
543.0
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 2 1.0\n", + "1 3 1.0\n", + "2 3 2.0\n", + "3 4 1.0\n", + "4 4 2.0\n", + "5 4 3.0" + ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -165,25 +338,73 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 2 1\n", - "1 3 1\n", - "2 3 2\n", - "3 4 1\n", - "4 4 2\n", - "5 4 3" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<int64><int64>
021
131
232
341
442
543
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
021
131
232
341
442
543
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 2 1\n", + "1 3 1\n", + "2 3 2\n", + "3 4 1\n", + "4 4 2\n", + "5 4 3" + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ - "df >> unchop(f.y, keep_empty=True, dtypes=int)" + "df >> unchop(f.y, keep_empty=True, ptype=int)" ] }, { @@ -192,37 +413,62 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1 a\n", - "1 2 1\n", - "2 2 2\n", - "3 2 3" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<int64><object>
01a
121
222
323
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01a
121
222
323
\n
" - }, - "metadata": {}, - "execution_count": 6 - }, - { - "output_type": "execute_result", - "data": { "text/plain": [ - "x int64\n", - "y object\n", - "dtype: object" + " x y\n", + " \n", + "0 1 a\n", + "1 2 1\n", + "2 2 2\n", + "3 2 3" ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ "df = tibble(x = f[1:2], y = [\"a\", [1,2,3]])\n", - "df >> unchop(f.y)\n", - "_.dtypes" + "df >> unchop(f.y)" ] }, { @@ -231,10 +477,10 @@ "metadata": {}, "outputs": [ { - "output_type": "stream", "name": "stdout", + "output_type": "stream", "text": [ - "[ValueError] invalid literal for int() with base 10: 'a'\n" + "[TypeError] got an unexpected keyword argument 'dtypes'\n" ] } ], @@ -249,18 +495,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y$x y$y\n", - "0 2 1.0 NaN\n", - "1 3 NaN 1.0\n", - "2 3 NaN 2.0" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy$xy$y
<int64><float64><float64>
021.0NaN
13NaN1.0
23NaN2.0
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy$xy$y
021.0NaN
13NaN1.0
23NaN2.0
\n
" + "text/plain": [ + " x y$x y$y\n", + " \n", + "0 2 1.0 NaN\n", + "1 3 NaN 1.0\n", + "2 3 NaN 2.0" + ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], "source": [ @@ -274,24 +558,92 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y$x y$y\n", - "0 1 NaN NaN\n", - "1 2 1.0 NaN\n", - "2 3 NaN 1.0\n", - "3 3 NaN 2.0" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy$xy$y
<int64><float64><float64>
01NaNNaN
121.0NaN
23NaN1.0
33NaN2.0
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy$xy$y
01NaNNaN
121.0NaN
23NaN1.0
33NaN2.0
\n
" + "text/plain": [ + " x y$x y$y\n", + " \n", + "0 1 NaN NaN\n", + "1 2 1.0 NaN\n", + "2 3 NaN 1.0\n", + "3 3 NaN 2.0" + ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ "df >> unchop(f.y, keep_empty=True)" ] } - ] + ], + "metadata": { + "interpreter": { + "hash": "c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc" + }, + "kernelspec": { + "display_name": "Python 3.7.8 64-bit ('base': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + }, + "orig_nbformat": 2 + }, + "nbformat": 4, + "nbformat_minor": 2 } \ No newline at end of file diff --git a/docs/notebooks/drop_na.ipynb b/docs/notebooks/drop_na.ipynb index 145dc142..72153c40 100644 --- a/docs/notebooks/drop_na.ipynb +++ b/docs/notebooks/drop_na.ipynb @@ -3,7 +3,6 @@ { "cell_type": "code", "execution_count": 1, - "id": "recent-verification", "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:52:21.073522Z", @@ -14,28 +13,74 @@ }, "outputs": [ { - "output_type": "display_data", + "name": "stderr", + "output_type": "stream", + "text": [ + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"min\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"max\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"sum\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"abs\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"round\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"all\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"any\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"re\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"filter\" has been overriden by datar.\n", + "[2021-06-29 16:46:23][datar][WARNING] Builtin name \"slice\" has been overriden by datar.\n" + ] + }, + { "data": { - "text/plain": "", - "text/html": "
Try this notebook on binder.
" + "text/html": [ + "
Try this notebook on binder.
" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # drop_na " + "text/markdown": [ + "### # drop_na " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Drop rows containing missing values\n\nSee https://tidyr.tidyverse.org/reference/drop_na.html \n\n##### Args:\n  `data`: A data frame. \n  `*columns`: Columns to inspect for missing values. \n  `how`: How to select the rows to drop \n    - all: All columns of `columns` to be `NA`s\n\n    - any: Any columns of `columns` to be `NA`s\n\n    (tidyr doesn't support this argument) \n\n  `_base0`: Whether `*columns` are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Dataframe with rows with NAs dropped and indexes dropped \n" + "text/markdown": [ + "##### Drop rows containing missing values\n", + "\n", + "See https://tidyr.tidyverse.org/reference/drop_na.html \n", + "\n", + "##### Args:\n", + "  `data`: A data frame. \n", + "  `*columns`: Columns to inspect for missing values. \n", + "  `how_`: How to select the rows to drop \n", + "    - all: All columns of `columns` to be `NA`s\n", + "\n", + "    - any: Any columns of `columns` to be `NA`s\n", + "\n", + "    (tidyr doesn't support this argument) \n", + "\n", + "  `base0_`: Whether `*columns` are 0-based if given by indexes \n", + "    If not provided, will use `datar.base.get_option('index.base.0')` \n", + "\n", + "##### Returns:\n", + "  Dataframe with rows with NAs dropped and indexes dropped \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -50,7 +95,6 @@ { "cell_type": "code", "execution_count": 2, - "id": "verified-balance", "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:52:21.924553Z", @@ -61,16 +105,39 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1.0 a" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<float64><object>
01.0a
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01.0a
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 1.0 a" + ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], "source": [ @@ -81,7 +148,6 @@ { "cell_type": "code", "execution_count": 3, - "id": "otherwise-encoding", "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:52:21.990414Z", @@ -92,17 +158,45 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1.0 a\n", - "1 2.0 NaN" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<float64><object>
01.0a
12.0NaN
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01.0a
12.0NaN
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 1.0 a\n", + "1 2.0 NaN" + ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -112,7 +206,6 @@ { "cell_type": "code", "execution_count": 4, - "id": "violent-safety", "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:52:22.055763Z", @@ -123,16 +216,39 @@ }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1.0 a" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<float64><object>
01.0a
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01.0a
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 1.0 a" + ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -146,24 +262,57 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1.0 a\n", - "1 2.0 NaN\n", - "2 NaN b" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<float64><object>
01.0a
12.0NaN
2NaNb
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01.0a
12.0NaN
2NaNb
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 1.0 a\n", + "1 2.0 NaN\n", + "2 NaN b" + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ - "# how='any' or how='all'\n", + "# how_='any' or how_='all'\n", "# not supported by tidyr\n", - "df >> drop_na(how='all')" + "df >> drop_na(how_='all')" ] }, { @@ -172,27 +321,52 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y\n", - "0 1.0 a" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xy
<float64><object>
01.0a
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xy
01.0a
\n
" + "text/plain": [ + " x y\n", + " \n", + "0 1.0 a" + ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ - "df >> drop_na(how='any')" + "df >> drop_na(how_='any')" ] } ], "metadata": { + "interpreter": { + "hash": "c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc" + }, "kernelspec": { - "display_name": "Python 3", - "language": "python", + "display_name": "Python 3.7.8 64-bit ('base': conda)", "name": "python3" }, "language_info": { diff --git a/docs/notebooks/expand.ipynb b/docs/notebooks/expand.ipynb index aa2076c0..1ae32b31 100644 --- a/docs/notebooks/expand.ipynb +++ b/docs/notebooks/expand.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Generates all combination of variables found in a dataset.\n\n##### Args:\n  `data`: A data frame \n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with all combination of variables. \n" + "text/markdown": "##### Generates all combination of variables found in a dataset.\n\n##### Args:\n  `data`: A data frame \n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with all combination of variables. \n" }, "metadata": {} }, @@ -49,7 +49,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### A helper that only finds combinations already present in the data.\n\n##### Args:\n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with all combinations in data. \n" + "text/markdown": "##### A helper that only finds combinations already present in the data.\n\n##### Args:\n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with all combinations in data. \n" }, "metadata": {} }, @@ -65,7 +65,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### A wrapper around `expand_grid()` that de-duplicates and sorts its inputs\n\nWhen values are not specified by literal `list`, they will be sorted. \n\n##### Args:\n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with values deduplicated and sorted. \n" + "text/markdown": "##### A wrapper around `expand_grid()` that de-duplicates and sorts its inputs\n\nWhen values are not specified by literal `list`, they will be sorted. \n\n##### Args:\n  `*args`: and, \n  `**kwargs`: columns to expand. Columns can be atomic lists. \n    - To find all unique combinations of x, y and z, including\n      those not present in the data, supply each variable as a \n      separate argument: `expand(df, x, y, z)`. \n\n    - To find only the combinations that occur in the data, use\n      `nesting`: `expand(df, nesting(x, y, z))`. \n\n    - You can combine the two forms. For example,\n      `expand(df, nesting(school_id, student_id), date)` would \n      produce a row for each present school-student combination \n      for all possible dates. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with values deduplicated and sorted. \n" }, "metadata": {} } diff --git a/docs/notebooks/expand_grid.ipynb b/docs/notebooks/expand_grid.ipynb index e4555d09..58b7d952 100644 --- a/docs/notebooks/expand_grid.ipynb +++ b/docs/notebooks/expand_grid.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Create a tibble from all combinations of inputs\n\n##### Args:\n  `*args`: and \n  `**kwargs`: name-value pairs. \n    For `*args`, names will be inferred from the values and if failed, \n    `_Var0`, `_Var1`, etc will be used. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with one column for each input in `*args` and `**kwargs`. \n  The output will have one row for each combination of the inputs, \n  i.e. the size be equal to the product of the sizes of the inputs. \n  This implies that if any input has length 0, the output will have \n  zero rows. \n" + "text/markdown": "##### Create a tibble from all combinations of inputs\n\n##### Args:\n  `*args`: and \n  `**kwargs`: name-value pairs. \n    For `*args`, names will be inferred from the values and if failed, \n    `_Var0`, `_Var1`, etc will be used. \n\n  `_name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether the suffixes of repaired names should be 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')`. \n\n##### Returns:\n  A data frame with one column for each input in `*args` and `**kwargs`. \n  The output will have one row for each combination of the inputs, \n  i.e. the size be equal to the product of the sizes of the inputs. \n  This implies that if any input has length 0, the output will have \n  zero rows. \n" }, "metadata": {} } diff --git a/docs/notebooks/extract.ipynb b/docs/notebooks/extract.ipynb index ef42795d..5f76334f 100644 --- a/docs/notebooks/extract.ipynb +++ b/docs/notebooks/extract.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Given a regular expression with capturing groups, extract() turns each\ngroup into a new column. If the groups don't match, or the input is NA, \nthe output will be NA. \n\nSee https://tidyr.tidyverse.org/reference/extract.html \n\n##### Args:\n  `data`: The dataframe \n  `col`: Column name or position. \n  `into`: Names of new variables to create as character vector. \n    Use None to omit the variable in the output. \n\n  `regex`: a regular expression used to extract the desired values. \n    There should be one group (defined by ()) for each element of into. \n\n  `remove`: If TRUE, remove input column from output data frame. \n  `convert`: The universal type for the extracted columns or a dict for \n    individual ones \n\n  `_base0`: Whether `col` is 0-based when given by index \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Dataframe with extracted columns. \n" + "text/markdown": "##### Given a regular expression with capturing groups, extract() turns each\ngroup into a new column. If the groups don't match, or the input is NA, \nthe output will be NA. \n\nSee https://tidyr.tidyverse.org/reference/extract.html \n\n##### Args:\n  `data`: The dataframe \n  `col`: Column name or position. \n  `into`: Names of new variables to create as character vector. \n    Use None to omit the variable in the output. \n\n  `regex`: a regular expression used to extract the desired values. \n    There should be one group (defined by ()) for each element of into. \n\n  `remove`: If TRUE, remove input column from output data frame. \n  `convert`: The universal type for the extracted columns or a dict for \n    individual ones \n\n  `base0_`: Whether `col` is 0-based when given by index \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Dataframe with extracted columns. \n" }, "metadata": {} } diff --git a/docs/notebooks/fill.ipynb b/docs/notebooks/fill.ipynb index 2179ddd1..df8b43fc 100644 --- a/docs/notebooks/fill.ipynb +++ b/docs/notebooks/fill.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Fills missing values in selected columns using the next or\nprevious entry. \n\nSee https://tidyr.tidyverse.org/reference/fill.html \n\n##### Args:\n  `_data`: A dataframe \n  `*columns`: Columns to fill \n  `_direction`: Direction in which to fill missing values. \n    Currently either \"down\" (the default), \"up\", \n    \"downup\" (i.e. first down and then up) or \n    \"updown\" (first up and then down). \n\n  `_base0`: Whether `*columns` are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with NAs being replaced. \n" + "text/markdown": "##### Fills missing values in selected columns using the next or\nprevious entry. \n\nSee https://tidyr.tidyverse.org/reference/fill.html \n\n##### Args:\n  `_data`: A dataframe \n  `*columns`: Columns to fill \n  `_direction`: Direction in which to fill missing values. \n    Currently either \"down\" (the default), \"up\", \n    \"downup\" (i.e. first down and then up) or \n    \"updown\" (first up and then down). \n\n  `base0_`: Whether `*columns` are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with NAs being replaced. \n" }, "metadata": {} } diff --git a/docs/notebooks/group_by.ipynb b/docs/notebooks/group_by.ipynb index 67b1ae51..b484c70d 100644 --- a/docs/notebooks/group_by.ipynb +++ b/docs/notebooks/group_by.ipynb @@ -81,7 +81,7 @@ "  `*args`: variables or computations to group by. \n", "    Note that columns here cannot be selected by indexes. As they are \n", "    treated as computations to be added as new columns. \n", - "    So no `_base0` argument is supported. \n", + "    So no `base0_` argument is supported. \n", "\n", "  `**kwargs`: Extra variables to group the dataframe \n", "\n", @@ -117,7 +117,7 @@ "##### Args:\n", "  `x`: The data frame \n", "  `*cols`: Variables to remove from the grouping variables. \n", - "  `_base0`: If columns are selected with indexes, whether they are 0-based. \n", + "  `base0_`: If columns are selected with indexes, whether they are 0-based. \n", "    If not given, will use `datar.base.get_option('index.base.0')` \n", "\n", "##### Returns:\n", diff --git a/docs/notebooks/mutate.ipynb b/docs/notebooks/mutate.ipynb index 0f8efeec..e7255f13 100644 --- a/docs/notebooks/mutate.ipynb +++ b/docs/notebooks/mutate.ipynb @@ -85,7 +85,7 @@ "    (the default is to add to the right hand side). \n", "    See relocate() for more details. \n", "\n", - "  `_base0`: Whether `_before` and `_after` are 0-based if given by indexes. \n", + "  `base0_`: Whether `_before` and `_after` are 0-based if given by indexes. \n", "    If not provided, will use `datar.base.get_option('index.base.0')` \n", "\n", "  `*args`: and \n", @@ -842,7 +842,7 @@ "execution_count": 8, "source": [ "# use 0-based index\n", - "df >> mutate(z=f.x+f.y, _before=0, _base0=True)" + "df >> mutate(z=f.x+f.y, _before=0, base0_=True)" ], "outputs": [ { diff --git a/docs/notebooks/nest.ipynb b/docs/notebooks/nest.ipynb index 26879102..8169774c 100644 --- a/docs/notebooks/nest.ipynb +++ b/docs/notebooks/nest.ipynb @@ -1,25 +1,4 @@ { - "metadata": { - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - }, - "orig_nbformat": 2, - "kernelspec": { - "name": "python378jvsc74a57bd0c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc", - "display_name": "Python 3.7.8 64-bit ('base': conda)" - } - }, - "nbformat": 4, - "nbformat_minor": 2, "cells": [ { "cell_type": "code", @@ -27,44 +6,137 @@ "metadata": {}, "outputs": [ { - "output_type": "display_data", + "name": "stderr", + "output_type": "stream", + "text": [ + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"min\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"max\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"sum\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"abs\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"round\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"all\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"any\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"re\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"filter\" has been overriden by datar.\n", + "[2021-06-29 16:36:20][datar][WARNING] Builtin name \"slice\" has been overriden by datar.\n" + ] + }, + { "data": { - "text/plain": "", - "text/html": "
Try this notebook on binder.
" + "text/html": [ + "
Try this notebook on binder.
" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # nest " + "text/markdown": [ + "### # nest " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Nesting creates a list-column of data frames\n\n##### Args:\n  `_data`: A data frame \n  `**cols`: Columns to nest \n  `_names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `_names_sep`. \n\n  `_base0`: Whether `**cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Nested data frame. \n" + "text/markdown": [ + "##### Nesting creates a list-column of data frames\n", + "\n", + "##### Args:\n", + "  `_data`: A data frame \n", + "  `**cols`: Columns to nest \n", + "  `_names_sep`: If `None`, the default, the names will be left as is. \n", + "    Inner names will come from the former outer names \n", + "    If a string, the inner and outer names will be used together. \n", + "    The names of the new outer columns will be formed by pasting \n", + "    together the outer and the inner column names, separated by \n", + "    `_names_sep`. \n", + "\n", + "  `base0_`: Whether `**cols` are 0-based \n", + "    if not provided, will use `datar.base.get_option('index.base.0')` \n", + "\n", + "##### Returns:\n", + "  Nested data frame. \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "### # unnest " + "text/markdown": [ + "### # unnest " + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": "", - "text/markdown": "##### Flattens list-column of data frames back out into regular columns.\n\n##### Args:\n  `data`: A data frame to flatten. \n  `*cols`: Columns to unnest. \n  `keep_empty`: By default, you get one row of output for each element \n    of the list your unchopping/unnesting. \n    This means that if there's a size-0 element \n    (like NULL or an empty data frame), that entire row will be \n    dropped from the output. \n    If you want to preserve all rows, use `keep_empty` = `True` to \n    replace size-0 elements with a single row of missing values. \n\n  `dtypes`: NOT `ptype`. Providing the dtypes for the output columns. \n    Could be a single dtype, which will be applied to all columns, or \n    a dictionary of dtypes with keys for the columns and values the \n    dtypes. \n\n  `names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `names_sep`. \n\n  `names_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether `cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Data frame with selected columns unnested. \n" + "text/markdown": [ + "##### Flattens list-column of data frames back out into regular columns.\n", + "\n", + "##### Args:\n", + "  `data`: A data frame to flatten. \n", + "  `*cols`: Columns to unnest. \n", + "  `keep_empty`: By default, you get one row of output for each element \n", + "    of the list your unchopping/unnesting. \n", + "    This means that if there's a size-0 element \n", + "    (like NULL or an empty data frame), that entire row will be \n", + "    dropped from the output. \n", + "    If you want to preserve all rows, use `keep_empty` = `True` to \n", + "    replace size-0 elements with a single row of missing values. \n", + "\n", + "  `ptype`: Providing the dtypes for the output columns. \n", + "    Could be a single dtype, which will be applied to all columns, or \n", + "    a dictionary of dtypes with keys for the columns and values the \n", + "    dtypes. \n", + "\n", + "  `names_sep`: If `None`, the default, the names will be left as is. \n", + "    Inner names will come from the former outer names \n", + "    If a string, the inner and outer names will be used together. \n", + "    The names of the new outer columns will be formed by pasting \n", + "    together the outer and the inner column names, separated by \n", + "    `names_sep`. \n", + "\n", + "  `names_repair`: treatment of problematic column names: \n", + "    - \"minimal\": No name repair or checks, beyond basic existence,\n", + "\n", + "    - \"unique\": Make sure names are unique and not empty,\n", + "\n", + "    - \"check_unique\": (default value), no name repair,\n", + "      but check they are unique, \n", + "\n", + "    - \"universal\": Make the names unique and syntactic\n", + "\n", + "    - a function: apply custom name repair\n", + "\n", + "  `base0_`: Whether `cols` are 0-based \n", + "    if not provided, will use `datar.base.get_option('index.base.0')` \n", + "\n", + "##### Returns:\n", + "  Data frame with selected columns unnested. \n" + ], + "text/plain": [ + "" + ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -81,24 +153,51 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x data\n", - "0 1 y z\n", - "0 1 6\n", - "1 2 5\n", - "2 3 4\n", - "1 2 y z\n", - "3 4 3\n", - "4 5 2\n", - "2 3 y z\n", - "5 6 1" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xdata
<int64><object>
01<DF 3x2>
12<DF 2x2>
23<DF 1x2>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xdata
01y z\n0 1 6\n1 2 5\n2 3 4
12y z\n3 4 3\n4 5 2
23y z\n5 6 1
\n
" + "text/plain": [ + " x data\n", + " \n", + "0 1 \n", + "1 2 \n", + "2 3 " + ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], "source": [ @@ -112,18 +211,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y z\n", - "0 1 [1, 2, 3] [6, 5, 4]\n", - "1 2 [4, 5] [3, 2]\n", - "2 3 [6] [1]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xyz
<int64><object><object>
01[1, 2, 3][6, 5, 4]
12[4, 5][3, 2]
23[6][1]
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xyz
01[1, 2, 3][6, 5, 4]
12[4, 5][3, 2]
23[6][1]
\n
" + "text/plain": [ + " x y z\n", + " \n", + "0 1 [1, 2, 3] [6, 5, 4]\n", + "1 2 [4, 5] [3, 2]\n", + "2 3 [6] [1]" + ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], "source": [ @@ -136,24 +273,51 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x data\n", - "0 1 y z\n", - "0 1 6\n", - "1 2 5\n", - "2 3 4\n", - "1 2 y z\n", - "3 4 3\n", - "4 5 2\n", - "2 3 y z\n", - "5 6 1" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xdata
<int64><object>
01<DF 3x2>
12<DF 2x2>
23<DF 1x2>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xdata
01y z\n0 1 6\n1 2 5\n2 3 4
12y z\n3 4 3\n4 5 2
23y z\n5 6 1
\n
" + "text/plain": [ + " x data\n", + " \n", + "0 1 \n", + "1 2 \n", + "2 3 " + ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], "source": [ @@ -166,24 +330,429 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Species data\n", - "0 setosa Sepal_Length Sepal_Width Petal_Length P...\n", - "1 versicolor Sepal_Length Sepal_Width Petal_Length P...\n", - "2 virginica Sepal_Length Sepal_Width Petal_Length ..." + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Speciesdata
<object><object>
0setosa<DF 50x4>
1versicolor<DF 50x4>
2virginica<DF 50x4>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Speciesdata
0setosaSepal_Length Sepal_Width Petal_Length P...
1versicolorSepal_Length Sepal_Width Petal_Length P...
2virginicaSepal_Length Sepal_Width Petal_Length ...
\n
" + "text/plain": [ + " Species data\n", + " \n", + "0 setosa \n", + "1 versicolor \n", + "2 virginica " + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" }, { - "output_type": "execute_result", "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Sepal_LengthSepal_WidthPetal_LengthPetal_Width
<float64><float64><float64><float64>
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
105.43.71.50.2
114.83.41.60.2
124.83.01.40.1
134.33.01.10.1
145.84.01.20.2
155.74.41.50.4
165.43.91.30.4
175.13.51.40.3
185.73.81.70.3
195.13.81.50.3
205.43.41.70.2
215.13.71.50.4
224.63.61.00.2
235.13.31.70.5
244.83.41.90.2
255.03.01.60.2
265.03.41.60.4
275.23.51.50.2
285.23.41.40.2
294.73.21.60.2
304.83.11.60.2
315.43.41.50.4
325.24.11.50.1
335.54.21.40.2
344.93.11.50.2
355.03.21.20.2
365.53.51.30.2
374.93.61.40.1
384.43.01.30.2
395.13.41.50.2
405.03.51.30.3
414.52.31.30.3
424.43.21.30.2
435.03.51.60.6
445.13.81.90.4
454.83.01.40.3
465.13.81.60.2
474.63.21.40.2
485.33.71.50.2
495.03.31.40.2
" + ], "text/plain": [ " Sepal_Length Sepal_Width Petal_Length Petal_Width\n", + " \n", "0 5.1 3.5 1.4 0.2\n", "1 4.9 3.0 1.4 0.2\n", "2 4.7 3.2 1.3 0.2\n", @@ -234,11 +803,11 @@ "47 4.6 3.2 1.4 0.2\n", "48 5.3 3.7 1.5 0.2\n", "49 5.0 3.3 1.4 0.2" - ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Sepal_LengthSepal_WidthPetal_LengthPetal_Width
05.13.51.40.2
14.93.01.40.2
24.73.21.30.2
34.63.11.50.2
45.03.61.40.2
55.43.91.70.4
64.63.41.40.3
75.03.41.50.2
84.42.91.40.2
94.93.11.50.1
105.43.71.50.2
114.83.41.60.2
124.83.01.40.1
134.33.01.10.1
145.84.01.20.2
155.74.41.50.4
165.43.91.30.4
175.13.51.40.3
185.73.81.70.3
195.13.81.50.3
205.43.41.70.2
215.13.71.50.4
224.63.61.00.2
235.13.31.70.5
244.83.41.90.2
255.03.01.60.2
265.03.41.60.4
275.23.51.50.2
285.23.41.40.2
294.73.21.60.2
304.83.11.60.2
315.43.41.50.4
325.24.11.50.1
335.54.21.40.2
344.93.11.50.2
355.03.21.20.2
365.53.51.30.2
374.93.61.40.1
384.43.01.30.2
395.13.41.50.2
405.03.51.30.3
414.52.31.30.3
424.43.21.30.2
435.03.51.60.6
445.13.81.90.4
454.83.01.40.3
465.13.81.60.2
474.63.21.40.2
485.33.71.50.2
495.03.31.40.2
\n
" + ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], "source": [ @@ -252,18 +821,51 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Species data\n", - "0 setosa Sepal_Length Sepal_Width Petal_Length P...\n", - "1 versicolor Sepal_Length Sepal_Width Petal_Length P...\n", - "2 virginica Sepal_Length Sepal_Width Petal_Length ..." + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Speciesdata
<object><object>
0setosa<DF 50x4>
1versicolor<DF 50x4>
2virginica<DF 50x4>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Speciesdata
0setosaSepal_Length Sepal_Width Petal_Length P...
1versicolorSepal_Length Sepal_Width Petal_Length P...
2virginicaSepal_Length Sepal_Width Petal_Length ...
\n
" + "text/plain": [ + " Species data\n", + " \n", + "0 setosa \n", + "1 versicolor \n", + "2 virginica " + ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], "source": [ @@ -277,29 +879,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Species petal \\\n", - "0 setosa Petal_Length Petal_Width\n", - "0 1.4... \n", - "1 versicolor Petal_Length Petal_Width\n", - "50 4.7... \n", - "2 virginica Petal_Length Petal_Width\n", - "100 6... \n", - "\n", - " sepal \n", - "0 Sepal_Length Sepal_Width\n", - "0 5.1... \n", - "1 Sepal_Length Sepal_Width\n", - "50 7.0... \n", - "2 Sepal_Length Sepal_Width\n", - "100 6... " + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Speciespetalsepal
<object><object><object>
0setosa<DF 50x2><DF 50x2>
1versicolor<DF 50x2><DF 50x2>
2virginica<DF 50x2><DF 50x2>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Speciespetalsepal
0setosaPetal_Length Petal_Width\n0 1.4...Sepal_Length Sepal_Width\n0 5.1...
1versicolorPetal_Length Petal_Width\n50 4.7...Sepal_Length Sepal_Width\n50 7.0...
2virginicaPetal_Length Petal_Width\n100 6...Sepal_Length Sepal_Width\n100 6...
\n
" + "text/plain": [ + " Species petal sepal\n", + " \n", + "0 setosa \n", + "1 versicolor \n", + "2 virginica " + ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" } ], "source": [ @@ -312,29 +941,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " Species width \\\n", - "0 setosa Sepal_Width Petal_Width\n", - "0 3.5 ... \n", - "1 versicolor Sepal_Width Petal_Width\n", - "50 3.2 ... \n", - "2 virginica Sepal_Width Petal_Width\n", - "100 3.3... \n", - "\n", - " length \n", - "0 Sepal_Length Petal_Length\n", - "0 5.... \n", - "1 Sepal_Length Petal_Length\n", - "50 7.... \n", - "2 Sepal_Length Petal_Length\n", - "100 ... " + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Specieswidthlength
<object><object><object>
0setosa<DF 50x2><DF 50x2>
1versicolor<DF 50x2><DF 50x2>
2virginica<DF 50x2><DF 50x2>
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Specieswidthlength
0setosaSepal_Width Petal_Width\n0 3.5 ...Sepal_Length Petal_Length\n0 5....
1versicolorSepal_Width Petal_Width\n50 3.2 ...Sepal_Length Petal_Length\n50 7....
2virginicaSepal_Width Petal_Width\n100 3.3...Sepal_Length Petal_Length\n100 ...
\n
" + "text/plain": [ + " Species width length\n", + " \n", + "0 setosa \n", + "1 versicolor \n", + "2 virginica " + ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], "source": [ @@ -347,73 +1003,150 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " fish data\n", - "0 4842 station seen\n", - "0 Release 1\n", - "1 I80_...\n", - "1 4843 station seen\n", - "11 Release 1\n", - "12 I80_...\n", - "2 4844 station seen\n", - "22 Release 1\n", - "23 I80_...\n", - "3 4845 station seen\n", - "33 Release 1\n", - "34 I80_...\n", - "4 4847 station seen\n", - "38 Release 1\n", - "39 I80_...\n", - "5 4848 station seen\n", - "41 Release 1\n", - "42 I80_...\n", - "6 4849 station seen\n", - "45 Release 1\n", - "46 I80_...\n", - "7 4850 station seen\n", - "47 Release 1\n", - "48 I80_...\n", - "8 4851 station seen\n", - "53 Release 1\n", - "54 I80_...\n", - "9 4854 station seen\n", - "55 Release 1\n", - "56 I80_...\n", - "10 4855 station seen\n", - "57 Release 1\n", - "58 I80_...\n", - "11 4857 station seen\n", - "62 Release 1\n", - "63 I80_...\n", - "12 4858 station seen\n", - "71 Release 1\n", - "72 I80_...\n", - "13 4859 station seen\n", - "82 Release 1\n", - "83 I80_...\n", - "14 4861 station seen\n", - "87 Release 1\n", - "88 I80_...\n", - "15 4862 station seen\n", - "98 Release 1\n", - "99 I...\n", - "16 4863 station seen\n", - "107 Release 1\n", - "108 I...\n", - "17 4864 station seen\n", - "109 Release 1\n", - "110 I...\n", - "18 4865 station seen\n", - "111 Release 1\n", - "112 I...\n", - "[Groups: ['fish'] (n=19)]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
fishdata
<int64><object>
04842<DF 11x2>
14843<DF 11x2>
24844<DF 11x2>
34845<DF 5x2>
44847<DF 3x2>
54848<DF 4x2>
64849<DF 2x2>
74850<DF 6x2>
84851<DF 2x2>
94854<DF 2x2>
104855<DF 5x2>
114857<DF 9x2>
124858<DF 11x2>
134859<DF 5x2>
144861<DF 11x2>
154862<DF 9x2>
164863<DF 2x2>
174864<DF 2x2>
184865<DF 3x2>
\n", + "

Groups: ['fish'] (n=19)

" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
fishdata
04842station seen\n0 Release 1\n1 I80_...
14843station seen\n11 Release 1\n12 I80_...
24844station seen\n22 Release 1\n23 I80_...
34845station seen\n33 Release 1\n34 I80_...
44847station seen\n38 Release 1\n39 I80_...
54848station seen\n41 Release 1\n42 I80_...
64849station seen\n45 Release 1\n46 I80_...
74850station seen\n47 Release 1\n48 I80_...
84851station seen\n53 Release 1\n54 I80_...
94854station seen\n55 Release 1\n56 I80_...
104855station seen\n57 Release 1\n58 I80_...
114857station seen\n62 Release 1\n63 I80_...
124858station seen\n71 Release 1\n72 I80_...
134859station seen\n82 Release 1\n83 I80_...
144861station seen\n87 Release 1\n88 I80_...
154862station seen\n98 Release 1\n99 I...
164863station seen\n107 Release 1\n108 I...
174864station seen\n109 Release 1\n110 I...
184865station seen\n111 Release 1\n112 I...
\n
[Groups: ['fish'] (n=19)]" + "text/plain": [ + " fish data\n", + " \n", + "0 4842 \n", + "1 4843 \n", + "2 4844 \n", + "3 4845 \n", + "4 4847 \n", + "5 4848 \n", + "6 4849 \n", + "7 4850 \n", + "8 4851 \n", + "9 4854 \n", + "10 4855 \n", + "11 4857 \n", + "12 4858 \n", + "13 4859 \n", + "14 4861 \n", + "15 4862 \n", + "16 4863 \n", + "17 4864 \n", + "18 4865 \n", + "\n", + "[Groups: fish (n=19)]" + ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], "source": [ @@ -426,19 +1159,59 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " cyl data models\n", - "0 6 mpg disp hp drat wt qsec vs ... \n", - "1 4 mpg disp hp drat wt qsec vs ... \n", - "2 8 mpg disp hp drat wt qsec vs ... \n", - "[Groups: ['cyl'] (n=3)]" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cyldatamodels
<int64><object><object>
06<DF 7x10><df 7x10>
14<DF 11x10><df 11x10>
28<DF 14x10><df 14x10>
\n", + "

Groups: ['cyl'] (n=3)

" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
cyldatamodels
06mpg disp hp drat wt qsec vs ...<df 7x10>
14mpg disp hp drat wt qsec vs ...<df 11x10>
28mpg disp hp drat wt qsec vs ...<df 14x10>
\n
[Groups: ['cyl'] (n=3)]" + "text/plain": [ + " cyl data models\n", + " \n", + "0 6 \n", + "1 4 \n", + "2 8 \n", + "\n", + "[Groups: cyl (n=3)]" + ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], "source": [ @@ -462,19 +1235,63 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x a b\n", - "0 2 1 2\n", - "1 3 1 3\n", - "2 3 2 2\n", - "3 3 3 1" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xab
<int64><int64><int64>
0212
1313
2322
3331
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xab
0212
1313
2322
3331
\n
" + "text/plain": [ + " x a b\n", + " \n", + "0 2 1 2\n", + "1 3 1 3\n", + "2 3 2 2\n", + "3 3 3 1" + ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], "source": [ @@ -486,7 +1303,7 @@ " tibble(a = f[1:3], b = f[3:1])\n", " ]\n", ")\n", - "df >> unnest(f.y, dtypes=int)" + "df >> unnest(f.y, ptype=int)" ] }, { @@ -495,20 +1312,70 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x a b\n", - "0 1 NaN NaN\n", - "1 2 1.0 2.0\n", - "2 3 1.0 3.0\n", - "3 3 2.0 2.0\n", - "4 3 3.0 1.0" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xab
<int64><float64><float64>
01NaNNaN
121.02.0
231.03.0
332.02.0
433.01.0
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
xab
01NaNNaN
121.02.0
231.03.0
332.02.0
433.01.0
\n
" + "text/plain": [ + " x a b\n", + " \n", + "0 1 NaN NaN\n", + "1 2 1.0 2.0\n", + "2 3 1.0 3.0\n", + "3 3 2.0 2.0\n", + "4 3 3.0 1.0" + ] }, + "execution_count": 12, "metadata": {}, - "execution_count": 12 + "output_type": "execute_result" } ], "source": [ @@ -521,18 +1388,56 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b c\n", - "0 a 1 11\n", - "1 b 2 11\n", - "2 c 3 22" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
<object><int64><int64>
0a111
1b211
2c322
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abc
0a111
1b211
2c322
\n
" + "text/plain": [ + " a b c\n", + " \n", + "0 a 1 11\n", + "1 b 2 11\n", + "2 c 3 22" + ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 13 + "output_type": "execute_result" } ], "source": [ @@ -550,25 +1455,99 @@ "metadata": {}, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b c\n", - "0 a 1 11\n", - "1 a 2 11\n", - "2 b 1 11\n", - "3 b 2 11\n", - "4 c 3 22" + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abc
<object><int64><int64>
0a111
1a211
2b111
3b211
4c322
" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abc
0a111
1a211
2b111
3b211
4c322
\n
" + "text/plain": [ + " a b c\n", + " \n", + "0 a 1 11\n", + "1 a 2 11\n", + "2 b 1 11\n", + "3 b 2 11\n", + "4 c 3 22" + ] }, + "execution_count": 14, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], "source": [ "df >> unnest(f.a) >> unnest(f.b)" ] } - ] + ], + "metadata": { + "interpreter": { + "hash": "c4cc73b080e063fcebb9afb794613be7caf4b26129562cba1382945a18cc49cc" + }, + "kernelspec": { + "display_name": "Python 3.7.8 64-bit ('base': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.8" + }, + "orig_nbformat": 2 + }, + "nbformat": 4, + "nbformat_minor": 2 } \ No newline at end of file diff --git a/docs/notebooks/pack.ipynb b/docs/notebooks/pack.ipynb index 7b3b8c3a..daef6e51 100644 --- a/docs/notebooks/pack.ipynb +++ b/docs/notebooks/pack.ipynb @@ -46,7 +46,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Makes df narrow by collapsing a set of columns into a single df-column.\n\n##### Args:\n  `_data`: A data frame \n  `**cols`: Columns to pack \n  `_names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `_names_sep`. \n\n  `_base0`: Whether `**cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n" + "text/markdown": "##### Makes df narrow by collapsing a set of columns into a single df-column.\n\n##### Args:\n  `_data`: A data frame \n  `**cols`: Columns to pack \n  `_names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `_names_sep`. \n\n  `base0_`: Whether `**cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n" }, "metadata": {} }, @@ -62,7 +62,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Makes df wider by expanding df-columns back out into individual columns.\n\n##### Args:\n  `data`: A data frame \n  `cols`: Columns to unpack \n  `names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `_names_sep`. \n\n  `name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether `cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Data frame with given columns unpacked. \n" + "text/markdown": "##### Makes df wider by expanding df-columns back out into individual columns.\n\n##### Args:\n  `data`: A data frame \n  `cols`: Columns to unpack \n  `names_sep`: If `None`, the default, the names will be left as is. \n    Inner names will come from the former outer names \n    If a string, the inner and outer names will be used together. \n    The names of the new outer columns will be formed by pasting \n    together the outer and the inner column names, separated by \n    `_names_sep`. \n\n  `name_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether `cols` are 0-based \n    if not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  Data frame with given columns unpacked. \n" }, "metadata": {} } diff --git a/docs/notebooks/pivot_longer.ipynb b/docs/notebooks/pivot_longer.ipynb index 570f7d4a..67a5a9a2 100644 --- a/docs/notebooks/pivot_longer.ipynb +++ b/docs/notebooks/pivot_longer.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### \"lengthens\" data, increasing the number of rows and\ndecreasing the number of columns. \n\nThe row order is a bit different from `tidyr` and `pandas.DataFrame.melt`. \n  >>> df = tibble(x=f[1:2], y=f[3:4]) \n  >>> pivot_longer(df, f[f.x:f.y]) \n  >>> # name value \n  >>> # 0 x 1 \n  >>> # 1 x 2 \n  >>> # 2 y 3 \n  >>> # 3 y 4 \n\nBut with `tidyr::pivot_longer`, the output will be: \n  >>> # # A tibble: 4 x 2 \n  >>> # name value \n  >>> # \n  >>> # 1 x 1 \n  >>> # 2 y 3 \n  >>> # 3 x 2 \n  >>> # 4 y 4 \n\n##### Args:\n  `_data`: A data frame to pivot. \n  `cols`: Columns to pivot into longer format. \n  `names_to`: A string specifying the name of the column to create from \n    the data stored in the column names of data. \n    Can be a character vector, creating multiple columns, if names_sep \n    or names_pattern is provided. In this case, there are two special \n    values you can take advantage of: \n\n    - `None`/`NA`/`NULL` will discard that component of the name.\n\n    - `.value`/`_value` indicates that component of the name defines\n      the name of the column containing the cell values, \n      overriding values_to. \n\n    - Different as `tidyr`: With `.value`/`_value`, if there are other\n      parts of the names to distinguish the groups, they must be \n      captured. For example, use `r'(\\w)_(\\d)'` to match `'a_1'` and \n      `['.value', NA]` to discard the suffix, instead of use \n      `r'(\\w)_\\d'` to match. \n\n  `names_prefix`: A regular expression used to remove matching text from \n    the start of each variable name. \n\n  `names_sep`: and \n  `names_pattern`: If names_to contains multiple values, \n    these arguments control how the column name is broken up. \n    names_sep takes the same specification as separate(), and \n    can either be a numeric vector (specifying positions to break on), \n    or a single string (specifying a regular expression to split on). \n\n  `names_pattern`: takes the same specification as extract(), \n    a regular expression containing matching groups (()). \n\n  `names_ptypes`: and \n  `values_ptypes`: A list of column name-prototype pairs. \n    A prototype (or ptype for short) is a zero-length vector \n    (like integer() or numeric()) that defines the type, class, and \n    attributes of a vector. Use these arguments if you want to confirm \n    that the created columns are the types that you expect. \n    Note that if you want to change (instead of confirm) the types \n    of specific columns, you should use names_transform or \n    values_transform instead. \n\n  `names_transform`: and \n  `values_transform`: A list of column name-function pairs. \n    Use these arguments if you need to change the types of \n    specific columns. For example, \n    names_transform = dict(week = as.integer) would convert a \n    character variable called week to an integer. \n    If not specified, the type of the columns generated from names_to \n    will be character, and the type of the variables generated from \n    values_to will be the common type of the input columns used to \n    generate them. \n\n  `names_repair`: Not supported yet. \n  `values_to`: A string specifying the name of the column to create from \n    the data stored in cell values. If names_to is a character \n    containing the special `.value`/`_value` sentinel, this value \n    will be ignored, and the name of the value column will be derived \n    from part of the existing column names. \n\n  `values_drop_na`: If TRUE, will drop rows that contain only NAs in \n    the value_to column. This effectively converts explicit missing \n    values to implicit missing values, and should generally be used \n    only when missing values in data were created by its structure. \n\n  `names_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `_base0`: Whether `cols` are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The pivoted dataframe. \n" + "text/markdown": "##### \"lengthens\" data, increasing the number of rows and\ndecreasing the number of columns. \n\nThe row order is a bit different from `tidyr` and `pandas.DataFrame.melt`. \n  >>> df = tibble(x=f[1:2], y=f[3:4]) \n  >>> pivot_longer(df, f[f.x:f.y]) \n  >>> # name value \n  >>> # 0 x 1 \n  >>> # 1 x 2 \n  >>> # 2 y 3 \n  >>> # 3 y 4 \n\nBut with `tidyr::pivot_longer`, the output will be: \n  >>> # # A tibble: 4 x 2 \n  >>> # name value \n  >>> # \n  >>> # 1 x 1 \n  >>> # 2 y 3 \n  >>> # 3 x 2 \n  >>> # 4 y 4 \n\n##### Args:\n  `_data`: A data frame to pivot. \n  `cols`: Columns to pivot into longer format. \n  `names_to`: A string specifying the name of the column to create from \n    the data stored in the column names of data. \n    Can be a character vector, creating multiple columns, if names_sep \n    or names_pattern is provided. In this case, there are two special \n    values you can take advantage of: \n\n    - `None`/`NA`/`NULL` will discard that component of the name.\n\n    - `.value`/`_value` indicates that component of the name defines\n      the name of the column containing the cell values, \n      overriding values_to. \n\n    - Different as `tidyr`: With `.value`/`_value`, if there are other\n      parts of the names to distinguish the groups, they must be \n      captured. For example, use `r'(\\w)_(\\d)'` to match `'a_1'` and \n      `['.value', NA]` to discard the suffix, instead of use \n      `r'(\\w)_\\d'` to match. \n\n  `names_prefix`: A regular expression used to remove matching text from \n    the start of each variable name. \n\n  `names_sep`: and \n  `names_pattern`: If names_to contains multiple values, \n    these arguments control how the column name is broken up. \n    names_sep takes the same specification as separate(), and \n    can either be a numeric vector (specifying positions to break on), \n    or a single string (specifying a regular expression to split on). \n\n  `names_pattern`: takes the same specification as extract(), \n    a regular expression containing matching groups (()). \n\n  `names_ptypes`: and \n  `values_ptypes`: A list of column name-prototype pairs. \n    A prototype (or ptype for short) is a zero-length vector \n    (like integer() or numeric()) that defines the type, class, and \n    attributes of a vector. Use these arguments if you want to confirm \n    that the created columns are the types that you expect. \n    Note that if you want to change (instead of confirm) the types \n    of specific columns, you should use names_transform or \n    values_transform instead. \n\n  `names_transform`: and \n  `values_transform`: A list of column name-function pairs. \n    Use these arguments if you need to change the types of \n    specific columns. For example, \n    names_transform = dict(week = as.integer) would convert a \n    character variable called week to an integer. \n    If not specified, the type of the columns generated from names_to \n    will be character, and the type of the variables generated from \n    values_to will be the common type of the input columns used to \n    generate them. \n\n  `names_repair`: Not supported yet. \n  `values_to`: A string specifying the name of the column to create from \n    the data stored in cell values. If names_to is a character \n    containing the special `.value`/`_value` sentinel, this value \n    will be ignored, and the name of the value column will be derived \n    from part of the existing column names. \n\n  `values_drop_na`: If TRUE, will drop rows that contain only NAs in \n    the value_to column. This effectively converts explicit missing \n    values to implicit missing values, and should generally be used \n    only when missing values in data were created by its structure. \n\n  `names_repair`: treatment of problematic column names: \n    - \"minimal\": No name repair or checks, beyond basic existence,\n\n    - \"unique\": Make sure names are unique and not empty,\n\n    - \"check_unique\": (default value), no name repair,\n      but check they are unique, \n\n    - \"universal\": Make the names unique and syntactic\n\n    - a function: apply custom name repair\n\n  `base0_`: Whether `cols` are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The pivoted dataframe. \n" }, "metadata": {} } diff --git a/docs/notebooks/pivot_wider.ipynb b/docs/notebooks/pivot_wider.ipynb index c3dd008c..62b8376c 100644 --- a/docs/notebooks/pivot_wider.ipynb +++ b/docs/notebooks/pivot_wider.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### \"widens\" data, increasing the number of columns and decreasing\nthe number of rows. \n\n##### Args:\n  `_data`: A data frame to pivot. \n  `id_cols`: A set of columns that uniquely identifies each observation. \n    Defaults to all columns in data except for the columns specified \n    in names_from and values_from. \n\n  `names_from`: and \n  `values_from`: A pair of arguments describing which column \n    (or columns) to get the name of the output column (names_from), \n    and which column (or columns) to get the cell values from \n    (values_from). \n\n  `names_prefix`: String added to the start of every variable name. \n  `names_sep`: If names_from or values_from contains multiple variables, \n    this will be used to join their values together into a single \n    string to use as a column name. \n\n  `names_glue`: Instead of names_sep and names_prefix, you can supply \n    a glue specification that uses the names_from columns \n    (and special _value) to create custom column names. \n\n  `names_sort`: Should the column names be sorted? If FALSE, the default, \n    column names are ordered by first appearance. \n\n  `names_repair`: todo \n  `values_fill`: Optionally, a (scalar) value that specifies what \n    each value should be filled in with when missing. \n\n  `values_fn`: Optionally, a function applied to the value in each cell \n    in the output. You will typically use this when the combination \n    of `id_cols` and value column does not uniquely identify \n    an observation. \n    This can be a dict you want to apply different aggregations to \n    different value columns. \n    If not specified, will be `numpy.mean` \n\n  `_base0`: Whether `id_cols`, `names_from` and `values_from` \n    are 0-based if given by indexes. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The pivoted dataframe. \n" + "text/markdown": "##### \"widens\" data, increasing the number of columns and decreasing\nthe number of rows. \n\n##### Args:\n  `_data`: A data frame to pivot. \n  `id_cols`: A set of columns that uniquely identifies each observation. \n    Defaults to all columns in data except for the columns specified \n    in names_from and values_from. \n\n  `names_from`: and \n  `values_from`: A pair of arguments describing which column \n    (or columns) to get the name of the output column (names_from), \n    and which column (or columns) to get the cell values from \n    (values_from). \n\n  `names_prefix`: String added to the start of every variable name. \n  `names_sep`: If names_from or values_from contains multiple variables, \n    this will be used to join their values together into a single \n    string to use as a column name. \n\n  `names_glue`: Instead of names_sep and names_prefix, you can supply \n    a glue specification that uses the names_from columns \n    (and special _value) to create custom column names. \n\n  `names_sort`: Should the column names be sorted? If FALSE, the default, \n    column names are ordered by first appearance. \n\n  `names_repair`: todo \n  `values_fill`: Optionally, a (scalar) value that specifies what \n    each value should be filled in with when missing. \n\n  `values_fn`: Optionally, a function applied to the value in each cell \n    in the output. You will typically use this when the combination \n    of `id_cols` and value column does not uniquely identify \n    an observation. \n    This can be a dict you want to apply different aggregations to \n    different value columns. \n    If not specified, will be `numpy.mean` \n\n  `base0_`: Whether `id_cols`, `names_from` and `values_from` \n    are 0-based if given by indexes. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The pivoted dataframe. \n" }, "metadata": {} } diff --git a/docs/notebooks/pull.ipynb b/docs/notebooks/pull.ipynb index ce6a600f..4ca6c062 100644 --- a/docs/notebooks/pull.ipynb +++ b/docs/notebooks/pull.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Pull a series or a dataframe from a dataframe\n\n##### Args:\n  `_data`: The dataframe \n  `var`: The column to pull, either the name or the index \n  `name`: The name of the pulled value \n    - If `to` is frame, or the value pulled is data frame, it will be\n      the column names \n\n    - If `to` is series, it will be the series name. If multiple names\n      are given, only the first name will be used. \n\n    - If `to` is series, but value pulled is a data frame, then a\n      dictionary of series with the series names as keys or given `name` \n      as keys. \n\n  `to`: Type of data to return. \n    Only works when pulling `a` for name `a$b` \n\n    - series: Return a pandas Series object\n      Group information will be lost \n      If pulled value is a dataframe, it will return a dict of series, \n      with the series names or the `name` provided. \n\n    - array: Return a numpy.ndarray object\n\n    - frame: Return a DataFrame with that column\n\n    - list: Return a python list\n\n    - dict: Return a dict with `name` as keys and pulled value as values\n      Only a single column is allowed to pull \n\n    - If not provided: `series` when pulled data has only one columns.\n      `dict` if `name` provided and has the same length as the pulled \n      single column. Otherwise `frame`. \n\n  `_base0`: Whether `var` is 0-based if given by index \n    If not provided, `datar.base.get_option('index.base.0')` is used. \n\n##### Returns:\n  The data according to `to` \n" + "text/markdown": "##### Pull a series or a dataframe from a dataframe\n\n##### Args:\n  `_data`: The dataframe \n  `var`: The column to pull, either the name or the index \n  `name`: The name of the pulled value \n    - If `to` is frame, or the value pulled is data frame, it will be\n      the column names \n\n    - If `to` is series, it will be the series name. If multiple names\n      are given, only the first name will be used. \n\n    - If `to` is series, but value pulled is a data frame, then a\n      dictionary of series with the series names as keys or given `name` \n      as keys. \n\n  `to`: Type of data to return. \n    Only works when pulling `a` for name `a$b` \n\n    - series: Return a pandas Series object\n      Group information will be lost \n      If pulled value is a dataframe, it will return a dict of series, \n      with the series names or the `name` provided. \n\n    - array: Return a numpy.ndarray object\n\n    - frame: Return a DataFrame with that column\n\n    - list: Return a python list\n\n    - dict: Return a dict with `name` as keys and pulled value as values\n      Only a single column is allowed to pull \n\n    - If not provided: `series` when pulled data has only one columns.\n      `dict` if `name` provided and has the same length as the pulled \n      single column. Otherwise `frame`. \n\n  `base0_`: Whether `var` is 0-based if given by index \n    If not provided, `datar.base.get_option('index.base.0')` is used. \n\n##### Returns:\n  The data according to `to` \n" }, "metadata": {} } @@ -277,7 +277,7 @@ ], "source": [ "# 0-based index\n", - "mtcars >> pull(0, _base0=True)" + "mtcars >> pull(0, base0_=True)" ] }, { diff --git a/docs/notebooks/relocate.ipynb b/docs/notebooks/relocate.ipynb index fa2700cf..fae3ee68 100644 --- a/docs/notebooks/relocate.ipynb +++ b/docs/notebooks/relocate.ipynb @@ -32,7 +32,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### change column positions\n\nSee original API \nhttps://dplyr.tidyverse.org/reference/relocate.html \n\n##### Args:\n  `_data`: A data frame \n  `*args`: and \n  `**kwargs`: Columns to rename and move \n  `_before`: and \n  `_after`: Destination. Supplying neither will move columns to \n    the left-hand side; specifying both is an error. \n\n  `_base0`: Whether `_before` and `_after` are 0-based if given by indexes. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  An object of the same type as .data. The output has the following \n  properties: \n  - Rows are not affected.\n  - The same columns appear in the output, but (usually) in a\n    different place. \n\n  - Data frame attributes are preserved.\n  - Groups are not affected\n" + "text/markdown": "##### change column positions\n\nSee original API \nhttps://dplyr.tidyverse.org/reference/relocate.html \n\n##### Args:\n  `_data`: A data frame \n  `*args`: and \n  `**kwargs`: Columns to rename and move \n  `_before`: and \n  `_after`: Destination. Supplying neither will move columns to \n    the left-hand side; specifying both is an error. \n\n  `base0_`: Whether `_before` and `_after` are 0-based if given by indexes. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  An object of the same type as .data. The output has the following \n  properties: \n  - Rows are not affected.\n  - The same columns appear in the output, but (usually) in a\n    different place. \n\n  - Data frame attributes are preserved.\n  - Groups are not affected\n" }, "metadata": {} } @@ -429,7 +429,7 @@ } ], "source": [ - "df2 >> relocate(f.d, _after=1, _base0=True)" + "df2 >> relocate(f.d, _after=1, base0_=True)" ] } ], diff --git a/docs/notebooks/rename.ipynb b/docs/notebooks/rename.ipynb index c8ef2a8d..58d6d165 100644 --- a/docs/notebooks/rename.ipynb +++ b/docs/notebooks/rename.ipynb @@ -45,7 +45,7 @@ "##### Args:\n", "  `_data`: The dataframe \n", "  `**kwargs`: The new_name = old_name pairs \n", - "  `_base0`: Whether the old_name is 0-based if given by indexes. \n", + "  `base0_`: Whether the old_name is 0-based if given by indexes. \n", "    If not provided, will use `datar.base.get_option('index.base.0')` \n", "\n", "##### Returns:\n", @@ -84,7 +84,7 @@ "    keyword arguments instead. \n", "\n", "  `**kwargs`: keyword arguments for `_fn` \n", - "  `_base0`: Whether the old_name is 0-based if given by indexes. \n", + "  `base0_`: Whether the old_name is 0-based if given by indexes. \n", "    If not provided, will use `datar.base.get_option('index.base.0')` \n", "\n", "##### Returns:\n", @@ -1106,7 +1106,7 @@ } ], "source": [ - "iris >> rename(Sp=4, _base0=True)" + "iris >> rename(Sp=4, base0_=True)" ] }, { diff --git a/docs/notebooks/rowwise.ipynb b/docs/notebooks/rowwise.ipynb index 01262438..618f8dff 100644 --- a/docs/notebooks/rowwise.ipynb +++ b/docs/notebooks/rowwise.ipynb @@ -32,7 +32,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Compute on a data frame a row-at-a-time\n\nSee https://dplyr.tidyverse.org/reference/rowwise.html \n\n##### Args:\n  `_data`: The dataframe \n  `*columns`: Variables to be preserved when calling summarise(). \n    This is typically a set of variables whose combination \n    uniquely identify each row. \n\n  `_base0`: Whether indexes are 0-based if columns are selected by indexes. \n    If not given, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  A row-wise data frame \n" + "text/markdown": "##### Compute on a data frame a row-at-a-time\n\nSee https://dplyr.tidyverse.org/reference/rowwise.html \n\n##### Args:\n  `_data`: The dataframe \n  `*columns`: Variables to be preserved when calling summarise(). \n    This is typically a set of variables whose combination \n    uniquely identify each row. \n\n  `base0_`: Whether indexes are 0-based if columns are selected by indexes. \n    If not given, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  A row-wise data frame \n" }, "metadata": {} } diff --git a/docs/notebooks/select.ipynb b/docs/notebooks/select.ipynb index 59305e13..7824ff5f 100644 --- a/docs/notebooks/select.ipynb +++ b/docs/notebooks/select.ipynb @@ -32,7 +32,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Select (and optionally rename) variables in a data frame\n\nSee original API \nhttps://dplyr.tidyverse.org/reference/select.html \n\nTo exclude columns use `~` instead of `-`. For example, to exclude last \n`column`: `select(df, ~c(-1))`. \n\nTo use column name in slice: `f[f.col1:f.col2]`. If you don't want `col2` \nto be included: `f[f.col1:f.col2:0]` \n\n##### Args:\n  `*columns`: The columns to select \n  `**renamings`: The columns to rename and select in new => old column way. \n  `_base0`: Whether the columns are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with select columns \n" + "text/markdown": "##### Select (and optionally rename) variables in a data frame\n\nSee original API \nhttps://dplyr.tidyverse.org/reference/select.html \n\nTo exclude columns use `~` instead of `-`. For example, to exclude last \n`column`: `select(df, ~c(-1))`. \n\nTo use column name in slice: `f[f.col1:f.col2]`. If you don't want `col2` \nto be included: `f[f.col1:f.col2:0]` \n\n##### Args:\n  `*columns`: The columns to select \n  `**renamings`: The columns to rename and select in new => old column way. \n  `base0_`: Whether the columns are 0-based if given by indexes \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with select columns \n" }, "metadata": {} } @@ -617,7 +617,7 @@ } ], "source": [ - "iris >> select(f[1:3], _base0=True) # stop (3) is not included" + "iris >> select(f[1:3], base0_=True) # stop (3) is not included" ] }, { diff --git a/docs/notebooks/separate.ipynb b/docs/notebooks/separate.ipynb index 3392167b..fd0ac24e 100644 --- a/docs/notebooks/separate.ipynb +++ b/docs/notebooks/separate.ipynb @@ -3,61 +3,57 @@ { "cell_type": "code", "execution_count": 1, - "source": [ - "# https://tidyr.tidyverse.org/reference/separate.html\n", - "\n", - "from datar.all import *\n", - "\n", - "%run nb_helpers.py\n", - "nb_header(separate, separate_rows)" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:05.972746Z", + "iopub.status.busy": "2021-04-17T00:55:05.972062Z", + "iopub.status.idle": "2021-04-17T00:55:06.622960Z", + "shell.execute_reply": "2021-04-17T00:55:06.622375Z" + } + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"min\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"max\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"sum\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"abs\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"round\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"all\" has been overriden by datar.\n", - "[2021-06-21 14:45:04][datar][WARNING] Builtin name \"any\" has been overriden by datar.\n", - "[2021-06-21 14:45:05][datar][WARNING] Builtin name \"re\" has been overriden by datar.\n", - "[2021-06-21 14:45:05][datar][WARNING] Builtin name \"filter\" has been overriden by datar.\n", - "[2021-06-21 14:45:05][datar][WARNING] Builtin name \"slice\" has been overriden by datar.\n" + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"min\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"max\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"sum\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"abs\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"round\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"all\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"any\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"re\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"filter\" has been overriden by datar.\n", + "[2021-06-29 16:33:21][datar][WARNING] Builtin name \"slice\" has been overriden by datar.\n" ] }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/html": [ "
Try this notebook on binder.
" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/markdown": [ "### # separate " + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/markdown": [ "##### Given either a regular expression or a vector of character positions,\n", "turns a single character column into multiple columns. \n", @@ -99,34 +95,34 @@ "\n", "    - \"left\": fill with missing values on the left\n", "\n", - "  `_base0`: Whether `col` is 0-based when given by index and Whether `sep` \n", + "  `base0_`: Whether `col` is 0-based when given by index and Whether `sep` \n", "    is 0-based if given by position \n", "    If not provided, will use `datar.base.get_option('index.base.0')` \n", "\n", "##### Returns:\n", "  Dataframe with separated columns. \n" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/markdown": [ "### # separate_rows " + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" }, { - "output_type": "display_data", "data": { - "text/plain": [ - "" - ], "text/markdown": [ "##### Separates the values and places each one in its own row.\n", "\n", @@ -137,44 +133,43 @@ "  `convert`: The universal type for the extracted columns or a dict for \n", "    individual ones \n", "\n", - "  `_base0`: Whether `columns` is 0-based when given by index \n", + "  `base0_`: Whether `columns` is 0-based when given by index \n", "    If not provided, will use `datar.base.get_option('index.base.0')` \n", "\n", "##### Returns:\n", "  Dataframe with rows separated and repeated. \n" + ], + "text/plain": [ + "" ] }, - "metadata": {} + "metadata": {}, + "output_type": "display_data" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:05.972746Z", - "iopub.status.busy": "2021-04-17T00:55:05.972062Z", - "iopub.status.idle": "2021-04-17T00:55:06.622960Z", - "shell.execute_reply": "2021-04-17T00:55:06.622375Z" - } - } + "source": [ + "# https://tidyr.tidyverse.org/reference/separate.html\n", + "\n", + "from datar.all import *\n", + "\n", + "%run nb_helpers.py\n", + "nb_header(separate, separate_rows)" + ] }, { "cell_type": "code", "execution_count": 2, - "source": [ - "df = tibble(x=c(NA, \"x.y\", \"x.z\", \"y.z\"))\n", - "df >> separate(f.x, c(\"A\", \"B\"))" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.627824Z", + "iopub.status.busy": "2021-04-17T00:55:06.627338Z", + "iopub.status.idle": "2021-04-17T00:55:06.671440Z", + "shell.execute_reply": "2021-04-17T00:55:06.670953Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " A B\n", - " \n", - "0 NaN NaN\n", - "1 x y\n", - "2 x z\n", - "3 y z" - ], "text/html": [ "\n", " \n", @@ -212,39 +207,40 @@ " \n", " \n", "
" + ], + "text/plain": [ + " A B\n", + " \n", + "0 NaN NaN\n", + "1 x y\n", + "2 x z\n", + "3 y z" ] }, + "execution_count": 2, "metadata": {}, - "execution_count": 2 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.627824Z", - "iopub.status.busy": "2021-04-17T00:55:06.627338Z", - "iopub.status.idle": "2021-04-17T00:55:06.671440Z", - "shell.execute_reply": "2021-04-17T00:55:06.670953Z" - } - } + "source": [ + "df = tibble(x=c(NA, \"x.y\", \"x.z\", \"y.z\"))\n", + "df >> separate(f.x, c(\"A\", \"B\"))" + ] }, { "cell_type": "code", "execution_count": 3, - "source": [ - "df >> separate(f.x, c(NA, \"B\"))" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.681197Z", + "iopub.status.busy": "2021-04-17T00:55:06.680500Z", + "iopub.status.idle": "2021-04-17T00:55:06.684538Z", + "shell.execute_reply": "2021-04-17T00:55:06.684126Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " B\n", - " \n", - "0 NaN\n", - "1 y\n", - "2 z\n", - "3 z" - ], "text/html": [ "\n", " \n", @@ -276,48 +272,47 @@ " \n", " \n", "
" + ], + "text/plain": [ + " B\n", + " \n", + "0 NaN\n", + "1 y\n", + "2 z\n", + "3 z" ] }, + "execution_count": 3, "metadata": {}, - "execution_count": 3 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.681197Z", - "iopub.status.busy": "2021-04-17T00:55:06.680500Z", - "iopub.status.idle": "2021-04-17T00:55:06.684538Z", - "shell.execute_reply": "2021-04-17T00:55:06.684126Z" - } - } + "source": [ + "df >> separate(f.x, c(NA, \"B\"))" + ] }, { "cell_type": "code", "execution_count": 4, - "source": [ - "df = tibble(x=c(\"x\", \"x y\", \"x y z\", NA))\n", - "df >> separate(f.x, c(\"a\", \"b\"))" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.689306Z", + "iopub.status.busy": "2021-04-17T00:55:06.688654Z", + "iopub.status.idle": "2021-04-17T00:55:06.710126Z", + "shell.execute_reply": "2021-04-17T00:55:06.709657Z" + } + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "[2021-06-21 14:45:13][datar][WARNING] Expected 2 pieces. Additional pieces discarded in 1 rows ['x y z'].\n", - "[2021-06-21 14:45:13][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['x'].\n" + "[2021-06-29 16:33:22][datar][WARNING] Expected 2 pieces. Additional pieces discarded in 1 rows ['x y z'].\n", + "[2021-06-29 16:33:22][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['x'].\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b\n", - " \n", - "0 x NaN\n", - "1 x y\n", - "2 x y\n", - "3 NaN NaN" - ], "text/html": [ "\n", " \n", @@ -355,39 +350,40 @@ " \n", " \n", "
" + ], + "text/plain": [ + " a b\n", + " \n", + "0 x NaN\n", + "1 x y\n", + "2 x y\n", + "3 NaN NaN" ] }, + "execution_count": 4, "metadata": {}, - "execution_count": 4 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.689306Z", - "iopub.status.busy": "2021-04-17T00:55:06.688654Z", - "iopub.status.idle": "2021-04-17T00:55:06.710126Z", - "shell.execute_reply": "2021-04-17T00:55:06.709657Z" - } - } + "source": [ + "df = tibble(x=c(\"x\", \"x y\", \"x y z\", NA))\n", + "df >> separate(f.x, c(\"a\", \"b\"))" + ] }, { "cell_type": "code", "execution_count": 5, - "source": [ - "df >> separate(f.x, c(\"a\", \"b\"), extra=\"drop\", fill=\"right\")" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.720277Z", + "iopub.status.busy": "2021-04-17T00:55:06.719443Z", + "iopub.status.idle": "2021-04-17T00:55:06.724478Z", + "shell.execute_reply": "2021-04-17T00:55:06.723986Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b\n", - " \n", - "0 x NaN\n", - "1 x y\n", - "2 x y\n", - "3 NaN NaN" - ], "text/html": [ "\n", " \n", @@ -425,39 +421,39 @@ " \n", " \n", "
" + ], + "text/plain": [ + " a b\n", + " \n", + "0 x NaN\n", + "1 x y\n", + "2 x y\n", + "3 NaN NaN" ] }, + "execution_count": 5, "metadata": {}, - "execution_count": 5 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.720277Z", - "iopub.status.busy": "2021-04-17T00:55:06.719443Z", - "iopub.status.idle": "2021-04-17T00:55:06.724478Z", - "shell.execute_reply": "2021-04-17T00:55:06.723986Z" - } - } + "source": [ + "df >> separate(f.x, c(\"a\", \"b\"), extra=\"drop\", fill=\"right\")" + ] }, { "cell_type": "code", "execution_count": 6, - "source": [ - "df >> separate(f.x, c(\"a\", \"b\"), extra=\"merge\", fill=\"left\")" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.734426Z", + "iopub.status.busy": "2021-04-17T00:55:06.733779Z", + "iopub.status.idle": "2021-04-17T00:55:06.737125Z", + "shell.execute_reply": "2021-04-17T00:55:06.736745Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b\n", - " \n", - "0 NaN x\n", - "1 x y\n", - "2 x y z\n", - "3 NaN NaN" - ], "text/html": [ "\n", " \n", @@ -495,46 +491,46 @@ " \n", " \n", "
" + ], + "text/plain": [ + " a b\n", + " \n", + "0 NaN x\n", + "1 x y\n", + "2 x y z\n", + "3 NaN NaN" ] }, + "execution_count": 6, "metadata": {}, - "execution_count": 6 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.734426Z", - "iopub.status.busy": "2021-04-17T00:55:06.733779Z", - "iopub.status.idle": "2021-04-17T00:55:06.737125Z", - "shell.execute_reply": "2021-04-17T00:55:06.736745Z" - } - } + "source": [ + "df >> separate(f.x, c(\"a\", \"b\"), extra=\"merge\", fill=\"left\")" + ] }, { "cell_type": "code", "execution_count": 7, - "source": [ - "df >> separate(f.x, c(\"a\", \"b\", \"c\"))" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.742517Z", + "iopub.status.busy": "2021-04-17T00:55:06.742023Z", + "iopub.status.idle": "2021-04-17T00:55:06.751103Z", + "shell.execute_reply": "2021-04-17T00:55:06.750709Z" + } + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "[2021-06-21 14:45:14][datar][WARNING] Expected 3 pieces. Missing pieces filled with `NA` in 2 rows ['x', 'x y'].\n" + "[2021-06-29 16:33:23][datar][WARNING] Expected 3 pieces. Missing pieces filled with `NA` in 2 rows ['x', 'x y'].\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " a b c\n", - " \n", - "0 x NaN NaN\n", - "1 x y NaN\n", - "2 x y z\n", - "3 NaN NaN NaN" - ], "text/html": [ "\n", " \n", @@ -578,38 +574,39 @@ " \n", " \n", "
" + ], + "text/plain": [ + " a b c\n", + " \n", + "0 x NaN NaN\n", + "1 x y NaN\n", + "2 x y z\n", + "3 NaN NaN NaN" ] }, + "execution_count": 7, "metadata": {}, - "execution_count": 7 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.742517Z", - "iopub.status.busy": "2021-04-17T00:55:06.742023Z", - "iopub.status.idle": "2021-04-17T00:55:06.751103Z", - "shell.execute_reply": "2021-04-17T00:55:06.750709Z" - } - } + "source": [ + "df >> separate(f.x, c(\"a\", \"b\", \"c\"))" + ] }, { "cell_type": "code", "execution_count": 8, - "source": [ - "df = tibble(x=c(\"x: 123\", \"y: error: 7\"))\n", - "df >> separate(f.x, c(\"key\", \"value\"), \": \", extra=\"merge\")" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.755401Z", + "iopub.status.busy": "2021-04-17T00:55:06.754899Z", + "iopub.status.idle": "2021-04-17T00:55:06.773486Z", + "shell.execute_reply": "2021-04-17T00:55:06.772976Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " key value\n", - " \n", - "0 x 123\n", - "1 y error: 7" - ], "text/html": [ "\n", " \n", @@ -637,40 +634,38 @@ " \n", " \n", "
" + ], + "text/plain": [ + " key value\n", + " \n", + "0 x 123\n", + "1 y error: 7" ] }, + "execution_count": 8, "metadata": {}, - "execution_count": 8 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.755401Z", - "iopub.status.busy": "2021-04-17T00:55:06.754899Z", - "iopub.status.idle": "2021-04-17T00:55:06.773486Z", - "shell.execute_reply": "2021-04-17T00:55:06.772976Z" - } - } + "source": [ + "df = tibble(x=c(\"x: 123\", \"y: error: 7\"))\n", + "df >> separate(f.x, c(\"key\", \"value\"), \": \", extra=\"merge\")" + ] }, { "cell_type": "code", "execution_count": 9, - "source": [ - "df = tibble(x=c(NA, \"x?y\", \"x.z\", \"y:z\"))\n", - "df >> separate(f.x, c(\"A\",\"B\"), sep=r\"[.?:]\")" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.778430Z", + "iopub.status.busy": "2021-04-17T00:55:06.777927Z", + "iopub.status.idle": "2021-04-17T00:55:06.796636Z", + "shell.execute_reply": "2021-04-17T00:55:06.796283Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " A B\n", - " \n", - "0 NaN NaN\n", - "1 x y\n", - "2 x z\n", - "3 y z" - ], "text/html": [ "\n", " \n", @@ -708,48 +703,47 @@ " \n", " \n", "
" + ], + "text/plain": [ + " A B\n", + " \n", + "0 NaN NaN\n", + "1 x y\n", + "2 x z\n", + "3 y z" ] }, + "execution_count": 9, "metadata": {}, - "execution_count": 9 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.778430Z", - "iopub.status.busy": "2021-04-17T00:55:06.777927Z", - "iopub.status.idle": "2021-04-17T00:55:06.796636Z", - "shell.execute_reply": "2021-04-17T00:55:06.796283Z" - } - } + "source": [ + "df = tibble(x=c(NA, \"x?y\", \"x.z\", \"y:z\"))\n", + "df >> separate(f.x, c(\"A\",\"B\"), sep=r\"[.?:]\")" + ] }, { "cell_type": "code", "execution_count": 10, - "source": [ - "df = tibble(x=c(\"x:1\", \"x:2\", \"y:4\", \"z\", NA))\n", - "df >> separate(f.x, c(\"key\",\"value\"), \":\")" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.801518Z", + "iopub.status.busy": "2021-04-17T00:55:06.801042Z", + "iopub.status.idle": "2021-04-17T00:55:06.820661Z", + "shell.execute_reply": "2021-04-17T00:55:06.821022Z" + } + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "[2021-06-21 14:45:16][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['z'].\n" + "[2021-06-29 16:33:24][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['z'].\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " key value\n", - " \n", - "0 x 1\n", - "1 x 2\n", - "2 y 4\n", - "3 z NaN\n", - "4 NaN NaN" - ], "text/html": [ "\n", " \n", @@ -792,48 +786,48 @@ " \n", " \n", "
" + ], + "text/plain": [ + " key value\n", + " \n", + "0 x 1\n", + "1 x 2\n", + "2 y 4\n", + "3 z NaN\n", + "4 NaN NaN" ] }, + "execution_count": 10, "metadata": {}, - "execution_count": 10 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.801518Z", - "iopub.status.busy": "2021-04-17T00:55:06.801042Z", - "iopub.status.idle": "2021-04-17T00:55:06.820661Z", - "shell.execute_reply": "2021-04-17T00:55:06.821022Z" - } - } + "source": [ + "df = tibble(x=c(\"x:1\", \"x:2\", \"y:4\", \"z\", NA))\n", + "df >> separate(f.x, c(\"key\",\"value\"), \":\")" + ] }, { "cell_type": "code", "execution_count": 11, - "source": [ - "df >> separate(f.x, c(\"key\",\"value\"), \":\", convert={'value': float}) \n", - "_.dtypes" - ], + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.826799Z", + "iopub.status.busy": "2021-04-17T00:55:06.826255Z", + "iopub.status.idle": "2021-04-17T00:55:06.834973Z", + "shell.execute_reply": "2021-04-17T00:55:06.835369Z" + } + }, "outputs": [ { - "output_type": "stream", "name": "stderr", + "output_type": "stream", "text": [ - "[2021-06-21 14:45:17][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['z'].\n" + "[2021-06-29 16:33:24][datar][WARNING] Expected 2 pieces. Missing pieces filled with `NA` in 1 rows ['z'].\n" ] }, { - "output_type": "execute_result", "data": { - "text/plain": [ - " key value\n", - " \n", - "0 x 1.0\n", - "1 x 2.0\n", - "2 y 4.0\n", - "3 z NaN\n", - "4 NaN NaN" - ], "text/html": [ "\n", " \n", @@ -876,13 +870,22 @@ " \n", " \n", "
" + ], + "text/plain": [ + " key value\n", + " \n", + "0 x 1.0\n", + "1 x 2.0\n", + "2 y 4.0\n", + "3 z NaN\n", + "4 NaN NaN" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" }, { - "output_type": "execute_result", "data": { "text/plain": [ "key object\n", @@ -890,30 +893,19 @@ "dtype: object" ] }, + "execution_count": 11, "metadata": {}, - "execution_count": 11 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.826799Z", - "iopub.status.busy": "2021-04-17T00:55:06.826255Z", - "iopub.status.idle": "2021-04-17T00:55:06.834973Z", - "shell.execute_reply": "2021-04-17T00:55:06.835369Z" - } - } + "source": [ + "df >> separate(f.x, c(\"key\",\"value\"), \":\", convert={'value': float}) \n", + "_.dtypes" + ] }, { "cell_type": "code", "execution_count": 12, - "source": [ - "df = tibble(\n", - " x=[1,2,3],\n", - " y=c(\"a\", \"d,e,f\", \"g,h\"),\n", - " z=c(\"1\", \"2,3,4\", \"5,6\")\n", - ")" - ], - "outputs": [], "metadata": { "execution": { "iopub.execute_input": "2021-04-17T00:55:06.847921Z", @@ -921,28 +913,30 @@ "iopub.status.idle": "2021-04-17T00:55:06.861789Z", "shell.execute_reply": "2021-04-17T00:55:06.861234Z" } - } + }, + "outputs": [], + "source": [ + "df = tibble(\n", + " x=[1,2,3],\n", + " y=c(\"a\", \"d,e,f\", \"g,h\"),\n", + " z=c(\"1\", \"2,3,4\", \"5,6\")\n", + ")" + ] }, { "cell_type": "code", - "execution_count": 14, - "source": [ - "df >> separate_rows(f.y, f.z, convert={'z': int})" - ], + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2021-04-17T00:55:06.867252Z", + "iopub.status.busy": "2021-04-17T00:55:06.866623Z", + "iopub.status.idle": "2021-04-17T00:55:06.903701Z", + "shell.execute_reply": "2021-04-17T00:55:06.903227Z" + } + }, "outputs": [ { - "output_type": "execute_result", "data": { - "text/plain": [ - " x y z\n", - " \n", - "0 1 a 1\n", - "1 2 d 2\n", - "2 2 e 3\n", - "3 2 f 4\n", - "4 3 g 5\n", - "5 3 h 6" - ], "text/html": [ "\n", " \n", @@ -998,27 +992,33 @@ " \n", " \n", "
" + ], + "text/plain": [ + " x y z\n", + " \n", + "0 1 a 1\n", + "1 2 d 2\n", + "2 2 e 3\n", + "3 2 f 4\n", + "4 3 g 5\n", + "5 3 h 6" ] }, + "execution_count": 13, "metadata": {}, - "execution_count": 14 + "output_type": "execute_result" } ], - "metadata": { - "execution": { - "iopub.execute_input": "2021-04-17T00:55:06.867252Z", - "iopub.status.busy": "2021-04-17T00:55:06.866623Z", - "iopub.status.idle": "2021-04-17T00:55:06.903701Z", - "shell.execute_reply": "2021-04-17T00:55:06.903227Z" - } - } + "source": [ + "df >> separate_rows(f.y, f.z, convert={'z': int})" + ] }, { "cell_type": "code", "execution_count": null, - "source": [], + "metadata": {}, "outputs": [], - "metadata": {} + "source": [] } ], "metadata": { diff --git a/docs/notebooks/slice.ipynb b/docs/notebooks/slice.ipynb index 597bff17..ff3236d0 100644 --- a/docs/notebooks/slice.ipynb +++ b/docs/notebooks/slice.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Index rows by their (integer) locations\n\nOriginal APIs https://dplyr.tidyverse.org/reference/slice.html \n\n##### Args:\n  `_data`: The dataframe \n  `rows`: The indexes \n    Ranges can be specified as `f[1:3]` \n    Note that the negatives mean differently than in dplyr. \n    In dplyr, negative numbers meaning exclusive, but here negative \n    numbers are negative indexes like how they act in python indexing. \n    For exclusive indexes, you need to use inversion. For example: \n    `slice(df, ~f[:3])` excludes first 3 rows. You can also do: \n    `slice(df, ~c(f[:3], 6))` to exclude multiple set of rows. \n    To exclude a single row, you can't do this directly: `slice(df, ~1)` \n    since `~1` is directly compiled into a number. You can do this \n\n    `instead`: `slice(df, ~c(1))` \n\n    Exclusive and inclusive expressions are allowed to be mixed, unlike \n    in `dplyr`. They are expanded in the order they are passed in. \n\n  `_preserve`: Relevant when the _data input is grouped. \n    If _preserve = FALSE (the default), the grouping structure is \n    recalculated based on the resulting data, \n    otherwise the grouping is kept as is. \n\n  `_base0`: If rows are selected by indexes, whether they are 0-based. \n    If not provided, `datar.base.get_option('index.base.0')` is used. \n\n##### Returns:\n  The sliced dataframe \n" + "text/markdown": "##### Index rows by their (integer) locations\n\nOriginal APIs https://dplyr.tidyverse.org/reference/slice.html \n\n##### Args:\n  `_data`: The dataframe \n  `rows`: The indexes \n    Ranges can be specified as `f[1:3]` \n    Note that the negatives mean differently than in dplyr. \n    In dplyr, negative numbers meaning exclusive, but here negative \n    numbers are negative indexes like how they act in python indexing. \n    For exclusive indexes, you need to use inversion. For example: \n    `slice(df, ~f[:3])` excludes first 3 rows. You can also do: \n    `slice(df, ~c(f[:3], 6))` to exclude multiple set of rows. \n    To exclude a single row, you can't do this directly: `slice(df, ~1)` \n    since `~1` is directly compiled into a number. You can do this \n\n    `instead`: `slice(df, ~c(1))` \n\n    Exclusive and inclusive expressions are allowed to be mixed, unlike \n    in `dplyr`. They are expanded in the order they are passed in. \n\n  `_preserve`: Relevant when the _data input is grouped. \n    If _preserve = FALSE (the default), the grouping structure is \n    recalculated based on the resulting data, \n    otherwise the grouping is kept as is. \n\n  `base0_`: If rows are selected by indexes, whether they are 0-based. \n    If not provided, `datar.base.get_option('index.base.0')` is used. \n\n##### Returns:\n  The sliced dataframe \n" }, "metadata": {} }, diff --git a/docs/notebooks/tibble.ipynb b/docs/notebooks/tibble.ipynb index 4c7305b9..b98b974a 100644 --- a/docs/notebooks/tibble.ipynb +++ b/docs/notebooks/tibble.ipynb @@ -67,7 +67,7 @@ "  `_rows`: Number of rows of a 0-col dataframe when args and kwargs are \n", "    not provided. When args or kwargs are provided, this is ignored. \n", "\n", - "  `_base0`: Whether the suffixes of repaired names should be 0-based. \n", + "  `base0_`: Whether the suffixes of repaired names should be 0-based. \n", "    If not provided, will use `datar.base.get_option('index.base.0')`. \n", "\n", "##### Returns:\n", @@ -114,7 +114,7 @@ "\n", "    - a function: apply custom name repair\n", "\n", - "  `_base0`: Whether the suffixes of repaired names should be 0-based. \n", + "  `base0_`: Whether the suffixes of repaired names should be 0-based. \n", "    If not provided, will use `datar.base.get_option('index.base.0')`. \n", "\n", "##### Returns:\n", @@ -251,7 +251,7 @@ "\n", "    - a function: apply custom name repair\n", "\n", - "  `_base0`: Whether the suffixes of repaired names should be 0-based. \n", + "  `base0_`: Whether the suffixes of repaired names should be 0-based. \n", "    If not provided, will use `datar.base.get_option('index.base.0')`. \n", "\n", "##### Returns:\n", @@ -628,7 +628,7 @@ "execution_count": 7, "source": [ "# 0-based suffixing for name repair\n", - "tibble(x, x, _name_repair=\"unique\", _base0=True)" + "tibble(x, x, _name_repair=\"unique\", base0_=True)" ], "outputs": [ { diff --git a/docs/notebooks/uncount.ipynb b/docs/notebooks/uncount.ipynb index 27fd6116..b6768c13 100644 --- a/docs/notebooks/uncount.ipynb +++ b/docs/notebooks/uncount.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Duplicating rows according to a weighting variable\n\n##### Args:\n  `data`: A data frame \n  `weights`: A vector of weights. Evaluated in the context of data \n  `_remove`: If TRUE, and weights is the name of a column in data, \n    then this column is removed. \n\n  `_id`: Supply a string to create a new variable which gives a \n    unique identifier for each created row (0-based). \n\n  `_base0`: Whether the generated `_id` columns are 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  dataframe with rows repeated. \n" + "text/markdown": "##### Duplicating rows according to a weighting variable\n\n##### Args:\n  `data`: A data frame \n  `weights`: A vector of weights. Evaluated in the context of data \n  `_remove`: If TRUE, and weights is the name of a column in data, \n    then this column is removed. \n\n  `_id`: Supply a string to create a new variable which gives a \n    unique identifier for each created row (0-based). \n\n  `base0_`: Whether the generated `_id` columns are 0-based. \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  dataframe with rows repeated. \n" }, "metadata": {} } diff --git a/docs/notebooks/unite.ipynb b/docs/notebooks/unite.ipynb index 71f40ac1..4dc063e8 100644 --- a/docs/notebooks/unite.ipynb +++ b/docs/notebooks/unite.ipynb @@ -33,7 +33,7 @@ "output_type": "display_data", "data": { "text/plain": "", - "text/markdown": "##### Unite multiple columns into one by pasting strings together\n\n##### Args:\n  `data`: A data frame. \n  `col`: The name of the new column, as a string or symbol. \n  `*columns`: Columns to unite \n  `sep`: Separator to use between values. \n  `remove`: If True, remove input columns from output data frame. \n  `na_rm`: If True, missing values will be remove prior to uniting \n    each value. \n\n  `_base0`: Whether `columns` is 0-based when given by index \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with selected columns united \n" + "text/markdown": "##### Unite multiple columns into one by pasting strings together\n\n##### Args:\n  `data`: A data frame. \n  `col`: The name of the new column, as a string or symbol. \n  `*columns`: Columns to unite \n  `sep`: Separator to use between values. \n  `remove`: If True, remove input columns from output data frame. \n  `na_rm`: If True, missing values will be remove prior to uniting \n    each value. \n\n  `base0_`: Whether `columns` is 0-based when given by index \n    If not provided, will use `datar.base.get_option('index.base.0')` \n\n##### Returns:\n  The dataframe with selected columns united \n" }, "metadata": {} } diff --git a/docs/piping_vs_regular.md b/docs/piping_vs_regular.md index 87a28264..e6c7731e 100644 --- a/docs/piping_vs_regular.md +++ b/docs/piping_vs_regular.md @@ -1,5 +1,5 @@ -A verb can be called in a piping form: +A verb can be called using a piping syntax: ```python df >> verb(...) ``` @@ -9,42 +9,33 @@ Or in a regular way: verb(df, ...) ``` -The piping is recommended and is designed specially to enable full features of `datar`. +The piping is recommended and is designed specially to enable full features of `datar` with [`pipda`][1]. -The regular form of verb calling is limited when an argument is calling a function that is registered requiring the data argument. For example: +The regular form of calling a verb has no problems with simple arguments (arguments that don't involve any functions registered by `register_func()/register_verb()`). Functions registered by `register_func(None, ...)` that don't have data argument as the first argument are also perfect to work in this form. -```python -df >> head(n=10) -head(df, n=10) # same -``` +However, there may be problems with verb calls as arguments of a verb, or a function call with data argument as arguments of a verb. In most cases, they are just fine, but there are ambiguous cases when the functions have optional arguments, and the second argument has the same type annotation as the first one. Because we cannot distinguish whether we should call it regularly or let it return a `Function` object to wait for the data to be piped in. -However, -```python -df >> select(everything()) # works -select(df, everything()) # not working -``` -Since `everything` is registered requiring the first argument to be a data frame. With the regular form, we are not able (or need too much effort) to obtain the data frame, but for the piping form, `pipda` is designed to pass the data piped to the verb and every argument of it. +For example: -The functions registered by `register_func` are supposed to be used as arguments of verbs. However, they have to be used with the right signature. For example, `everything` signature has `_data` as the first argument, to be called regularly: ```python -everything(df) -# everything() not working, everything of what? -``` +@register_verb(int) +def add(a: int, b: int): + return a + b -When the functions are registered by `register_func(None, ...)`, which does not require the data argument, they are able to be used in regular form: +@register_func(int) +def incr(x: int, y: int = 3): + return x + y -```python -from datar.core import f -from datar.base import abs -from datar.tibble import tibble -from datar.dplyr import mutate - -df = tibble(x=[-1,-2,-3]) -df >> mutate(y=abs(f.x)) -# x y -# 0 -1 1 -# 1 -2 2 -# 2 -3 3 - -mutate(df, abs(f.x)) # works the same way +add(1, incr(2)) ``` + +In such a case, we don't know whether `incr(2)` should be interpreted as `incr(2, y=3)` or `add(y=3)` waiting for `x` to be piped in. + +The above code will still run and get a result of `6`, but a warning will be showing about the ambiguity. + +To avoid this, use the piping syntax: `1 >> add(incr(2))`, resulting in `4`. Or if you are intended to do `incr(2, y=3)`, specify a value for `y`: `add(1, incr(2, 3))`, resulting in `6`, without a warning. + +For more details, see also the [caveats][2] from `pipda` + +[1]: https://github.com/pwwang/pipda +[2]: https://github.com/pwwang/pipda#caveats diff --git a/docs/porting_rules.md b/docs/porting_rules.md index 1d3d10ef..0c6282ae 100644 --- a/docs/porting_rules.md +++ b/docs/porting_rules.md @@ -14,7 +14,7 @@ For example, `is.integer` is ported as `is_integer`. Argument `.drop` in `group_by` is replaced with `_drop`. -- `datar` specific arguments are named with `_` prefix. For example, `_base0`. +- `datar` specific arguments are named with `_` suffix. For example, `base0_`. - camelCase style named functions are ported with snake_case named functions. @@ -22,7 +22,7 @@ ## Extra arguments -In order to keep some python language features, or extend the APIs a little, a few APIs may come with extra arguments. For example, to allow people to work with 0-indexing, `_base0` argument is added to functions that involve indexing. `how` for `drop_na` is added to allow drop rows of a data frame with `any` or `all` values of in that row. +In order to keep some python language features, or extend the APIs a little, a few APIs may come with extra arguments. For example, to allow people to work with 0-indexing, `base0_` argument is added to functions that involve indexing. `how_` for `drop_na` is added to allow drop rows of a data frame with `any` or `all` values of in that row. ## `tibble` vs `DataFrame` diff --git a/mkdocs.yml b/mkdocs.yml index e3957f99..2fc66b3d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -48,7 +48,7 @@ nav: - 'Piping vs regular calling': 'piping_vs_regular.md' - 'Indexing/Selection': 'indexing.md' - 'Datasets': 'datasets.md' - - 'API': 'mkapi/api/datar' + - 'Advanced usage': 'advanced.md' - 'Examples': 'across': 'notebooks/across.ipynb' 'add_column': 'notebooks/add_column.ipynb' @@ -111,5 +111,5 @@ nav: 'uncount': 'notebooks/uncount.ipynb' 'unite': 'notebooks/unite.ipynb' 'with_groups': 'notebooks/with_groups.ipynb' - - 'TODO': 'TODO.md' + - 'API': 'mkapi/api/datar' - 'Change Log': CHANGELOG.md diff --git a/pyproject.toml b/pyproject.toml index a5b3902e..5f280e13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "datar" -version = "0.2.3" +version = "0.3.0" description = "Port of dplyr and other related R packages in python, using pipda." authors = ["pwwang "] readme = "README.md" diff --git a/setup.py b/setup.py index e9173174..1a8e5dae 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ setup( long_description=readme, name='datar', - version='0.2.3', + version='0.3.0', description='Port of dplyr and other related R packages in python, using pipda.', python_requires='==3.*,>=3.7.1', project_urls={"homepage": "https://github.com/pwwang/datar", diff --git a/tests/test_base_seq.py b/tests/test_base_seq.py index 4c8f3c28..96d54ef4 100644 --- a/tests/test_base_seq.py +++ b/tests/test_base_seq.py @@ -26,7 +26,7 @@ def test_seq_along(): assert_iterable_equal(seq_along([]), []) assert_iterable_equal(seq_along([1,2]), [1,2]) assert_iterable_equal(seq_along(['a', 'b']), [1,2]) - assert_iterable_equal(seq_along(['a', 'b'], _base0=True), [0,1]) + assert_iterable_equal(seq_along(['a', 'b'], base0_=True), [0,1]) def test_seq_len(caplog): assert_iterable_equal(seq_len([3,4]), [1,2,3]) diff --git a/tests/test_base_string.py b/tests/test_base_string.py index f932f56a..13cfbc4b 100644 --- a/tests/test_base_string.py +++ b/tests/test_base_string.py @@ -18,7 +18,7 @@ def test_is_character(): def test_grep(): out = grep('[a-z]', 'a') assert_iterable_equal(out, [0]) - out = grep('[a-z]', 'a', _base0=False) + out = grep('[a-z]', 'a', base0_=False) assert_iterable_equal(out, [1]) out = grep('[a-z]', 'a', invert=True) assert_iterable_equal(out, []) diff --git a/tests/test_base_which.py b/tests/test_base_which.py index 271864b1..b5127936 100644 --- a/tests/test_base_which.py +++ b/tests/test_base_which.py @@ -5,7 +5,7 @@ def test_which(): assert_iterable_equal(which([True, False, True]), [0,2]) - assert_iterable_equal(which([True, False, True], _base0=False), [1,3]) + assert_iterable_equal(which([True, False, True], base0_=False), [1,3]) def test_which_min(): assert which_min([2,1,3]) == 1 diff --git a/tests/test_core_names.py b/tests/test_core_names.py index 854bb8a2..c0dbdf80 100644 --- a/tests/test_core_names.py +++ b/tests/test_core_names.py @@ -35,7 +35,7 @@ def test_minimal(names, expect): ["__0", "x__1", "__2", "y", "x__4", "__5", "__6"]), ]) def test_unique(names, expect): - assert repair_names(names, repair="unique", _base0=True) == expect + assert repair_names(names, repair="unique", base0_=True) == expect def test_unique_algebraic_y(): x = ["__20", "a__1", "b", "", "a__2", "d"] @@ -92,7 +92,7 @@ def test_unique_algebraic_y(): ["_7","_4","_3","_6","_5","_1","_2","_8"]), ]) def test_universal(names, expect): - assert repair_names(names, repair="universal", _base0=True) == expect + assert repair_names(names, repair="universal", base0_=True) == expect def test_check_unique(): diff --git a/tests/test_dplyr_bind.py b/tests/test_dplyr_bind.py index 2474909c..f4cb6cb0 100644 --- a/tests/test_dplyr_bind.py +++ b/tests/test_dplyr_bind.py @@ -45,12 +45,12 @@ def test_bind_col_null(): def test_repair_names(): df = tibble(a = 1, b = 2) - bound = bind_cols(df, df, _base0=True) + bound = bind_cols(df, df, base0_=True) assert bound.columns.tolist() == ['a__0', 'b__1', 'a__2', 'b__3'] t1 = tibble(a=1) t2 = tibble(a=2) - bound = bind_cols(t1, t2, _base0=True) + bound = bind_cols(t1, t2, base0_=True) assert bound.columns.tolist() == ['a__0', 'a__1'] def test_incompatible_size_fill_with_NA(): @@ -179,7 +179,7 @@ def test_create_id_col(): out = df1 >> bind_rows(df2, _id='col') assert out.col.tolist() == [1,1,1,2,2] - out = bind_rows([df1, df2], _id='col', _base0=True) + out = bind_rows([df1, df2], _id='col', base0_=True) assert out.col.tolist() == [0,0,0,1,1] out = bind_rows(None, one=df1, two=df2, _id="col") diff --git a/tests/test_dplyr_filter.py b/tests/test_dplyr_filter.py index c6afc8c7..6ab95f9b 100644 --- a/tests/test_dplyr_filter.py +++ b/tests/test_dplyr_filter.py @@ -1,11 +1,12 @@ # tests grabbed from: # https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-filter.r import numpy -from pandas.core.groupby import groupby -from pipda.function import register_func import pytest from datar.all import * -from datar.datasets import mtcars, iris +from datar.datasets import iris, mtcars +from pandas.core.groupby import groupby +from pipda import register_func + def test_handles_passing_args(): df = tibble(x=range(1,5)) diff --git a/tests/test_dplyr_funs.py b/tests/test_dplyr_funs.py index 2d164652..532ac996 100644 --- a/tests/test_dplyr_funs.py +++ b/tests/test_dplyr_funs.py @@ -166,9 +166,9 @@ def test_near(): # ----------------------------------------------------------------- def test_nth_works_with_lists(): x = [1,2,3] - assert nth(x, 0, _base0=True) == 1 # 0-based - assert pandas.isna(nth(x, 3, _base0=True)) - assert nth(x, 3, default=1, _base0=True) == 1 + assert nth(x, 0, base0_=True) == 1 # 0-based + assert pandas.isna(nth(x, 3, base0_=True)) + assert nth(x, 3, default=1, base0_=True) == 1 assert first(x) == 1 assert last(x) == 3 assert first(x, order_by=[3,2,1]) == 3 @@ -182,9 +182,9 @@ def test_nth_negative_index(): def test_nth_index_past_ends_returns_default_value(): x = [1,2,3,4] - assert pandas.isna(nth(x, 4, _base0=True)) - assert pandas.isna(nth(x, -5, _base0=True)) - assert pandas.isna(nth(x, 10, _base0=True)) + assert pandas.isna(nth(x, 4, base0_=True)) + assert pandas.isna(nth(x, -5, base0_=True)) + assert pandas.isna(nth(x, 10, base0_=True)) def test_nth_errors(): with pytest.raises(TypeError): diff --git a/tests/test_dplyr_join.py b/tests/test_dplyr_join.py index d4fee8c2..2608160f 100644 --- a/tests/test_dplyr_join.py +++ b/tests/test_dplyr_join.py @@ -260,18 +260,18 @@ def test_join_by_dict_not_keep(): def test_nest_join_by_multiple(): df1 = tibble(x=[1,2], y=[3,4]) df2 = tibble(x=[1,2], y=[3,4], z=[5,6]) - out = nest_join(df1, df2, by=['x', 'y']) + out = df1 >> nest_join(df2, by=['x', 'y']) assert out.df2.values[0].equals(tibble(z=5)) assert out.df2.values[1].equals(tibble(z=6)) - out = nest_join(df1, df2, copy=True) + out = df1 >> nest_join(df2, copy=True) assert out.df2.values[0].equals(tibble(z=5)) assert out.df2.values[1].equals(tibble(z=6)) def test_join_by_none(): df1 = tibble(x=[1,2,3], y=[3,4,5]) df2 = tibble(x=[2,3,4], z=[5,6,7]) - out = inner_join(df1, df2, keep=True) + out = df1 >> inner_join(df2, keep=True) assert_frame_equal(out, tibble( x_x=[2,3], @@ -280,7 +280,7 @@ def test_join_by_none(): z=[5,6] )) - out = inner_join(df1, df2, keep=False) + out = df1 >> inner_join(df2, keep=False) assert_frame_equal(out, tibble( x=[2,3], y=[4,5], diff --git a/tests/test_dplyr_mutate.py b/tests/test_dplyr_mutate.py index f9fff482..66f11b4e 100644 --- a/tests/test_dplyr_mutate.py +++ b/tests/test_dplyr_mutate.py @@ -1,13 +1,14 @@ # tests grabbed from: # https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-mutate.r -from pipda.function import register_func import pytest -from pandas.testing import assert_frame_equal -from pandas.core.frame import DataFrame from datar.all import * -from datar.core.grouped import DataFrameGroupBy, DataFrameRowwise from datar.core.exceptions import ColumnNotExistingError, DataUnrecyclable -from datar.datasets import mtcars, iris +from datar.core.grouped import DataFrameGroupBy, DataFrameRowwise +from datar.datasets import iris, mtcars +from pandas.core.frame import DataFrame +from pandas.testing import assert_frame_equal +from pipda import register_func + def test_empty_mutate_returns_input(): df = tibble(x=1) diff --git a/tests/test_dplyr_rows.py b/tests/test_dplyr_rows.py index a6b18358..ba913211 100644 --- a/tests/test_dplyr_rows.py +++ b/tests/test_dplyr_rows.py @@ -46,35 +46,35 @@ def test_rows_upsert(data): assert_frame_equal(out, exp) def test_rows_delete(data): - out = rows_delete(data, tibble(a=[2,3]), by="a") + out = data >> rows_delete(tibble(a=[2,3]), by="a") assert_frame_equal(out, data.iloc[[0], :]) with pytest.raises(ValueError, match="delete missing"): - rows_delete(data, tibble(a=[2,3,4]), by="a") + data >> rows_delete(tibble(a=[2,3,4]), by="a") - out = rows_delete(data, tibble(a = [2,3], b = "b"), by = "a") + out = data >> rows_delete(tibble(a = [2,3], b = "b"), by = "a") assert_frame_equal(out, data.iloc[[0], :]) with pytest.raises(ValueError, match="delete missing"): - rows_delete(data, tibble(a = [2,3], b = "b"), by = c("a", "b")) + data >> rows_delete(tibble(a = [2,3], b = "b"), by = c("a", "b")) def test_rows_errors(data): # by must be string or strings with pytest.raises(ValueError, match="must be a string"): - rows_delete(data, tibble(a = [2,3]), by=1) + data >> rows_delete(tibble(a = [2,3]), by=1) # Insert with pytest.raises(ValueError): - rows_insert(data, tibble(a = 3, b = "z")) + data >> rows_insert(tibble(a = 3, b = "z")) with pytest.raises(ValueError): - rows_insert(data.iloc[[0,0], ], tibble(a = 3)) + data.iloc[[0,0], ] >> rows_insert(tibble(a = 3)) with pytest.raises(ValueError): - rows_insert(data, tibble(a = 4, b = "z"), by = "e") + data >> rows_insert(tibble(a = 4, b = "z"), by = "e") with pytest.raises(ValueError): - rows_insert(data, tibble(d = 4)) + data >> rows_insert(tibble(d = 4)) # Update with pytest.raises(ValueError): @@ -86,10 +86,10 @@ def test_rows_errors(data): # Delete and truncate with pytest.raises(ValueError): - rows_delete(data, tibble(a = [2,3,4])) + data >> rows_delete(tibble(a = [2,3,4])) with pytest.raises(ValueError): - rows_delete(data, tibble(a = [2,3], b = "b"), by = c("a", "b")) + data >> rows_delete(tibble(a = [2,3], b = "b"), by = c("a", "b")) # works # rows_delete(data, tibble(a = [2,3])) diff --git a/tests/test_dplyr_select.py b/tests/test_dplyr_select.py index cbf83727..3c9000a1 100644 --- a/tests/test_dplyr_select.py +++ b/tests/test_dplyr_select.py @@ -1,12 +1,13 @@ # tests grabbed from: # https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-select.r -from pipda.verb import register_verb -from datar.stats.verbs import set_names -from pandas.core.frame import DataFrame import pytest from datar.all import * -from datar.datasets import mtcars from datar.core.exceptions import ColumnNotExistingError +from datar.datasets import mtcars +from datar.stats.verbs import set_names +from pandas.core.frame import DataFrame +from pipda import register_verb + def test_preserves_grouping(): gf = group_by(tibble(g = [1,2,3], x = [3,2,1]), f.g) diff --git a/tests/test_dplyr_sets.py b/tests/test_dplyr_sets.py index 2354f49a..4d89c7f2 100644 --- a/tests/test_dplyr_sets.py +++ b/tests/test_dplyr_sets.py @@ -79,7 +79,7 @@ def test_set_operations_reconstruct_grouping_metadata(): def test_set_operations_keep_the_ordering_of_the_data(): # test_that("set operations keep the ordering of the data (#3839)", { - rev_df = lambda df: df >> get(rev(seq_len(nrow(df), _base0=True))) + rev_df = lambda df: df >> get(rev(seq_len(nrow(df), base0_=True))) df1 = tibble(x = seq(1,4), g = rep([1,2], each = 2)) df2 = tibble(x = seq(3,6), g = rep([2,3], each = 2)) diff --git a/tests/test_dplyr_slice.py b/tests/test_dplyr_slice.py index d96cc8b1..30aebbab 100644 --- a/tests/test_dplyr_slice.py +++ b/tests/test_dplyr_slice.py @@ -245,9 +245,9 @@ def test_slice_any_checks_for_empty_args_kwargs(): # with pytest.raises(ValueError): # slice_tail(df, 5) with pytest.raises(TypeError): - slice_min(df, n=5) + df >> slice_min(n=5) with pytest.raises(TypeError): - slice_max(df, n=5) + df >> slice_max(n=5) # with pytest.raises(ValueError): # slice_sample(df, 5) diff --git a/tests/test_dplyr_summarise.py b/tests/test_dplyr_summarise.py index 20e8fe04..21cf6d97 100644 --- a/tests/test_dplyr_summarise.py +++ b/tests/test_dplyr_summarise.py @@ -1,17 +1,24 @@ # tests grabbed from: # https://github.com/tidyverse/dplyr/blob/master/tests/testthat/test-summarise.r from tokenize import group -from datar.core.grouped import DataFrameRowwise -from pandas.core.frame import DataFrame -from pandas.testing import assert_frame_equal -from pipda.function import register_func -from datar.core.contexts import Context + import pytest from datar.all import * +from datar.core.contexts import Context +from datar.core.exceptions import ( + ColumnNotExistingError, + DataUnrecyclable, + NameNonUniqueError +) +from datar.core.grouped import DataFrameRowwise from datar.datasets import mtcars -from datar.core.exceptions import ColumnNotExistingError, DataUnrecyclable, NameNonUniqueError +from pandas.core.frame import DataFrame +from pandas.testing import assert_frame_equal +from pipda import register_func + from .conftest import assert_iterable_equal + def test_freshly_create_vars(): df = tibble(x=range(1,11)) out = summarise(df, y=mean(f.x), z=f.y+1) @@ -33,7 +40,7 @@ def test_input_recycled(): ) >> group_by(f.a) assert df1.equals(df2) - df1 = gf >> summarise(x = seq_len(f.a, _base0=True), y = 1) + df1 = gf >> summarise(x = seq_len(f.a, base0_=True), y = 1) df2 = tibble(a = c(1, 2, 2), x = c(0, 0, 1), y = 1) >> group_by(f.a) # assert df1.equals(df2) assert_frame_equal(df1, df2) diff --git a/tests/test_tibble_verbs.py b/tests/test_tibble_verbs.py index a28bfdef..ce6a9d27 100644 --- a/tests/test_tibble_verbs.py +++ b/tests/test_tibble_verbs.py @@ -102,8 +102,9 @@ def test_error_if_adding_row_with_unknown_variables(): add_row(tibble(a=3), b="err", c="oops") def test_add_rows_to_nondf(): - with pytest.raises(NotImplementedError): - add_row(1) + # with pytest.raises(NotImplementedError): + out = add_row(1) + assert hasattr(out, '_pipda_eval') def test_can_add_multiple_rows(): df = tibble(a=3) @@ -415,7 +416,7 @@ def test_column_to_rownames(caplog): # expect_false(has_name(res, var)) mtcars1 = mtcars.copy() - mtcars1['num'] = rev(seq_len(nrow(mtcars), _base0=True)) + mtcars1['num'] = rev(seq_len(nrow(mtcars), base0_=True)) res0 = rownames_to_column(mtcars1) res = column_to_rownames(res0, var="num") assert caplog.text == '' diff --git a/tests/test_tidyr_chop.py b/tests/test_tidyr_chop.py index 14e28608..bd44304d 100644 --- a/tests/test_tidyr_chop.py +++ b/tests/test_tidyr_chop.py @@ -47,12 +47,12 @@ def test_chop_with_all_column_keys(): def test_unchop_extends_into_rows(): df = tibble(x = [1, 2], y = [NULL, seq(1, 4)]) - out = df >> unchop(f.y, dtypes=int) + out = df >> unchop(f.y, ptype=int) assert_frame_equal(out, tibble(x=[2,2,2,2], y=[1,2,3,4])) def test_can_unchop_multiple_cols(): df = tibble(x=[1,2], y=[[1], [2,3]], z=[[4], [5,6]]) - out = df >> unchop(c(f.y, f.z), dtypes=int) + out = df >> unchop(c(f.y, f.z), ptype=int) assert_frame_equal(out, tibble( x=[1,2,2], y=[1,2,3], @@ -69,7 +69,7 @@ def test_unchopping_null_inputs_are_dropped(): y = [NULL, [1,2], 4, NULL], z = [NULL, [1,2], NULL, 5] ) - out = df >> unchop(c(f.y, f.z), dtypes=float) + out = df >> unchop(c(f.y, f.z), ptype=float) assert_frame_equal(out, tibble( x=[2,2,3,4], y=[1,2,4,NA], @@ -119,7 +119,7 @@ def test_unchop_empty_list(): def test_unchop_recycles_size_1_inputs(): df = tibble(x=[[1], [2,3]], y=[[2,3], [1]]) - out = unchop(df, [f.x, f.y], dtypes=int) + out = unchop(df, [f.x, f.y], ptype=int) exp = tibble(x=[1,2,3], y=[2,3,1]) # exp = tibble(x=[1,1,2,3], y=[2,3,1,1]) assert_frame_equal(out, exp) @@ -130,7 +130,7 @@ def test_unchop_can_specify_dtypes(): # No extra columns added exp = tibble(x=[1,1], y=[1,2]) # exp = tibble(x=[1,1], y=[1,2], z=[NA,NA]) - out = unchop(df, f.y, dtypes=dtypes) + out = unchop(df, f.y, ptype=dtypes) assert_frame_equal(out, exp) # test_that("can specify a ptype with extra columns", { @@ -144,7 +144,7 @@ def test_unchop_can_specify_dtypes(): def test_unchop_can_specify_dtypes_to_force_output_type(): df = tibble(x=[[1,2]]) - out = unchop(df, f.x, dtypes=float) + out = unchop(df, f.x, ptype=float) exp = tibble(x=[1.0,2.0]) assert_frame_equal(out, exp)