From b1d3786527ba71af47d8643bdff93d3ebd949bfd Mon Sep 17 00:00:00 2001 From: kinegratii Date: Tue, 24 Mar 2020 20:13:22 +0800 Subject: [PATCH 01/12] :sparkles: Add calendars,utils module --- borax/calendars/utils.py | 12 ++++++++++++ tests/test_calendars.py | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 borax/calendars/utils.py create mode 100644 tests/test_calendars.py diff --git a/borax/calendars/utils.py b/borax/calendars/utils.py new file mode 100644 index 0000000..673178c --- /dev/null +++ b/borax/calendars/utils.py @@ -0,0 +1,12 @@ +# coding=utf8 +import calendar +from datetime import date, datetime + + +def get_last_day_of_this_month(year: int, month: int) -> date: + return date(year, month, calendar.monthrange(year, month)[-1]) + + +def get_fist_day_of_year_week(year: int, week: int) -> date: + fmt = '{}-W{}-1'.format(year, week) + return datetime.strptime(fmt, "%Y-W%W-%w").date() diff --git a/tests/test_calendars.py b/tests/test_calendars.py new file mode 100644 index 0000000..4da7615 --- /dev/null +++ b/tests/test_calendars.py @@ -0,0 +1,18 @@ +# coding=utf8 + + +import unittest +from datetime import date + +from borax.calendars.utils import get_last_day_of_this_month, get_fist_day_of_year_week + + +class LastDayTestCase(unittest.TestCase): + def test_last_day(self): + self.assertEqual(date(2019, 3, 31), get_last_day_of_this_month(2019, 3)) + self.assertEqual(date(2019, 2, 28), get_last_day_of_this_month(2019, 2)) + self.assertEqual(date(2020, 2, 29), get_last_day_of_this_month(2020, 2)) + + def test_fist_day_of_week(self): + self.assertEqual(date(2020, 2, 24), get_fist_day_of_year_week(2020, 9)) + self.assertEqual(date(2020, 1, 6), get_fist_day_of_year_week(2020, 1)) From b34691d8ad59582555b74361c6e9b680126c5c50 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Fri, 1 May 2020 13:40:22 +0800 Subject: [PATCH 02/12] :sparkles: Add function format_percentage --- borax/structures/percentage.py | 14 +++++++++++--- docs/changelog.md | 9 ++++++++- tests/test_percentage.py | 2 +- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/borax/structures/percentage.py b/borax/structures/percentage.py index f53e445..b801ae8 100644 --- a/borax/structures/percentage.py +++ b/borax/structures/percentage.py @@ -1,19 +1,27 @@ # coding=utf8 +def format_percentage(numerator: int, denominator: int, *, places: int = 2, null_val: str = '-') -> str: + if denominator == 0: + return null_val + percent_fmt = '{0:. f}%'.replace(' ', str(places)) + val = round(numerator / denominator, places + 2) + return percent_fmt.format(val * 100) + + class Percentage: """ Percentage(completed=0, total=100, places=2,) """ def __init__(self, *, total: int = 100, completed: int = 0, places: int = 2, - display_fmt: str = '{completed} / {total}'): + display_fmt: str = '{completed} / {total}', null_val: str = '-'): self.total = total self.completed = completed self._display_fmt = display_fmt self._places = places # string.format will fails here - self._percent_fmt = '{0:. f}%'.replace(' ', str(self._places)) + self._null_val = null_val def increase(self, value: int = 1) -> None: self.completed += value @@ -30,7 +38,7 @@ def percent(self) -> float: @property def percent_display(self) -> str: - return self._percent_fmt.format(self.percent * 100) + return format_percentage(self.completed, self.total, places=self._places, null_val=self._null_val) @property def display(self) -> str: diff --git a/docs/changelog.md b/docs/changelog.md index ef03c65..9a19ab1 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,13 @@ # 更新日志 -## v3.1.0 +## v3.2.0 + +- 新增 `borax.calendars.utils` 模块 +- `borax.structures.percentage` 模块 + - 新增 `format_percentage` 函数 + - 当 total 为 0 ,显示为 `'-'` ,而不是 `'0.00%'` + +## v3.1.0 (20200118) > 新增 Python3.8构建 diff --git a/tests/test_percentage.py b/tests/test_percentage.py index 2c5fa05..145320f 100644 --- a/tests/test_percentage.py +++ b/tests/test_percentage.py @@ -29,7 +29,7 @@ def test_zero_total(self): self.assertEqual(0, p.total) self.assertEqual(34, p.completed) self.assertAlmostEqual(0, p.percent) - self.assertEqual('0.00%', p.percent_display) + self.assertEqual('-', p.percent_display) self.assertEqual('34 / 0', p.display) self.assertDictEqual({ 'total': 0, From c10f45f64fff81b8fb05cdaaf0f211c3d3386da9 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Fri, 1 May 2020 22:08:51 +0800 Subject: [PATCH 03/12] :zap: make borax,fetch DeprecationWarning --- borax/fetch.py | 2 +- docs/changelog.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/borax/fetch.py b/borax/fetch.py index 3316167..ae79796 100644 --- a/borax/fetch.py +++ b/borax/fetch.py @@ -6,5 +6,5 @@ warnings.warn( 'This module is deprecated and will be removed in V3.3.Use borax.datasets.fetch instead.', - category=PendingDeprecationWarning + category=DeprecationWarning ) diff --git a/docs/changelog.md b/docs/changelog.md index 9a19ab1..9524996 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -6,6 +6,8 @@ - `borax.structures.percentage` 模块 - 新增 `format_percentage` 函数 - 当 total 为 0 ,显示为 `'-'` ,而不是 `'0.00%'` +- `borax.fetch` 模块 + - 本模块被标记为 DeprecationWarning ,将在V3.3移除 ## v3.1.0 (20200118) From 9f94c43ce49cdc92dc8b77b6e01a2ad763957db5 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sat, 2 May 2020 09:02:44 +0800 Subject: [PATCH 04/12] :boom: new join_one API --- borax/datasets/join_.py | 25 ++++++++++++++++++++++++- tests/test_join.py | 34 +++++++++++++++++++++++++++++----- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/borax/datasets/join_.py b/borax/datasets/join_.py index 484cc8b..404e7e9 100644 --- a/borax/datasets/join_.py +++ b/borax/datasets/join_.py @@ -1,7 +1,30 @@ # coding=utf8 +import operator -def join_one(data_list, values, from_, as_, default=None): + +def join_one(ldata, rdata, on, select_as, default=None): + if isinstance(rdata, (list, tuple)): + rdata = dict(rdata) + if not isinstance(rdata, dict): + raise TypeError("Unsupported Type for values param.") + + if isinstance(on, str): + lic = operator.itemgetter(on) + elif callable(on): + lic = on + else: + raise TypeError('str or callable only supported for on param. ') + + for litem in ldata: + if not (isinstance(on, str) and on not in litem): + lv = lic(litem) + rvv = rdata.get(lv, default) + litem[select_as] = rvv + return ldata + + +def old_join_one(data_list, values, from_, as_, default=None): if isinstance(values, (list, tuple)): values = dict(values) if not isinstance(values, dict): diff --git a/tests/test_join.py b/tests/test_join.py index d6b6b5e..cb0d68b 100644 --- a/tests/test_join.py +++ b/tests/test_join.py @@ -3,13 +3,14 @@ import copy import unittest -from borax.datasets.join_ import join_one, join +from borax.datasets.join_ import join_one, old_join_one, join catalogs_dict = { 1: 'Python', 2: 'Java', 3: '软件工程' } +catalog_choices = [(1, 'Python'), (2, 'Java'), (3, '软件工程')] catalogs_list = [ {'id': 1, 'name': 'Python'}, {'id': 2, 'name': 'Java'}, @@ -25,9 +26,23 @@ class JoinOneTestCase(unittest.TestCase): - def test_join_one(self): + def test_with_dict(self): book_data = copy.deepcopy(books) - catalog_books = join_one(book_data, catalogs_dict, from_='catalog', as_='catalog_name') + catalog_books = old_join_one(book_data, catalogs_dict, from_='catalog', as_='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + catalog_books = join_one(book_data, catalogs_dict, on='catalog', select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + def test_with_choices(self): + book_data = copy.deepcopy(books) + catalog_books = old_join_one(book_data, catalog_choices, from_='catalog', as_='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + catalog_books = join_one(book_data, catalog_choices, on='catalog', select_as='catalog_name') self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual('Java', catalog_books[1]['catalog_name']) @@ -38,7 +53,11 @@ def test_join_one_with_default(self): 2: 'Java' } - catalog_books = join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name') + catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual(None, catalog_books[2]['catalog_name']) + + catalog_books = old_join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name') self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual(None, catalog_books[2]['catalog_name']) @@ -48,8 +67,13 @@ def test_join_one_with_custom_default(self): 1: 'Python', 2: 'Java' } + catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name', + default='[未知分类]') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('[未知分类]', catalog_books[2]['catalog_name']) - catalog_books = join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name', default='[未知分类]') + catalog_books = old_join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name', + default='[未知分类]') self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual('[未知分类]', catalog_books[2]['catalog_name']) From b89e9c6bf957338546083815dc5992b7760dee5d Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sat, 2 May 2020 20:51:24 +0800 Subject: [PATCH 05/12] :boom: new join API --- borax/datasets/dict_datasets.py | 18 ---------- borax/datasets/join_.py | 60 ++++++++++++++++++++++++++++++++- tests/test_join.py | 5 +-- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/borax/datasets/dict_datasets.py b/borax/datasets/dict_datasets.py index 669f724..fcabb79 100644 --- a/borax/datasets/dict_datasets.py +++ b/borax/datasets/dict_datasets.py @@ -1,9 +1,6 @@ # coding=utf8 -from borax.datasets.join_ import join_one, join - - class DictDataset: def __init__(self, data, primary_field=None): self._data = [] @@ -18,18 +15,3 @@ def data(self): def __iter__(self): for item in self.data: yield item - - def join(self, values, from_, to_, as_args=None, as_kwargs=None): - join( - self._data, - values=values, - from_=from_, - to_=to_, - as_args=as_args, - as_kwargs=as_kwargs, - ) - return self - - def join_one(self, values, from_, as_): - join_one(self._data, values=values, from_=from_, as_=as_) - return self diff --git a/borax/datasets/join_.py b/borax/datasets/join_.py index 404e7e9..2c4c837 100644 --- a/borax/datasets/join_.py +++ b/borax/datasets/join_.py @@ -24,6 +24,64 @@ def join_one(ldata, rdata, on, select_as, default=None): return ldata +def _sf(val): + """Build a SelectField from val + """ + if isinstance(val, str): + return val, val, None + elif isinstance(val, (list, tuple)): + l = len(val) + if l == 1: + return val[0], val[0], None + elif l == 2: + return val[0], val[1], None + else: + return tuple(val[0:3]) + + +def _parse_on(val): + if isinstance(val, str): + return (val, val), + if isinstance(val, (list, tuple)): + def _ep(_v): + if isinstance(_v, str): + return _v, _v + else: + return _v + + return tuple(map(_ep, val)) + + +def join(ldata, rdata, on, select_as): + if isinstance(on, (list, tuple, str)): + lfields, rfields = zip(*_parse_on(on)) + on_callback = lambda _li, _ri: operator.itemgetter(*lfields)(_li) == operator.itemgetter(*rfields)(_ri) + elif callable(on): + on_callback = on + else: + raise TypeError('str or callable only supported for on param. ') + + if isinstance(select_as, str): + select_as = select_as, + sf_list = list(map(_sf, select_as)) + + def _pick_data(_item, _sfs): + result = {} + for rk, lk, defv in _sfs: + result[lk] = _item.get(rk, defv) + return result + + for litem in ldata: + for ritem in rdata: + if on_callback(litem, ritem): + _ri = ritem + break + else: + _ri = {} + litem.update(_pick_data(_ri, sf_list)) + return ldata + + def old_join_one(data_list, values, from_, as_, default=None): if isinstance(values, (list, tuple)): values = dict(values) @@ -40,7 +98,7 @@ def old_join_one(data_list, values, from_, as_, default=None): return data_list -def join(data_list, values, from_, to_, as_args=None, as_kwargs=None): +def old_join(data_list, values, from_, to_, as_args=None, as_kwargs=None): as_args = as_args or [] as_kwargs = as_kwargs or {} as_fields = {**{a: a for a in as_args}, **as_kwargs} diff --git a/tests/test_join.py b/tests/test_join.py index cb0d68b..22f0828 100644 --- a/tests/test_join.py +++ b/tests/test_join.py @@ -3,7 +3,7 @@ import copy import unittest -from borax.datasets.join_ import join_one, old_join_one, join +from borax.datasets.join_ import join_one, old_join_one, join, old_join catalogs_dict = { 1: 'Python', @@ -81,6 +81,7 @@ def test_join_one_with_custom_default(self): class JoinTestCase(unittest.TestCase): def test_as_kwargs(self): book_data = copy.deepcopy(books) - catalog_books = join(book_data, catalogs_list, from_='catalog', to_='id', as_kwargs={'name': 'catalog_name'}) + catalog_books = old_join(book_data, catalogs_list, from_='catalog', to_='id', + as_kwargs={'name': 'catalog_name'}) self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual('Java', catalog_books[1]['catalog_name']) From 51e6b2ae0583137f3b71dd3304ac767b081131d4 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sat, 2 May 2020 20:52:24 +0800 Subject: [PATCH 06/12] :pencil: update docs for join_ module. --- docs/guides/join.md | 199 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 192 insertions(+), 7 deletions(-) diff --git a/docs/guides/join.md b/docs/guides/join.md index cde505c..775f20b 100644 --- a/docs/guides/join.md +++ b/docs/guides/join.md @@ -2,10 +2,36 @@ > 模块 `borax.datasets.join_` -本模块实现了类似于数据库的 JOIN 数据列表操作,从另一个数据集获取某一个或几个列的值。 +> Changed in V3.2.0 + +## 重要说明 + +从V3.2.0开始,我们重写 `join` 和 `join_one` ,原有的函数分别重命名为 `old_join` 和 `old_join_one` ,主要变化: + +- 使用符合SQL的参数命名,比如 on、select_as 等。 +- 原有比较分散的参数进行合并。 +- 支持回调函数。 + +如果不想使用新版本API,将下面的引入语句 + +```python +from borax.datasets.join_ import join, join_one +``` + +修改为 + +```python +from borax.datasets.join_ import old_join as join, old_join_one as join_one +``` + +如果想将旧API修改新的API,请参见下面的 *使用迁移* 一节。 ## 概述 +本模块实现了类似于数据库的 JOIN 数据列表操作,从另一个数据集获取某一个或几个列的值。 + +> 本模块的 *join_* 函数将会修改传入的列表数据,如需不影响原有数据,可以提前复制一份数据。 + 本模块示例所用的数据描述如下: 图书清单 @@ -40,12 +66,106 @@ catalogs_list = [ ] ``` +## API + +### join_one + +*`join_one(ldata, rdata, on, select_as, default=None)`* + +从右边数据获取一个字段的值,加到左边数据集。 + +各参数定义如下: + +| 参数 | 类型 | 说明 | +| --------- | -------------- | -------------------------------- | +| ldata | List[Dict] | 左边数据集 | +| rdata | Dict / List | 右边数据集 | +| on | str / callable | 使用左边的连接字段,支持回调函数 | +| select_as | str | 右边数据在结果的字段名称 | +| default | Any | 右边数据集找不到时的默认值 | + +备注: + +- `rdata`:该参数标准类型为 `Dict` 。但同时也支持 `List` ,Borax 将使用 `dict(list_obj)` 的方式进行转化。 + +- 当 `on` 参数为函数时,接收 `litem` 参数,表示左边数据集的当前记录,返回连接值。当 `on` 为字符串时,Borax 将其转化为对应的函数。 + +```python +# 以下两种定义方式是等效的 +on = 'catalog' + +def on(litem): + return litem['catalog'] +``` + +在例子中,实现从 `catalog_dict` 将书本类别名称加到 `books` ,可以使用以下的定义方式。 + +```python +join_one(books, catalog_dict, on='catalog', select_as='catalog_name') +``` + + + +### join + +*`join(ldata, rdata, on, select_as, defaults=None)`* + +实现左、右数据集的连接。 + +| 参数 | 类型 | 说明 | +| --------- | ----------------------------------------- | -------------------------------- | +| ldata | List[Dict] | 左边数据集 | +| rdata | List[Dict] | 右边数据集 | +| on | str / Tuple[Union[str, tuple]] / Callable | 使用左边的连接字段,支持回调函数 | +| select_as | str | 右边数据在结果的字段名称 | +| default | Any | 右边数据集找不到时的默认值 | + +备注: + +- 当`on` 参数为函数时,定义如下 ,返回是否匹配。 + +```python +def on_callback(litem:dict, ritem:dict) -> bool: + pass +``` + +- 当 `on` 为字段配置时,其类型为 `Tuple[Union[str, Tuple]]`。 该定义了一个元组,元组的每个元素又是由2个字符串组成的元组。通用格式如下: + +```python + ( + (, ), + (, ), + # ... + ) +``` + + 表示 `left_item.key1=right.item.key1&left_item.key2=right.item.key2`。 + +当某一个条件左右两边的key相同时,内部的元组可以省略为一个字符串。 + +```python +# 以下两种方式是相同的。 +on = (('x', 'x'), ('y', 'y')) + +on = ('x', 'y') +``` + +当只有一个条件时,还可以继续省略外层的元组,只定义一个字符串即可。以下三种是等效的。 + +```python +on = 'x' +on = ('x',) +on = (('x', 'x'),) +``` -## join_one方法 -*`join_one(data_list, values, from_, as_, default=None)`* + + +### old_join_one + +*`old_join_one(data_list, values, from_, as_, default=None)`* > V3.1 新增default参数。 @@ -54,7 +174,7 @@ catalogs_list = [ 从 `catalogs_dict` 获取类别名称并按照 catalogs.id 分组填充至 `books` 。 ```python -catalog_books = join_one(books, catalogs_dict, from_='catalog', as_='catalog_name') +catalog_books = old_join_one(books, catalogs_dict, from_='catalog', as_='catalog_name') ``` 输出 @@ -71,16 +191,16 @@ catalog_books = join_one(books, catalogs_dict, from_='catalog', as_='catalog_nam -## join方法 +### old_join -*`join(data_list, values, from_, to_, as_args=None, as_kwargs=None):`* +*`old_join(data_list, values, from_, to_, as_args=None, as_kwargs=None):`* 从字典读取多个列的值。 示例1 ```python -catalog_books = join( +catalog_books = old_join( books, catalogs_list, from_='catalog', @@ -101,3 +221,68 @@ catalog_books = join( ] ``` +## 使用示例 + +使用示例 + +```python +points = [ + {'x': 1, 'y': 1, 'val': 34}, + {'x': 2, 'y': 2, 'val': 34}, + {'x': 4, 'y': 4, 'val': 34}, +] + +links = [ + {'x': 1, 'y': 1, 'val': 56, 'link1': 23, 'link2': 102}, + {'x': 2, 'y': 2, 'val': 78, 'link1': 45, 'link2': 345}, + {'x': 3, 'y': 4, 'val': 25, 'link1': 90, 'link2': 456}, +] +``` + +## 使用迁移 + +### 迁移join_one + +`join_one` 函数迁移比较简单,只需要参数重命名即可。 + +- `from_` 改为 `on` +- `as_` 改为 `select_as` + +如果调用时不使用关键字方式,可以不作任何改变。 + +### 迁移join + +第一步,原有的 `from_` 和 `to_` 合并为 `on` 参数,只要把旧版的两个参数合并为一个元组,传给 `on`。 + +如果`from_` 和 `to_` 是一样的,只要将该字符串传给 `on` 即可。 + +```python +old_join(from_'foo', to_='bar') +# 转化为 +join(on=('foo','bar')) + +old_join(from_'foo', to_='foo') +# 转化为 +join(on='foo') +``` + +第二步, `as_args` 和 `as_kwargs` 合并为 `select_as` ,转化方式如下: + +```python + = + .items() +``` + +例如: + +```python +old_join(as_args=['Xxx', 'Yyy'], as_kwargs={'Zzz':'cZzz', 'Www':'cWww'}) +join(select_as=( + 'Xxx', + 'Yyy', + ('Zzz', 'cZzz'), + ('Www', 'cWww') +)) +``` + + + From a9e411e25f1bcd2e964b38c64a05ebf0976dbd2c Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sun, 3 May 2020 13:43:42 +0800 Subject: [PATCH 07/12] :sparkles: new functions for uitls --- borax/structures/percentage.py | 2 ++ borax/utils.py | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/borax/structures/percentage.py b/borax/structures/percentage.py index b801ae8..9b6037d 100644 --- a/borax/structures/percentage.py +++ b/borax/structures/percentage.py @@ -38,10 +38,12 @@ def percent(self) -> float: @property def percent_display(self) -> str: + """percent format string like '12.34%' """ return format_percentage(self.completed, self.total, places=self._places, null_val=self._null_val) @property def display(self) -> str: + """return a fractor like '34 / 100'""" return self._display_fmt.format(completed=self.completed, total=self.total) def as_dict(self, prefix='') -> dict: diff --git a/borax/utils.py b/borax/utils.py index 7a400d6..9f717aa 100644 --- a/borax/utils.py +++ b/borax/utils.py @@ -90,3 +90,12 @@ def flatten(iterable): yield from flatten(el) else: yield el + + +def force_list(val, sep=','): + if isinstance(val, (list, set, tuple)): + return val + elif isinstance(val, str): + return val.split(sep) + else: + return val, From a2dae6bcfe35379dfa1c006fcb8a00c2ff9e1ee8 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sun, 3 May 2020 14:54:27 +0800 Subject: [PATCH 08/12] :pencil: Update docs --- borax/datasets/join_.py | 4 ++-- borax/structures/percentage.py | 10 +++++++++- docs/changelog.md | 6 ++++++ docs/guides/join.md | 28 ++++++++++++++++++++++------ docs/guides/percentage.md | 33 ++++++++++++++++++++++++++++----- 5 files changed, 67 insertions(+), 14 deletions(-) diff --git a/borax/datasets/join_.py b/borax/datasets/join_.py index 2c4c837..d0bb2d8 100644 --- a/borax/datasets/join_.py +++ b/borax/datasets/join_.py @@ -39,7 +39,7 @@ def _sf(val): return tuple(val[0:3]) -def _parse_on(val): +def _of(val): if isinstance(val, str): return (val, val), if isinstance(val, (list, tuple)): @@ -54,7 +54,7 @@ def _ep(_v): def join(ldata, rdata, on, select_as): if isinstance(on, (list, tuple, str)): - lfields, rfields = zip(*_parse_on(on)) + lfields, rfields = zip(*_of(on)) on_callback = lambda _li, _ri: operator.itemgetter(*lfields)(_li) == operator.itemgetter(*rfields)(_ri) elif callable(on): on_callback = on diff --git a/borax/structures/percentage.py b/borax/structures/percentage.py index 9b6037d..746a5b7 100644 --- a/borax/structures/percentage.py +++ b/borax/structures/percentage.py @@ -42,10 +42,15 @@ def percent_display(self) -> str: return format_percentage(self.completed, self.total, places=self._places, null_val=self._null_val) @property - def display(self) -> str: + def fraction_display(self): """return a fractor like '34 / 100'""" return self._display_fmt.format(completed=self.completed, total=self.total) + @property + def display(self) -> str: + """old alias name for fraction_display'""" + return self.fraction_display + def as_dict(self, prefix='') -> dict: return { prefix + 'total': self.total, @@ -62,3 +67,6 @@ def generate(self, char_total=100) -> str: '░' * (char_total - char_completed), self.percent * 100 ) + + def __str__(self): + return ''.format(self.display, self.percent_display) diff --git a/docs/changelog.md b/docs/changelog.md index 9524996..bbbe1e4 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,9 +2,15 @@ ## v3.2.0 +> 本版本重写 `borax.datasets.join_` 模块,接口引入重大变更,详情查看 [join模块](guides/join) 。 + +- `borax.datasets.join_`模块 + - 重写 `join` 和 `join_one` 函数,原有的重命名为 `old_join` 和 `old_join_one` + - 原有的 `old_*` 将在V4.0版本移除。 - 新增 `borax.calendars.utils` 模块 - `borax.structures.percentage` 模块 - 新增 `format_percentage` 函数 + - 类 `Percentage` 新增 `fraction_display` 属性 - 当 total 为 0 ,显示为 `'-'` ,而不是 `'0.00%'` - `borax.fetch` 模块 - 本模块被标记为 DeprecationWarning ,将在V3.3移除 diff --git a/docs/guides/join.md b/docs/guides/join.md index 775f20b..c2cb79d 100644 --- a/docs/guides/join.md +++ b/docs/guides/join.md @@ -28,9 +28,11 @@ from borax.datasets.join_ import old_join as join, old_join_one as join_one ## 概述 -本模块实现了类似于数据库的 JOIN 数据列表操作,从另一个数据集获取某一个或几个列的值。 +本模块实现了类似于数据库的 LEFT JOIN 数据列表操作,从另一个数据集获取某一个或几个列的值,加到当前数据集中。 -> 本模块的 *join_* 函数将会修改传入的列表数据,如需不影响原有数据,可以提前复制一份数据。 +> **关于LEFT JOIN** :LEFT JOIN返回左表的全部行和右表满足ON条件的行,如果左表的行在右表中没有匹配,那么这一行右表中对应数据用NULL代替。 + +本模块的 *join_* 函数将会修改传入的列表数据,如需不影响原有数据,可以提前复制一份数据。 本模块示例所用的数据描述如下: @@ -108,7 +110,7 @@ join_one(books, catalog_dict, on='catalog', select_as='catalog_name') ### join -*`join(ldata, rdata, on, select_as, defaults=None)`* +*`join(ldata, rdata, on, select_as)`* 实现左、右数据集的连接。 @@ -117,12 +119,12 @@ join_one(books, catalog_dict, on='catalog', select_as='catalog_name') | ldata | List[Dict] | 左边数据集 | | rdata | List[Dict] | 右边数据集 | | on | str / Tuple[Union[str, tuple]] / Callable | 使用左边的连接字段,支持回调函数 | -| select_as | str | 右边数据在结果的字段名称 | -| default | Any | 右边数据集找不到时的默认值 | +| select_as | str / List[Tuple] | 右边数据在结果的字段名称 | 备注: -- 当`on` 参数为函数时,定义如下 ,返回是否匹配。 +- 和 `join` 相比,没有显示的 defaults 参数,默认值可以在 `select_as` 参数中配置。 +- 当`on` 参数为回调函数时,定义如下 ,返回是否匹配。 ```python def on_callback(litem:dict, ritem:dict) -> bool: @@ -158,9 +160,23 @@ on = ('x',) on = (('x', 'x'),) ``` +- `select_as` 采用配置型参数,标准格式为: + +```python +( + (, , ), + (, , # 省略默认值 + (,), + , + ... +) +``` +元组元素的三个值分别表示右边数据字段名称、左边数据字段名称、默认值。和 `on` 参数类似,也可以依次省略后面两个内容。 +如果只选择一个字段,也可以省略外层的元组符号,直接使用字符串即可。 +## 旧版API ### old_join_one diff --git a/docs/guides/percentage.md b/docs/guides/percentage.md index 00f5136..48da30a 100644 --- a/docs/guides/percentage.md +++ b/docs/guides/percentage.md @@ -2,15 +2,33 @@ > 模块: `borax.structures.percentage` -## 创建数据对象 + + +## 模块方法 + +### format_percentage + +> Add in V3.2.0 + +``` +format_percentage(numerator: int, denominator: int, *, places: int = 2, null_val: str = '-') -> str +``` + +返回百分数。 + +## Percentage类 + +### 定义 该模块仅定义一个 `Percentage` 类,表示具体的百分比数据。类 `__init__` 函数定义如下: ```python -def __init__(self, *, total=100, completed=0, places=2, display_fmt='{completed} / {total}'): +def __init__(self, *, total=100, completed=0, places=2, display_fmt='{completed} / {total}', null_val:str='-'): pass ``` +> Changed in V3.2.0: 新增 null_val 参数。 + 各参数意义如下: | 参数 | 数据类型 | 意义 | @@ -20,7 +38,7 @@ def __init__(self, *, total=100, completed=0, places=2, display_fmt='{completed} | places | int | 百分比的小数点,如 place=2,时显示为 34.56% | | display_fmt | string | 显示格式字符串,可用变量:total, completed | -## 数据属性 +### 数据属性 `Percentage` 包含了一系列的数据属性。 @@ -39,9 +57,14 @@ p = Percentage(total=100, completed=34) | completed | int | 完成数目 | `34` | | percent | float | 百分比数值 | `0.34` | | percent_display | string | 百分比字符串 | `'34.00%'` | -| display | string | 进度字符串 | `'34 / 100'`| +| fraction_display | string | 分数字符串 | `'34 / 100'`| +| display | string | 分数字符串 | `'34 / 100'`| + +备注: + +- `fraction_display` 为 V3.2.0 新增。 -## 方法 +### 方法 - **`increase(value=1)`** From f41a3891fb09fcb86216cab3aa7caf78bb6a9b89 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Tue, 5 May 2020 20:29:59 +0800 Subject: [PATCH 09/12] :rotating_light: Fix for flake8 & nose2 --- borax/datasets/join_.py | 10 ++++++---- pyproject.toml | 2 +- tests/test_calendars.py | 2 +- tests/test_percentage.py | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/borax/datasets/join_.py b/borax/datasets/join_.py index d0bb2d8..74aff7e 100644 --- a/borax/datasets/join_.py +++ b/borax/datasets/join_.py @@ -30,10 +30,10 @@ def _sf(val): if isinstance(val, str): return val, val, None elif isinstance(val, (list, tuple)): - l = len(val) - if l == 1: + le = len(val) + if le == 1: return val[0], val[0], None - elif l == 2: + elif le == 2: return val[0], val[1], None else: return tuple(val[0:3]) @@ -55,7 +55,9 @@ def _ep(_v): def join(ldata, rdata, on, select_as): if isinstance(on, (list, tuple, str)): lfields, rfields = zip(*_of(on)) - on_callback = lambda _li, _ri: operator.itemgetter(*lfields)(_li) == operator.itemgetter(*rfields)(_ri) + + def on_callback(_li, _ri): + return operator.itemgetter(*lfields)(_li) == operator.itemgetter(*rfields)(_ri) elif callable(on): on_callback = on else: diff --git a/pyproject.toml b/pyproject.toml index ee09d0e..9368e8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "borax" -version = "3.0.1" +version = "3.2.0" description = "A util collections for Python3." readme = "long_description.rst" authors = ["kinegratii "] diff --git a/tests/test_calendars.py b/tests/test_calendars.py index 4da7615..0771f23 100644 --- a/tests/test_calendars.py +++ b/tests/test_calendars.py @@ -14,5 +14,5 @@ def test_last_day(self): self.assertEqual(date(2020, 2, 29), get_last_day_of_this_month(2020, 2)) def test_fist_day_of_week(self): - self.assertEqual(date(2020, 2, 24), get_fist_day_of_year_week(2020, 9)) + self.assertEqual(date(2020, 2, 24), get_fist_day_of_year_week(2020, 8)) self.assertEqual(date(2020, 1, 6), get_fist_day_of_year_week(2020, 1)) diff --git a/tests/test_percentage.py b/tests/test_percentage.py index 145320f..239422d 100644 --- a/tests/test_percentage.py +++ b/tests/test_percentage.py @@ -35,7 +35,7 @@ def test_zero_total(self): 'total': 0, 'completed': 34, 'percent': 0, - 'percent_display': '0.00%', + 'percent_display': '-', 'display': '34 / 0' }, p.as_dict()) From 187b4e4214369877a00b973b7847f6d5e1fa04f5 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sat, 9 May 2020 20:45:41 +0800 Subject: [PATCH 10/12] :sparkles: Add OnClause and SelectClause --- borax/datasets/join_.py | 75 +++++++++++++++---------- tests/test_join.py | 18 +----- tests/test_new_join.py | 118 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 166 insertions(+), 45 deletions(-) create mode 100644 tests/test_new_join.py diff --git a/borax/datasets/join_.py b/borax/datasets/join_.py index 74aff7e..877ecca 100644 --- a/borax/datasets/join_.py +++ b/borax/datasets/join_.py @@ -2,6 +2,8 @@ import operator +__all__ = ['join_one', 'join', 'old_join_one', 'old_join'] + def join_one(ldata, rdata, on, select_as, default=None): if isinstance(rdata, (list, tuple)): @@ -24,37 +26,54 @@ def join_one(ldata, rdata, on, select_as, default=None): return ldata -def _sf(val): - """Build a SelectField from val - """ - if isinstance(val, str): - return val, val, None - elif isinstance(val, (list, tuple)): - le = len(val) - if le == 1: - return val[0], val[0], None - elif le == 2: - return val[0], val[1], None - else: - return tuple(val[0:3]) +CLAUSE_SINGLE_TYPES = (str, tuple) -def _of(val): - if isinstance(val, str): - return (val, val), - if isinstance(val, (list, tuple)): - def _ep(_v): - if isinstance(_v, str): - return _v, _v - else: - return _v +class OnClause(tuple): + def __new__(self, lkey, rkey=None): + rkey = rkey or lkey + return tuple.__new__(OnClause, (lkey, rkey)) + + @classmethod + def from_val(cls, val): + cm = val.__class__.__name__ + if cm == "OnClause": + return val + elif cm == "str": + return cls(val, val) + elif cm == "tuple": + return cls(*val[:2]) + else: + raise TypeError("Cannot build OnClause from a {} object.".format(cm)) + + +class SelectClause(tuple): + def __new__(self, rkey, lkey=None, default=None): + lkey = lkey or rkey + return tuple().__new__(SelectClause, (rkey, lkey, default)) + + @classmethod + def from_val(cls, val): + cm = val.__class__.__name__ + if cm == "SelectClause": + return val + elif cm == "str": + return cls(val, val, None) + elif cm == "tuple": + return cls(*val[:3]) + else: + raise TypeError("Cannot build SelectClause from a {} object.".format(cm)) + - return tuple(map(_ep, val)) +OC = OnClause +SC = SelectClause def join(ldata, rdata, on, select_as): - if isinstance(on, (list, tuple, str)): - lfields, rfields = zip(*_of(on)) + if isinstance(on, CLAUSE_SINGLE_TYPES): + on = [on] + if isinstance(on, list): + lfields, rfields = zip(*list(map(OnClause.from_val, on))) def on_callback(_li, _ri): return operator.itemgetter(*lfields)(_li) == operator.itemgetter(*rfields)(_ri) @@ -63,9 +82,9 @@ def on_callback(_li, _ri): else: raise TypeError('str or callable only supported for on param. ') - if isinstance(select_as, str): - select_as = select_as, - sf_list = list(map(_sf, select_as)) + if isinstance(select_as, CLAUSE_SINGLE_TYPES): + select_as = [select_as] + sf_list = list(map(SelectClause.from_val, select_as)) def _pick_data(_item, _sfs): result = {} diff --git a/tests/test_join.py b/tests/test_join.py index 22f0828..5a684f3 100644 --- a/tests/test_join.py +++ b/tests/test_join.py @@ -3,7 +3,7 @@ import copy import unittest -from borax.datasets.join_ import join_one, old_join_one, join, old_join +from borax.datasets.join_ import old_join_one, old_join catalogs_dict = { 1: 'Python', @@ -32,20 +32,12 @@ def test_with_dict(self): self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual('Java', catalog_books[1]['catalog_name']) - catalog_books = join_one(book_data, catalogs_dict, on='catalog', select_as='catalog_name') - self.assertTrue(all(['catalog_name' in book for book in catalog_books])) - self.assertEqual('Java', catalog_books[1]['catalog_name']) - def test_with_choices(self): book_data = copy.deepcopy(books) catalog_books = old_join_one(book_data, catalog_choices, from_='catalog', as_='catalog_name') self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual('Java', catalog_books[1]['catalog_name']) - catalog_books = join_one(book_data, catalog_choices, on='catalog', select_as='catalog_name') - self.assertTrue(all(['catalog_name' in book for book in catalog_books])) - self.assertEqual('Java', catalog_books[1]['catalog_name']) - def test_join_one_with_default(self): book_data = copy.deepcopy(books) cur_catalogs_dict = { @@ -53,10 +45,6 @@ def test_join_one_with_default(self): 2: 'Java' } - catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name') - self.assertTrue(all(['catalog_name' in book for book in catalog_books])) - self.assertEqual(None, catalog_books[2]['catalog_name']) - catalog_books = old_join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name') self.assertTrue(all(['catalog_name' in book for book in catalog_books])) self.assertEqual(None, catalog_books[2]['catalog_name']) @@ -67,10 +55,6 @@ def test_join_one_with_custom_default(self): 1: 'Python', 2: 'Java' } - catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name', - default='[未知分类]') - self.assertTrue(all(['catalog_name' in book for book in catalog_books])) - self.assertEqual('[未知分类]', catalog_books[2]['catalog_name']) catalog_books = old_join_one(book_data, cur_catalogs_dict, from_='catalog', as_='catalog_name', default='[未知分类]') diff --git a/tests/test_new_join.py b/tests/test_new_join.py new file mode 100644 index 0000000..b1e4b38 --- /dev/null +++ b/tests/test_new_join.py @@ -0,0 +1,118 @@ +# coding=utf8 + +import unittest +import copy + +from borax.datasets.join_ import (OnClause, OC, SelectClause, SC, join, join_one) + +catalogs_dict = { + 1: 'Python', + 2: 'Java', + 3: '软件工程' +} +catalog_choices = [(1, 'Python'), (2, 'Java'), (3, '软件工程')] +catalogs_list = [ + {'id': 1, 'name': 'Python'}, + {'id': 2, 'name': 'Java'}, + {'id': 3, 'name': '软件工程'}, +] +books = [ + {'name': 'Python入门教程', 'catalog': 1, 'price': 45}, + {'name': 'Java标准库', 'catalog': 2, 'price': 80}, + {'name': '软件工程(本科教学版)', 'catalog': 3, 'price': 45}, + {'name': 'Django Book', 'catalog': 1, 'price': 45}, + {'name': '系统架构设计教程', 'catalog': 3, 'price': 104}, +] + + +class OnClauseTestCase(unittest.TestCase): + def test_type_hints(self): + c1 = OnClause("foo", "foo") + self.assertEqual("OnClause", c1.__class__.__name__) + self.assertTrue(isinstance(c1, tuple)) + alias_obj = OC("foo") + self.assertEqual("OnClause", alias_obj.__class__.__name__) + self.assertTrue(isinstance(alias_obj, tuple)) + + def test_build(self): + expected = ("foo", "foo") + self.assertEqual(expected, OnClause.from_val("foo")) + self.assertEqual(expected, OnClause.from_val(("foo",))) + self.assertEqual(expected, OnClause.from_val(("foo", "foo"))) + self.assertEqual(expected, OnClause.from_val(OnClause("foo"))) + with self.assertRaises(TypeError): + OnClause.from_val(["foo", "bar"]) + + +class SelectClauseTestCase(unittest.TestCase): + def test_type_hints(self): + c1 = SelectClause("foo", "foo") + self.assertEqual("SelectClause", c1.__class__.__name__) + self.assertTrue(isinstance(c1, tuple)) + alias_obj = SC("foo") + self.assertEqual("SelectClause", alias_obj.__class__.__name__) + self.assertTrue(isinstance(alias_obj, tuple)) + + def test_build(self): + expected = ("foo", "foo", None) + self.assertEqual(expected, SelectClause.from_val("foo")) + self.assertEqual(expected, SelectClause.from_val(("foo",))) + self.assertEqual(expected, SelectClause.from_val(("foo", "foo"))) + self.assertEqual(expected, SelectClause.from_val(SelectClause("foo"))) + with self.assertRaises(TypeError): + OnClause.from_val(["foo", "bar"]) + + +class JoinOneTestCase(unittest.TestCase): + def test_with_dict(self): + book_data = copy.deepcopy(books) + catalog_books = join_one(book_data, catalogs_dict, on='catalog', select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + def test_with_choices(self): + book_data = copy.deepcopy(books) + + catalog_books = join_one(book_data, catalog_choices, on='catalog', select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + def test_join_one_with_default(self): + book_data = copy.deepcopy(books) + cur_catalogs_dict = { + 1: 'Python', + 2: 'Java' + } + + catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual(None, catalog_books[2]['catalog_name']) + + def test_join_one_with_custom_default(self): + book_data = copy.deepcopy(books) + cur_catalogs_dict = { + 1: 'Python', + 2: 'Java' + } + catalog_books = join_one(book_data, cur_catalogs_dict, on='catalog', select_as='catalog_name', + default='[未知分类]') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('[未知分类]', catalog_books[2]['catalog_name']) + + def test_callback(self): + def _on(_litem): + return _litem['catalog'] + + book_data = copy.deepcopy(books) + catalog_books = join_one(book_data, catalogs_dict, on=_on, select_as='catalog_name') + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) + + +class JoinTestCase(unittest.TestCase): + def test_basic_join(self): + book_data = copy.deepcopy(books) + catalog_books = join(book_data, catalogs_list, on=('catalog', 'id'), + select_as=('name', 'catalog_name')) + self.assertTrue(all(['catalog_name' in book for book in catalog_books])) + self.assertEqual('Java', catalog_books[1]['catalog_name']) From d4eb6140288c3d394ab8ef1040ec00f51a5e3e9f Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sun, 10 May 2020 14:53:50 +0800 Subject: [PATCH 11/12] :pencil: Update join_ docs --- docs/guides/join.md | 113 +++++++++++++++++++++++++++++++------------- 1 file changed, 80 insertions(+), 33 deletions(-) diff --git a/docs/guides/join.md b/docs/guides/join.md index c2cb79d..ff1ff71 100644 --- a/docs/guides/join.md +++ b/docs/guides/join.md @@ -6,7 +6,7 @@ ## 重要说明 -从V3.2.0开始,我们重写 `join` 和 `join_one` ,原有的函数分别重命名为 `old_join` 和 `old_join_one` ,主要变化: +从V3.2.0开始,我们重写 `join` 和 `join_one` ,原有的函数分别重命名为 `old_join` 和 `old_join_one` ,主要变化如下: - 使用符合SQL的参数命名,比如 on、select_as 等。 - 原有比较分散的参数进行合并。 @@ -32,7 +32,7 @@ from borax.datasets.join_ import old_join as join, old_join_one as join_one > **关于LEFT JOIN** :LEFT JOIN返回左表的全部行和右表满足ON条件的行,如果左表的行在右表中没有匹配,那么这一行右表中对应数据用NULL代替。 -本模块的 *join_* 函数将会修改传入的列表数据,如需不影响原有数据,可以提前复制一份数据。 +本模块的 join_ 函数将会修改传入的列表join_数据,如需不影响原有数据,可以提前复制一份数据。 本模块示例所用的数据描述如下: @@ -68,6 +68,33 @@ catalogs_list = [ ] ``` +## 配置类 + +`join_` 模块定义了两个配置类,分别用于定义 `join` 函数的 `on` 和 `select_as` 参数。 + +- `OC` 是 `OnClause` 的类别名,`SC` 是 `SelectClause` 的类别名。 +- OnClause和SelectClause均继承自 tuple 。 +- `from_val` 类方法可以将一个标准类型(包括 str、tuple,不支持list)的对象转化成对应的 Clause 类。 + +### OnClause/OC + +*`OnClause(lkey, rkey=None)`* + +表示 on 表达式的条件,一个 `OnClause` 表示一个等值条件。 + +- lkey:条件的左值字段。 +- rkey:条件的右值字段。 + +### SelectClause/SC + +*`SelectClause(rkey, lkey=None, default=None)`* + +表示 select表达式的字段定义,一个 `SelectClause` 表示一个字段。 + +- rkey:右边数据的字段。 +- lkey:加入左边数据的命名的命名字段,如果不提供,默认和 rkey一致。 +- default:在右边数据找不到时使用该默认值。 + ## API ### join_one @@ -114,67 +141,87 @@ join_one(books, catalog_dict, on='catalog', select_as='catalog_name') 实现左、右数据集的连接。 -| 参数 | 类型 | 说明 | -| --------- | ----------------------------------------- | -------------------------------- | -| ldata | List[Dict] | 左边数据集 | -| rdata | List[Dict] | 右边数据集 | -| on | str / Tuple[Union[str, tuple]] / Callable | 使用左边的连接字段,支持回调函数 | -| select_as | str / List[Tuple] | 右边数据在结果的字段名称 | +| 参数 | 类型 | 说明 | +| --------- | ------------------------- | -------------------------------- | +| ldata | List[Dict] | 左边数据集 | +| rdata | List[Dict] | 右边数据集 | +| on | List[OnClause] / callback | 使用左边的连接字段,支持回调函数 | +| select_as | str / List[SelectClause] | 右边数据在结果的字段名称 | 备注: -- 和 `join` 相比,没有显示的 defaults 参数,默认值可以在 `select_as` 参数中配置。 -- 当`on` 参数为回调函数时,定义如下 ,返回是否匹配。 +- 和 `join` 相比,没有显式的 defaults 参数,默认值可以在 `select_as` 参数中配置。 + +#### on参数 + +on支持以下几种参数方式: + +- 回调函数。定义如下 ,返回是否匹配。 ```python def on_callback(litem:dict, ritem:dict) -> bool: pass ``` -- 当 `on` 为字段配置时,其类型为 `Tuple[Union[str, Tuple]]`。 该定义了一个元组,元组的每个元素又是由2个字符串组成的元组。通用格式如下: +注意:如果在右边数据有多条记录匹配时,只会使用第一次成功匹配的记录。 + +- 标准配置:当 `on` 为字段配置时,其类型为 `List[OnClause]`。 该定义了一个由若干个 OnClause 对象组成的列表。注意这里的列表(list)不能使用元组(tuple)代替。通用格式如下: ```python - ( - (, ), - (, ), + [ + OnClause('lkey1', 'rkey1'), + OnClause('lkey2', 'rkey2'), # ... - ) + ] ``` - 表示 `left_item.key1=right.item.key1&left_item.key2=right.item.key2`。 + 表示 `left_item.lkey1=right_item.rkey1&left_item.lkey2=right_item.rkey2`。 -当某一个条件左右两边的key相同时,内部的元组可以省略为一个字符串。 +- 简易配置。当含有以下条件时,可以不显式定义 `OnClause` 对象,由程序自动转化为对应的 `OnClause`对象,。 + + (1) 当某一个等值条件左右两边的key相同。 + + (2) 只有一个等值条件,可以省略外面的 `[]` 列表符号。 ```python -# 以下两种方式是相同的。 -on = (('x', 'x'), ('y', 'y')) +# 以下三种方式是相同的。 +on = [('x', 'x'), ('y', 'y')] +on = ['x', 'y'] +on = [OnClause('x', 'x'), OnClause('y', 'y')] -on = ('x', 'y') +# 以下三种方式是等效的。 +on = 'x' +on = OnClause('x', 'x') +on = [OnClause('x', 'x')] ``` -当只有一个条件时,还可以继续省略外层的元组,只定义一个字符串即可。以下三种是等效的。 +下列的三种方式也是等效的,注意和上面 `on = ['x', 'y']` 的区别。 ```python -on = 'x' -on = ('x',) -on = (('x', 'x'),) +on = ('x', 'y') +on = OnClause('x', 'y') +on = [OnClause('x', 'y')] ``` -- `select_as` 采用配置型参数,标准格式为: +#### select_as参数 + +- 标准配置:其类型为 `List[SelectClause]`。 该定义了一个由若干个 SelectClause 对象组成的列表。注意这里的列表(list)不能使用元组(tuple)代替。通用格式如下: ```python -( - (, , ), - (, , # 省略默认值 - (,), - , - ... -) +[ + SelectClause(, , ), + SelectClause(, , # 省略默认值 + SelectClause ,), + ] ``` 元组元素的三个值分别表示右边数据字段名称、左边数据字段名称、默认值。和 `on` 参数类似,也可以依次省略后面两个内容。 -如果只选择一个字段,也可以省略外层的元组符号,直接使用字符串即可。 +- 简易配置。当含有以下条件时,可以不显式定义 `SelectClause` 对象,由程序自动转化为对应的 `SelectClause`对象。 + + (1) 当某一个选择条件lkey和default使用默认值时。 + + (2) 只有一个选择条件,可以省略外面的 `[]` 列表符号。 ## 旧版API From 82e894e75c71b8b3929f64fd5fc3d4952cd1ef55 Mon Sep 17 00:00:00 2001 From: kinegratii Date: Sun, 10 May 2020 14:59:21 +0800 Subject: [PATCH 12/12] :bookmark: release v3.2.0 --- borax/__init__.py | 2 +- docs/changelog.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/borax/__init__.py b/borax/__init__.py index 24b3c9b..55d5de3 100644 --- a/borax/__init__.py +++ b/borax/__init__.py @@ -1,4 +1,4 @@ # coding=utf8 -__version__ = '3.1.0' +__version__ = '3.2.0' __author__ = 'kinegratii' diff --git a/docs/changelog.md b/docs/changelog.md index bbbe1e4..5705777 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,6 @@ # 更新日志 -## v3.2.0 +## v3.2.0 (20200510) > 本版本重写 `borax.datasets.join_` 模块,接口引入重大变更,详情查看 [join模块](guides/join) 。