diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 573e80c..0000000 --- a/.flake8 +++ /dev/null @@ -1,4 +0,0 @@ -[flake8] -exclude = .tox,*.egg,build,data,*docs -select = E,W,F -ignore = F401,F403 \ No newline at end of file diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml deleted file mode 100644 index 315f17a..0000000 --- a/.github/workflows/ci-workflow.yml +++ /dev/null @@ -1,41 +0,0 @@ -name: tests - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.6, 3.7, 3.8, 3.9] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - name: Package cbsodata - run: | - pip install --upgrade pip - pip install wheel - python setup.py bdist_wheel sdist - - name: Install cbsodata - run: | - pip install ./dist/cbsodata-*.whl - - name: Lint with flake8 - run: | - pip install flake8 - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --show-source --statistics - # exit-zero treats all errors as warnings - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest - run: | - pip install pytest - # remove cbsodata to prevent relative imports (use installed package) - # this is like wrapping stuff in a src folder - rm -r cbsodata/ - pytest diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2ebde81..367ccc9 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -9,4 +9,4 @@ jobs: - uses: ammaraskar/sphinx-action@master with: docs-folder: "docs/" - repo-token: "${{ secrets.GITHUB_TOKEN }}" \ No newline at end of file + repo-token: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 0000000..dee61b9 --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,8 @@ +name: Linter +on: [ push, pull_request ] +jobs: + lint-ruff: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: chartboost/ruff-action@v1 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..8014c0c --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,25 @@ +name: Python package and test +on: [push, pull_request] + +jobs: + test: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install package and dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[test] + - name: Test with pytest + run: | + pytest diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..eaaeb08 --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,31 @@ +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml deleted file mode 100644 index 5e039c0..0000000 --- a/.github/workflows/pythonpackage.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: Deploy and release - -on: - push: - tags: - - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 - -jobs: - deploy: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up Python - uses: actions/setup-python@v1 - with: - python-version: '3.x' - - name: Get the version (git tag) - id: get_version - run: | - echo ${GITHUB_REF/refs\/tags\/v/} - echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\/v/} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel - - name: Build - run: | - python setup.py sdist bdist_wheel - - name: Create Release - id: create_release - uses: actions/create-release@v1.0.0 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ github.ref }} - release_name: Release ${{ github.ref }} - draft: false - prerelease: false - - name: Upload Release Asset (Wheel) - id: upload-release-asset-whl - uses: actions/upload-release-asset@v1.0.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./dist/cbsodata-${{ steps.get_version.outputs.VERSION }}-py3-none-any.whl - asset_name: cbsodata-${{ steps.get_version.outputs.VERSION }}-py3-none-any.whl - asset_content_type: application/x-wheel+zip - - name: Upload Release Asset (Sdist) - id: upload-release-asset-sdist - uses: actions/upload-release-asset@v1.0.1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.create_release.outputs.upload_url }} - asset_path: ./dist/cbsodata-${{ steps.get_version.outputs.VERSION }}.tar.gz - asset_name: cbsodata-${{ steps.get_version.outputs.VERSION }}.tar.gz - asset_content_type: application/zip - - name: Publish package - uses: pypa/gh-action-pypi-publish@master - with: - user: __token__ - password: ${{ secrets.pypi_password }} diff --git a/.gitignore b/.gitignore index 892cfe2..703acd6 100644 --- a/.gitignore +++ b/.gitignore @@ -90,4 +90,4 @@ ENV/ .spyderproject # Rope project settings -.ropeproject \ No newline at end of file +.ropeproject diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9ec9fdc --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + exclude: "^.*drawio|.*.svg$" + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.0 + hooks: + - id: ruff +- repo: https://github.com/psf/black-pre-commit-mirror + rev: 23.10.0 + hooks: + - id: black diff --git a/LICENSE.txt b/LICENSE.txt index e1918f7..993da5e 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -19,4 +19,4 @@ Copyright (c) 2016 Jonathan de Bruin HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file + OTHER DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 4ccf2b1..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,3 +0,0 @@ -# Include the license file -include LICENSE.txt - diff --git a/README.md b/README.md new file mode 100644 index 0000000..0db6abe --- /dev/null +++ b/README.md @@ -0,0 +1,216 @@ +# Statistics Netherlands opendata API client for Python + +[![pypi](https://badge.fury.io/py/cbsodata.svg)](https://badge.fury.io/py/cbsodata) +[![tests](https://github.com/J535D165/cbsodata/workflows/tests/badge.svg)](https://github.com/J535D165/cbsodata/actions) + +Retrieve data from the [open data interface of Statistics +Netherlands](http://www.cbs.nl/nl-NL/menu/cijfers/statline/open-data/default.htm) +(Centraal Bureau voor de Statistiek) with *Python*. The data is +identical in content to the tables which can be retrieved and downloaded +from [StatLine](http://statline.cbs.nl/). CBS datasets are accessed via +the [CBS open data +portal](https://opendata.cbs.nl/statline/portal.html). + +The documentation of this package is found at this page and on +[readthedocs.io](http://cbsodata.readthedocs.io/). + +R user? Use +[cbsodataR](https://cran.r-project.org/web/packages/cbsodataR/index.html). + +## Installation + +From PyPi + +``` sh +pip install cbsodata +``` + +## Usage + +Load the package with + +``` python +>>> import cbsodata +``` + +### Tables + +Statistics Netherlands (CBS) has a large amount of public available data +tables (more than 4000 at the moment of writing). Each table is +identified by a unique identifier (`Identifier`). + +``` python +>>> tables = cbsodata.get_table_list() +>>> print(tables[0]) +{'Catalog': 'CBS', + 'ColumnCount': 18, + 'DefaultPresentation': '_la=nl&_si=&_gu=&_ed=LandVanUiteindelijkeZeggenschapUCI&_td=Perioden&graphType=line', + 'DefaultSelection': "$filter=((LandVanUiteindelijkeZeggenschapUCI eq '11111') or (LandVanUiteindelijkeZeggenschapUCI eq '22222')) and (Bedrijfsgrootte eq '10000') and (substringof('JJ',Perioden))&$select=LandVanUiteindelijkeZeggenschapUCI, Bedrijfsgrootte, Perioden, FiscaalJaarloonPerBaan_15", + 'ExplanatoryText': '', + 'Frequency': 'Perjaar', + 'GraphTypes': 'Table,Bar,Line', + 'ID': 0, + 'Identifier': '82010NED', + 'Language': 'nl', + 'MetaDataModified': '2014-02-04T02:00:00', + 'Modified': '2014-02-04T02:00:00', + 'OutputStatus': 'Regulier', + 'Period': '2008 t/m 2011', + 'ReasonDelivery': 'Actualisering', + 'RecordCount': 32, + 'SearchPriority': '2', + 'ShortDescription': '\nDeze tabel bevat informatie over banen en lonen bij bedrijven in Nederland, uitgesplitst naar het land van uiteindelijke zeggenschap van die bedrijven. Hierbij wordt onderscheid gemaakt tussen bedrijven onder Nederlandse zeggenschap en bedrijven onder buitenlandse zeggenschap. In de tabel zijn alleen de bedrijven met werknemers in loondienst meegenomen. De cijfers hebben betrekking op het totale aantal banen bij deze bedrijven en de samenstelling van die banen naar kenmerken van de werknemers (baanstatus, geslacht, leeftijd, herkomst en hoogte van het loon). Ook het gemiddelde fiscale jaarloon per baan is in de tabel te vinden. \n\nGegevens beschikbaar vanaf: 2008 \n\nStatus van de cijfers: \nDe cijfers in deze tabel zijn definitief.\n\nWijzigingen per 4 februari 2014\nDe cijfers van 2011 zijn toegevoegd.\n\nWanneer komen er nieuwe cijfers?\nDe cijfers over 2012 verschijnen in de eerste helft van 2015.\n', + 'ShortTitle': 'Zeggenschap bedrijven; banen, grootte', + 'Source': 'CBS.', + 'Summary': 'Banen en lonen van werknemers bij bedrijven in Nederland\nnaar land van uiteindelijke zeggenschap en bedrijfsgrootte', + 'SummaryAndLinks': 'Banen en lonen van werknemers bij bedrijven in Nederland
naar land van uiteindelijke zeggenschap en bedrijfsgrootte
http://opendata.cbs.nl/ODataApi/OData/82010NED
http://opendata.cbs.nl/ODataFeed/OData/82010NED', + 'Title': 'Zeggenschap bedrijven in Nederland; banen en lonen, bedrijfsgrootte', + 'Updated': '2014-02-04T02:00:00'} +``` + +### Info + +Get information about a table with the `get_info` function. + +``` python +>>> info = cbsodata.get_info('82070ENG') # Returns a dict with info +>>> info['Title'] +'Caribbean Netherlands; employed labour force characteristics 2012' +>>> info['Modified'] +'2013-11-28T15:00:00' +``` + +### Data + +The function you are looking for!! The function `get_data` returns a +list of dicts with the table data. + +``` python +>>> data = cbsodata.get_data('82070ENG') +[{'CaribbeanNetherlands': 'Bonaire', + 'EmployedLabourForceInternatDef_1': 8837, + 'EmployedLabourForceNationalDef_2': 8559, + 'Gender': 'Total male and female', + 'ID': 0, + 'Periods': '2012', + 'PersonalCharacteristics': 'Total personal characteristics'}, + {'CaribbeanNetherlands': 'St. Eustatius', + 'EmployedLabourForceInternatDef_1': 2099, + 'EmployedLabourForceNationalDef_2': 1940, + 'Gender': 'Total male and female', + 'ID': 1, + 'Periods': '2012', + 'PersonalCharacteristics': 'Total personal characteristics'}, + {'CaribbeanNetherlands': 'Saba', + 'EmployedLabourForceInternatDef_1': 1045, + 'EmployedLabourForceNationalDef_2': 971, + 'Gender': 'Total male and female', + 'ID': 2, + 'Periods': '2012', + 'PersonalCharacteristics': 'Total personal characteristics'}, + # ... +] +``` + +The keyword argument `dir` can be used to download the data directly to +your file system. + +``` python +>>> data = cbsodata.get_data('82070ENG', dir="dir_to_save_data") +``` + +### Catalogs (dataderden) + +There are multiple ways to retrieve data from catalogs other than +'opendata.cbs.nl'. The code below shows 3 different ways to retrieve +data from the catalog 'dataderden.cbs.nl' (known from Iv3). + +On module level. + +``` python +cbsodata.options.catalog_url = 'dataderden.cbs.nl' +# list tables +cbsodata.get_table_list() +# get dataset 47003NED +cbsodata.get_data('47003NED') +``` + +With context managers. + +``` python +with cbsodata.catalog('dataderden.cbs.nl'): + # list tables + cbsodata.get_table_list() + # get dataset 47003NED + cbsodata.get_data('47003NED') +``` + +As a function argument. + +``` python +# list tables +cbsodata.get_table_list(catalog_url='dataderden.cbs.nl') +# get dataset 47003NED +cbsodata.get_data('47003NED', catalog_url='dataderden.cbs.nl') +``` + +### Pandas users + +The package works well with Pandas. Convert the result easily into a +pandas DataFrame with the code below. + +``` python +>>> data = pandas.DataFrame(cbsodata.get_data('82070ENG')) +>>> data.head() +``` + +The list of tables can be turned into a pandas DataFrame as well. + +``` python +>>> tables = pandas.DataFrame(cbsodata.get_table_list()) +>>> tables.head() +``` + +## Command Line Interface + +This library ships with a Command Line Interface (CLI). + +``` bash +> cbsodata -h +usage: cbsodata [-h] [--version] [subcommand] + +CBS Open Data: Command Line Interface + +positional arguments: + subcommand the subcommand (one of 'data', 'info', 'list') + +optional arguments: + -h, --help show this help message and exit + --version show the package version +``` + +Download data: + +``` bash +> cbsodata data 82010NED +``` + +Retrieve table information: + +``` bash +> cbsodata info 82010NED +``` + +Retrieve a list with all tables: + +``` bash +> cbsodata list +``` + +### Export data + +Use the flag `-o` to load data to a file (JSON lines). + +``` bash +> cbsodata data 82010NED -o table_82010NED.jl +``` diff --git a/README.rst b/README.rst deleted file mode 100644 index 9c97195..0000000 --- a/README.rst +++ /dev/null @@ -1,232 +0,0 @@ -Statistics Netherlands opendata API client for Python -===================================================== - -|pypi| |tests| - -.. |pypi| image:: https://badge.fury.io/py/cbsodata.svg - :target: https://badge.fury.io/py/cbsodata - -.. |tests| image:: https://github.com/J535D165/cbsodata/workflows/tests/badge.svg - :target: https://github.com/J535D165/cbsodata/actions - -Retrieve data from the `open data interface of Statistics Netherlands -`__ -(Centraal Bureau voor de Statistiek) with *Python*. The data is identical in -content to the tables which can be retrieved and downloaded from `StatLine -`__. CBS datasets are accessed via the `CBS open data -portal `__. - -The documentation of this -package is found at this page and on `readthedocs.io -`__. - -R user? Use `cbsodataR `__. - -Installation ------------- - -From PyPi - -.. code:: sh - - pip install cbsodata - -Usage ------ - -Load the package with - -.. code:: python - - >>> import cbsodata - -Tables -~~~~~~ - -Statistics Netherlands (CBS) has a large amount of public available -data tables (more than 4000 at the moment of writing). Each table is -identified by a unique identifier (``Identifier``). - -.. code:: python - - >>> tables = cbsodata.get_table_list() - >>> print(tables[0]) - {'Catalog': 'CBS', - 'ColumnCount': 18, - 'DefaultPresentation': '_la=nl&_si=&_gu=&_ed=LandVanUiteindelijkeZeggenschapUCI&_td=Perioden&graphType=line', - 'DefaultSelection': "$filter=((LandVanUiteindelijkeZeggenschapUCI eq '11111') or (LandVanUiteindelijkeZeggenschapUCI eq '22222')) and (Bedrijfsgrootte eq '10000') and (substringof('JJ',Perioden))&$select=LandVanUiteindelijkeZeggenschapUCI, Bedrijfsgrootte, Perioden, FiscaalJaarloonPerBaan_15", - 'ExplanatoryText': '', - 'Frequency': 'Perjaar', - 'GraphTypes': 'Table,Bar,Line', - 'ID': 0, - 'Identifier': '82010NED', - 'Language': 'nl', - 'MetaDataModified': '2014-02-04T02:00:00', - 'Modified': '2014-02-04T02:00:00', - 'OutputStatus': 'Regulier', - 'Period': '2008 t/m 2011', - 'ReasonDelivery': 'Actualisering', - 'RecordCount': 32, - 'SearchPriority': '2', - 'ShortDescription': '\nDeze tabel bevat informatie over banen en lonen bij bedrijven in Nederland, uitgesplitst naar het land van uiteindelijke zeggenschap van die bedrijven. Hierbij wordt onderscheid gemaakt tussen bedrijven onder Nederlandse zeggenschap en bedrijven onder buitenlandse zeggenschap. In de tabel zijn alleen de bedrijven met werknemers in loondienst meegenomen. De cijfers hebben betrekking op het totale aantal banen bij deze bedrijven en de samenstelling van die banen naar kenmerken van de werknemers (baanstatus, geslacht, leeftijd, herkomst en hoogte van het loon). Ook het gemiddelde fiscale jaarloon per baan is in de tabel te vinden. \n\nGegevens beschikbaar vanaf: 2008 \n\nStatus van de cijfers: \nDe cijfers in deze tabel zijn definitief.\n\nWijzigingen per 4 februari 2014\nDe cijfers van 2011 zijn toegevoegd.\n\nWanneer komen er nieuwe cijfers?\nDe cijfers over 2012 verschijnen in de eerste helft van 2015.\n', - 'ShortTitle': 'Zeggenschap bedrijven; banen, grootte', - 'Source': 'CBS.', - 'Summary': 'Banen en lonen van werknemers bij bedrijven in Nederland\nnaar land van uiteindelijke zeggenschap en bedrijfsgrootte', - 'SummaryAndLinks': 'Banen en lonen van werknemers bij bedrijven in Nederland
naar land van uiteindelijke zeggenschap en bedrijfsgrootte
http://opendata.cbs.nl/ODataApi/OData/82010NED
http://opendata.cbs.nl/ODataFeed/OData/82010NED', - 'Title': 'Zeggenschap bedrijven in Nederland; banen en lonen, bedrijfsgrootte', - 'Updated': '2014-02-04T02:00:00'} - -Info -~~~~ - -Get information about a table with the ``get_info`` function. - -.. code:: python - - >>> info = cbsodata.get_info('82070ENG') # Returns a dict with info - >>> info['Title'] - 'Caribbean Netherlands; employed labour force characteristics 2012' - >>> info['Modified'] - '2013-11-28T15:00:00' - -Data -~~~~ - -The function you are looking for!! The function ``get_data`` returns a list of -dicts with the table data. - -.. code:: python - - >>> data = cbsodata.get_data('82070ENG') - [{'CaribbeanNetherlands': 'Bonaire', - 'EmployedLabourForceInternatDef_1': 8837, - 'EmployedLabourForceNationalDef_2': 8559, - 'Gender': 'Total male and female', - 'ID': 0, - 'Periods': '2012', - 'PersonalCharacteristics': 'Total personal characteristics'}, - {'CaribbeanNetherlands': 'St. Eustatius', - 'EmployedLabourForceInternatDef_1': 2099, - 'EmployedLabourForceNationalDef_2': 1940, - 'Gender': 'Total male and female', - 'ID': 1, - 'Periods': '2012', - 'PersonalCharacteristics': 'Total personal characteristics'}, - {'CaribbeanNetherlands': 'Saba', - 'EmployedLabourForceInternatDef_1': 1045, - 'EmployedLabourForceNationalDef_2': 971, - 'Gender': 'Total male and female', - 'ID': 2, - 'Periods': '2012', - 'PersonalCharacteristics': 'Total personal characteristics'}, - # ... - ] - -The keyword argument ``dir`` can be used to download the data directly to your -file system. - -.. code:: python - - >>> data = cbsodata.get_data('82070ENG', dir="dir_to_save_data") - -Catalogs (dataderden) -~~~~~~~~~~~~~~~~~~~~~ - -There are multiple ways to retrieve data from catalogs other than -'opendata.cbs.nl'. The code below shows 3 different ways to retrieve data from -the catalog 'dataderden.cbs.nl' (known from Iv3). - -On module level. - -.. code:: python - - cbsodata.options.catalog_url = 'dataderden.cbs.nl' - # list tables - cbsodata.get_table_list() - # get dataset 47003NED - cbsodata.get_data('47003NED') - -With context managers. - -.. code:: python - - with cbsodata.catalog('dataderden.cbs.nl'): - # list tables - cbsodata.get_table_list() - # get dataset 47003NED - cbsodata.get_data('47003NED') - -As a function argument. - -.. code:: python - - # list tables - cbsodata.get_table_list(catalog_url='dataderden.cbs.nl') - # get dataset 47003NED - cbsodata.get_data('47003NED', catalog_url='dataderden.cbs.nl') - -Pandas users -~~~~~~~~~~~~ - -The package works well with Pandas. Convert the result easily into a pandas -DataFrame with the code below. - -.. code:: python - - >>> data = pandas.DataFrame(cbsodata.get_data('82070ENG')) - >>> data.head() - -The list of tables can be turned into a pandas DataFrame as well. - -.. code:: python - - >>> tables = pandas.DataFrame(cbsodata.get_table_list()) - >>> tables.head() - - -Command Line Interface ----------------------- - -This library ships with a Command Line Interface (CLI). - -.. code:: bash - - > cbsodata -h - usage: cbsodata [-h] [--version] [subcommand] - - CBS Open Data: Command Line Interface - - positional arguments: - subcommand the subcommand (one of 'data', 'info', 'list') - - optional arguments: - -h, --help show this help message and exit - --version show the package version - -Download data: - -.. code:: bash - - > cbsodata data 82010NED - -Retrieve table information: - -.. code:: bash - - > cbsodata info 82010NED - -Retrieve a list with all tables: - -.. code:: bash - - > cbsodata list - - -Export data -~~~~~~~~~~~ - -Use the flag ``-o`` to load data to a file (JSON lines). - -.. code:: bash - - > cbsodata data 82010NED -o table_82010NED.jl diff --git a/cbsodata/__init__.py b/cbsodata/__init__.py index fd4d6af..ea7e1dd 100644 --- a/cbsodata/__init__.py +++ b/cbsodata/__init__.py @@ -23,6 +23,27 @@ """Statistics Netherlands opendata API client for Python""" -from cbsodata.cbsodata3 import * +try: + from cbsodata._version import __version__ + from cbsodata._version import __version_tuple__ +except ImportError: + __version__ = "0.0.0" + __version_tuple__ = (0, 0, 0) -__version__ = "1.3.4" +from cbsodata.cbsodata3 import catalog +from cbsodata.cbsodata3 import download_data +from cbsodata.cbsodata3 import get_data +from cbsodata.cbsodata3 import get_info +from cbsodata.cbsodata3 import get_meta +from cbsodata.cbsodata3 import get_table_list +from cbsodata.cbsodata3 import options + +__all__ = [ + "download_data", + "get_data", + "get_info", + "get_meta", + "get_table_list", + "options", + "catalog", +] diff --git a/cbsodata/__main__.py b/cbsodata/__main__.py index 4b73373..354bad7 100644 --- a/cbsodata/__main__.py +++ b/cbsodata/__main__.py @@ -72,8 +72,7 @@ def text_outputter(data_obj, max_rows=None): col_line = "" for i, col in enumerate(columns): if i != (len(columns) - 1): - col_line = col_line + str(col) \ - .upper().ljust(value_max_len[col] + 2) + col_line = col_line + str(col).upper().ljust(value_max_len[col] + 2) else: col_line = col_line + str(col).upper() print(col_line) @@ -96,11 +95,7 @@ def text_outputter(data_obj, max_rows=None): def parse_argument_table_id(parser): - parser.add_argument( - "table_id", - type=str, - help="table identifier" - ) + parser.add_argument("table_id", type=str, help="table identifier") def parse_argument_catalog(parser): @@ -108,43 +103,49 @@ def parse_argument_catalog(parser): "--catalog_url", default=None, type=str, - help="the catalog to download the data from") + help="the catalog to download the data from", + ) def parse_argument_output_format(parser): parser.add_argument( - "--output_format", "-f", + "--output_format", + "-f", default="json", type=str, - help="format to show table ('json', 'text')") + help="format to show table ('json', 'text')", + ) def parse_argument_output(parser): parser.add_argument( - "--output_file", "-o", + "--output_file", + "-o", default=None, type=str, - help="file to store the output (only json support)") + help="file to store the output (only json support)", + ) def parse_argument_max_rows(parser): parser.add_argument( - "--max_rows", "-n", + "--max_rows", + "-n", default=100, type=int, - help="maximum number of rows to output") + help="maximum number of rows to output", + ) def save_list_to_json(data_obj, fp): """Write list with dicts to json""" - with open(fp, 'w+') as f: + with open(fp, "w+") as f: for line in data_obj: f.write(json.dumps(line) + "\n") def main(): - if len(sys.argv) > 1 and sys.argv[1] == "data": parser = argparse.ArgumentParser( prog="cbsodata", @@ -152,7 +153,7 @@ def main(): CBS Open Data: Command Line Interface Get data by table identifier. - """ + """, ) parse_argument_table_id(parser) parse_argument_catalog(parser) @@ -178,7 +179,7 @@ def main(): CBS Open Data: Command Line Interface Get data infomation by table identifier. - """ + """, ) parse_argument_table_id(parser) parse_argument_catalog(parser) @@ -189,13 +190,11 @@ def main(): result = cbsodata.get_info(args.table_id, catalog_url=args.catalog_url) if args.output_file: - with open(args.output_file, 'w') as f: + with open(args.output_file, "w") as f: json.dump(result, f, indent=4) if args.output_format == "text": - text_outputter( - [{"Label": k, "Value": v} for k, v in result.items()] - ) + text_outputter([{"Label": k, "Value": v} for k, v in result.items()]) else: print(json.dumps(result, indent=4)) @@ -206,7 +205,7 @@ def main(): CBS Open Data: Command Line Interface Get list of available tables. - """ + """, ) parse_argument_catalog(parser) parse_argument_output_format(parser) @@ -230,19 +229,17 @@ def main(): prog="cbsodata", description=""" CBS Open Data: Command Line Interface - """ + """, ) parser.add_argument( "subcommand", nargs="?", type=lambda x: isinstance(x, str) and x in AVAILABLE_CMDS, - help="the subcommand (one of '{}')".format( - "', '".join(AVAILABLE_CMDS)) + help="the subcommand (one of '{}')".format("', '".join(AVAILABLE_CMDS)), ) parser.add_argument( - "--version", - action='store_true', - help="show the package version") + "--version", action="store_true", help="show the package version" + ) args = parser.parse_args() @@ -250,5 +247,5 @@ def main(): parser.print_help() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/cbsodata/cbsodata3.py b/cbsodata/cbsodata3.py index adb027a..d51d242 100644 --- a/cbsodata/cbsodata3.py +++ b/cbsodata/cbsodata3.py @@ -23,19 +23,26 @@ """Statistics Netherlands opendata API client for Python""" -__all__ = ['download_data', 'get_data', 'get_info', 'get_meta', - 'get_table_list', 'options', 'catalog'] +__all__ = [ + "download_data", + "get_data", + "get_info", + "get_meta", + "get_table_list", + "options", + "catalog", +] -import os -import json import copy +import json import logging +import os import warnings from contextlib import contextmanager import requests -from requests import Session, Request - +from requests import Request +from requests import Session CBSOPENDATA = "opendata.cbs.nl" # deprecate in next version API = "ODataApi/odata" @@ -45,11 +52,10 @@ FORMAT = "json" -class OptionsManager(object): +class OptionsManager: """Class for option management""" def __init__(self): - self.use_https = True self.api_version = "3" # Get default proxy settings from environment variables @@ -66,8 +72,7 @@ def __repr__(self): return self.__str__() def __str__(self): - return "catalog_url = {}, use_https = {}".format( - self.catalog_url, self.use_https) + return f"catalog_url = {self.catalog_url}, use_https = {self.use_https}" def __getitem__(self, arg): return getattr(self, arg) @@ -77,8 +82,7 @@ def __setitem__(self, arg, value): def _log_setting_change(self, setting_name, old_value, new_value): logging.info( - "Setting '{}' changed from '{}' to '{}'.".format( - setting_name, old_value, new_value) + f"Setting '{setting_name}' changed from '{old_value}' to '{new_value}'." ) def __setattr__(self, arg, value): @@ -88,7 +92,7 @@ def __setattr__(self, arg, value): old_value = "undefined" self._log_setting_change(arg, old_value, value) - super(OptionsManager, self).__setattr__(arg, value) + super().__setattr__(arg, value) @property def catalog_url(self): @@ -101,15 +105,16 @@ def catalog_url(self, url): @property def proxies(self): - return self.requests.get('proxies', None) + return self.requests.get("proxies", None) @proxies.setter def proxies(self, proxies): warnings.warn( "Deprecated, use options.requests['proxies'] instead", - DeprecationWarning + DeprecationWarning, + stacklevel=2, ) - self.requests['proxies'] = proxies + self.requests["proxies"] = proxies # User options @@ -117,7 +122,6 @@ def proxies(self, proxies): def _get_catalog_url(url): - return options.catalog_url if url is None else url @@ -129,17 +133,20 @@ def _get_table_url(table_id, catalog_url=None): else: _catalog_url = catalog_url - components = {"http": "https://" if options.use_https else "http://", - "baseurl": _catalog_url, - "bulk": BULK, - "table_id": table_id} + components = { + "http": "https://" if options.use_https else "http://", + "baseurl": _catalog_url, + "bulk": BULK, + "table_id": table_id, + } # http://opendata.cbs.nl/ODataApi/OData/37506wwm return "{http}{baseurl}/{bulk}/{table_id}/".format(**components) -def _download_metadata(table_id, metadata_name, select=None, filters=None, - catalog_url=None, **kwargs): +def _download_metadata( + table_id, metadata_name, select=None, filters=None, catalog_url=None, **kwargs +): """Download metadata.""" # http://opendata.cbs.nl/ODataApi/OData/37506wwm/UntypedDataSet?$format=json @@ -149,9 +156,9 @@ def _download_metadata(table_id, metadata_name, select=None, filters=None, params["$format"] = FORMAT if select: - params['$select'] = _select(select) + params["$select"] = _select(select) if filters: - params['$filter'] = _filters(filters) + params["$filter"] = _filters(filters) # additional parameters to requests request_kwargs = options.requests.copy() @@ -160,10 +167,9 @@ def _download_metadata(table_id, metadata_name, select=None, filters=None, try: data = [] - while (url is not None): - + while url is not None: s = Session() - p = Request('GET', url, params=params).prepare() + p = Request("GET", url, params=params).prepare() logging.info("Download " + p.url) @@ -172,10 +178,10 @@ def _download_metadata(table_id, metadata_name, select=None, filters=None, r.encoding = "utf-8" res = json.loads(r.text) - data.extend(res['value']) + data.extend(res["value"]) try: - url = res['odata.nextLink'] + url = res["odata.nextLink"] params = {} except KeyError: url = None @@ -184,8 +190,8 @@ def _download_metadata(table_id, metadata_name, select=None, filters=None, except requests.HTTPError as http_err: raise requests.HTTPError( - "Downloading table '{}' failed. {}".format(table_id, str(http_err)) - ) + f"Downloading table '{table_id}' failed. {str(http_err)}" + ) from http_err def _save_data(data, dir, metadata_name): @@ -194,9 +200,9 @@ def _save_data(data, dir, metadata_name): if not os.path.exists(dir): os.makedirs(dir) - fp = os.path.join(dir, metadata_name + '.json') + fp = os.path.join(dir, metadata_name + ".json") - with open(fp, 'w') as output_file: + with open(fp, "w") as output_file: json.dump(data, output_file, indent=2) @@ -232,13 +238,20 @@ def _select(select): """ if isinstance(select, list): - select = ','.join(select) + select = ",".join(select) return select -def download_data(table_id, dir=None, typed=False, select=None, filters=None, - catalog_url=None, **kwargs): +def download_data( + table_id, + dir=None, + typed=False, + select=None, + filters=None, + catalog_url=None, + **kwargs, +): """Download the CBS data and metadata. Parameters @@ -274,7 +287,7 @@ def download_data(table_id, dir=None, typed=False, select=None, filters=None, ) # The names of the tables with metadata - metadata_table_names = [table['name'] for table in metadata_tables] + metadata_table_names = [table["name"] for table in metadata_tables] # Download only the typed or untyped data typed_or_not_str = "TypedDataSet" if typed else "UntypedDataSet" @@ -283,17 +296,20 @@ def download_data(table_id, dir=None, typed=False, select=None, filters=None, data = {} for table_name in metadata_table_names: - # download table if table_name in ["TypedDataSet", "UntypedDataSet"]: - metadata = _download_metadata(table_id, table_name, - select=select, filters=filters, - catalog_url=_catalog_url, - **kwargs) + metadata = _download_metadata( + table_id, + table_name, + select=select, + filters=filters, + catalog_url=_catalog_url, + **kwargs, + ) else: - metadata = _download_metadata(table_id, table_name, - catalog_url=_catalog_url, - **kwargs) + metadata = _download_metadata( + table_id, table_name, catalog_url=_catalog_url, **kwargs + ) data[table_name] = metadata @@ -332,17 +348,19 @@ def get_table_list(select=None, filters=None, catalog_url=None, **kwargs): _catalog_url = _get_catalog_url(catalog_url) - components = {"http": "https://" if options.use_https else "http://", - "baseurl": _catalog_url, - "catalog": CATALOG} + components = { + "http": "https://" if options.use_https else "http://", + "baseurl": _catalog_url, + "catalog": CATALOG, + } url = "{http}{baseurl}/{catalog}/Tables?$format=json".format(**components) params = {} if select: - params['$select'] = _select(select) + params["$select"] = _select(select) if filters: - params['$filter'] = _filters(filters) + params["$filter"] = _filters(filters) # additional parameters to requests request_kwargs = options.requests.copy() @@ -350,7 +368,7 @@ def get_table_list(select=None, filters=None, catalog_url=None, **kwargs): try: s = Session() - p = Request('GET', url, params=params).prepare() + p = Request("GET", url, params=params).prepare() logging.info("Download " + p.url) @@ -358,12 +376,12 @@ def get_table_list(select=None, filters=None, catalog_url=None, **kwargs): r.raise_for_status() res = r.json() - return res['value'] + return res["value"] except requests.HTTPError as http_err: raise requests.HTTPError( - "Downloading table list failed. {}".format(str(http_err)) - ) + f"Downloading table list failed. {str(http_err)}" + ) from http_err def get_info(table_id, catalog_url=None, **kwargs): @@ -386,10 +404,7 @@ def get_info(table_id, catalog_url=None, **kwargs): """ info_list = _download_metadata( - table_id, - "TableInfos", - catalog_url=_get_catalog_url(catalog_url), - **kwargs + table_id, "TableInfos", catalog_url=_get_catalog_url(catalog_url), **kwargs ) if len(info_list) > 0: @@ -420,15 +435,19 @@ def get_meta(table_id, name, catalog_url=None, **kwargs): """ return _download_metadata( - table_id, - name, - catalog_url=_get_catalog_url(catalog_url), - **kwargs + table_id, name, catalog_url=_get_catalog_url(catalog_url), **kwargs ) -def get_data(table_id, dir=None, typed=False, select=None, filters=None, - catalog_url=None, **kwargs): +def get_data( + table_id, + dir=None, + typed=False, + select=None, + filters=None, + catalog_url=None, + **kwargs, +): """Get the CBS data table. Parameters @@ -465,7 +484,7 @@ def get_data(table_id, dir=None, typed=False, select=None, filters=None, select=select, filters=filters, catalog_url=_catalog_url, - **kwargs + **kwargs, ) if "TypedDataSet" in metadata.keys(): @@ -474,22 +493,23 @@ def get_data(table_id, dir=None, typed=False, select=None, filters=None, data = metadata["UntypedDataSet"] exclude = [ - "TableInfos", "TypedDataSet", "UntypedDataSet", - "DataProperties", "CategoryGroups" + "TableInfos", + "TypedDataSet", + "UntypedDataSet", + "DataProperties", + "CategoryGroups", ] norm_cols = list(set(metadata.keys()) - set(exclude)) for norm_col in norm_cols: - metadata[norm_col] = {r['Key']: r for r in metadata[norm_col]} + metadata[norm_col] = {r["Key"]: r for r in metadata[norm_col]} for i in range(0, len(data)): - for norm_col in norm_cols: - try: v = data[i][norm_col] - data[i][norm_col] = metadata[norm_col][v]['Title'] + data[i][norm_col] = metadata[norm_col][v]["Title"] except KeyError: pass diff --git a/docs/conf.py b/docs/conf.py index 4b2cdfa..d6b8b70 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- # # CBSOData documentation build configuration file, created by # sphinx-quickstart on Sun Dec 11 20:22:38 2016. @@ -18,9 +17,9 @@ # import os import sys -sys.path.insert(0, os.path.abspath('..')) -import cbsodata +sys.path.insert(0, os.path.abspath("..")) + # -- General configuration ------------------------------------------------ @@ -31,40 +30,37 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.napoleon' -] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. # # source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'CBSOData' -copyright = u'2016, Jonathan de Bruin' -author = u'Jonathan de Bruin' +project = "CBSOData" +copyright = "2016, Jonathan de Bruin" +author = "Jonathan de Bruin" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '1.3' +version = "1.3" # The full version, including alpha/beta/rc tags. -release = '1.3' +release = "1.3" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -85,7 +81,7 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The reST default role (used for this markup: `text`) to use for all # documents. @@ -107,7 +103,7 @@ # show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] @@ -124,7 +120,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -158,7 +154,7 @@ # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied @@ -238,34 +234,36 @@ # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'CBSODatadoc' +htmlhelp_basename = "CBSODatadoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + # + # 'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + # + # 'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + # + # 'preamble': '', + # Latex figure (float) alignment + # + # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'CBSOData.tex', u'CBSOData Documentation', - u'Jonathan de Bruin', 'manual'), + ( + master_doc, + "CBSOData.tex", + "CBSOData Documentation", + "Jonathan de Bruin", + "manual", + ), ] # The name of an image file (relative to this directory) to place at the top of @@ -305,10 +303,7 @@ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'CBSOData', u'CBSOData Documentation', - [author], 1) -] +man_pages = [(master_doc, "CBSOData", "CBSOData Documentation", [author], 1)] # If true, show URL addresses after external links. # @@ -321,9 +316,15 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'CBSOData', u'CBSOData Documentation', - author, 'CBSOData', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "CBSOData", + "CBSOData Documentation", + author, + "CBSOData", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. diff --git a/docs/index.rst b/docs/index.rst index c275acd..5dcbb37 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,4 +8,3 @@ Contents: readme_link reference - diff --git a/docs/readme_link.rst b/docs/readme_link.rst index 6b2b3ec..72a3355 100644 --- a/docs/readme_link.rst +++ b/docs/readme_link.rst @@ -1 +1 @@ -.. include:: ../README.rst \ No newline at end of file +.. include:: ../README.rst diff --git a/docs/reference.rst b/docs/reference.rst index c56241a..dd3088e 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -2,4 +2,4 @@ Reference ========= .. automodule:: cbsodata - :members: \ No newline at end of file + :members: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..fa73d8f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,44 @@ +[project] +name = "cbsodata" +description = "Statistics Netherlands opendata API client for Python" +authors = [ + { name = "Jonathan de Bruin", email = "jonathandebruinos@gmail.com" } +] +readme = "README.md" +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12" +] +license = {text = "MIT"} +dependencies = ["requests"] +dynamic = ["version"] +requires-python = ">=3.8" + +[project.scripts] +cbsodata = "cbsodata.__main__:main" +cbs = "cbsodata.__main__:main" + +[project.optional-dependencies] +lint = ["ruff"] +test = ["pytest"] + +[build-system] +build-backend = 'setuptools.build_meta' +requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] + +[tool.setuptools] +packages = ["cbsodata"] + +[tool.setuptools_scm] +write_to = "cbsodata/_version.py" + +[tool.ruff] +select = ["E", "F", "UP", "I", "B"] + +[tool.ruff.isort] +force-single-line = true diff --git a/setup.py b/setup.py deleted file mode 100644 index d7db6f5..0000000 --- a/setup.py +++ /dev/null @@ -1,49 +0,0 @@ -from setuptools import setup - -from os import path -from codecs import open - -here = path.abspath(path.dirname(__file__)) - -# Use readme as long description -with open(path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() - -# Extract version from cbsodata.py -for line in open(path.join('cbsodata', '__init__.py')): - if line.startswith('__version__'): - exec(line) - break - -setup( - name='cbsodata', - version=__version__, # noqa - description='Statistics Netherlands opendata API client for Python', - long_description=long_description, - url='https://github.com/J535D165/cbsodata', - author='Jonathan de Bruin', - author_email='jonathandebruinos@gmail.com', - license='MIT', - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7' - ], - keywords='cbs statistics odata netherlands dutch', - install_requires=['requests'], - entry_points={ - # two entry points: cbs and cbsodata - 'console_scripts': [ - 'cbsodata=cbsodata.__main__:main', - 'cbs=cbsodata.__main__:main', - ], - - }, - py_modules=['cbsodata.cbsodata3', 'cbsodata.__main__'], - tests_require=[ - 'pytest' - ], -) diff --git a/tests/test_cbsodata.py b/tests/test_cbsodata.py index c9a515f..b2eebde 100644 --- a/tests/test_cbsodata.py +++ b/tests/test_cbsodata.py @@ -1,45 +1,37 @@ import os import shutil -import requests - -import cbsodata - # testing deps import pytest +import requests +import cbsodata -datasets = [ - '82010NED', - '80884ENG' -] +datasets = ["82010NED", "80884ENG"] -datasets_derden = [ - '47003NED', - '47005NED' -] +datasets_derden = ["47003NED", "47005NED"] -TEST_ENV = 'test_env' +TEST_ENV = "test_env" def setup_module(module): - print('\nsetup_module()') + print("\nsetup_module()") if not os.path.exists(TEST_ENV): os.makedirs(TEST_ENV) def teardown_module(module): - print('teardown_module()') + print("teardown_module()") shutil.rmtree(TEST_ENV) + # Tests @pytest.mark.parametrize("table_id", datasets) def test_info(table_id): - # testing info = cbsodata.get_info(table_id) @@ -48,139 +40,121 @@ def test_info(table_id): @pytest.mark.parametrize("table_id", datasets) def test_download(table_id): - cbsodata.download_data(table_id) -@pytest.mark.parametrize("table_id", ['00000AAA']) +@pytest.mark.parametrize("table_id", ["00000AAA"]) def test_http_error(table_id): - try: cbsodata.get_data(table_id) except requests.HTTPError: assert True else: - assert False + raise AssertionError() def test_http_error_table_list(): - try: - cbsodata.get_table_list(catalog_url='test.cbs.nl') + cbsodata.get_table_list(catalog_url="test.cbs.nl") except requests.ConnectionError: assert True else: - assert False + raise AssertionError() @pytest.mark.parametrize("table_id", datasets) def test_http_https_download(table_id): - - cbsodata.options['use_https'] = True + cbsodata.options["use_https"] = True cbsodata.download_data(table_id) - cbsodata.options['use_https'] = False + cbsodata.options["use_https"] = False cbsodata.download_data(table_id) - cbsodata.options['use_https'] = True + cbsodata.options["use_https"] = True @pytest.mark.parametrize("table_id", datasets) def test_download_and_store(table_id): + cbsodata.download_data(table_id, dir=os.path.join(TEST_ENV, table_id)) - cbsodata.download_data( - table_id, - dir=os.path.join(TEST_ENV, table_id) - ) - - assert os.path.exists( - os.path.join(TEST_ENV, table_id, 'TableInfos.json') - ) + assert os.path.exists(os.path.join(TEST_ENV, table_id, "TableInfos.json")) @pytest.mark.parametrize("table_id", datasets) def test_get_data(table_id): - cbsodata.get_data(table_id) @pytest.mark.parametrize("table_id", datasets) def test_info_values(table_id): - info = cbsodata.get_info(table_id) # Check response is dict (not a list) assert isinstance(info, dict) # Check required keys are available - assert 'Description' in info.keys() - assert 'ID' in info.keys() - assert 'Identifier' in info.keys() + assert "Description" in info.keys() + assert "ID" in info.keys() + assert "Identifier" in info.keys() def test_table_list(): - assert len(cbsodata.get_table_list()) > 100 def test_filters(): - - default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] + default_sel_filt = cbsodata.get_info("82070ENG")["DefaultSelection"] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: - if fs.startswith('$filter='): + if fs.startswith("$filter="): filt = fs[8:] - cbsodata.get_data('82070ENG', filters=filt) + cbsodata.get_data("82070ENG", filters=filt) def test_select(): - - default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] + default_sel_filt = cbsodata.get_info("82070ENG")["DefaultSelection"] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: - if fs.startswith('$select='): + if fs.startswith("$select="): select = fs[8:] - cbsodata.get_data('82070ENG', select=select) + cbsodata.get_data("82070ENG", select=select) def test_select_list(): - - default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] + default_sel_filt = cbsodata.get_info("82070ENG")["DefaultSelection"] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: - if fs.startswith('$select='): + if fs.startswith("$select="): select = fs[8:] - cbsodata.get_data('82070ENG', select=select.split(', ')) + cbsodata.get_data("82070ENG", select=select.split(", ")) def test_select_subset(): - - default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] + default_sel_filt = cbsodata.get_info("82070ENG")["DefaultSelection"] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: - if fs.startswith('$select='): + if fs.startswith("$select="): select = fs[8:] - select_list = select.split(', ') - cbsodata.get_data('82070ENG', select=select_list[0:2]) + select_list = select.split(", ") + cbsodata.get_data("82070ENG", select=select_list[0:2]) def test_select_n_cols(): - - default_sel_filt = cbsodata.get_info('82070ENG')['DefaultSelection'] + default_sel_filt = cbsodata.get_info("82070ENG")["DefaultSelection"] filters_and_selections = default_sel_filt.split("&") for fs in filters_and_selections: - if fs.startswith('$select='): + if fs.startswith("$select="): select = fs[8:] - select_list = select.split(', ') - data = cbsodata.get_data('82070ENG', select=select_list[0:2]) + select_list = select.split(", ") + data = cbsodata.get_data("82070ENG", select=select_list[0:2]) assert len(data[0].keys()) == 2 assert len(data[5].keys()) == 2 @@ -189,56 +163,45 @@ def test_select_n_cols(): @pytest.mark.parametrize("table_id", datasets_derden) def test_get_table_list_derden(table_id): - # option 1 print("global") - cbsodata.options.catalog_url = 'dataderden.cbs.nl' + cbsodata.options.catalog_url = "dataderden.cbs.nl" data_option1 = cbsodata.get_table_list() - cbsodata.options.catalog_url = 'opendata.cbs.nl' + cbsodata.options.catalog_url = "opendata.cbs.nl" # option 2 print("context") - with cbsodata.catalog('dataderden.cbs.nl'): + with cbsodata.catalog("dataderden.cbs.nl"): data_option2 = cbsodata.get_table_list() # option 3 print("argument") - data_option3 = cbsodata.get_table_list( - catalog_url='dataderden.cbs.nl' - ) + data_option3 = cbsodata.get_table_list(catalog_url="dataderden.cbs.nl") assert len(data_option1[0].keys()) > 0 for key in data_option1[0].keys(): - - assert data_option1[0][key] == \ - data_option2[0][key] == data_option3[0][key] + assert data_option1[0][key] == data_option2[0][key] == data_option3[0][key] @pytest.mark.parametrize("table_id", datasets_derden) def test_get_data_derden(table_id): - # option 1 print("global") - cbsodata.options.catalog_url = 'dataderden.cbs.nl' + cbsodata.options.catalog_url = "dataderden.cbs.nl" data_option1 = cbsodata.get_data(table_id) - cbsodata.options.catalog_url = 'opendata.cbs.nl' + cbsodata.options.catalog_url = "opendata.cbs.nl" # option 2 print("context") - with cbsodata.catalog('dataderden.cbs.nl'): + with cbsodata.catalog("dataderden.cbs.nl"): data_option2 = cbsodata.get_data(table_id) # option 3 print("argument") - data_option3 = cbsodata.get_data( - table_id, - catalog_url='dataderden.cbs.nl' - ) + data_option3 = cbsodata.get_data(table_id, catalog_url="dataderden.cbs.nl") assert len(data_option1[0].keys()) > 0 for key in data_option1[0].keys(): - - assert data_option1[0][key] == \ - data_option2[0][key] == data_option3[0][key] + assert data_option1[0][key] == data_option2[0][key] == data_option3[0][key]