Create a plugin for additional filetypes (#40)

* plugin for additional file types * remove parquet and feather from xiplot * add plugin to publish workflow * Apply suggestions from code review Co-authored-by: Juniper Tyree <50025784+juntyr@users.noreply.github.com> * Add footnote about plugin to data_files.md * Add installation instructions to plugin README.md * unit tests for filetypes plugin * Do not force install test_plugin during testing Add it to requirements-dev.txt instead. And issue a warning during testing if not installed. --------- Co-authored-by: Juniper Tyree <50025784+juntyr@users.noreply.github.com>
edahelsinki · Aug 22, 2023 · dd23768 · dd23768
1 parent c601e79
commit dd23768
Show file tree

Hide file tree

Showing 16 changed files with 254 additions and 169 deletions.
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -10,23 +10,42 @@ on:
 
 jobs:
   deploy:
-
     runs-on: ubuntu-latest
-
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v3
-      with:
-        python-version: '3.x'
-    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        python -m pip install build
-    - name: Build package
-      run: python -m build
-    - name: Publish package
-      uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
-      with:
-        user: __token__
-        password: ${{ secrets.PYPI_API_TOKEN }}
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.x"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install build
+      - name: Build package
+        run: python -m build
+      - name: Publish package
+        uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
+  plugin:
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: "3.x"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install build
+      - name: Build plugin
+        run: python -m build plugin_xiplot_filetypes
+      - name: Publish plugin
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: plugin_xiplot_filetypes/dist
+          skip-existing: true
diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml
@@ -35,7 +35,6 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements.txt -r requirements-dev.txt
-          python -m pip install test_plugin/
 
       - name: Build package
         run: |

diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml
@@ -13,37 +13,36 @@ jobs:
   black:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.7'
-    - name: Install black
-      run: pip install black
-    - name: Run the black formatter
-      run: black --diff --check .
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.7"
+      - name: Install black
+        run: pip install black
+      - name: Run the black formatter
+        run: black --diff --check .
   isort:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.7'
-    - name: Install isort
-      run: pip install isort
-    - name: Run the isort linter
-      run: isort --diff --check .
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.7"
+      - name: Install isort
+        run: pip install isort
+      - name: Run the isort linter
+        run: isort --diff --check .
   flake8:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v2
-    - name: Set up Python
-      uses: actions/setup-python@v2
-      with:
-        python-version: '3.7'
-    - name: Install flake8
-      run: pip install pyproject-flake8
-    - name: Run the flake8 linter
-      # black uses a line length of 88
-      run: pflake8 --max-line-length 88 .
+      - uses: actions/checkout@v2
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: "3.7"
+      - name: Install flake8
+        run: pip install pyproject-flake8
+      - name: Run the flake8 linter
+        run: pflake8 .
diff --git a/.gitignore b/.gitignore
@@ -135,3 +135,4 @@ Untitled.ipynb
 data/
 uploads/
 node_modules
+plugins/*.whl
diff --git a/docs/user_guide/data_files.md b/docs/user_guide/data_files.md
@@ -46,7 +46,9 @@ The plots and the data file will be downloaded as a .tar file. It can be restore
 
 ## Dataframe loading and saving
 
-`xiplot` uses `pandas` to load dataframes from `.csv`, `.json`, `.ft` (feather format) and `.parquet` files. If you are running `xiplot` locally, you can simply copy your datasets into the `data/` folder. If you are accessing `xiplot` remotely or you do not want to pollute the `data/` folder, you can upload the dataset into memory directly inside `xiplot`: navigate to the data tab and either click the upload button to the right or drag the file into it.
+`xiplot` uses `pandas` to load dataframes from `.csv`, `.json`, `.feather`[^1], and `.parquet`[^1] files. If you are running `xiplot` locally, you can simply copy your datasets into the `data/` folder. If you are accessing `xiplot` remotely or you do not want to pollute the `data/` folder, you can upload the dataset into memory directly inside `xiplot`: navigate to the data tab and either click the upload button to the right or drag the file into it.
+
+[^1]: Feather and Parquet support is provided by the [xiplot_filetypes](../../plugin_xiplot_filetypes) plugin (which is installed by default in the non-WASM version).
 
 To save the currently loaded dataset, you can navigate to the data tab and press the "Download only the data file" button. The downloaded file will contain the unmodified content of the original dataset.
 
@@ -66,7 +68,7 @@ The combined plots-and-data file has a filename `DATASET.tar`, where `DATASET` i
 - `aux.EXT`
 - `meta.json`
 
-The `data.EXT` file contains the dataset dataframe that will be loaded using `read_csv`, `read_json`, `read_pickle`, or `read_feather`. Note that when reading a json file, `xiplot` tries reading the dataset both in columns (`pd.read_json(file, typ="frame", orient="columns")`) and split (`pd.read_json(file, typ="frame", orient="split")`) mode.
+The `data.EXT` file contains the dataset dataframe that will be loaded using `read_csv`, `read_json`, `read_feather`, or `read_parquet`. Note that when reading a json file, `xiplot` tries reading the dataset both in columns (`pd.read_json(file, typ="frame", orient="columns")`) and split (`pd.read_json(file, typ="frame", orient="split")`) mode.
 
 The `aux.EXT` file contains auxiliary columns for the dataset. It is stored in the same file format as the dataset dataframe. It must either be empty, or have the same number of rows as the dataset dataframe.
 
@@ -373,4 +375,4 @@ and where `PLOT_TYPES` is the list of plot type names registered with `xiplot`,
         }
     }
 }
-```
+```
diff --git a/plugin_xiplot_filetypes/README.md b/plugin_xiplot_filetypes/README.md
@@ -0,0 +1,12 @@
+# [&chi;iplot](https://github.com/edahelsinki/xiplot) plugin for additional file types
+
+This plugin adds support for additional file types (beside `csv` and `json`) to [&chi;iplot](https://github.com/edahelsinki/xiplot).
+Currently, this plugin adds support for `feather` and `parquet`.
+Note that in the [WASM version](https://edahelsinki.fi/xiplot) only support for `parquet` is added.
+
+## Installation
+
+In non-WASM [&chi;iplot](https://github.com/edahelsinki/xiplot) this plugin should be automatically installed.
+Otherwise you can use `pip install xiplot_filetypes` in the same Python environment.
+
+In the [WASM version](https://edahelsinki.fi/xiplot) you can install the plugin by going to the "Plugin" tab and selecting `xiplot_filetypes`.
diff --git a/plugin_xiplot_filetypes/pyproject.toml b/plugin_xiplot_filetypes/pyproject.toml
@@ -0,0 +1,39 @@
+
+[project]
+name = "xiplot_filetypes"
+version = "1.0"
+authors = [{ name = "Anton Björklund", email = "anton.bjorklund@helsinki.fi" }]
+description = "Xiplot plugin for additional file types"
+license = { file = "../LICENCE-MIT" }
+readme = "README.md"
+
+requires-python = ">=3.7"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+dependencies = [
+    "pandas",
+    "pyarrow >= 11.0.0; platform_system!='Emscripten'",
+    "fastparquet; platform_system=='Emscripten'",
+]
+
+[build-system]
+requires = ["setuptools>=42", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools]
+packages = ["xiplot_filetypes"]
+
+[project.urls]
+homepage = "https://github.com/edahelsinki/xiplot"
+repository = "https://github.com/edahelsinki/xiplot.git"
+
+[project.entry-points."xiplot.plugin.read"]
+parquet-read = "xiplot_filetypes:read_parquet"
+feather-read = "xiplot_filetypes:read_feather"
+
+[project.entry-points."xiplot.plugin.write"]
+parquet-write = "xiplot_filetypes:write_parquet"
+feather-write = "xiplot_filetypes:write_feather"
diff --git a/plugin_xiplot_filetypes/xiplot_filetypes/__init__.py b/plugin_xiplot_filetypes/xiplot_filetypes/__init__.py
@@ -0,0 +1,32 @@
+from io import BytesIO
+
+import pandas as pd
+
+
+def read_parquet():
+    return pd.read_parquet, ".parquet"
+
+
+def write_parquet():
+    return pd.DataFrame.to_parquet, ".parquet", "application/octet-stream"
+
+
+def read_feather():
+    try:
+        df = pd.DataFrame()
+        ft = BytesIO()
+        df.reset_index().to_feather(ft)
+        pd.read_feather(ft)
+    except ImportError:
+        return
+
+    return pd.read_feather, ".feather"
+
+
+def write_feather():
+    try:
+        pd.DataFrame().reset_index().to_feather(BytesIO())
+    except ImportError:
+        return
+
+    return pd.DataFrame.to_feather, ".feather", "application/octet-stream"
diff --git a/plugins/.gitignore b/plugins/.gitignore
diff --git a/pyproject.toml b/pyproject.toml
@@ -28,8 +28,8 @@ dependencies = [
     "kaleido ~= 0.2.1; platform_system!='Emscripten'",
     "pandas >= 1.4.0, < 2.0.0",
     "plotly >= 5.9.0",
-    "pyarrow >= 11.0.0; platform_system!='Emscripten'",
     "scikit-learn >= 1.0; platform_system!='Emscripten'",
+    "xiplot_filetypes == 1.0; platform_system!='Emscripten'",
 ]
 
 [project.optional-dependencies]
@@ -55,11 +55,8 @@ xiplot = "xiplot:cli"
 requires = ["setuptools>=42", "wheel"]
 build-backend = "setuptools.build_meta"
 
-[tool.setuptools.packages.find]
-where = ["."]
-include = ["xiplot*"]
-exclude = ["tests", "test_plugin"]
-namespaces = true
+[tool.setuptools]
+packages = ["xiplot"]
 
 [tool.black]
 target-version = ['py37']
@@ -72,3 +69,5 @@ profile = "black"
 line_length = 79
 
 [tool.flake8]
+max-line-length = 88
+exclude = "build/*"
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -5,3 +5,4 @@ isort
 pyproject-flake8
 webdriver-manager
 selenium
+./test_plugin
diff --git a/requirements.txt b/requirements.txt
@@ -2,11 +2,10 @@ dash==2.6.2
 dash-extensions==0.1.4
 dash-mantine-components==0.10.2
 dash-uploader~=0.6.0
-feather-format>=0.4.1
 jsonschema~=4.6.0
 pandas>=1.4.0,<2.0.0
 plotly>=5.9.0
 scikit-learn>=1.0
 kaleido~=0.2.1
-pyarrow>=11.0.0
 packaging<22 # Needed for dash-uploader==0.6.0
+./plugin_xiplot_filetypes
diff --git a/tests/test_filetypes.py b/tests/test_filetypes.py
@@ -0,0 +1,40 @@
+from io import BytesIO
+
+import pandas as pd
+
+try:
+    from xiplot_filetypes import (
+        read_feather,
+        read_parquet,
+        write_feather,
+        write_parquet,
+    )
+except ImportError:
+    import sys
+    from pathlib import Path
+
+    sys.path.insert(
+        0, str(Path(__file__).parent.parent / "plugin_xiplot_filetypes")
+    )
+    from xiplot_filetypes import (
+        read_feather,
+        read_parquet,
+        write_feather,
+        write_parquet,
+    )
+
+
+def test_feather():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    io = BytesIO()
+    write_feather()[0](df, io)
+    df2 = read_feather()[0](io)
+    assert df.equals(df2)
+
+
+def test_parquet():
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    io = BytesIO()
+    write_parquet()[0](df, io)
+    df2 = read_parquet()[0](io)
+    assert df.equals(df2)