add scRNA data (#27)

meyer-lab · Jun 20, 2024 · 4dcee75 · 4dcee75
2 parents 3be767a + 753edba
commit 4dcee75
Show file tree

Hide file tree

Showing 17 changed files with 96,941 additions and 413 deletions.
diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
@@ -0,0 +1,15 @@
+name: Code Quality
+
+on: [push]
+
+jobs:
+    build:
+        runs-on: self-hosted
+        steps:
+        - uses: actions/checkout@v4
+        - name: Install dependencies
+          run: poetry install --no-interaction
+        - name: Run ruff check
+          run: poetry run ruff check .
+        - name: Run ruff format check
+          run: poetry run ruff format --check .
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -8,7 +8,7 @@ jobs:
     steps:
     - uses: actions/checkout@v4
     - name: Install dependencies
-      run: poetry install --no-root
+      run: poetry install --no-interaction
     - name: Test with pytest
       run: make coverage.xml
     - name: Upload coverage to Codecov

diff --git a/.gitignore b/.gitignore
@@ -131,3 +131,7 @@ dmypy.json
 .idea/
 
 .DS_Store
+
+.ruff_cache
+
+poetry.lock
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,15 +6,35 @@ authors = ["Cyrillus Tan <cyztan@gmail.com>"]
 license = "MIT"
 
 [tool.poetry.dependencies]
-python = ">=3.11,<3.13"
-numpy = "^1.23"
-pandas = "^1.5.0"
-xarray = "^2022.3.0"
+python = "^3.12"
+pandas = "^2.0.0"
+xarray = "^2024.6.0"
+anndata = "^0.10.7"
+numpy = "^1.26"
 
 [tool.poetry.dev-dependencies]
-pytest = "^6.2"
-pytest-cov = "^3.0.0"
+pytest = "^8.2"
+pytest-cov = "^5.0.0"
+ruff = "^0.4.9"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # Unused arguments
+    "ARG",
+]
diff --git a/setup.py b/setup.py
@@ -1,9 +1,11 @@
-from setuptools import setup, find_packages
+from setuptools import find_packages, setup
 
-setup(name='tensordata',
-      version='0.0.7',
-      description='A common repository for tensor structured datasets.',
-      url='https://github.com/meyer-lab/tensordata',
-      license='MIT',
-      packages=find_packages(exclude=['doc']),
-      install_requires=['numpy', 'tensorly'])
+setup(
+    name="tensordata",
+    version="0.0.7",
+    description="A common repository for tensor structured datasets.",
+    url="https://github.com/meyer-lab/tensordata",
+    license="MIT",
+    packages=find_packages(exclude=["doc"]),
+    install_requires=["numpy", "tensorly"],
+)
diff --git a/tensordata/__init__.py b/tensordata/__init__.py
@@ -1,8 +1,9 @@
-__version__ = '0.0.7'
+__version__ = "0.0.7"
+
 
 class Bunch(dict):
-    """ A Bunch, exposing dict keys as a keys() method.
-    Definition from scikit-learn. """
+    """A Bunch, exposing dict keys as a keys() method.
+    Definition from scikit-learn."""
 
     def __init__(self, **kwargs):
         super().__init__(kwargs)
@@ -16,18 +17,19 @@ def __dir__(self):
     def __getattr__(self, key):
         try:
             return self[key]
-        except KeyError:
-            raise AttributeError(key)
+        except KeyError as e:
+            raise AttributeError(key) from e
 
     def __setstate__(self, state):
         pass
 
 
 def xr_to_bunch(data):
     import xarray as xr
+
     assert isinstance(data, xr.DataArray)
     return Bunch(
-        tensor = data.to_numpy(),
-        mode = list(data.coords.dims),
-        axes = [data.coords[dim].values for dim in data.coords.dims],
-    )
+        tensor=data.to_numpy(),
+        mode=list(data.coords.dims),
+        axes=[data.coords[dim].values for dim in data.coords.dims],
+    )
diff --git a/tensordata/jones.py b/tensordata/jones.py
@@ -18,7 +18,8 @@
 
 
 def process_RA_Tensor():
-    """Structures all Rheumatoid Arthritis Synovial Fibroblast data into a usable tensor"""
+    """Structures all Rheumatoid Arthritis Synovial Fibroblast data into a
+    usable tensor"""
     RA_df = pd.DataFrame()
     donor_list = ["1869", "1931", "2159", "2586", "2645", "2708", "2759"]
     rep_list = [1, 2]
@@ -59,9 +60,9 @@ def process_RA_Tensor():
             # Background (Spike and Otherwise) Subtraction
             for stimulant in stimulants:
                 if stimulant in avg_data.columns:
-                    avg_data.loc[(avg_data["Stimulant"] == stimulant)][
+                    avg_data.loc[(avg_data["Stimulant"] == stimulant), stimulant][
                         stimulant
-                    ] == np.nan
+                    ] = np.nan
                 spike_row = (
                     avg_data.loc[
                         (avg_data["Stimulant"] == stimulant)
@@ -94,9 +95,7 @@ def process_RA_Tensor():
                         (avg_data["Stimulant"] == stimulant)
                         & (avg_data["Inhibitor"] == inh),
                         cytokines,
-                    ] = (
-                        basal_spike_df.max().to_frame().transpose().values
-                    )
+                    ] = basal_spike_df.max().to_frame().transpose().values
             avg_data[cytokines] = np.log(avg_data[cytokines].values)
             avg_data[cytokines] -= avg_data.loc[
                 avg_data.Stimulant == "Buffer", cytokines

diff --git a/tensordata/kaplonekVaccineSA.py b/tensordata/kaplonekVaccineSA.py
@@ -27,7 +27,7 @@ def data():
     antigens = list(dict.fromkeys(antigens))
 
     luminex_da = xr.DataArray(
-        np.full((len(df), len(antigens), len(detections)), np.NaN),
+        np.full((len(df), len(antigens), len(detections)), np.nan),
         coords={
             "Subject": df.index,
             "Antigen": antigens,

diff --git a/tensordata/scRNA/__init__.py b/tensordata/scRNA/__init__.py
@@ -0,0 +1 @@
+from tensordata.scRNA.main import *  # noqa