zincware · SamTov · Jan 21, 2022 · Jan 21, 2022 · Jan 21, 2022 · Jan 24, 2022
diff --git a/.coveragerc b/.coveragerc
diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml
diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml
diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -0,0 +1,60 @@
+name: Check coding style
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  black:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Black Check
+        uses: psf/black@stable
+        with:
+          version: "22.1.0"
+
+  isort:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Install isort
+        run: |
+          pip install isort==5.10.1
+      - name: run isort
+        run: |
+          isort --check-only --quiet .
+
+  flake8:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Install flake8
+        run: |
+          pip install flake8==4.0.1
+      - name: run flake8
+        run: |
+          flake8 . --count --show-source --statistics
+
+  pylint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.x'
+      - name: Install package
+        run: |
+          pip install .
+      - name: Install pylint
+        run: |
+          pip install pylint
+      - name: run pylint
+        run: |
+          pylint mdsuite
diff --git a/.github/workflows/pytest-memory.yml b/.github/workflows/pytest-memory.yml
@@ -0,0 +1,45 @@
+name: pytest-memory profiling
+
+on:
+  push:
+
+jobs:
+  memory-profiling:
+    runs-on: "ubuntu-latest"
+
+    steps:
+      - uses: actions/checkout@v2
+      - uses: iterative/setup-cml@v1
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+          pip install pytest-monitor tabulate
+      - name: Install package
+        run: |
+          pip install .
+      - name: Pytest
+        continue-on-error: true
+        env:
+          CUDA_VISIBLE_DEVICES: -1
+          TF_CPP_MIN_LOG_LEVEL: 3
+          # this might be really dumb, but we have to suppress libcudart error
+        run: |
+          pytest -m "memory" ./CI/
+      - name: Write CML report
+        env:
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Post reports as comments in GitHub PRs
+          echo "# Memory Scaling" >> report.md
+          python ./CI/memory_scaling/plot_results.py ./.pymon plot.png > table.md
+          cml publish ./plot.png --md >> report.md
+          echo "# Raw data" >> report.md
+          echo "Activate in workflow file" >> report.md
+          # cat table.md >> report.md
+          # cml-send-comment --pr --update report.md
+          cml-send-comment report.md
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ fail_fast: true
 
 repos:
   - repo: https://github.com/psf/black
-    rev: 21.12b0
+    rev: 22.1.0
     hooks:
       - id: black
 

diff --git a/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py b/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py
@@ -30,6 +30,7 @@
 from zinchub import DataHub
 
 import mdsuite as mds
+from mdsuite.utils.testing import assertDeepAlmostEqual
 
 
 @pytest.fixture(scope="session")
@@ -63,6 +64,5 @@ def test_project(traj_file, true_values, tmp_path):
         "NaCl", simulation_data=traj_file, timestep=0.002, temperature=1400
     )
 
-    # computation = project.run.EinsteinHelfandIonicConductivity(plot=False)
-
-    # assertDeepAlmostEqual(computation["NaCl"].data_dict, true_values, decimal=-6)
+    computation = project.run.EinsteinHelfandIonicConductivity(plot=False)
+    assertDeepAlmostEqual(computation["NaCl"].data_dict, true_values, decimal=-6)
diff --git a/CI/memory_scaling/plot_results.py b/CI/memory_scaling/plot_results.py
@@ -0,0 +1,46 @@
+import argparse
+import sqlite3
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+
+
+def get_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("db")
+    parser.add_argument("plot")
+    return parser.parse_args()
+
+
+if __name__ == "__main__":
+    parsed = get_parser()
+    con = sqlite3.connect(parsed.db)
+    df = pd.read_sql_query("SELECT * FROM TEST_METRICS", con)
+
+    # get the name of the test without the number of points
+    df["test"] = df["ITEM_VARIANT"].str.split("[").str[0]
+    # set the index to the number of points
+    df = df.set_index("ITEM_VARIANT")
+    df.index = df.index.str.extract(r"(\d+)", expand=False).astype(float)
+
+    # group by memory usage
+    grouped = df[["MEM_USAGE", "test"]].groupby("test")
+
+    ncols = 2
+    nrows = int(np.ceil(grouped.ngroups / ncols))
+
+    fig, axes = plt.subplots(nrows=nrows, ncols=ncols)
+
+    for (key, ax) in zip(grouped.groups.keys(), axes.flatten()):
+        grouped.get_group(key).plot(ax=ax)
+        ax.legend()
+        ax.set_title(key)
+        ax.set_ylabel("MEM_USAGE / MB")
+    plt.tight_layout()
+    fig.savefig(parsed.plot)
+    print(
+        df[
+            ["KERNEL_TIME", "CPU_USAGE", "MEM_USAGE", "test", "TOTAL_TIME", "USER_TIME"]
+        ].to_markdown()
+    )
diff --git a/CI/memory_scaling/test_memory.py b/CI/memory_scaling/test_memory.py
@@ -0,0 +1,113 @@
+import numpy as np
+import pytest
+
+import mdsuite
+from mdsuite.database.mdsuite_properties import mdsuite_properties
+from mdsuite.database.simulation_database import (
+    SpeciesInfo,
+    TrajectoryChunkData,
+    TrajectoryMetadata,
+)
+from mdsuite.file_io import script_input
+
+mdsuite.config.memory_fraction = 1.0
+mdsuite.config.memory_scaling_test = True
+
+
+def get_project(tmp_path, n_configs, n_parts) -> mdsuite.Project:
+    """Build a MDSuite Project with dummy data
+
+    This creates a project with data for velocities and positions
+    generated randomly for 100 configurations and a variable size of
+    particles given by the fixture definition
+    """
+    n_dims = 3
+    time_step = 0.1
+    sp_name = "species"
+    positions = np.random.rand(*(n_configs, n_parts, n_dims))
+    velocities = np.random.rand(*(n_configs, n_parts, n_dims))
+
+    species_list = [
+        SpeciesInfo(
+            name=sp_name,
+            n_particles=n_parts,
+            mass=1234,
+            properties=[mdsuite_properties.positions, mdsuite_properties.velocities],
+        )
+    ]
+
+    metadata = TrajectoryMetadata(
+        species_list=species_list,
+        n_configurations=n_configs,
+        sample_rate=1,
+        box_l=3 * [1.1],
+    )
+    data = TrajectoryChunkData(species_list=species_list, chunk_size=n_configs)
+    data.add_data(positions, 0, sp_name, "Positions")
+    data.add_data(velocities, 0, sp_name, "Velocities")
+
+    proc = script_input.ScriptInput(data=data, metadata=metadata, name="test_name")
+
+    project = mdsuite.Project(name="test_proj", storage_path=tmp_path)
+    project.add_experiment(name="test_experiment", timestep=time_step)
+    exp = project.experiments["test_experiment"]
+    exp.add_data(proc)
+
+    return project
+
+
+@pytest.mark.parametrize("n_parts", [x for x in range(100, 10000, 200)])
+@pytest.mark.memory
+def test_rdf(tmp_path, n_parts):
+    project = get_project(tmp_path, n_configs=15, n_parts=n_parts)
+    _ = project.run.RadialDistributionFunction(number_of_configurations=10, plot=False)
+
+
+@pytest.fixture(params=[x for x in range(100, 10000, 200)])
+def rdf_project(tmp_path, request):
+    project = get_project(tmp_path, n_configs=15, n_parts=request.param)
+    return project
+
+
+@pytest.mark.memory
+def test_rdf_w_fixt(rdf_project):
+    _ = rdf_project.run.RadialDistributionFunction(
+        number_of_configurations=10, plot=False
+    )
+
+
+# @pytest.mark.parametrize("n_parts", [x for x in range(10, 300, 10)])
+# @pytest.mark.memory
+# def test_adf(tmp_path, n_parts):
+#     project = get_project(tmp_path, n_configs=5, n_parts=n_parts)
+#     _ = project.run.AngularDistributionFunction(number_of_configurations=2, plot=False)
+#
+#
+# @pytest.mark.parametrize("n_parts", [x for x in range(100, 12000, 200)])
+# @pytest.mark.memory
+# def test_rdf(tmp_path, n_parts):
+#     project = get_project(tmp_path, n_configs=15, n_parts=n_parts)
+#     _ = project.run.RadialDistributionFunction(number_of_configurations=10, plot=False)
+#
+#
+# @pytest.mark.parametrize("n_configs", [x for x in range(100, 12000, 200)])
+# @pytest.mark.memory
+# def test_einstein_diffusion(tmp_path, n_configs):
+#     # TODO What do we want to actually loop over
+#     project = get_project(tmp_path, n_configs=n_configs, n_parts=100)
+#     _ = project.run.EinsteinDiffusionCoefficients(plot=False)
+#
+#
+# @pytest.mark.parametrize("data_range", [x for x in range(10, 10000, 200)])
+# @pytest.mark.memory
+# def test_einstein_diffusion_data_range(tmp_path, data_range):
+#     project = get_project(tmp_path, n_configs=12000, n_parts=100)
+#     _ = project.run.EinsteinDiffusionCoefficients(plot=False, data_range=data_range)
+#
+#
+# @pytest.mark.parametrize("n_configs", [x for x in range(500, 12000, 200)])
+# @pytest.mark.memory
+# def test_gk_diffusion(tmp_path, n_configs):
+#     # TODO What do we want to actually loop over
+#     project = get_project(tmp_path, n_configs=n_configs, n_parts=100)
+#     _ = project.run.GreenKuboDiffusionCoefficients(plot=False)
diff --git a/CI/unit_tests/memory_manager/test_memory_manager.py b/CI/unit_tests/memory_manager/test_memory_manager.py
@@ -23,6 +23,7 @@
 
 import numpy as np
 
+import mdsuite
 from mdsuite.memory_management.memory_manager import MemoryManager
 
 
@@ -146,7 +147,6 @@ def test_get_batch_size(self):
         # Test correct returns for 1 batch
         self.memory_manager.database = TestDatabase(data_size=500, rows=10, columns=10)
         self.memory_manager.data_path = ["Test/Path"]
-        self.memory_manager.memory_fraction = 0.5
         self.memory_manager.machine_properties["memory"] = 50000
         batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size(
             system=False
@@ -188,7 +188,8 @@ def test_get_optimal_batch_size(self):
         the same value that is passed to it.
         """
         data = self.memory_manager._get_optimal_batch_size(10)
-        self.assertEqual(data, data)  # Todo: no shit, sherlock
+        self.assertEqual(data, 10)  # Todo: no shit, sherlock
+        mdsuite.config.memory_scaling_test = True
 
     def test_compute_atomwise_minibatch(self):
         """