diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 801ce14a..00000000 --- a/.coveragerc +++ /dev/null @@ -1,16 +0,0 @@ -[run] -branch = True -relative_files = True - -[report] -# Regexes for lines to exclude from consideration -omit = -# omit anything in a Temp directory anywhere - */Temp/* - */AppData/* - */CI/* - -ignore_errors = True - -[html] -directory = coverage_html_report diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml deleted file mode 100644 index 882bb16e..00000000 --- a/.github/workflows/black.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: Check black coding style - -on: - push: - pull_request: - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Black Check - uses: psf/black@stable - with: - version: "21.12b0" diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml deleted file mode 100644 index 46e57afa..00000000 --- a/.github/workflows/flake8.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: flake8 Test - -on: - push: - pull_request: - -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: - - 3.8 - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - pip install flake8==4.0.1 - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --show-source --statistics diff --git a/.github/workflows/isort.yaml b/.github/workflows/isort.yaml deleted file mode 100644 index 5031d560..00000000 --- a/.github/workflows/isort.yaml +++ /dev/null @@ -1,17 +0,0 @@ -name: Check isort - -on: - push: - pull_request: - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Install isort - run: | - pip install isort==5.10.1 - - name: run isort - run: | - isort --check-only --quiet . diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..2e6ccbd6 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,60 @@ +name: Check coding style + +on: + push: + pull_request: + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Black Check + uses: psf/black@stable + with: + version: "22.1.0" + + isort: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install isort + run: | + pip install isort==5.10.1 + - name: run isort + run: | + isort --check-only --quiet . + + flake8: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install flake8 + run: | + pip install flake8==4.0.1 + - name: run flake8 + run: | + flake8 . --count --show-source --statistics + + pylint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install package + run: | + pip install . + - name: Install pylint + run: | + pip install pylint + - name: run pylint + run: | + pylint mdsuite diff --git a/.github/workflows/pytest-memory.yml b/.github/workflows/pytest-memory.yml new file mode 100644 index 00000000..a21da825 --- /dev/null +++ b/.github/workflows/pytest-memory.yml @@ -0,0 +1,45 @@ +name: pytest-memory profiling + +on: + push: + +jobs: + memory-profiling: + runs-on: "ubuntu-latest" + + steps: + - uses: actions/checkout@v2 + - uses: iterative/setup-cml@v1 + - name: Set up Python 3.9 + uses: actions/setup-python@v2 + with: + python-version: 3.9 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r dev-requirements.txt + pip install pytest-monitor tabulate + - name: Install package + run: | + pip install . + - name: Pytest + continue-on-error: true + env: + CUDA_VISIBLE_DEVICES: -1 + TF_CPP_MIN_LOG_LEVEL: 3 + # this might be really dumb, but we have to suppress libcudart error + run: | + pytest -m "memory" ./CI/ + - name: Write CML report + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Post reports as comments in GitHub PRs + echo "# Memory Scaling" >> report.md + python ./CI/memory_scaling/plot_results.py ./.pymon plot.png > table.md + cml publish ./plot.png --md >> report.md + echo "# Raw data" >> report.md + echo "Activate in workflow file" >> report.md + # cat table.md >> report.md + # cml-send-comment --pr --update report.md + cml-send-comment report.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c54f01a8..289a0562 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ fail_fast: true repos: - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 22.1.0 hooks: - id: black diff --git a/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py b/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py index 88f1e5c5..8f3c507f 100644 --- a/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py +++ b/CI/integration_tests/calculators/test_einstein_helfand_ionic_conductivity.py @@ -30,6 +30,7 @@ from zinchub import DataHub import mdsuite as mds +from mdsuite.utils.testing import assertDeepAlmostEqual @pytest.fixture(scope="session") @@ -63,6 +64,5 @@ def test_project(traj_file, true_values, tmp_path): "NaCl", simulation_data=traj_file, timestep=0.002, temperature=1400 ) - # computation = project.run.EinsteinHelfandIonicConductivity(plot=False) - - # assertDeepAlmostEqual(computation["NaCl"].data_dict, true_values, decimal=-6) + computation = project.run.EinsteinHelfandIonicConductivity(plot=False) + assertDeepAlmostEqual(computation["NaCl"].data_dict, true_values, decimal=-6) diff --git a/CI/memory_scaling/plot_results.py b/CI/memory_scaling/plot_results.py new file mode 100644 index 00000000..7424b2f9 --- /dev/null +++ b/CI/memory_scaling/plot_results.py @@ -0,0 +1,46 @@ +import argparse +import sqlite3 + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + + +def get_parser(): + parser = argparse.ArgumentParser() + parser.add_argument("db") + parser.add_argument("plot") + return parser.parse_args() + + +if __name__ == "__main__": + parsed = get_parser() + con = sqlite3.connect(parsed.db) + df = pd.read_sql_query("SELECT * FROM TEST_METRICS", con) + + # get the name of the test without the number of points + df["test"] = df["ITEM_VARIANT"].str.split("[").str[0] + # set the index to the number of points + df = df.set_index("ITEM_VARIANT") + df.index = df.index.str.extract(r"(\d+)", expand=False).astype(float) + + # group by memory usage + grouped = df[["MEM_USAGE", "test"]].groupby("test") + + ncols = 2 + nrows = int(np.ceil(grouped.ngroups / ncols)) + + fig, axes = plt.subplots(nrows=nrows, ncols=ncols) + + for (key, ax) in zip(grouped.groups.keys(), axes.flatten()): + grouped.get_group(key).plot(ax=ax) + ax.legend() + ax.set_title(key) + ax.set_ylabel("MEM_USAGE / MB") + plt.tight_layout() + fig.savefig(parsed.plot) + print( + df[ + ["KERNEL_TIME", "CPU_USAGE", "MEM_USAGE", "test", "TOTAL_TIME", "USER_TIME"] + ].to_markdown() + ) diff --git a/CI/memory_scaling/test_memory.py b/CI/memory_scaling/test_memory.py new file mode 100644 index 00000000..cce64d3f --- /dev/null +++ b/CI/memory_scaling/test_memory.py @@ -0,0 +1,113 @@ +import numpy as np +import pytest + +import mdsuite +from mdsuite.database.mdsuite_properties import mdsuite_properties +from mdsuite.database.simulation_database import ( + SpeciesInfo, + TrajectoryChunkData, + TrajectoryMetadata, +) +from mdsuite.file_io import script_input + +mdsuite.config.memory_fraction = 1.0 +mdsuite.config.memory_scaling_test = True + + +def get_project(tmp_path, n_configs, n_parts) -> mdsuite.Project: + """Build a MDSuite Project with dummy data + + This creates a project with data for velocities and positions + generated randomly for 100 configurations and a variable size of + particles given by the fixture definition + """ + n_dims = 3 + time_step = 0.1 + sp_name = "species" + positions = np.random.rand(*(n_configs, n_parts, n_dims)) + velocities = np.random.rand(*(n_configs, n_parts, n_dims)) + + species_list = [ + SpeciesInfo( + name=sp_name, + n_particles=n_parts, + mass=1234, + properties=[mdsuite_properties.positions, mdsuite_properties.velocities], + ) + ] + + metadata = TrajectoryMetadata( + species_list=species_list, + n_configurations=n_configs, + sample_rate=1, + box_l=3 * [1.1], + ) + data = TrajectoryChunkData(species_list=species_list, chunk_size=n_configs) + data.add_data(positions, 0, sp_name, "Positions") + data.add_data(velocities, 0, sp_name, "Velocities") + + proc = script_input.ScriptInput(data=data, metadata=metadata, name="test_name") + + project = mdsuite.Project(name="test_proj", storage_path=tmp_path) + project.add_experiment(name="test_experiment", timestep=time_step) + exp = project.experiments["test_experiment"] + exp.add_data(proc) + + return project + + +@pytest.mark.parametrize("n_parts", [x for x in range(100, 10000, 200)]) +@pytest.mark.memory +def test_rdf(tmp_path, n_parts): + project = get_project(tmp_path, n_configs=15, n_parts=n_parts) + _ = project.run.RadialDistributionFunction(number_of_configurations=10, plot=False) + + +@pytest.fixture(params=[x for x in range(100, 10000, 200)]) +def rdf_project(tmp_path, request): + project = get_project(tmp_path, n_configs=15, n_parts=request.param) + return project + + +@pytest.mark.memory +def test_rdf_w_fixt(rdf_project): + _ = rdf_project.run.RadialDistributionFunction( + number_of_configurations=10, plot=False + ) + + +# @pytest.mark.parametrize("n_parts", [x for x in range(10, 300, 10)]) +# @pytest.mark.memory +# def test_adf(tmp_path, n_parts): +# project = get_project(tmp_path, n_configs=5, n_parts=n_parts) +# _ = project.run.AngularDistributionFunction(number_of_configurations=2, plot=False) +# +# +# @pytest.mark.parametrize("n_parts", [x for x in range(100, 12000, 200)]) +# @pytest.mark.memory +# def test_rdf(tmp_path, n_parts): +# project = get_project(tmp_path, n_configs=15, n_parts=n_parts) +# _ = project.run.RadialDistributionFunction(number_of_configurations=10, plot=False) +# +# +# @pytest.mark.parametrize("n_configs", [x for x in range(100, 12000, 200)]) +# @pytest.mark.memory +# def test_einstein_diffusion(tmp_path, n_configs): +# # TODO What do we want to actually loop over +# project = get_project(tmp_path, n_configs=n_configs, n_parts=100) +# _ = project.run.EinsteinDiffusionCoefficients(plot=False) +# +# +# @pytest.mark.parametrize("data_range", [x for x in range(10, 10000, 200)]) +# @pytest.mark.memory +# def test_einstein_diffusion_data_range(tmp_path, data_range): +# project = get_project(tmp_path, n_configs=12000, n_parts=100) +# _ = project.run.EinsteinDiffusionCoefficients(plot=False, data_range=data_range) +# +# +# @pytest.mark.parametrize("n_configs", [x for x in range(500, 12000, 200)]) +# @pytest.mark.memory +# def test_gk_diffusion(tmp_path, n_configs): +# # TODO What do we want to actually loop over +# project = get_project(tmp_path, n_configs=n_configs, n_parts=100) +# _ = project.run.GreenKuboDiffusionCoefficients(plot=False) diff --git a/CI/unit_tests/memory_manager/test_memory_manager.py b/CI/unit_tests/memory_manager/test_memory_manager.py index 964e1a5b..f13d254f 100644 --- a/CI/unit_tests/memory_manager/test_memory_manager.py +++ b/CI/unit_tests/memory_manager/test_memory_manager.py @@ -23,6 +23,7 @@ import numpy as np +import mdsuite from mdsuite.memory_management.memory_manager import MemoryManager @@ -146,7 +147,6 @@ def test_get_batch_size(self): # Test correct returns for 1 batch self.memory_manager.database = TestDatabase(data_size=500, rows=10, columns=10) self.memory_manager.data_path = ["Test/Path"] - self.memory_manager.memory_fraction = 0.5 self.memory_manager.machine_properties["memory"] = 50000 batch_size, number_of_batches, remainder = self.memory_manager.get_batch_size( system=False @@ -188,7 +188,8 @@ def test_get_optimal_batch_size(self): the same value that is passed to it. """ data = self.memory_manager._get_optimal_batch_size(10) - self.assertEqual(data, data) # Todo: no shit, sherlock + self.assertEqual(data, 10) # Todo: no shit, sherlock + mdsuite.config.memory_scaling_test = True def test_compute_atomwise_minibatch(self): """ diff --git a/CI/unit_tests/utils/test_meta_functions.py b/CI/unit_tests/utils/test_meta_functions.py index 9b6c8e52..da167981 100644 --- a/CI/unit_tests/utils/test_meta_functions.py +++ b/CI/unit_tests/utils/test_meta_functions.py @@ -179,7 +179,7 @@ def func(x: np.ndarray): ------- x**2 """ - return x ** 2 + return x**2 x_dat = np.linspace(-10, 10, 1000) data = [x_dat, func(x_dat)] diff --git a/mdsuite/calculators/angular_distribution_function.py b/mdsuite/calculators/angular_distribution_function.py index 41cb9e75..94a5a6f1 100644 --- a/mdsuite/calculators/angular_distribution_function.py +++ b/mdsuite/calculators/angular_distribution_function.py @@ -421,7 +421,7 @@ def _build_histograms(self, positions, species_indices, angles): # Get the indices required. angle_vals, pre_factor = get_angles(r_ij_mat, tmp) - pre_factor = 1 / pre_factor ** self.norm_power + pre_factor = 1 / pre_factor**self.norm_power histogram, _ = np.histogram( angle_vals, bins=self.args.number_of_bins, diff --git a/mdsuite/calculators/coordination_number_calculation.py b/mdsuite/calculators/coordination_number_calculation.py index c081e77d..ac68d2d0 100644 --- a/mdsuite/calculators/coordination_number_calculation.py +++ b/mdsuite/calculators/coordination_number_calculation.py @@ -279,15 +279,12 @@ def _get_coordination_numbers(self): self.integral_data[self.indices[0][1]], ] ) - first_shell_error = ( - np.std( - [ - self.integral_data[self.indices[0][0]], - self.integral_data[self.indices[0][1]], - ] - ) - / np.sqrt(2) - ) + first_shell_error = np.std( + [ + self.integral_data[self.indices[0][0]], + self.integral_data[self.indices[0][1]], + ] + ) / np.sqrt(2) # # TODO what about second shell? # second_shell = ( @@ -368,8 +365,8 @@ def plot_data(self, data): x_data=val[self.result_series_keys[0]], y_data=val[self.result_series_keys[1]], title=( - fr"{selected_species}: {val[self.result_keys[0]]: 0.3E} +-" - fr" {val[self.result_keys[1]]: 0.3E}" + rf"{selected_species}: {val[self.result_keys[0]]: 0.3E} +-" + rf" {val[self.result_keys[1]]: 0.3E}" ), layouts=[model_1, model_2], ) diff --git a/mdsuite/calculators/einstein_helfand_ionic_conductivity.py b/mdsuite/calculators/einstein_helfand_ionic_conductivity.py index dc84c572..4ac202c0 100644 --- a/mdsuite/calculators/einstein_helfand_ionic_conductivity.py +++ b/mdsuite/calculators/einstein_helfand_ionic_conductivity.py @@ -145,7 +145,7 @@ def _calculate_prefactor(self): """ # Calculate the prefactor - numerator = (self.experiment.units["length"] ** 2) * (elementary_charge ** 2) + numerator = (self.experiment.units["length"] ** 2) * (elementary_charge**2) denominator = ( 6 * self.experiment.units["time"] diff --git a/mdsuite/calculators/green_kubo_ionic_conductivity.py b/mdsuite/calculators/green_kubo_ionic_conductivity.py index 4590ddfb..04e6c6e0 100644 --- a/mdsuite/calculators/green_kubo_ionic_conductivity.py +++ b/mdsuite/calculators/green_kubo_ionic_conductivity.py @@ -170,7 +170,7 @@ def _calculate_prefactor(self): """ # Calculate the prefactor - numerator = (elementary_charge ** 2) * (self.experiment.units["length"] ** 2) + numerator = (elementary_charge**2) * (self.experiment.units["length"] ** 2) denominator = ( 3 * boltzmann_constant @@ -203,15 +203,18 @@ def ensemble_operation(self, ensemble): ------- MSD of the tensor_values. """ - jacf = self.args.data_range * tf.reduce_sum( - tfp.stats.auto_correlation( - tf.gather(ensemble, self.args.tau_values, axis=1), - normalize=False, - axis=1, - center=False, - ), - axis=-1, - )[0, :] + jacf = ( + self.args.data_range + * tf.reduce_sum( + tfp.stats.auto_correlation( + tf.gather(ensemble, self.args.tau_values, axis=1), + normalize=False, + axis=1, + center=False, + ), + axis=-1, + )[0, :] + ) self.jacf += jacf self.sigma.append( np.trapz( diff --git a/mdsuite/calculators/green_kubo_thermal_conductivity.py b/mdsuite/calculators/green_kubo_thermal_conductivity.py index 053473be..02d8bcb6 100644 --- a/mdsuite/calculators/green_kubo_thermal_conductivity.py +++ b/mdsuite/calculators/green_kubo_thermal_conductivity.py @@ -165,7 +165,7 @@ def _calculate_prefactor(self): denominator = ( 3 * (self.args.data_range - 1) - * self.experiment.temperature ** 2 + * self.experiment.temperature**2 * self.experiment.units["boltzman"] * self.experiment.volume ) diff --git a/mdsuite/calculators/nernst_einstein_ionic_conductivity.py b/mdsuite/calculators/nernst_einstein_ionic_conductivity.py index 3bed9827..9068639d 100644 --- a/mdsuite/calculators/nernst_einstein_ionic_conductivity.py +++ b/mdsuite/calculators/nernst_einstein_ionic_conductivity.py @@ -182,7 +182,7 @@ def _nernst_einstein(self, diffusion_information: list): """ # evaluate the prefactor - numerator = self.experiment.number_of_atoms * (elementary_charge ** 2) + numerator = self.experiment.number_of_atoms * (elementary_charge**2) denominator = ( boltzmann_constant * self.experiment.temperature @@ -227,7 +227,7 @@ def _corrected_nernst_einstein( """ # evaluate the prefactor - numerator = self.experiment.number_of_atoms * (elementary_charge ** 2) + numerator = self.experiment.number_of_atoms * (elementary_charge**2) denominator = ( boltzmann_constant * self.experiment.temperature diff --git a/mdsuite/calculators/potential_of_mean_force.py b/mdsuite/calculators/potential_of_mean_force.py index ed5c432b..c68841c1 100644 --- a/mdsuite/calculators/potential_of_mean_force.py +++ b/mdsuite/calculators/potential_of_mean_force.py @@ -275,8 +275,8 @@ def plot_data(self, data): x_data=val[self.result_series_keys[0]], y_data=val[self.result_series_keys[1]], title=( - fr"{selected_species}: {val[self.result_keys[0]]: 0.3E} +-" - fr" {val[self.result_keys[1]]: 0.3E}" + rf"{selected_species}: {val[self.result_keys[0]]: 0.3E} +-" + rf" {val[self.result_keys[1]]: 0.3E}" ), layouts=[model], ) diff --git a/mdsuite/calculators/radial_distribution_function.py b/mdsuite/calculators/radial_distribution_function.py index b2f19974..122d713e 100644 --- a/mdsuite/calculators/radial_distribution_function.py +++ b/mdsuite/calculators/radial_distribution_function.py @@ -455,7 +455,7 @@ def run_minibatch_loop(self, atoms, stop, n_atoms, minibatch_start, positions_te ) exec_time = timer() - start_time atom_pairs_per_second = ( - tf.cast(tf.shape(indices)[1], dtype=self.dtype) / exec_time / 10 ** 6 + tf.cast(tf.shape(indices)[1], dtype=self.dtype) / exec_time / 10**6 ) atom_pairs_per_second *= tf.cast(batch_size, dtype=self.dtype) log.debug( @@ -748,7 +748,7 @@ def _spherical_symmetry(data: np.array) -> np.array: function_values : np.array result of the operation """ - return 4 * np.pi * (data ** 2) + return 4 * np.pi * (data**2) def _correction_1(data: np.array) -> np.array: """ @@ -777,13 +777,13 @@ def _correction_2(data: np.array) -> np.array: result of the operation """ - arctan_1 = np.arctan(np.sqrt(4 * (data ** 2) - 2)) + arctan_1 = np.arctan(np.sqrt(4 * (data**2) - 2)) arctan_2 = ( 8 * data * np.arctan( - (2 * data * (4 * (data ** 2) - 3)) - / (np.sqrt(4 * (data ** 2) - 2) * (4 * (data ** 2) + 1)) + (2 * data * (4 * (data**2) - 3)) + / (np.sqrt(4 * (data**2) - 2) * (4 * (data**2) + 1)) ) ) diff --git a/mdsuite/calculators/spatial_distribution_function.py b/mdsuite/calculators/spatial_distribution_function.py index 5b5bc384..54598a5b 100644 --- a/mdsuite/calculators/spatial_distribution_function.py +++ b/mdsuite/calculators/spatial_distribution_function.py @@ -249,7 +249,7 @@ def run_calculator(self): subjects=["System"], ) - coordinates = tf.reshape(self._get_unit_sphere(), [self.args.n_bins ** 2, 3]) + coordinates = tf.reshape(self._get_unit_sphere(), [self.args.n_bins**2, 3]) colour_map = tf.reshape(sdf_values, [-1]) self._run_visualization(coordinates, colour_map) diff --git a/mdsuite/calculators/structure_factor.py b/mdsuite/calculators/structure_factor.py index ba1ed880..c6a6f378 100644 --- a/mdsuite/calculators/structure_factor.py +++ b/mdsuite/calculators/structure_factor.py @@ -251,7 +251,7 @@ def average_atomic_form_factor(self, scattering_scalar): atomic_form_facs = self.atomic_form_factors(scattering_scalar) for el in self.experiment.species: sum1 += self.molar_fractions[el] * atomic_form_facs[el]["atomic_form_factor"] - average_atomic_factor = sum1 ** 2 + average_atomic_factor = sum1**2 return average_atomic_factor def partial_structure_factor(self, scattering_scalar, elements): @@ -266,7 +266,7 @@ def partial_structure_factor(self, scattering_scalar, elements): integrand[counter] = 0 continue integrand[counter] = ( - radius ** 2 + radius**2 * np.sin(scattering_scalar * radius) / (scattering_scalar * radius) * (self.rdf[counter] - 1) diff --git a/mdsuite/experiment/experiment.py b/mdsuite/experiment/experiment.py index 83a740be..719accfa 100644 --- a/mdsuite/experiment/experiment.py +++ b/mdsuite/experiment/experiment.py @@ -607,7 +607,7 @@ def _store_metadata(self, metadata: TrajectoryMetadata, update_with_pubchempy=Fa ---------- metadata: TrajectoryMetadata update_with_pubchempy: bool - Load data from pubchempy and add it to fill missing infomration + Load data from pubchempy and add it to fill missing information. """ # new trajectory: store all metadata and construct a new database self.temperature = metadata.temperature diff --git a/mdsuite/memory_management/memory_manager.py b/mdsuite/memory_management/memory_manager.py index a04225df..eeee1a04 100644 --- a/mdsuite/memory_management/memory_manager.py +++ b/mdsuite/memory_management/memory_manager.py @@ -23,14 +23,15 @@ Summary ------- +Module to manage the memory use of MDSuite operations. """ import logging from typing import Tuple import numpy as np -import tensorflow as tf from mdsuite.database.simulation_database import Database +from mdsuite.utils import config from mdsuite.utils.meta_functions import get_machine_properties, gpu_available from mdsuite.utils.scale_functions import ( linear_scale_function, @@ -58,11 +59,20 @@ class MemoryManager: Attributes ---------- data_path : list + Path to reference the data in the hdf5 database. database : Database + Database to look through. parallel : bool + If true, batch sizes should take into account the use of multiple machines + with shared memory. TODO: This is outdated. memory_fraction : float + Amount of memory to use TODO: In a perfect scaling, this can be 100 % of the + free memory. scale_function : dict + Function to use to describe how the memory scaling changes with changing + data size. gpu : bool + If true, a gpu is available. """ def __init__( @@ -93,7 +103,8 @@ def __init__( scale_function : dict Scaling function to compute the memory scaling of a calculator. gpu : bool - If true, gpu should be used. + If true, a GPU has been detected and the available memory will be + calculated from the GPU. offset : int If data is being loaded from a non-zero point in the database the offset is used to take this into account. For example, expanding a @@ -104,7 +115,6 @@ def __init__( self.data_path = data_path self.parallel = parallel self.database = database - self.memory_fraction = memory_fraction self.offset = offset self.machine_properties = get_machine_properties() @@ -115,9 +125,6 @@ def __init__( memory = self.machine_properties["gpu"][item]["memory"] self.machine_properties["memory"] = memory * 1e6 - tf.device("gpu") - else: - tf.device("cpu") self.batch_size = None self.n_batches = None @@ -209,13 +216,15 @@ def get_batch_size(self, system: bool = False) -> tuple: ) maximum_loaded_configurations = int( np.clip( - (self.memory_fraction * self.machine_properties["memory"]) + (config.memory_fraction * self.machine_properties["memory"]) / per_configuration_memory, 1, n_configs - self.offset, ) ) - batch_size = self._get_optimal_batch_size(maximum_loaded_configurations) + batch_size = self._get_optimal_batch_size( + maximum_loaded_configurations, n_configs + ) number_of_batches, remainder = divmod((n_configs - self.offset), batch_size) self.batch_size = batch_size self.n_batches = number_of_batches @@ -241,15 +250,19 @@ def hdf5_load_time(n: int): return np.log(n) @staticmethod - def _get_optimal_batch_size(naive_size): + def _get_optimal_batch_size(naive_size, n_configs: int): """ Use the open/close and read speeds of the hdf5 database_path as well as the operation being performed to get an optimal batch size. + This is where the memory scaling test will be enforced. + Parameters ---------- naive_size : int Naive batch size to be optimized + n_configs : int + Total number of configurations in the database. Returns ------- @@ -257,7 +270,10 @@ def _get_optimal_batch_size(naive_size): An optimized batch size """ # db_io_time = self.database.get_load_time() - return naive_size + if config.memory_scaling_test: + return n_configs + else: + return naive_size def _compute_atomwise_minibatch(self, data_range: int): """ @@ -310,7 +326,7 @@ def _compute_atomwise_minibatch(self, data_range: int): ) batch_size = int( np.clip( - self.memory_fraction + config.memory_fraction * self.machine_properties["memory"] / per_atom_memory, 1, @@ -323,7 +339,7 @@ def _compute_atomwise_minibatch(self, data_range: int): atom_batch_memory = fraction * per_atom_memory batch_size = int( np.clip( - self.memory_fraction + config.memory_fraction * self.machine_properties["memory"] / atom_batch_memory, 1, diff --git a/mdsuite/utils/config.py b/mdsuite/utils/config.py index 269304e5..03356789 100644 --- a/mdsuite/utils/config.py +++ b/mdsuite/utils/config.py @@ -23,6 +23,8 @@ Summary ------- +A set of configuration parameters for the MDSuite framework. Includes information +regarding memory fraction, scaling test state, jupyter use and so on. """ from dataclasses import dataclass @@ -36,10 +38,22 @@ class Config: bokeh_sizing_mode: str The way bokeh scales plots. see bokeh / sizing_mode for more information + jupyter : bool + If true, jupyter is being used. + GPU: bool + TODO I think this is outdated. + memory_scaling_test : bool + If true, a scaling test is being performed and therefore, all batch sizes + are set to 1. Should typically be accompanied by the memory fraction being + set to 1 as well. + memory_fraction: bool + The portion of the available memory to be used. """ jupyter: bool = False GPU: bool = False + memory_scaling_test: bool = False + memory_fraction: float = 0.5 bokeh_sizing_mode: str = "stretch_both" diff --git a/mdsuite/utils/meta_functions.py b/mdsuite/utils/meta_functions.py index 96f17a48..dbdb2c5e 100644 --- a/mdsuite/utils/meta_functions.py +++ b/mdsuite/utils/meta_functions.py @@ -418,7 +418,7 @@ def golden_section_search( # Define the golden ratio identities phi_a = 1 / golden_ratio - phi_b = 1 / (golden_ratio ** 2) + phi_b = 1 / (golden_ratio**2) (a, b) = (min(a, b), max(a, b)) # check for a simple error diff --git a/pyproject.toml b/pyproject.toml index 38806598..b526df7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,3 +5,34 @@ experimental_string_processing = true [tool.isort] profile = 'black' multi_line_output = 3 + +[tool.pytest.ini_options] +addopts = '-m "not memory"' +markers = [ + "memory: marks tests that are used for memory profiling", +] + +[tool.pylint.messages_control] + +max-line-length = 90 +disable = [ + "logging-fstring-interpolation", + "too-many-arguments", + # seems to fail for some cases + "no-else-return", + # allow for open TODOs + "fixme", +] + +[tool.coverage.run] +branch = true +relative_files = true +# Regexes for lines to exclude from consideration +omit =["*/Temp/*", "*/AppData/*", "*/CI/*"] + +[tool.coverage.report] +# omit anything in a Temp directory anywhere +ignore_errors = true + +[tool.coverage.html] +directory = "coverage_html_report"