From aad6c9dff0738ae940011da674897c1b2de149ce Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 21 Nov 2024 15:17:24 -0700 Subject: [PATCH 01/11] break run.py into diagnostics & timeseries; rename build to webpage --- cupid/{run.py => cupid_diagnostics.py} | 87 ------------ cupid/cupid_timeseries.py | 186 +++++++++++++++++++++++++ cupid/{build.py => cupid_webpage.py} | 2 +- 3 files changed, 187 insertions(+), 88 deletions(-) rename cupid/{run.py => cupid_diagnostics.py} (67%) create mode 100755 cupid/cupid_timeseries.py rename cupid/{build.py => cupid_webpage.py} (99%) diff --git a/cupid/run.py b/cupid/cupid_diagnostics.py similarity index 67% rename from cupid/run.py rename to cupid/cupid_diagnostics.py index 8ed31a0..913f9eb 100755 --- a/cupid/run.py +++ b/cupid/cupid_diagnostics.py @@ -11,7 +11,6 @@ Options: -s, --serial Do not use LocalCluster objects - -ts, --time-series Run time series generation scripts prior to diagnostics -atm, --atmosphere Run atmosphere component diagnostics -ocn, --ocean Run ocean component diagnostics -lnd, --land Run land component diagnostics @@ -29,7 +28,6 @@ import intake import ploomber -import cupid.timeseries import cupid.util CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) @@ -40,7 +38,6 @@ @click.command(context_settings=CONTEXT_SETTINGS) @click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects") -@click.option("--time-series", "-ts", is_flag=True, help="Run time series generation scripts prior to diagnostics") # Options to turn components on or off @click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component diagnostics") @click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component diagnostics") @@ -52,7 +49,6 @@ def run( config_path, serial=False, - time_series=False, all=False, atmosphere=False, ocean=False, @@ -106,89 +102,6 @@ def run( #################################################################### - if time_series: - timeseries_params = control["timeseries"] - - # general timeseries arguments for all components - num_procs = timeseries_params["num_procs"] - - for component, comp_bool in component_options.items(): - if comp_bool: - - # set time series input and output directory: - # ----- - if isinstance(timeseries_params["case_name"], list): - ts_input_dirs = [] - for cname in timeseries_params["case_name"]: - ts_input_dirs.append(global_params["CESM_output_dir"]+"/"+cname+f"/{component}/hist/") - else: - ts_input_dirs = [ - global_params["CESM_output_dir"] + "/" + - timeseries_params["case_name"] + f"/{component}/hist/", - ] - - if "ts_output_dir" in timeseries_params: - if isinstance(timeseries_params["ts_output_dir"], list): - ts_output_dirs = [] - for ts_outdir in timeseries_params["ts_output_dir"]: - ts_output_dirs.append([ - os.path.join( - ts_outdir, - f"{component}", "proc", "tseries", - ), - ]) - else: - ts_output_dirs = [ - os.path.join( - timeseries_params["ts_output_dir"], - f"{component}", "proc", "tseries", - ), - ] - else: - if isinstance(timeseries_params["case_name"], list): - ts_output_dirs = [] - for cname in timeseries_params["case_name"]: - ts_output_dirs.append( - os.path.join( - global_params["CESM_output_dir"], - cname, - f"{component}", "proc", "tseries", - ), - ) - else: - ts_output_dirs = [ - os.path.join( - global_params["CESM_output_dir"], - timeseries_params["case_name"], - f"{component}", "proc", "tseries", - ), - ] - # ----- - - # fmt: off - # pylint: disable=line-too-long - cupid.timeseries.create_time_series( - component, - timeseries_params[component]["vars"], - timeseries_params[component]["derive_vars"], - timeseries_params["case_name"], - timeseries_params[component]["hist_str"], - ts_input_dirs, - ts_output_dirs, - # Note that timeseries output will eventually go in - # /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/ - timeseries_params["ts_done"], - timeseries_params["overwrite_ts"], - timeseries_params[component]["start_years"], - timeseries_params[component]["end_years"], - timeseries_params[component]["level"], - num_procs, - serial, - logger, - ) - # fmt: on - # pylint: enable=line-too-long - # Grab paths run_dir = os.path.realpath(os.path.expanduser(control["data_sources"]["run_dir"])) diff --git a/cupid/cupid_timeseries.py b/cupid/cupid_timeseries.py new file mode 100755 index 0000000..d94d77e --- /dev/null +++ b/cupid/cupid_timeseries.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python +""" +Main script for running timeseries specified in the configuration file. + +This script sets up and runs timeseries according to the configurations +provided in the specified YAML configuration file. + +Usage: cupid-timeseries [OPTIONS] + + Main engine to set up running timeseries. + +Options: + -s, --serial Do not use LocalCluster objects + -ts, --time-series Run time series generation scripts prior to diagnostics + -atm, --atmosphere Run atmosphere component diagnostics #TODO: should we set this up to run timeseries for just atm? + -ocn, --ocean Run ocean component diagnostics + -lnd, --land Run land component diagnostics + -ice, --seaice Run sea ice component diagnostics + -glc, --landice Run land ice component diagnostics + -rof, --river-runoff Run river runoff component diagnostics + -config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml) + -h, --help Show this message and exit. +""" +from __future__ import annotations + +import os + +import click + +import cupid.timeseries +import cupid.util + +CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"]) + +# fmt: off +# pylint: disable=line-too-long + + +@click.command(context_settings=CONTEXT_SETTINGS) +@click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects") +# Options to turn components on or off +@click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component diagnostics") +@click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component diagnostics") +@click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics") +@click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics") +@click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics") +@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") +@click.argument("config_path", default="config.yml") +def run_timeseries( + config_path, + serial=False, + atmosphere=False, + ocean=False, + land=False, + seaice=False, + landice=False, + river_runoff=False, +): + """ + Main engine to set up running all the notebooks. + + Args: + CONFIG_PATH: str, path to configuration file (default config.yml) + + Returns: + None + + """ + # fmt: on + # pylint: enable=line-too-long + # Get control structure + control = cupid.util.get_control_dict(config_path) + cupid.util.setup_book(config_path) + logger = cupid.util.setup_logging(config_path) + + component_options = { + "atm": atmosphere, + "ocn": ocean, + "lnd": land, + "ice": seaice, + "glc": landice, + "rof": river_runoff, + } + + # Automatically run all if no components specified + + if True not in [atmosphere, ocean, land, seaice, landice, river_runoff]: + # all = True + for key in component_options.keys(): + component_options[key] = True + + ##################################################################### + # Managing global parameters + + global_params = dict() + + if "global_params" in control: + global_params = control["global_params"] + + global_params["serial"] = serial + + #################################################################### + + timeseries_params = control["timeseries"] + + # general timeseries arguments for all components + num_procs = timeseries_params["num_procs"] + + for component, comp_bool in component_options.items(): + if comp_bool: + + # set time series input and output directory: + # ----- + if isinstance(timeseries_params["case_name"], list): + ts_input_dirs = [] + for cname in timeseries_params["case_name"]: + ts_input_dirs.append(global_params["CESM_output_dir"]+"/"+cname+f"/{component}/hist/") + else: + ts_input_dirs = [ + global_params["CESM_output_dir"] + "/" + + timeseries_params["case_name"] + f"/{component}/hist/", + ] + + if "ts_output_dir" in timeseries_params: + if isinstance(timeseries_params["ts_output_dir"], list): + ts_output_dirs = [] + for ts_outdir in timeseries_params["ts_output_dir"]: + ts_output_dirs.append([ + os.path.join( + ts_outdir, + f"{component}", "proc", "tseries", + ), + ]) + else: + ts_output_dirs = [ + os.path.join( + timeseries_params["ts_output_dir"], + f"{component}", "proc", "tseries", + ), + ] + else: + if isinstance(timeseries_params["case_name"], list): + ts_output_dirs = [] + for cname in timeseries_params["case_name"]: + ts_output_dirs.append( + os.path.join( + global_params["CESM_output_dir"], + cname, + f"{component}", "proc", "tseries", + ), + ) + else: + ts_output_dirs = [ + os.path.join( + global_params["CESM_output_dir"], + timeseries_params["case_name"], + f"{component}", "proc", "tseries", + ), + ] + # ----- + + # fmt: off + # pylint: disable=line-too-long + cupid.timeseries.create_time_series( + component, + timeseries_params[component]["vars"], + timeseries_params[component]["derive_vars"], + timeseries_params["case_name"], + timeseries_params[component]["hist_str"], + ts_input_dirs, + ts_output_dirs, + # Note that timeseries output will eventually go in + # /glade/derecho/scratch/${USER}/archive/${CASE}/${component}/proc/tseries/ + timeseries_params["ts_done"], + timeseries_params["overwrite_ts"], + timeseries_params[component]["start_years"], + timeseries_params[component]["end_years"], + timeseries_params[component]["level"], + num_procs, + serial, + logger, + ) + # fmt: on + # pylint: enable=line-too-long + + return None diff --git a/cupid/build.py b/cupid/cupid_webpage.py similarity index 99% rename from cupid/build.py rename to cupid/cupid_webpage.py index f5a4bf3..cd60de5 100755 --- a/cupid/build.py +++ b/cupid/cupid_webpage.py @@ -27,7 +27,7 @@ @click.argument("config_path", default="config.yml") def build(config_path): """ - Build a Jupyter book based on the TOC in CONFIG_PATH. Called by `cupid-build`. + Build a Jupyter book based on the TOC in CONFIG_PATH. Called by `cupid-webpage`. Args: CONFIG_PATH: str, path to configuration file (default config.yml) From 6cf4f6c5c1d7fbce79d5ef5460e4f171321e1232 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 21 Nov 2024 15:29:37 -0700 Subject: [PATCH 02/11] include if name == main --- cupid/clear.py | 4 ++++ cupid/cupid_diagnostics.py | 8 ++++++-- cupid/cupid_timeseries.py | 4 ++++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/cupid/clear.py b/cupid/clear.py index 8cb6b4f..d46c236 100755 --- a/cupid/clear.py +++ b/cupid/clear.py @@ -57,3 +57,7 @@ def clear(config_path): # Delete the "computed_notebooks" folder and all the contents inside of it shutil.rmtree(run_dir) logger.info(f"All contents in {run_dir} have been cleared.") + + +if __name__ == "__main__": + clear() diff --git a/cupid/cupid_diagnostics.py b/cupid/cupid_diagnostics.py index 913f9eb..4590ff0 100755 --- a/cupid/cupid_diagnostics.py +++ b/cupid/cupid_diagnostics.py @@ -5,7 +5,7 @@ This script sets up and runs all the specified notebooks and scripts according to the configurations provided in the specified YAML configuration file. -Usage: cupid-run [OPTIONS] +Usage: cupid-diagnostics [OPTIONS] Main engine to set up running all the notebooks. @@ -46,7 +46,7 @@ @click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics") @click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") @click.argument("config_path", default="config.yml") -def run( +def run_diagnostics( config_path, serial=False, all=False, @@ -239,3 +239,7 @@ def run( dag.build() return None + + +if __name__ == "__main__": + run_diagnostics() diff --git a/cupid/cupid_timeseries.py b/cupid/cupid_timeseries.py index d94d77e..6176661 100755 --- a/cupid/cupid_timeseries.py +++ b/cupid/cupid_timeseries.py @@ -184,3 +184,7 @@ def run_timeseries( # pylint: enable=line-too-long return None + + +if __name__ == "__main__": + run_timeseries() From 1ac2e4676ed7a3a2f351902de7d563152028defe Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 21 Nov 2024 15:38:32 -0700 Subject: [PATCH 03/11] clean up instructions and actual call to run tools --- NCARtips.md | 10 +++++----- README.md | 18 +++++++++--------- ...cupid_diagnostics.py => run_diagnostics.py} | 0 .../{cupid_timeseries.py => run_timeseries.py} | 0 docs/addingnotebookstocollection.md | 2 +- environments/README | 2 +- pyproject.toml | 5 +++-- 7 files changed, 19 insertions(+), 18 deletions(-) rename cupid/{cupid_diagnostics.py => run_diagnostics.py} (100%) rename cupid/{cupid_timeseries.py => run_timeseries.py} (100%) diff --git a/NCARtips.md b/NCARtips.md index 822f9bf..d764a68 100644 --- a/NCARtips.md +++ b/NCARtips.md @@ -8,25 +8,25 @@ There are two ways to request multiple cores on either casper or derecho. Both cases are requesting 12 cores and 120 GB of memory. -The recommended approach releases the cores immediately after `cupid-run` finishes: +The recommended approach releases the cores immediately after `cupid-diagnostics` finishes: ``` [login-node] $ conda activate cupid-dev -(cupid-dev) [login-node] $ qcmd -l select=1:ncpus=12:mem=120GB -- cupid-run +(cupid-dev) [login-node] $ qcmd -l select=1:ncpus=12:mem=120GB -- cupid-diagnostics ``` -Alternatively, you can start an interactive session and remain on the compute nodes after `cupid-run` completes: +Alternatively, you can start an interactive session and remain on the compute nodes after `cupid-diagnostics` completes: ``` [login-node] $ qinteractive -l select=1:ncpus=12:mem=120GB [compute-node] $ conda activate cupid-dev -(cupid-dev) [compute-node] $ cupid-run +(cupid-dev) [compute-node] $ cupid-diagnostics ``` Notes: 1. If you chose to run on derecho, specify the `develop` queue by adding the option `-q develop` to either `qcmd` or `qinteractive` (the `develop` queue is a shared resource and you are charged by the core hour rather than the node hour). -1. `cupid-build` is not computationally expensive, and can be run on a login node for either machine. +1. `cupid-webpage` is not computationally expensive, and can be run on a login node for either machine. ## Looking at Output diff --git a/README.md b/README.md index 8be2e31..c9e36cf 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Then `cd` into the `CUPiD` directory and build the necessary conda environments $ cd CUPiD $ mamba env create -f environments/dev-environment.yml $ conda activate cupid-dev -$ which cupid-run +$ which cupid-diagnostics $ mamba env create -f environments/cupid-analysis.yml ``` @@ -38,7 +38,7 @@ If you do not have `mamba` installed, you can still use `conda`... it will just (To see what version of conda you have installed, run `conda --version`.) 1. If the subdirectories in `externals/` are all empty, run `git submodule update --init` to clone the submodules. 1. For existing users who cloned `CUPiD` prior to the switch from manage externals to git submodule, we recommend removing `externals/` before checking out main, running `git submodule update --init`, and removing `manage_externals` (if it is still present after `git submodule update --init`). -1. If `which cupid-run` returned the error `which: no cupid-run in ($PATH)`, then please run the following: +1. If `which cupid-diagnostics` returned the error `which: no cupid-diagnostics in ($PATH)`, then please run the following: ``` bash $ conda activate cupid-dev @@ -59,8 +59,8 @@ To test the package out, try to run `examples/key-metrics`: $ conda activate cupid-dev $ cd examples/key_metrics $ # machine-dependent: request multiple compute cores -$ cupid-run -$ cupid-build # Will build HTML from Jupyter Book +$ cupid-diagnostics +$ cupid-webpage # Will build HTML from Jupyter Book ``` After the last step is finished, you can use Jupyter to view generated notebooks in `${CUPID_ROOT}/examples/key-metrics/computed_notebooks` @@ -74,7 +74,7 @@ Notes: (cupid-analysis) $ python -m ipykernel install --user --name=cupid-analysis ``` -Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-run` and `cupid-build` commands, you can run the following command: +Furthermore, to clear the `computed_notebooks` folder which was generated by the `cupid-diagnostics` and `cupid-webpage` commands, you can run the following command: ``` bash $ cupid-clear @@ -87,8 +87,8 @@ This will clear the `computed_notebooks` folder which is at the location pointed Most of CUPiD's configuration is done via the `config.yml` file, but there are a few command line options as well: ```bash -(cupid-dev) $ cupid-run -h -Usage: cupid-run [OPTIONS] CONFIG_PATH +(cupid-dev) $ cupid-diagnostics -h +Usage: cupid-diagnostics [OPTIONS] CONFIG_PATH Main engine to set up running all the notebooks. @@ -122,8 +122,8 @@ client #### Specifying components -If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-run -ocn -ice`. +If no component flags are provided, all component diagnostics listed in `config.yml` will be executed by default. Multiple flags can be used together to select a group of components, for example: `cupid-diagnostics -ocn -ice`. ### Timeseries File Generation -CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-run -ts`. +CUPiD also has the capability to generate single variable timeseries files from history files for all components. To run timeseries, edit the `config.yml` file's timeseries section to fit your preferences, and then run `cupid-timeseries`. diff --git a/cupid/cupid_diagnostics.py b/cupid/run_diagnostics.py similarity index 100% rename from cupid/cupid_diagnostics.py rename to cupid/run_diagnostics.py diff --git a/cupid/cupid_timeseries.py b/cupid/run_timeseries.py similarity index 100% rename from cupid/cupid_timeseries.py rename to cupid/run_timeseries.py diff --git a/docs/addingnotebookstocollection.md b/docs/addingnotebookstocollection.md index 1369a08..a380b56 100644 --- a/docs/addingnotebookstocollection.md +++ b/docs/addingnotebookstocollection.md @@ -40,7 +40,7 @@ Generally, a good fit for a diagnostic notebook is one that reads in CESM output 7. Update your parameters. Parameters that are specific to just this notebook should go under `parameter_groups` in the notebook's entry under `compute_notebooks`. Global parameters that you want passed in to every notebook in the collection should go under `global_params`. When `CUPiD` executes your notebook, all of these parameters will get put in a new cell below the cell tagged `parameters` that you added in step 3. This means they will supercede the values of the parameters that you put in the cell above---the names, notation, etc. should match to make sure your notebook is able to find the variables it needs. -8. Your collection can now be run with `cupid-run`, and then the website can be built with `cupid-build`. +8. Your collection can now be run with `cupid-diagnostics`, and then the website can be built with `cupid-webpage`. 9. If you're happy with your notebook and want to add it to the CUPiD repository, there are a few formatting items that we would like contributors to follow: * Title your notebook something descriptive. A recommended format is `___.ipynb`; for instance, this might look like `Global_PSL_NMSE_compare_obs_lens.ipynb` or `Greenland_SMB_visual_compare_obs.ipynb`. diff --git a/environments/README b/environments/README index 60310aa..9d104dd 100644 --- a/environments/README +++ b/environments/README @@ -1,6 +1,6 @@ This directory contains three conda environment files: -1. dev-environment.yml: this creates (cupid-dev), which provides cupid-run and cupid-build, +1. dev-environment.yml: this creates (cupid-dev), which provides cupid-timeseries, cupid-diagnostics and cupid-webpage, and lets users update the main CUPiD code in cupid/ 2. cupid-analysis.yml: this creates (cupid-analysis), the environment all scripts and notebooks diff --git a/pyproject.toml b/pyproject.toml index 3e2db81..45252e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,7 @@ documentation = "https://nbscuid.readthedocs.io" [project.scripts] -cupid-run = "cupid.run:run" -cupid-build = "cupid.build:build" +cupid-timeseries = "cupid.run_timeseries:run_timeseries" +cupid-diagnostics = "cupid.run_diagnostics:run_diagnostics" +cupid-webpage = "cupid.cupid_webpage:cupid_webpage" cupid-clear = "cupid.clear:clear" From c72af02c28c731dd1742376651f77d98039af514 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 21 Nov 2024 15:55:42 -0700 Subject: [PATCH 04/11] rof comment out & webpage call --- cupid/run_timeseries.py | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cupid/run_timeseries.py b/cupid/run_timeseries.py index 6176661..b3f69ba 100755 --- a/cupid/run_timeseries.py +++ b/cupid/run_timeseries.py @@ -17,7 +17,7 @@ -lnd, --land Run land component diagnostics -ice, --seaice Run sea ice component diagnostics -glc, --landice Run land ice component diagnostics - -rof, --river-runoff Run river runoff component diagnostics + #-rof, --river-runoff Run river runoff component diagnostics -config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml) -h, --help Show this message and exit. """ @@ -44,7 +44,7 @@ @click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics") @click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics") @click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics") -@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") +# @click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") @click.argument("config_path", default="config.yml") def run_timeseries( config_path, @@ -79,7 +79,7 @@ def run_timeseries( "lnd": land, "ice": seaice, "glc": landice, - "rof": river_runoff, + # "rof": river_runoff, } # Automatically run all if no components specified diff --git a/pyproject.toml b/pyproject.toml index 45252e9..82112af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,5 +46,5 @@ documentation = "https://nbscuid.readthedocs.io" [project.scripts] cupid-timeseries = "cupid.run_timeseries:run_timeseries" cupid-diagnostics = "cupid.run_diagnostics:run_diagnostics" -cupid-webpage = "cupid.cupid_webpage:cupid_webpage" +cupid-webpage = "cupid.cupid_webpage:build" cupid-clear = "cupid.clear:clear" From 515c241811c16c4a60038f313f4613d987ba0131 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Thu, 21 Nov 2024 16:01:02 -0700 Subject: [PATCH 05/11] update dev env name to infrastructure --- NCARtips.md | 8 ++++---- README.md | 12 ++++++------ docs/ContributorsGuide.md | 2 +- docs/addingnotebookstocollection.md | 2 +- ...{dev-environment.yml => cupid-infrastructure.yml} | 2 +- examples/external_diag_packages/config.yml | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) rename environments/{dev-environment.yml => cupid-infrastructure.yml} (91%) diff --git a/NCARtips.md b/NCARtips.md index d764a68..9ca0105 100644 --- a/NCARtips.md +++ b/NCARtips.md @@ -11,16 +11,16 @@ Both cases are requesting 12 cores and 120 GB of memory. The recommended approach releases the cores immediately after `cupid-diagnostics` finishes: ``` -[login-node] $ conda activate cupid-dev -(cupid-dev) [login-node] $ qcmd -l select=1:ncpus=12:mem=120GB -- cupid-diagnostics +[login-node] $ conda activate cupid-infrastructure +(cupid-infrastructure) [login-node] $ qcmd -l select=1:ncpus=12:mem=120GB -- cupid-diagnostics ``` Alternatively, you can start an interactive session and remain on the compute nodes after `cupid-diagnostics` completes: ``` [login-node] $ qinteractive -l select=1:ncpus=12:mem=120GB -[compute-node] $ conda activate cupid-dev -(cupid-dev) [compute-node] $ cupid-diagnostics +[compute-node] $ conda activate cupid-infrastructure +(cupid-infrastructure) [compute-node] $ cupid-diagnostics ``` Notes: diff --git a/README.md b/README.md index c9e36cf..b1714c3 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,8 @@ Then `cd` into the `CUPiD` directory and build the necessary conda environments ``` bash $ cd CUPiD -$ mamba env create -f environments/dev-environment.yml -$ conda activate cupid-dev +$ mamba env create -f environments/cupid-infrastructure.yml +$ conda activate cupid-infrastructure $ which cupid-diagnostics $ mamba env create -f environments/cupid-analysis.yml ``` @@ -41,11 +41,11 @@ If you do not have `mamba` installed, you can still use `conda`... it will just 1. If `which cupid-diagnostics` returned the error `which: no cupid-diagnostics in ($PATH)`, then please run the following: ``` bash - $ conda activate cupid-dev + $ conda activate cupid-infrastructure $ pip install -e . # installs cupid ``` -1. In the `cupid-dev` environment, run `pre-commit install` to configure `git` to automatically run `pre-commit` checks when you try to commit changes from the `cupid-dev` environment; the commit will only proceed if all checks pass. Note that CUPiD uses `pre-commit` to ensure code formatting guidelines are followed, and pull requests will not be accepted if they fail the `pre-commit`-based Github Action. +1. In the `cupid-infrastructure` environment, run `pre-commit install` to configure `git` to automatically run `pre-commit` checks when you try to commit changes from the `cupid-infrastructure` environment; the commit will only proceed if all checks pass. Note that CUPiD uses `pre-commit` to ensure code formatting guidelines are followed, and pull requests will not be accepted if they fail the `pre-commit`-based Github Action. 1. If you plan on contributing code to CUPiD, whether developing CUPiD itself or providing notebooks for CUPiD to run, please see the [Contributor's Guide](https://ncar.github.io/CUPiD/contributors_guide.html). @@ -56,7 +56,7 @@ CUPiD currently provides an example for generating diagnostics. To test the package out, try to run `examples/key-metrics`: ``` bash -$ conda activate cupid-dev +$ conda activate cupid-infrastructure $ cd examples/key_metrics $ # machine-dependent: request multiple compute cores $ cupid-diagnostics @@ -87,7 +87,7 @@ This will clear the `computed_notebooks` folder which is at the location pointed Most of CUPiD's configuration is done via the `config.yml` file, but there are a few command line options as well: ```bash -(cupid-dev) $ cupid-diagnostics -h +(cupid-infrastructure) $ cupid-diagnostics -h Usage: cupid-diagnostics [OPTIONS] CONFIG_PATH Main engine to set up running all the notebooks. diff --git a/docs/ContributorsGuide.md b/docs/ContributorsGuide.md index 188f623..36946f2 100644 --- a/docs/ContributorsGuide.md +++ b/docs/ContributorsGuide.md @@ -11,7 +11,7 @@ In order to contribute code to this repository, we recommend that you get starte 4. [Install CUPiD](https://ncar.github.io/CUPiD/index.html#installing), relevant environments, and setup `pre-commit`. 5. Make your edits and add your name to our `contributors.md` file to make sure we recognize your contributions 6. Merge in recent changes from master -7. Ensure that `pre-commit` checks all pass from the `cupid-dev` environment +7. Ensure that `pre-commit` checks all pass from the `cupid-infrastructure` environment 8. IF updating `github.io` pages, test with the steps listed below, otherwise proceed to #9: - Create the environment necessary for building documentation with `$ conda env create -f environments/docs.yml` - Activate the docs environment: `$ conda activate cupid-docs` diff --git a/docs/addingnotebookstocollection.md b/docs/addingnotebookstocollection.md index a380b56..6eff1cb 100644 --- a/docs/addingnotebookstocollection.md +++ b/docs/addingnotebookstocollection.md @@ -45,4 +45,4 @@ Generally, a good fit for a diagnostic notebook is one that reads in CESM output 9. If you're happy with your notebook and want to add it to the CUPiD repository, there are a few formatting items that we would like contributors to follow: * Title your notebook something descriptive. A recommended format is `___.ipynb`; for instance, this might look like `Global_PSL_NMSE_compare_obs_lens.ipynb` or `Greenland_SMB_visual_compare_obs.ipynb`. * Add a [cell tag](https://jupyterbook.org/en/stable/content/metadata.html#jupyter-cell-tags) `hide-input` for cells which output plots, and add the tag `hide-cell` for cells that do not contain plots (this will hide both the input and output). Do this through JupyterHub when editing your notebook: click `View --> Cell Toolbar --> Tags` and add either `hide-input` or `hide-cell`. This makes it easier to glance at the plots once the webpage is built and not need to scroll through code cells. - * Set up `pre-commit` in the `cupid-dev` environment to ensure that your code is properly formatted and linted. Running `pre-commit install` will configure `git` to automatically run the `pre-commit` checks when you try to commit changes; the commit will only proceed if all the checks pass. + * Set up `pre-commit` in the `cupid-infrastructure` environment to ensure that your code is properly formatted and linted. Running `pre-commit install` will configure `git` to automatically run the `pre-commit` checks when you try to commit changes; the commit will only proceed if all the checks pass. diff --git a/environments/dev-environment.yml b/environments/cupid-infrastructure.yml similarity index 91% rename from environments/dev-environment.yml rename to environments/cupid-infrastructure.yml index 30aec30..f10fb9a 100644 --- a/environments/dev-environment.yml +++ b/environments/cupid-infrastructure.yml @@ -1,4 +1,4 @@ -name: cupid-dev +name: cupid-infrastructure dependencies: - python=3.11.4 - black diff --git a/examples/external_diag_packages/config.yml b/examples/external_diag_packages/config.yml index 56a9ed4..d183bd1 100644 --- a/examples/external_diag_packages/config.yml +++ b/examples/external_diag_packages/config.yml @@ -111,7 +111,7 @@ compute_notebooks: atm: link_to_ADF: - kernel_name: cupid-dev + kernel_name: cupid-analysis parameter_groups: none: adf_root: ../../examples/external_diag_packages/ADF_output/ From d3f48af046c84d5d2cd54ca50f739c64d1bd9674 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Fri, 22 Nov 2024 12:21:15 -0700 Subject: [PATCH 06/11] add rof to timeseries options --- cupid/run_timeseries.py | 28 +++++++++++++--------------- examples/key_metrics/config.yml | 8 ++++++++ 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/cupid/run_timeseries.py b/cupid/run_timeseries.py index b3f69ba..6164162 100755 --- a/cupid/run_timeseries.py +++ b/cupid/run_timeseries.py @@ -11,13 +11,12 @@ Options: -s, --serial Do not use LocalCluster objects - -ts, --time-series Run time series generation scripts prior to diagnostics - -atm, --atmosphere Run atmosphere component diagnostics #TODO: should we set this up to run timeseries for just atm? - -ocn, --ocean Run ocean component diagnostics - -lnd, --land Run land component diagnostics - -ice, --seaice Run sea ice component diagnostics - -glc, --landice Run land ice component diagnostics - #-rof, --river-runoff Run river runoff component diagnostics + -atm, --atmosphere Run atmosphere component timeseries + -ocn, --ocean Run ocean component timeseries + -lnd, --land Run land component timeseries + -ice, --seaice Run sea ice component timeseries + -glc, --landice Run land ice component timeseries + -rof, --river-runoff Run river runoff component timeseries -config_path Path to the YAML configuration file containing specifications for notebooks (default: config.yml) -h, --help Show this message and exit. """ @@ -39,12 +38,12 @@ @click.command(context_settings=CONTEXT_SETTINGS) @click.option("--serial", "-s", is_flag=True, help="Do not use LocalCluster objects") # Options to turn components on or off -@click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component diagnostics") -@click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component diagnostics") -@click.option("--land", "-lnd", is_flag=True, help="Run land component diagnostics") -@click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component diagnostics") -@click.option("--landice", "-glc", is_flag=True, help="Run land ice component diagnostics") -# @click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component diagnostics") +@click.option("--atmosphere", "-atm", is_flag=True, help="Run atmosphere component timeseries") +@click.option("--ocean", "-ocn", is_flag=True, help="Run ocean component timeseries") +@click.option("--land", "-lnd", is_flag=True, help="Run land component timeseries") +@click.option("--seaice", "-ice", is_flag=True, help="Run sea ice component timeseries") +@click.option("--landice", "-glc", is_flag=True, help="Run land ice component timeseries") +@click.option("--river-runoff", "-rof", is_flag=True, help="Run river runoff component timeseries") @click.argument("config_path", default="config.yml") def run_timeseries( config_path, @@ -79,13 +78,12 @@ def run_timeseries( "lnd": land, "ice": seaice, "glc": landice, - # "rof": river_runoff, + "rof": river_runoff, } # Automatically run all if no components specified if True not in [atmosphere, ocean, land, seaice, landice, river_runoff]: - # all = True for key in component_options.keys(): component_options[key] = True diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml index 7efb2a7..868b75e 100644 --- a/examples/key_metrics/config.yml +++ b/examples/key_metrics/config.yml @@ -95,6 +95,14 @@ timeseries: end_years: [100,100] level: 'lev' + rof: + vars: [] + derive_vars: [] + hist_str: 'h' + start_years: [1,1] + end_years: [100,100] + level: 'lev' + compute_notebooks: # This is where all the notebooks you want run and their From 3724a50fd042237bacd0a20345c650518b8de9a1 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Fri, 22 Nov 2024 12:44:24 -0700 Subject: [PATCH 07/11] timeseries in series updates --- cupid/timeseries.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cupid/timeseries.py b/cupid/timeseries.py index 4f61dba..1efa271 100644 --- a/cupid/timeseries.py +++ b/cupid/timeseries.py @@ -340,7 +340,16 @@ def create_time_series( ) if serial: - call_ncrcat(list_of_commands) + try: + call_ncrcat(list_of_commands[0]) + # TODO: list_of_commands fails with "cupid-timeseries -s", but works if list is indexed + except IndexError: + print( + "If no commands in the following list, then no timeseries were requested.", + ) + print(list_of_commands) + print("No timeseries are being created") + continue else: # if not serial # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=num_procs) as mpool: From 5e0d6e19b981d8f841dccee119f69f667e6b50e6 Mon Sep 17 00:00:00 2001 From: Teagan King Date: Fri, 22 Nov 2024 12:57:38 -0700 Subject: [PATCH 08/11] update rof timeseries hist str --- examples/key_metrics/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/key_metrics/config.yml b/examples/key_metrics/config.yml index 868b75e..42cd866 100644 --- a/examples/key_metrics/config.yml +++ b/examples/key_metrics/config.yml @@ -98,7 +98,7 @@ timeseries: rof: vars: [] derive_vars: [] - hist_str: 'h' + hist_str: 'h0' start_years: [1,1] end_years: [100,100] level: 'lev' From 51e9817b2cbd1a5106973db6122d52603a50d4fd Mon Sep 17 00:00:00 2001 From: Teagan King Date: Fri, 22 Nov 2024 13:53:00 -0700 Subject: [PATCH 09/11] fix timeseries serial --- cupid/timeseries.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cupid/timeseries.py b/cupid/timeseries.py index 1efa271..db14e6d 100644 --- a/cupid/timeseries.py +++ b/cupid/timeseries.py @@ -340,16 +340,8 @@ def create_time_series( ) if serial: - try: - call_ncrcat(list_of_commands[0]) - # TODO: list_of_commands fails with "cupid-timeseries -s", but works if list is indexed - except IndexError: - print( - "If no commands in the following list, then no timeseries were requested.", - ) - print(list_of_commands) - print("No timeseries are being created") - continue + for cmd in list_of_commands: + call_ncrcat(cmd) else: # if not serial # Now run the "ncrcat" subprocesses in parallel: with mp.Pool(processes=num_procs) as mpool: From 09bf145d43bf654927670c955fc0d1fca2776291 Mon Sep 17 00:00:00 2001 From: Teagan King <98482480+TeaganKing@users.noreply.github.com> Date: Fri, 22 Nov 2024 14:21:36 -0700 Subject: [PATCH 10/11] Update external diag config to use infrastructure env --- examples/external_diag_packages/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/external_diag_packages/config.yml b/examples/external_diag_packages/config.yml index d183bd1..20061fb 100644 --- a/examples/external_diag_packages/config.yml +++ b/examples/external_diag_packages/config.yml @@ -26,7 +26,7 @@ computation_config: ### It must already be installed on your machine. You can also ### specify a different environment than the default for any ### notebook in NOTEBOOK CONFIG - default_kernel_name: cupid-analysis + default_kernel_name: cupid-infrastructure # log level sets the level of how verbose logging will be. # options include: debug, info, warning, error From 6aba58d411777b9b1fb4dad9df6096c482ee3a93 Mon Sep 17 00:00:00 2001 From: Teagan King <98482480+TeaganKing@users.noreply.github.com> Date: Fri, 22 Nov 2024 14:23:18 -0700 Subject: [PATCH 11/11] Update config.yml to use infrastructure --- examples/external_diag_packages/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/external_diag_packages/config.yml b/examples/external_diag_packages/config.yml index 20061fb..5ab4d2f 100644 --- a/examples/external_diag_packages/config.yml +++ b/examples/external_diag_packages/config.yml @@ -26,7 +26,7 @@ computation_config: ### It must already be installed on your machine. You can also ### specify a different environment than the default for any ### notebook in NOTEBOOK CONFIG - default_kernel_name: cupid-infrastructure + default_kernel_name: cupid-analysis # log level sets the level of how verbose logging will be. # options include: debug, info, warning, error @@ -111,7 +111,7 @@ compute_notebooks: atm: link_to_ADF: - kernel_name: cupid-analysis + kernel_name: cupid-infrastructure parameter_groups: none: adf_root: ../../examples/external_diag_packages/ADF_output/