From 597c38d8e6b0921e596c134abba89638819ca190 Mon Sep 17 00:00:00 2001 From: EdwardSnyder-NOAA <96196752+EdwardSnyder-NOAA@users.noreply.github.com> Date: Fri, 25 Oct 2024 10:12:11 -0500 Subject: [PATCH] Update Land DA container to Spack-Stack 1.6.0 (#147) * update to ss160 * update python and cmake vars * updated info * update file to work with new setup container script * added singularity files * fixed typo * fix typos * removed extra dir * update run container script * init commit * added logic to use staged data for the singularity container * remove wflow singularity lua file * update docs to reflect new container workflow * update container name to v2.0.0. release * updated container info * added changes to work with the new spack-stack * init commit * removed out of date analysis file * added fix data path for container * update to new spack stack * updated workflow process * update sub var name * update land da data link * update land da data path * fixed typo --------- Co-authored-by: Parallel Works app-run user --- .../BackgroundInfo/TechnicalOverview.rst | 8 +- .../BuildingRunningTesting/Container.rst | 219 ++++-------------- modulefiles/build_singularity_intel.lua | 56 +++-- .../tasks/singularity/task.analysis.lua | 2 + .../tasks/singularity/task.forecast.lua | 2 + .../tasks/singularity/task.plot_stats.lua | 2 + .../tasks/singularity/task.post_anal.lua | 2 + .../tasks/singularity/task.pre_anal.lua | 2 + .../tasks/singularity/task.prep_obs.lua | 2 + parm/parm_xml_singularity.yaml | 7 + parm/run_container_executable.sh | 13 +- sorc/test/ci/Dockerfile | 32 ++- ush/hofx_analysis_stats.py | 2 + ush/plot_forecast_restart.py | 2 + 14 files changed, 137 insertions(+), 214 deletions(-) create mode 100644 modulefiles/tasks/singularity/task.analysis.lua create mode 100644 modulefiles/tasks/singularity/task.forecast.lua create mode 100644 modulefiles/tasks/singularity/task.plot_stats.lua create mode 100644 modulefiles/tasks/singularity/task.post_anal.lua create mode 100644 modulefiles/tasks/singularity/task.pre_anal.lua create mode 100644 modulefiles/tasks/singularity/task.prep_obs.lua create mode 100644 parm/parm_xml_singularity.yaml diff --git a/doc/source/BackgroundInfo/TechnicalOverview.rst b/doc/source/BackgroundInfo/TechnicalOverview.rst index 742df050..a23ffa76 100644 --- a/doc/source/BackgroundInfo/TechnicalOverview.rst +++ b/doc/source/BackgroundInfo/TechnicalOverview.rst @@ -83,9 +83,9 @@ Preconfigured (Level 1) systems for Land DA already have the required external l - /work/noaa/epic/role-epic/spack-stack/hercules/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core - /work2/noaa/epic/UFS_Land-DA_Dev/jedi_v7_hercules * - Container - - intel-oneapi-compilers/2021.8.0 - - intel-oneapi-mpi/2021.8.0 - - /opt/spack-stack/ (inside the container) + - intel-oneapi-compilers/2021.10.0 + - intel-oneapi-mpi/2021.9.0 + - /opt/spack-stack/spack-stack-1.6.0/envs/unified-env/install/modulefiles/Core (inside the container) - /opt/jedi-bundle (inside the container) Level 2-4 Systems @@ -215,4 +215,4 @@ Unlike the standalone Noah-MP land driver, the Noah-MP :term:`NUOPC cap` is able Unified Workflow (UW) Tools ============================ -The Unified Workflow (UW) is a set of tools intended to unify the workflow for various UFS applications under one framework. The UW toolkit currently includes rocoto, template, and configuration (config) tools, which are being incorporated into the Land DA workflow. Additional tools are under development. More details about UW tools can be found in the `uwtools `_ GitHub repository and in the :uw:`UW Documentation <>`. \ No newline at end of file +The Unified Workflow (UW) is a set of tools intended to unify the workflow for various UFS applications under one framework. The UW toolkit currently includes rocoto, template, and configuration (config) tools, which are being incorporated into the Land DA workflow. Additional tools are under development. More details about UW tools can be found in the `uwtools `_ GitHub repository and in the :uw:`UW Documentation <>`. diff --git a/doc/source/BuildingRunningTesting/Container.rst b/doc/source/BuildingRunningTesting/Container.rst index 6949e8d0..31015eb5 100644 --- a/doc/source/BuildingRunningTesting/Container.rst +++ b/doc/source/BuildingRunningTesting/Container.rst @@ -6,10 +6,7 @@ Containerized Land DA Workflow These instructions will help users build and run a basic case for the Unified Forecast System (:term:`UFS`) Land Data Assimilation (DA) System using a `Singularity/Apptainer `_ container. The Land DA :term:`container` packages together the Land DA System with its dependencies (e.g., :term:`spack-stack`, :term:`JEDI`) and provides a uniform environment in which to build and run the Land DA System. Normally, the details of building and running Earth systems models will vary based on the computing platform because there are many possible combinations of operating systems, compilers, :term:`MPIs `, and package versions available. Installation via Singularity/Apptainer container reduces this variability and allows for a smoother experience building and running Land DA. This approach is recommended for users not running Land DA on a supported :ref:`Level 1 ` system (i.e., Hera, Orion). -This chapter provides instructions for building and running basic Land DA cases in a container. Users can choose between two options: - - * A Jan. 3-4, 2000 00z sample case using :term:`GSWP3` data with the UFS Noah-MP land component - * A Dec. 21-22, 2019 00z sample case using :term:`ERA5` data with the UFS Land Driver +This chapter provides instructions for building and running basic Land DA case for the UFS Land DA System using a Jan. 3-4, 2000 00z sample case using :term:`GSWP3` data with the UFS Noah-MP land component in a container. .. attention:: @@ -93,7 +90,7 @@ where ``/path/to/landda`` is the path to this top-level directory (e.g., ``/User NOAA RDHPCS Systems ---------------------- -On many NOAA :term:`RDHPCS`, a container named ``ubuntu20.04-intel-landda-release-public-v1.2.0.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. +On many NOAA :term:`RDHPCS`, a container named ``ubuntu22.04-intel-landda-release-public-v2.0.0.img`` has already been built, and users may access the container at the locations in :numref:`Table %s `. .. _PreBuiltContainers: @@ -119,30 +116,30 @@ Users can simply set an environment variable to point to the container: .. code-block:: console - export img=path/to/ubuntu20.04-intel-landda-release-public-v1.2.0.img + export img=path/to/ubuntu22.04-intel-landda-release-public-v2.0.0.img If users prefer, they may copy the container to their local working directory. For example, on Jet: .. code-block:: console - cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu20.04-intel-landda-release-public-v1.2.0.img . + cp /mnt/lfs4/HFIP/hfv3gfs/role.epic/containers/ubuntu22.04-intel-landda-release-public-v2.0.0.img . Other Systems ---------------- -On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu20.04-intel-landda-release-public-v1.2.0.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v1.2.0`` container rather than the updated ``develop`` branch container. +On other systems, users can build the Singularity container from a public Docker :term:`container` image or download the ``ubuntu22.04-intel-landda-release-public-v2.0.0.img`` container from the `Land DA Data Bucket `_. Downloading may be faster depending on the download speed on the user's system. However, the container in the data bucket is the ``release/v2.0.0`` container rather than the updated ``develop`` branch container. To download from the data bucket, users can run: .. code-block:: console - wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v1.2.0/ubuntu20.04-intel-landda-release-public-v1.2.0.img + wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v2.0.0/ubuntu22.04-intel-landda-release-public-v2.0.0.img To build the container from a Docker image, users can run: .. code-block:: console - singularity build --force ubuntu20.04-intel-landda-release-public-v1.2.0.img docker://noaaepic/ubuntu20.04-intel-landda:release-public-v1.2.0 + singularity build --force ubuntu22.04-intel-landda-release-public-v2.0.0.img docker://noaaepic/ubuntu22.04-intel21.10-landda:ue160-fms2024.01-release This process may take several hours depending on the system. @@ -162,16 +159,16 @@ Users on any system may download and untar the data from the `Land DA Data Bucke .. code-block:: console cd $LANDDAROOT - wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/current_land_da_release_data/v1.2.0/Landdav1.2.0_input_data.tar.gz - tar xvfz Landdav1.2.0_input_data.tar.gz + wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/develop-20241024/inputs.tar.gz + tar xvfz inputs.tar.gz If users choose to add data in a location other than ``$LANDDAROOT``, they can set the input data directory by running: .. code-block:: console - export LANDDA_INPUTS=/path/to/input/data + export LANDDA_INPUTS=/path/to/inputs -where ``/path/to/input/data`` is replaced by the absolute path to the location of their Land DA input data. +where ``/path/to`` is replaced by the absolute path to the location of their Land DA input data. .. _RunContainer: @@ -193,42 +190,34 @@ Save the location of the container in an environment variable. .. code-block:: console - export img=path/to/ubuntu20.04-intel-landda-release-public-v1.2.0.img - -Set the ``USE_SINGULARITY`` environment variable to "yes". - -.. code-block:: console - - export USE_SINGULARITY=yes - -This variable tells the workflow to use the containerized version of all the executables (including python) when running a cycle. + export img=/path/to/ubuntu22.04-intel-landda-release-public-v2.0.0.img Users may convert a container ``.img`` file to a writable sandbox. This step is optional on most systems: .. code-block:: console - singularity build --sandbox ubuntu20.04-intel-landda-release-public-v1.2.0 $img + singularity build --sandbox ubuntu22.04-intel-landda-release-public-v2.0.0 $img When making a writable sandbox on NOAA :term:`RDHPCS`, the following warnings commonly appear and can be ignored: .. code-block:: console INFO: Starting build... - INFO: Verifying bootstrap image ubuntu20.04-intel-landda-release-public-v1.2.0.img + INFO: Verifying bootstrap image ubuntu22.04-intel-landda-release-public-v2.0.0.img WARNING: integrity: signature not found for object group 1 WARNING: Bootstrap image could not be verified, but build will continue. -From within the ``$LANDDAROOT`` directory, copy the ``land-DA_workflow`` directory out of the container. +From within the ``$LANDDAROOT`` directory, copy the ``setup_container.sh`` script out of the container. .. code-block:: console - singularity exec -H $PWD $img cp -r /opt/land-DA_workflow . + singularity exec -H $PWD $img cp -r /opt/land-DA_workflow/setup_container.sh . -There should now be a ``land-DA_workflow`` directory in the ``$LANDDAROOT`` directory. Navigate into the ``land-DA_workflow`` directory. If for some reason, this is unsuccessful, users may try a version of the following command instead: +The ``setup_container.sh`` script should now be in the ``$LANDDAROOT`` directory. If for some reason, the previous command was unsuccessful, users may try a version of the following command instead: .. code-block:: console - singularity exec -B /:/ $img cp -r /opt/land-DA_workflow . + singularity exec -B /:/ $img cp -r /opt/land-DA_workflow/setup_container.sh . where ```` and ```` are replaced with a top-level directory on the local system and in the container, respectively. Additional directories can be bound by adding another ``-B /:/`` argument before the container location (``$img``). Note that if previous steps included a ``sudo`` command, ``sudo`` may be required in front of this command. @@ -240,90 +229,50 @@ where ```` and ```` are replaced with a top-level Sometimes binding directories with different names can cause problems. In general, it is recommended that the local base directory and the container directory have the same name. For example, if the host system's top-level directory is ``/user1234``, the user may want to convert the ``.img`` file to a writable sandbox and create a ``user1234`` directory in the sandbox to bind to. -Navigate to the ``land-DA_workflow`` directory after it has been successfully copied into ``$LANDDAROOT``. +Run the ``setup_container.sh`` script with the proper arguments. Ensure ``LANDDA_INPUTS`` variable is set before running this script. .. code-block:: console - cd land-DA_workflow + ./setup_container.sh -c= -m= -i=$img -When using a Singularity container, Intel compilers and Intel :term:`MPI` (preferably 2020 versions or newer) need to be available on the host system to properly launch MPI jobs. The Level 1 systems that have Intel compilers and Intel MPI available are: Hera, Jet, NOAA Cloud, and Orion. Generally, this is accomplished by loading a module with a recent Intel compiler and then loading the corresponding Intel MPI. For example, users can modify the following commands to load their system's compiler/MPI combination: - -.. code-block:: console +where: - module load intel/2022.1.2 impi/2022.1.2 - -.. note:: - - :term:`Spack-stack` uses lua modules, which require Lmod to be initialized for the ``module load`` command to work. If for some reason, Lmod is not initialized, users can source the ``init/bash`` file on their system before running the command above. For example, users can modify and run the following command: - - .. code-block:: console - - source /path/to/init/bash + * ``-c`` is the compiler on the user's local machine (e.g., ``intel/2022.1.2``) + * ``-m`` is the :term:`MPI` on the user's local machine (e.g., ``impi/2022.1.2``) + * ``-i`` is the full path to the container image ( e.g., ``$LANDDAROOT/ubuntu22.04-intel-landda-release-public-v2.0.0.img``). - Then they should be able to load the appropriate modules. - -The remaining Level 1 systems that do not have Intel MPI available will need to load a different Intel compiler and MPI combination. Refer to :numref:`Table %s ` for which Intel compiler and MPI to load for these systems. - -.. _NonIMPICompilers: - -.. table:: Intel compilers and MPIs for non-Intel MPI Level 1 systems - - +-----------------+-------------------------------------------------------------------------+ - | Machine | Intel compiler and MPI combinations | - +=================+=========================================================================+ - | Derecho | module load intel-oneapi/2023.2.1 cray-mpich/8.1.25 | - +-----------------+-------------------------------------------------------------------------+ - | Gaea | module load intel-classic/2023.1.0 cray-mpich/8.1.25 | - +-----------------+-------------------------------------------------------------------------+ - | Hercules | module load intel-oneapi-compilers/2022.2.1 intel-oneapi-mpi/2021.7.1 | - +-----------------+-------------------------------------------------------------------------+ - -For Derecho and Gaea, an additional script is needed to help set up the ``land-DA_workflow`` scripts so that the container can run there. - -.. code-block:: console - - ./setup_container.sh -p= - -where ```` is ``derecho`` or ``gaea``. +When using a Singularity container, Intel compilers and Intel :term:`MPI` (preferably 2020 versions or newer) need to be available on the host system to properly launch MPI jobs. Generally, this is accomplished by loading a module with a recent Intel compiler and then loading the corresponding Intel MPI. .. _ConfigureExptC: Configure the Experiment =========================== -Modify Machine Settings ------------------------- +The user should now see the ``Land-DA_workflow`` and ``jedi-bundle`` directories in the ``$LANDDAROOT`` directory. -Users on a system with a Slurm job scheduler will need to make some minor changes to the ``submit_cycle.sh`` file. Open the file and change the account and queue (qos) to match the desired account and qos on the system. Users may also need to add the following line to the script to specify the partition. For example, on Jet, users should set: +Because of a conda conflict between the container and the host system, it is best to load rocoto separately instead of using workflow files found in the ``modulefiles`` directory. .. code-block:: console - #SBATCH --partition=xjet + module load rocoto -When using the GSWP3 forcing option, users will need to update line 7 to say ``#SBATCH --cpus-per-task=4``. Users can perform this change manually in a code editor or run: +The ``setup_container.sh`` script creates the ``parm_xml.yaml`` from the ``parm_xml_singularity.yaml`` file. Update any relevant variables in this file (e.g. ``ACCOUNT`` or ``cycledef/spec``) before creating the Rocoto XML file. .. code-block:: console - sed -i 's/--cpus-per-task=1/--cpus-per-task=4/g' submit_cycle.sh + cd $LANDDAROOT/land-DA_workflow/parm + vi parm_xml.yaml Save and close the file. -Modify Experiment Settings ---------------------------- - -The Land DA System uses a script-based workflow that is launched using the ``do_submit_cycle.sh`` script. That script requires an input file that details all the specifics of a given experiment. EPIC has provided two sample ``settings_*`` files as examples: ``settings_DA_cycle_era5`` and ``settings_DA_cycle_gswp3``. - -.. attention:: - - Note that the GSWP3 option will only run as-is on Hera and Orion. Users on other systems may need to make significant changes to configuration files, which is not a supported option for the |latestr| release. It is recommended that users on other systems use the UFS land driver ERA5 sample experiment set in ``settings_DA_cycle_era5``. - -First, update the ``$BASELINE`` environment variable in the selected ``settings_DA_*`` file to say ``singularity.internal`` instead of ``hera.internal``: +Once everything looks good, run the uwtools scripts to create the Rocoto XML file: .. code-block:: console - export BASELINE=singularity.internal + ../sorc/conda/envs/land_da/bin/uw template render --input-file templates/template.land_analysis.yaml --values-file parm_xml.yaml --output-file land_analysis.yaml + ../sorc/conda/envs/land_da/bin/uw rocoto realize --input-file land_analysis.yaml --output-file land_analysis.xml -When using the GSWP3 forcing option, users must also update the ``MACHINE_ID`` to ``orion`` in ``settings_DA_cycle_gswp3`` if running on Orion. +A successful run of this command will output a “0 errors found” message. .. _RunExptC: @@ -334,107 +283,33 @@ To start the experiment, run: .. code-block:: console - ./do_submit_cycle.sh settings_DA_cycle_era5 + rocotorun -w land_analysis.xml -d land_analysis.db -The ``do_submit_cycle.sh`` script will read the ``settings_DA_cycle_*`` file and the ``release.environment`` file, which contain sensible experiment default values to simplify the process of running the workflow for the first time. Advanced users will wish to modify the parameters in ``do_submit_cycle.sh`` to fit their particular needs. After reading the defaults and other variables from the settings files, ``do_submit_cycle.sh`` creates a working directory (named ``workdir`` by default) and an output directory called ``landda_expts`` in the parent directory of ``land-DA_workflow`` and then submits a job (``submit_cycle.sh``) to the queue that will run through the workflow. If all succeeds, users will see ``log`` and ``err`` files created in ``land-DA_workflow`` along with a ``cycle.log`` file, which will show where the cycle has ended. +See the :ref:`Workflow Overview ` section to learn more about the workflow process. -.. _CheckProgress: +.. _TrackProgress: -Check Progress +Track Progress ---------------- -To check on the experiment status, users on a system with a Slurm job scheduler may run: +To check on the job status, users on a system with a Slurm job scheduler may run: .. code-block:: console squeue -u $USER -To view progress, users can open the ``log*`` and ``err*`` files once they have been generated: - -.. code-block:: console - - tail -f log* err* - -Users will need to type ``Ctrl+C`` to exit the files. For examples of what the log and error files should look like in a successful experiment, reference :ref:`ERA5 Experiment Logs ` or :ref:`GSWP3 Experiment Logs ` below. - -.. attention:: - - If the log file contains a NetCDF error (e.g., ``ModuleNotFoundError: No module named 'netCDF4'``), run: - - .. code-block:: console - - python -m pip install netCDF4 - - Then, resubmit the job (``sbatch submit_cycle.sh``). - -Next, check for the background and analysis files in the test directory. - -.. code-block:: console - - ls -l ../landda_expts/DA__test/mem000/restarts/`` - -where: - - * ```` is either ``era5`` or ``gswp3``, and - * ```` is either ``vector`` or ``tile`` depending on whether ERA5 or GSWP3 forcing data were used, respectively. - -The experiment should populate the ``landda_expts`` directory with data in the following locations: - -.. code-block:: console - - landda_expts/DA_GHCN_test/DA/ - # AND - landda_expts/DA_GHCN_test/mem000/restarts/vector/ - # OR - landda_expts/DA_GHCN_test/mem000/restarts/tile/ - -Depending on the experiment, either the ``vector`` or the ``tile`` directory will have data, but not both. - - -.. _era5-log-output: - -ERA5 Experiment Logs -===================== - -For the ERA5 experiment, the ``log*`` file for a successful experiment will contain a message like: +To view the experiment status, run: .. code-block:: console - Creating: .//ufs_land_restart.2019-12-22_00-00-00.nc - Searching for forcing at time: 2019-12-22 01:00:00 - -The ``err*`` file for a successful experiment will end with something similar to: + rocotostat -w land_analysis.xml -d land_analysis.db -.. code-block:: console +See the :ref:`Track Experiment Status ` section to learn more about the ``rocotostat`` output. - + THISDATE=2019122200 - + date_count=1 - + '[' 1 -lt 1 ']' - + '[' 2019122200 -lt 2019122200 ']' +.. _CheckExptOutput: -.. _gswp3-log-output: +Check Experiment Output +------------------------- -GSWP3 Experiment Logs -======================= - -For the GSWP3 experiment, the ``log*`` file for a successful experiment will end with a list of resource statistics. For example: - -.. code-block:: console - - Number of times filesystem performed OUTPUT = 250544 - Number of Voluntary Context Switches = 3252 - Number of InVoluntary Context Switches = 183 - *****************END OF RESOURCE STATISTICS************************* - -The ``err*`` file for a successful experiment will end with something similar to: - -.. code-block:: console +Since this experiment in the container is the same experiment explained in the previous document section, it is suggested that users should see the :ref:`experiment output structure ` as well as the :ref:`plotting results ` to learn more about the expected experiment outputs. - + echo 'do_landDA: calling apply snow increment' - + [[ '' =~ hera\.internal ]] - + /apps/intel-2022.1.2/intel-2022.1.2/mpi/2021.5.1/bin/mpiexec -n 6 /path/to/land-DA_workflow/build/bin/apply_incr.exe /path/to/landda_expts/DA_GSWP3_test/DA/logs//apply_incr.log - + [[ 0 != 0 ]] - + '[' YES == YES ']' - + '[' YES == YES ']' - + cp /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile1.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile2.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile3.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile4.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile5.nc /path/to/workdir/mem000/jedi/20000103.000000.xainc.sfc_data.tile6.nc /path/to/landda_expts/DA_GSWP3_test/DA/jedi_incr/ - + [[ YES == \N\O ]] diff --git a/modulefiles/build_singularity_intel.lua b/modulefiles/build_singularity_intel.lua index 80843f2e..37d362c5 100644 --- a/modulefiles/build_singularity_intel.lua +++ b/modulefiles/build_singularity_intel.lua @@ -1,25 +1,25 @@ help([[ -loads UFS Model prerequisites for Hera/Intel +loads UFS Model prerequisites for Singularity container ]]) setenv("EPICHOME", "/opt") -prepend_path("MODULEPATH", pathJoin(os.getenv("EPICHOME"),"spack-stack/spack-stack-1.3.0/envs/unified-dev/install/modulefiles/Core")) +prepend_path("MODULEPATH", pathJoin(os.getenv("EPICHOME"),"spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")) -stack_intel_ver=os.getenv("stack_intel_ver") or "2021.8.0" +stack_intel_ver=os.getenv("stack_intel_ver") or "2021.10.0" load(pathJoin("stack-intel", stack_intel_ver)) -load("intel-oneapi-mpi/2021.8.0") -stack_intel_oneapi_mpi_ver=os.getenv("stack_intel_oneapi_mpi_ver") or "2021.8.0" +load("intel-oneapi-mpi/2021.9.0") +stack_intel_oneapi_mpi_ver=os.getenv("stack_intel_oneapi_mpi_ver") or "2021.9.0" load(pathJoin("stack-intel-oneapi-mpi", stack_intel_oneapi_mpi_ver)) -stack_python_ver=os.getenv("stack_python_ver") or "3.8.10" -load(pathJoin("stack-python", stack_python_ver)) +--stack_python_ver=os.getenv("stack_python_ver") or "3.10.13" +--load(pathJoin("stack-python", stack_python_ver)) cmake_ver=os.getenv("cmake_ver") or "3.23.1" load(pathJoin("cmake", cmake_ver)) -ecbuild_ver=os.getenv("ecbuild_ver") or "3.6.5" +ecbuild_ver=os.getenv("ecbuild_ver") or "3.7.2" load(pathJoin("ecbuild", ecbuild_ver)) jasper_ver=os.getenv("jasper_ver") or "2.0.32" @@ -37,16 +37,16 @@ load(pathJoin("hdf5", hdf5_ver)) netcdf_c_ver=os.getenv("netcdf_ver") or "4.9.2" load(pathJoin("netcdf-c", netcdf_c_ver)) -netcdf_fortran_ver=os.getenv("netcdf_fortran_ver") or "4.6.0" +netcdf_fortran_ver=os.getenv("netcdf_fortran_ver") or "4.6.1" load(pathJoin("netcdf-fortran", netcdf_fortran_ver)) -pio_ver=os.getenv("pio_ver") or "2.5.9" +pio_ver=os.getenv("pio_ver") or "2.5.10" load(pathJoin("parallelio", pio_ver)) -esmf_ver=os.getenv("esmf_ver") or "8.3.0b09" +esmf_ver=os.getenv("esmf_ver") or "8.6.0" load(pathJoin("esmf", esmf_ver)) -fms_ver=os.getenv("fms_ver") or "2022.04" +fms_ver=os.getenv("fms_ver") or "2024.01" load(pathJoin("fms",fms_ver)) bacio_ver=os.getenv("bacio_ver") or "2.4.1" @@ -55,39 +55,47 @@ load(pathJoin("bacio", bacio_ver)) crtm_ver=os.getenv("crtm_ver") or "2.4.0" load(pathJoin("crtm", crtm_ver)) -g2_ver=os.getenv("g2_ver") or "3.4.5" +g2_ver=os.getenv("g2_ver") or "3.5.1" load(pathJoin("g2", g2_ver)) -g2tmpl_ver=os.getenv("g2tmpl_ver") or "1.10.2" +g2tmpl_ver=os.getenv("g2tmpl_ver") or "1.13.0" load(pathJoin("g2tmpl", g2tmpl_ver)) -ip_ver=os.getenv("ip_ver") or "3.3.3" +ip_ver=os.getenv("ip_ver") or "4.3.0" load(pathJoin("ip", ip_ver)) -sp_ver=os.getenv("sp_ver") or "2.3.3" +sp_ver=os.getenv("sp_ver") or "2.5.0" load(pathJoin("sp", sp_ver)) -w3emc_ver=os.getenv("w3emc_ver") or "2.9.2" +w3emc_ver=os.getenv("w3emc_ver") or "2.10.0" load(pathJoin("w3emc", w3emc_ver)) -gftl_shared_ver=os.getenv("gftl_shared_ver") or "1.5.0" +gftl_shared_ver=os.getenv("gftl_shared_ver") or "1.6.1" load(pathJoin("gftl-shared", gftl_shared_ver)) -mapl_ver=os.getenv("mapl_ver") or "2.22.0-esmf-8.3.0b09" +mapl_ver=os.getenv("mapl_ver") or "2.40.3-esmf-8.6.0" load(pathJoin("mapl", mapl_ver)) +scotch_ver=os.getenv("scotch_ver") or "7.0.4" +load(pathJoin("scotch", scotch_ver)) + load("py-cftime/1.0.3.4") -load("py-cython/0.29.32") +load("py-cython/0.29.36") load("py-f90nml/1.4.3") -load("py-jinja2/3.1.2") -load("py-netcdf4/1.5.3") +load("py-jinja2/3.0.3") +load("py-netcdf4/1.5.8") load("py-numpy/1.22.3") -load("py-pandas/1.4.0") +load("py-pandas/1.5.3") load("py-python-dateutil/2.8.2") load("py-pyyaml/6.0") -load("atlas") +setenv("CFLAGS","-diag-disable=10448") +setenv("FFLAGS","-diag-disable=10448") +prepend_path("PATH","/opt/intel/oneapi/compiler/2024.0/bin:/opt/intel/oneapi/compiler/2023.2.3/linux/bin/intel64") +--setenv("CMAKE_C_COMPILER","mpiicc") +--setenv("CMAKE_CXX_COMPILER","mpicxx") +--setenv("CMAKE_Fortran_COMPILER","mpif90") setenv("CC", "mpiicc") setenv("CXX", "mpiicpc") setenv("FC", "mpiifort") diff --git a/modulefiles/tasks/singularity/task.analysis.lua b/modulefiles/tasks/singularity/task.analysis.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.analysis.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.forecast.lua b/modulefiles/tasks/singularity/task.forecast.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.forecast.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.plot_stats.lua b/modulefiles/tasks/singularity/task.plot_stats.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.plot_stats.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.post_anal.lua b/modulefiles/tasks/singularity/task.post_anal.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.post_anal.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.pre_anal.lua b/modulefiles/tasks/singularity/task.pre_anal.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.pre_anal.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/modulefiles/tasks/singularity/task.prep_obs.lua b/modulefiles/tasks/singularity/task.prep_obs.lua new file mode 100644 index 00000000..517c4e82 --- /dev/null +++ b/modulefiles/tasks/singularity/task.prep_obs.lua @@ -0,0 +1,2 @@ +load("COMPILER") +load("MPI") diff --git a/parm/parm_xml_singularity.yaml b/parm/parm_xml_singularity.yaml new file mode 100644 index 00000000..10956221 --- /dev/null +++ b/parm/parm_xml_singularity.yaml @@ -0,0 +1,7 @@ +machine: singularity +account: epic +# exp_basedir: /path/to/parent/directory/of/land-DA_workflow +exp_basedir: SINGULARITY_WORKING_DIR +jedi_install: SINGULARITY_WORKING_DIR +warmstart_dir: SINGULARITY_WORKING_DIR/land-DA_workflow/fix/DATA_RESTART +we2e_test: 'NO' diff --git a/parm/run_container_executable.sh b/parm/run_container_executable.sh index 232a2e89..039ab1d7 100755 --- a/parm/run_container_executable.sh +++ b/parm/run_container_executable.sh @@ -2,18 +2,21 @@ export SINGULARITYENV_FI_PROVIDER=tcp export SINGULARITY_SHELL=/bin/bash +SINGULARITYBIN=`which singularity` BINDDIR="/"`pwd | awk -F"/" '{print $2}'` -CONTAINERLOC=${EPICCONTAINERS:-${HOME}} -img=${img:-${CONTAINERLOC}/ubuntu20.04-intel-ue-landda.img} +img=IMAGE CONTAINERBASE="/"`echo $img | xargs realpath | awk -F"/" '{print $2}'` cmd=$(basename "$0") arg="$@" -if [ ! -z "$FIXlandda" ]; then - INPUTBASE="/"`echo $FIXlandda | xargs realpath | awk -F"/" '{print $2}'` +if [ ! -z "$LANDDAROOT" ]; then + INPUTBASE="/"`echo $LANDDAROOT | xargs realpath | awk -F"/" '{print $2}'` INPUTBIND="-B $INPUTBASE:$INPUTBASE" else INPUTBIND="" fi -echo running: ${SINGULARITYBIN} exec $img $cmd $arg +# Remove echo for ndate command as it messes with the PTIME variable +if [ $cmd != "ndate" ]; then + echo running: ${SINGULARITYBIN} exec -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg +fi ${SINGULARITYBIN} exec -B $BINDDIR:$BINDDIR -B $CONTAINERBASE:$CONTAINERBASE $INPUTBIND $img $cmd $arg diff --git a/sorc/test/ci/Dockerfile b/sorc/test/ci/Dockerfile index 793ab2ad..7c0e5ca6 100644 --- a/sorc/test/ci/Dockerfile +++ b/sorc/test/ci/Dockerfile @@ -1,20 +1,34 @@ -From noaaepic/ubuntu20.04-intel-landda:develop +From noaaepic/ubuntu22.04-intel21.10-landda:ue160-fms202401-dev CMD ["/bin/bash"] ENV HOME=/opt WORKDIR $HOME -#remove org land-offline_workflow -RUN rm -rf $HOME/land-offline_workflow -COPY . $HOME/land-offline_workflow +# Get Land DA data +RUN wget https://noaa-ufs-land-da-pds.s3.amazonaws.com/develop-20241024/inputs.tar.gz && \ + tar -xvzf inputs.tar.gz && mv inputs/* /opt/land-DA_workflow/fix/ # set env vars ENV FIXlandda=$HOME/land-DA_workflow/fix -ENV EPICHOME=/opt -ENV JEDI_INSTALL=${EPICHOME}/jedi_skylabv7.0 -ENV TEST_BASEDIR=${EPICHOME}/test_base/restarts/vector"} +ENV JEDI_INSTALL=$HOME +ENV FIXdir=$FIXlandda +ENV JEDI_EXECDIR=/opt/jedi-bundle/install/bin + +# Fix UFS WM RT File paths +#RUN ln -s /opt/land-DA_workflow/install/bin/ufs_model /opt/land-DA_workflow/build/ufs_model.fd/src/ufs_model.fd-build/ +RUN sed -i '18 i PLATFORM=jet' /opt/land-DA_workflow/sorc/test/run_ufs_datm_lnd.sh +RUN mv /opt/land-DA_workflow/fix/DATM_input_data /opt/land-DA_workflow/fix/DATM_GSWP3_input_data && \ + ln -s /opt/land-DA_workflow/fix/DATM_GSWP3_input_data/gswp3/* /opt/land-DA_workflow/fix/DATM_GSWP3_input_data/ +RUN mkdir -p /opt/land-DA_workflow/fix/FV3_input_data/INPUT && \ + ln -s /opt/land-DA_workflow/fix/FV3_fix_tiled/C96/* /opt/land-DA_workflow/fix/FV3_input_data/INPUT +RUN mkdir -p /opt/land-DA_workflow/fix/NOAHMP_IC/CLMNCEP && \ + mv /opt/land-DA_workflow/fix/NOAHMP_IC/*.nc /opt/land-DA_workflow/fix/NOAHMP_IC/CLMNCEP/ #build & unit testing -WORKDIR $HOME/land-offline_workflow -RUN source /opt/spack-stack/.bashenv; mkdir build; cd build; pwd; ecbuild ..; make -j2; ctest -V --stop-on-failure +WORKDIR $HOME/land-DA_workflow +#RUN source /opt/spack-stack/.bashenv; mkdir build; cd build; pwd; ecbuild ..; make -j2; ctest -V --stop-on-failure +RUN source /opt/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/.bashenv-fms && \ + module load bacio cmake crtm ecbuild esmf fms gftl-shared g2 g2tmpl hdf5 ip jasper libpng mapl netcdf-c netcdf-fortran parallelio && \ + module load prod_util py-netcdf4 py-numpy py-pyyaml py-jinja2 py-xarray sp ufs-pyenv w3emc zlib scotch && \ + cd build; pwd; ctest -V --stop-on-failure diff --git a/ush/hofx_analysis_stats.py b/ush/hofx_analysis_stats.py index f1d4f7dc..df7a30dd 100755 --- a/ush/hofx_analysis_stats.py +++ b/ush/hofx_analysis_stats.py @@ -68,6 +68,8 @@ def plot_scatter(): cartopy.config['data_dir']='/scratch2/NAGAPE/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' elif yaml_data['machine']=='orion' or yaml_data['machine']=='hercules': cartopy.config['data_dir']='/work/noaa/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' + elif yaml_data['machine']=='singularity': + cartopy.config['data_dir']='SINGULARITY_WORKING_DIR/land-DA_workflow/fix/NaturalEarth' field_mean=float("{:.2f}".format(np.mean(np.absolute(field)))) field_std=float("{:.2f}".format(np.std(np.absolute(field)))) diff --git a/ush/plot_forecast_restart.py b/ush/plot_forecast_restart.py index 9f0e91b9..2e4c8d05 100755 --- a/ush/plot_forecast_restart.py +++ b/ush/plot_forecast_restart.py @@ -50,6 +50,8 @@ def main(): cartopy.config['data_dir']='/scratch2/NAGAPE/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' elif yaml_data['machine']=='orion' or yaml_data['machine']=='hercules': cartopy.config['data_dir']='/work/noaa/epic/UFS_Land-DA_Dev/inputs/NaturalEarth' + elif yaml_data['machine']=='singularity': + cartopy.config['data_dir']='SINGULARITY_WORKING_DIR/land-DA_workflow/fix/NaturalEarth' #var_list=["snwdph","smc"] var_list=["snwdph"]