Skip to content

Commit

Permalink
Merge pull request #2989 from boutproject/next-gitlab-ci
Browse files Browse the repository at this point in the history
Add LC gitlab CI for GPU build/run tests
  • Loading branch information
bendudson authored Oct 14, 2024
2 parents 837296e + 28b88a0 commit a16cc83
Show file tree
Hide file tree
Showing 5 changed files with 398 additions and 0 deletions.
98 changes: 98 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
###############################################################################
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# DESCRIPTION:
###############################################################################
# General GitLab pipelines configurations for supercomputers and Linux clusters
# at Lawrence Livermore National Laboratory (LLNL).
# This entire pipeline is LLNL-specific
#
# Important note: This file is a template provided by llnl/radiuss-shared-ci.
# Remains to set variable values, change the reference to the radiuss-shared-ci
# repo, opt-in and out optional features. The project can then extend it with
# additional stages.
#
# In addition, each project should copy over and complete:
# - .gitlab/custom-jobs-and-variables.yml
# - .gitlab/subscribed-pipelines.yml
#
# The jobs should be specified in a file local to the project,
# - .gitlab/jobs/${CI_MACHINE}.yml
# or generated (see LLNL/Umpire for an example).
###############################################################################

# We define the following GitLab pipeline variables:
variables:
##### LC GITLAB CONFIGURATION
# Use an LLNL service user to run CI. This prevents from running pipelines as
# an actual user.
LLNL_SERVICE_USER: ""
# Use the service user workspace. Solves permission issues, stores everything
# at the same location whoever triggers a pipeline.
CUSTOM_CI_BUILDS_DIR: "/usr/workspace/BOUT-GPU/gitlab-runner"
# Tells Gitlab to recursively update the submodules when cloning the project.
# GIT_SUBMODULE_STRATEGY: recursive

##### PROJECT VARIABLES
# We build the projects in the CI clone directory.
# Used in script/gitlab/build_and_test.sh script.
# TODO: add a clean-up mechanism.
BUILD_ROOT: ${CI_PROJECT_DIR}

##### SHARED_CI CONFIGURATION
# Required information about GitHub repository
GITHUB_PROJECT_NAME: "boutproject"
GITHUB_PROJECT_ORG: "BOUT-dev"
# Set the build-and-test command.
# Nested variables are allowed and useful to customize the job command. We
# prevent variable expansion so that you can define them at job level.
JOB_CMD:
value: "tests/gitlab/ci-tests.sh"
expand: false
# Override the pattern describing branches that will skip the "draft PR filter
# test". Add protected branches here. See default value in
# preliminary-ignore-draft-pr.yml.
# ALWAYS_RUN_PATTERN: ""

# We organize the build-and-test stage with sub-pipelines. Each sub-pipeline
# corresponds to a test batch on a given machine.

# High level stages
stages:
- prerequisites
- build-and-test

# Template for jobs triggering a build-and-test sub-pipeline:
.build-and-test:
stage: build-and-test
trigger:
include:
- local: '.gitlab/custom-jobs-and-variables.yml'
- project: 'radiuss/radiuss-shared-ci'
ref: 'v2024.07.0'
file: 'pipelines/${CI_MACHINE}.yml'
# Add your jobs
# you can use a local file
- local: '.gitlab/jobs/${CI_MACHINE}.yml'
# or a file generated in the previous steps
# - artifact: '${CI_MACHINE}-jobs.yml'
# job: 'generate-job-file'
# (See Umpire CI setup for an example).
strategy: depend
forward:
pipeline_variables: true

include:
# Sets ID tokens for every job using `default:`
- project: 'lc-templates/id_tokens'
file: 'id_tokens.yml'
# [Optional] checks preliminary to running the actual CI test
- project: 'radiuss/radiuss-shared-ci'
ref: 'v2024.07.0'
file: 'utilities/preliminary-ignore-draft-pr.yml'
# pipelines subscribed by the project
- local: '.gitlab/subscribed-pipelines.yml'
62 changes: 62 additions & 0 deletions .gitlab/custom-jobs-and-variables.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
###############################################################################
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# We define the following GitLab pipeline variables:
variables:
# In some pipelines we create only one allocation shared among jobs in
# order to save time and resources. This allocation has to be uniquely
# named so that we are sure to retrieve it and avoid collisions.
ALLOC_NAME: ${CI_PROJECT_NAME}_ci_${CI_PIPELINE_ID}

# Ruby
# Arguments for top level allocation
RUBY_SHARED_ALLOC: "--mpi=none --exclusive --reservation=ci --time=20 --nodes=1"
# Arguments for job level allocation
RUBY_JOB_ALLOC: "--mpi=none --reservation=ci --nodes=1"
# Add variables that should apply to all the jobs on a machine:
# RUBY_MY_VAR: "..."

# Poodle
# Arguments for top level allocation
POODLE_SHARED_ALLOC: "--exclusive --partition=pdebug --time=10 --nodes=1"
# Arguments for job level allocation
POODLE_JOB_ALLOC: "--nodes=1"
# Add variables that should apply to all the jobs on a machine:
# POODLE_MY_VAR: "..."

# Corona
# Arguments for top level allocation
# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node.
CORONA_SHARED_ALLOC: "--exclusive --time-limit=15m --nodes=1"
# Arguments for job level allocation
CORONA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
# Add variables that should apply to all the jobs on a machine:
# CORONA_MY_VAR: "..."

# Tioga
# Arguments for top level allocation
# OPTIONAL: "-o per-resource.count=2" allows to get 2 jobs running on each node.
TIOGA_SHARED_ALLOC: "--queue=pci --exclusive --time-limit=15m --nodes=1"
# Arguments for job level allocation
TIOGA_JOB_ALLOC: "--nodes=1 --begin-time=+5s"
# Add variables that should apply to all the jobs on a machine:
# TIOGA_MY_VAR: "..."

# Lassen uses a different job scheduler (spectrum lsf) that does not allow
# pre-allocation the same way slurm does. Arguments for job level allocation
LASSEN_JOB_ALLOC: "1 -W 30 -q pci"
# Add variables that should apply to all the jobs on a machine:
# LASSEN_MY_VAR: "..."


# Configuration shared by build and test jobs specific to this project.
# Not all configuration can be shared. Here projects can fine tune the
# CI behavior.
# See Umpire for an example (export junit test reports).
.custom_job:
variables:
JOB_TEMPLATE_CANNOT_BE_EMPTY: "True"
59 changes: 59 additions & 0 deletions .gitlab/jobs/lassen.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
###############################################################################
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# We require project to define their job command using a variable (JOB_CMD).
# In customization/gitlab-ci.yml, we encourage to define this variable as
# non-expandable, so that project can use nested variables to configure the job
# command. The caveat is that the reproducer here cannot capture the
# definition of these variables in a generic fashion. By overriding the
# following section, projects can specify the variables to define in the
# reproducer to exactly reproduce the CI build.
.lassen_reproducer_vars:
script:
- echo -e "Running on Lassen\n"

# With GitLab CI, included files cannot be empty.
# TODO: remove when you have at least on job defined.
variables:
INCLUDED_FILE_CANNOT_BE_EMPTY: "True"

###############
# Explanations:
###############
# RADIUSS Shared CI provides a pipeline for each machine, where a template job
# is provided. Each of your jobs must extend this template to be added to the
# list of jobs running on the associated machine.
#
# The job template then expects you to define the "JOB_CMD" variable with the
# one line command used to trigger the build and test of your project.
#
# We suggest that you set your command in such a way that you can then
# customize it per job with variables. E.g.:
# "./path/to/my_ci_script ${A_VARIABLE}"

## Adding jobs defined by the project.
## Note: placing the extends section first allows you to override part of the
## shared implementation if needed (and if you know what you are doing).
#<job-name (typically build target description)>:
# extends: .job_on_lassen
# variables:
# <A_VARIABLE>: "<with job specific value>"

.base-job:
extends: .job_on_lassen
before_script:
# Update BOUT-configs in the shared directory.
- pushd /usr/workspace/BOUT-GPU/BOUT-configs
- git pull
- popd
# Create the environment.
- source /usr/workspace/BOUT-GPU/BOUT-configs/lassen/setup-env.sh
after_script:
- rm -rf ${CI_BUILDS_DIR} ${CI_PROJECT_DIR}

build-test-cuda-minimal:
extends: .base-job
91 changes: 91 additions & 0 deletions .gitlab/subscribed-pipelines.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
###############################################################################
# Copyright (c) 2022-23, Lawrence Livermore National Security, LLC and RADIUSS
# project contributors. See the COPYRIGHT file for details.
#
# SPDX-License-Identifier: (MIT)
###############################################################################

# The template job to test whether a machine is up.
# Expects CI_MACHINE defined to machine name.
.machine-check:
stage: prerequisites
tags: [shell, oslic]
variables:
GIT_STRATEGY: none
script:
- |
if [[ $(jq '.[env.CI_MACHINE].total_nodes_up' /usr/global/tools/lorenz/data/loginnodeStatus) == 0 ]]
then
echo -e "\e[31mNo node available on ${CI_MACHINE}\e[0m"
curl --url "https://api.github.com/repos/${GITHUB_PROJECT_ORG}/${GITHUB_PROJECT_NAME}/statuses/${CI_COMMIT_SHA}" \
--header 'Content-Type: application/json' \
--header "authorization: Bearer ${GITHUB_TOKEN}" \
--data "{ \"state\": \"failure\", \"target_url\": \"${CI_PIPELINE_URL}\", \"description\": \"GitLab ${CI_MACHINE} down\", \"context\": \"ci/gitlab/${CI_MACHINE}\" }"
exit 1
fi
###
# Trigger a build-and-test pipeline for a machine.
# Comment the jobs for machines you don’t need.
###

# RUBY
#ruby-up-check:
# variables:
# CI_MACHINE: "ruby"
# extends: [.machine-check]
#
#ruby-build-and-test:
# variables:
# CI_MACHINE: "ruby"
# needs: [ruby-up-check]
# extends: [.build-and-test]

## POODLE
#poodle-up-check:
# variables:
# CI_MACHINE: "poodle"
# extends: [.machine-check]
#
#poodle-build-and-test:
# variables:
# CI_MACHINE: "poodle"
# needs: [poodle-up-check]
# extends: [.build-and-test]
#
## CORONA
#corona-up-check:
# variables:
# CI_MACHINE: "corona"
# extends: [.machine-check]
#
#corona-build-and-test:
# variables:
# CI_MACHINE: "corona"
# needs: [corona-up-check]
# extends: [.build-and-test]
#
## TIOGA
#tioga-up-check:
# variables:
# CI_MACHINE: "tioga"
# extends: [.machine-check]
#
#tioga-build-and-test:
# variables:
# CI_MACHINE: "tioga"
# needs: [tioga-up-check]
# extends: [.build-and-test]

# LASSEN
lassen-up-check:
variables:
CI_MACHINE: "lassen"
extends: [.machine-check]

lassen-build-and-test:
variables:
CI_MACHINE: "lassen"
needs: [lassen-up-check]
extends: [.build-and-test]

Loading

0 comments on commit a16cc83

Please sign in to comment.