Add GitHub workflow for continuous benchmarking.

The workflow runs on-demand by commenting `/benchmark` on any pull request. It runs a few iterations of both the base branch and the PR and posts the results back to the pull request as a comment. Additionally, detailed plots are available as images in the workflow artifacts. Unfortunately GitHub doesn't have a way for actions to upload images in comments. At the moment the workflow runs two benchmarks, one at a 10k population size and 10k time steps, and second one at 1M population size and 1k time steps. The workflow currently takes on the order of 1 hour to run. We can easily tweak the set of benchmarks in the future if we want to run more scenarios, or conversly if we want to reduce the total time. There is no long-term tracking of benchmark results yet. Doing so is tricky as the performance of the underlying CI is unlikely to be stable long-term.
mrc-ide · May 21, 2024 · 3fd40f1 · 3fd40f1
1 parent 397a7b7
commit 3fd40f1
Show file tree

Hide file tree

Showing 6 changed files with 225 additions and 0 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@ codecov.yml
 ^data-raw$
 ^doc$
 ^Meta$
+^touchstone$
diff --git a/.github/workflows/touchstone.yaml b/.github/workflows/touchstone.yaml
@@ -0,0 +1,144 @@
+name: Continuous benchmarking
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.issue.number }}
+  cancel-in-progress: true
+
+on:
+  issue_comment:
+    types: ['created', 'edited']
+
+permissions:
+  contents: read
+  statuses: write
+  pull-requests: write
+
+env:
+  WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  prepare:
+    # The other jobs all depend on this one succeeding. They'll implicitly get
+    # skipped as well if this condition is not met.
+    if: >
+      github.event.issue.pull_request &&
+      startsWith(github.event.comment.body, '/benchmark') && (
+        github.event.comment.author_association == 'OWNER' ||
+        github.event.comment.author_association == 'MEMBER' ||
+        github.event.comment.author_association == 'COLLABORATOR' 
+      )
+
+    runs-on: ubuntu-latest
+
+    outputs:
+      # The HEAD's sha is exported so we can update the status when the workflow
+      # completes.
+      head_sha: ${{ steps.metadata.outputs.result }}
+
+    steps:
+      - id: metadata
+        name: Fetch PR metadata
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            let pr = (await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: context.issue.number
+            })).data;
+
+            return pr.head.sha;
+
+      - name: Set commit status as in progress
+        uses: actions/github-script@v7
+        env:
+          HEAD_SHA: ${{ steps.metadata.outputs.result }}
+        with:
+          script: |
+            github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: process.env.HEAD_SHA,
+              state: "pending",
+              target_url: process.env.WORKFLOW_URL,
+              description: 'Benchmarking in progress...',
+              context: 'touchstone'
+            });
+
+  build:
+    needs: prepare
+
+    # This job run potentially untrusted code from the PR (albeit gated by a
+    # comment from a collaborator). We restrict the scope of the token as much
+    # as we can. We also need to be careful not to use any repository secrets
+    # as inputs to the job. The rest of the workflow only runs code from the
+    # master branch so isn't vulnerable to outsiders.
+    permissions:
+      contents: read
+
+    runs-on: ubuntu-24.04
+    env:
+      RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/2024-05-15"
+      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: lorenzwalthert/touchstone/actions/receive@main
+
+      # https://github.com/lorenzwalthert/touchstone/pull/138
+      - name: Upload raw results
+        uses: actions/upload-artifact@v2
+        with:
+          name: data
+          path: touchstone/records/
+          overwrite: true
+
+  comment:
+    needs: ['prepare', 'build']
+
+    if: always() && needs.prepare.result == 'success'
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download benchmarking results
+        if: needs.build.result == 'success'
+        # Version number must match the one used by touchstone when uploading
+        uses: actions/download-artifact@v2
+        with:
+          name: pr
+
+      - name: Comment on PR
+        if: needs.build.result == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            var fs = require('fs');
+            var body = fs.readFileSync('./info.txt').toString();
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body
+            });
+
+      - name: Update commit status
+        uses: actions/github-script@v7
+        env:
+          RESULT: ${{ needs.build.result }}
+          HEAD_SHA: ${{ needs.prepare.outputs.head_sha }}
+
+        with:
+          script: |
+            let description = process.env.RESULT == "success" ? 'Benchmarking succeeded!'
+                                                              : 'Benchmarking failed!';
+
+            github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: process.env.HEAD_SHA,
+              state: process.env.RESULT,
+              target_url: process.env.WORKFLOW_URL,
+              description: description,
+              context: 'touchstone'
+            });
diff --git a/touchstone/.gitignore b/touchstone/.gitignore
@@ -0,0 +1,5 @@
+*
+!script.R
+!.gitignore
+!header.R
+!footer.R
diff --git a/touchstone/footer.R b/touchstone/footer.R
@@ -0,0 +1,17 @@
+# You can modify the PR comment footer here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# See `?touchstone::pr_comment`
+
+documentation <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html"
+
+# This is exported by the workflow itself
+workflow <- Sys.getenv("WORKFLOW_URL")
+
+glue::glue(
+  "\n\nFurther explanation regarding interpretation and",
+  " methodology can be found in the [documentation]({documentation}).",
+  "\nPlots and raw data are available as artifacts of",
+  " [the workflow run]({workflow})."
+)
diff --git a/touchstone/header.R b/touchstone/header.R
@@ -0,0 +1,14 @@
+# You can modify the PR comment header here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# Available variables for glue substitution:
+# * ci: confidence interval
+# * branches: BASE and HEAD branches benchmarked against each other.
+# See `?touchstone::pr_comment`
+
+glue::glue(
+  "This is how benchmark results would change (along with a",
+  " {100 * ci}% confidence interval in relative change) if ",
+  "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n"
+)
diff --git a/touchstone/script.R b/touchstone/script.R
@@ -0,0 +1,44 @@
+library(magrittr)
+
+touchstone::branch_install()
+
+touchstone::benchmark_run(
+  small_population = {
+    set.seed(123)
+    params <- malariasimulation::get_parameters(
+      overrides = list(human_population=1e4))
+    malariasimulation::run_simulation(10000, params)
+  },
+  n = 10
+)
+
+touchstone::benchmark_run(
+  large_population = {
+    set.seed(123)
+    params <- malariasimulation::get_parameters(
+      overrides = list(human_population=1e6))
+    malariasimulation::run_simulation(1000, params)
+  },
+  n = 4
+)
+
+touchstone::benchmark_analyze()
+
+# Overwrite the plots generated by touchstone with something more sensible.
+touchstone::benchmark_ls() %>%
+  dplyr::reframe(touchstone::benchmark_read(name, branch)) %>%
+  dplyr::mutate(branch=as.factor(branch), name=as.factor(name)) %>%
+  dplyr::group_by(name) %>%
+  dplyr::group_walk(function(data, key) {
+    ggplot2::ggplot(data, ggplot2::aes(y = branch, x = elapsed, color = branch)) +
+      ggplot2::geom_boxplot() +
+      ggplot2::geom_jitter(height = 0.2) +
+      ggplot2::guides(color="none") +
+      ggplot2::labs(x="Elapsed time", y="Branch") +
+      bench::scale_x_bench_time(base = NULL) +
+      ggplot2::ggtitle(key$name)
+
+    fs::path(touchstone::dir_touchstone(), "plots", key$name) %>%
+      fs::path_ext_set("png") %>%
+      ggplot2::ggsave(height=3)
+  })
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,4 @@ codecov.yml @@
     ^data-raw$
     ^doc$
     ^Meta$
+    ^touchstone$