From 3fd40f1385df711b760932aa4434b35293e6a223 Mon Sep 17 00:00:00 2001
From: Paul Lietar <pl2113@ic.ac.uk>
Date: Wed, 15 May 2024 14:27:12 +0100
Subject: [PATCH] Add GitHub workflow for continuous benchmarking.

The workflow runs on-demand by commenting `/benchmark` on any pull
request. It runs a few iterations of both the base branch and the PR and
posts the results back to the pull request as a comment. Additionally,
detailed plots are available as images in the workflow artifacts.
Unfortunately GitHub doesn't have a way for actions to upload images in
comments.

At the moment the workflow runs two benchmarks, one at a 10k population
size and 10k time steps, and second one at 1M population size and 1k
time steps.

The workflow currently takes on the order of 1 hour to run. We can
easily tweak the set of benchmarks in the future if we want to run more
scenarios, or conversly if we want to reduce the total time.

There is no long-term tracking of benchmark results yet. Doing so is
tricky as the performance of the underlying CI is unlikely to be stable
long-term.
---
 .Rbuildignore                     |   1 +
 .github/workflows/touchstone.yaml | 144 ++++++++++++++++++++++++++++++
 touchstone/.gitignore             |   5 ++
 touchstone/footer.R               |  17 ++++
 touchstone/header.R               |  14 +++
 touchstone/script.R               |  44 +++++++++
 6 files changed, 225 insertions(+)
 create mode 100644 .github/workflows/touchstone.yaml
 create mode 100644 touchstone/.gitignore
 create mode 100644 touchstone/footer.R
 create mode 100644 touchstone/header.R
 create mode 100644 touchstone/script.R

diff --git a/.Rbuildignore b/.Rbuildignore
index 3e91f6fd..5963c998 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -12,3 +12,4 @@ codecov.yml
 ^data-raw$
 ^doc$
 ^Meta$
+^touchstone$
diff --git a/.github/workflows/touchstone.yaml b/.github/workflows/touchstone.yaml
new file mode 100644
index 00000000..e44ae041
--- /dev/null
+++ b/.github/workflows/touchstone.yaml
@@ -0,0 +1,144 @@
+name: Continuous benchmarking
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.issue.number }}
+  cancel-in-progress: true
+
+on:
+  issue_comment:
+    types: ['created', 'edited']
+
+permissions:
+  contents: read
+  statuses: write
+  pull-requests: write
+
+env:
+  WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+jobs:
+  prepare:
+    # The other jobs all depend on this one succeeding. They'll implicitly get
+    # skipped as well if this condition is not met.
+    if: >
+      github.event.issue.pull_request &&
+      startsWith(github.event.comment.body, '/benchmark') && (
+        github.event.comment.author_association == 'OWNER' ||
+        github.event.comment.author_association == 'MEMBER' ||
+        github.event.comment.author_association == 'COLLABORATOR' 
+      )
+
+    runs-on: ubuntu-latest
+
+    outputs:
+      # The HEAD's sha is exported so we can update the status when the workflow
+      # completes.
+      head_sha: ${{ steps.metadata.outputs.result }}
+
+    steps:
+      - id: metadata
+        name: Fetch PR metadata
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            let pr = (await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              pull_number: context.issue.number
+            })).data;
+
+            return pr.head.sha;
+
+      - name: Set commit status as in progress
+        uses: actions/github-script@v7
+        env:
+          HEAD_SHA: ${{ steps.metadata.outputs.result }}
+        with:
+          script: |
+            github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: process.env.HEAD_SHA,
+              state: "pending",
+              target_url: process.env.WORKFLOW_URL,
+              description: 'Benchmarking in progress...',
+              context: 'touchstone'
+            });
+
+  build:
+    needs: prepare
+
+    # This job run potentially untrusted code from the PR (albeit gated by a
+    # comment from a collaborator). We restrict the scope of the token as much
+    # as we can. We also need to be careful not to use any repository secrets
+    # as inputs to the job. The rest of the workflow only runs code from the
+    # master branch so isn't vulnerable to outsiders.
+    permissions:
+      contents: read
+
+    runs-on: ubuntu-24.04
+    env:
+      RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/2024-05-15"
+      R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    steps:
+      - uses: lorenzwalthert/touchstone/actions/receive@main
+
+      # https://github.com/lorenzwalthert/touchstone/pull/138
+      - name: Upload raw results
+        uses: actions/upload-artifact@v2
+        with:
+          name: data
+          path: touchstone/records/
+          overwrite: true
+
+  comment:
+    needs: ['prepare', 'build']
+
+    if: always() && needs.prepare.result == 'success'
+
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download benchmarking results
+        if: needs.build.result == 'success'
+        # Version number must match the one used by touchstone when uploading
+        uses: actions/download-artifact@v2
+        with:
+          name: pr
+
+      - name: Comment on PR
+        if: needs.build.result == 'success'
+        uses: actions/github-script@v7
+        with:
+          script: |
+            var fs = require('fs');
+            var body = fs.readFileSync('./info.txt').toString();
+            await github.rest.issues.createComment({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              issue_number: context.issue.number,
+              body: body
+            });
+
+      - name: Update commit status
+        uses: actions/github-script@v7
+        env:
+          RESULT: ${{ needs.build.result }}
+          HEAD_SHA: ${{ needs.prepare.outputs.head_sha }}
+
+        with:
+          script: |
+            let description = process.env.RESULT == "success" ? 'Benchmarking succeeded!'
+                                                              : 'Benchmarking failed!';
+
+            github.rest.repos.createCommitStatus({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              sha: process.env.HEAD_SHA,
+              state: process.env.RESULT,
+              target_url: process.env.WORKFLOW_URL,
+              description: description,
+              context: 'touchstone'
+            });
diff --git a/touchstone/.gitignore b/touchstone/.gitignore
new file mode 100644
index 00000000..30a0fd77
--- /dev/null
+++ b/touchstone/.gitignore
@@ -0,0 +1,5 @@
+*
+!script.R
+!.gitignore
+!header.R
+!footer.R
diff --git a/touchstone/footer.R b/touchstone/footer.R
new file mode 100644
index 00000000..8ddd815d
--- /dev/null
+++ b/touchstone/footer.R
@@ -0,0 +1,17 @@
+# You can modify the PR comment footer here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# See `?touchstone::pr_comment`
+
+documentation <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html"
+
+# This is exported by the workflow itself
+workflow <- Sys.getenv("WORKFLOW_URL")
+
+glue::glue(
+  "\n\nFurther explanation regarding interpretation and",
+  " methodology can be found in the [documentation]({documentation}).",
+  "\nPlots and raw data are available as artifacts of",
+  " [the workflow run]({workflow})."
+)
diff --git a/touchstone/header.R b/touchstone/header.R
new file mode 100644
index 00000000..1ad5311c
--- /dev/null
+++ b/touchstone/header.R
@@ -0,0 +1,14 @@
+# You can modify the PR comment header here. You can use github markdown e.g.
+# emojis like :tada:.
+# This file will be parsed and evaluate within the context of
+# `benchmark_analyze` and should return the comment text as the last value.
+# Available variables for glue substitution:
+# * ci: confidence interval
+# * branches: BASE and HEAD branches benchmarked against each other.
+# See `?touchstone::pr_comment`
+
+glue::glue(
+  "This is how benchmark results would change (along with a",
+  " {100 * ci}% confidence interval in relative change) if ",
+  "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n"
+)
diff --git a/touchstone/script.R b/touchstone/script.R
new file mode 100644
index 00000000..0ab84bae
--- /dev/null
+++ b/touchstone/script.R
@@ -0,0 +1,44 @@
+library(magrittr)
+
+touchstone::branch_install()
+
+touchstone::benchmark_run(
+  small_population = {
+    set.seed(123)
+    params <- malariasimulation::get_parameters(
+      overrides = list(human_population=1e4))
+    malariasimulation::run_simulation(10000, params)
+  },
+  n = 10
+)
+
+touchstone::benchmark_run(
+  large_population = {
+    set.seed(123)
+    params <- malariasimulation::get_parameters(
+      overrides = list(human_population=1e6))
+    malariasimulation::run_simulation(1000, params)
+  },
+  n = 4
+)
+
+touchstone::benchmark_analyze()
+
+# Overwrite the plots generated by touchstone with something more sensible.
+touchstone::benchmark_ls() %>%
+  dplyr::reframe(touchstone::benchmark_read(name, branch)) %>%
+  dplyr::mutate(branch=as.factor(branch), name=as.factor(name)) %>%
+  dplyr::group_by(name) %>%
+  dplyr::group_walk(function(data, key) {
+    ggplot2::ggplot(data, ggplot2::aes(y = branch, x = elapsed, color = branch)) +
+      ggplot2::geom_boxplot() +
+      ggplot2::geom_jitter(height = 0.2) +
+      ggplot2::guides(color="none") +
+      ggplot2::labs(x="Elapsed time", y="Branch") +
+      bench::scale_x_bench_time(base = NULL) +
+      ggplot2::ggtitle(key$name)
+
+    fs::path(touchstone::dir_touchstone(), "plots", key$name) %>%
+      fs::path_ext_set("png") %>%
+      ggplot2::ggsave(height=3)
+  })