From 3fd40f1385df711b760932aa4434b35293e6a223 Mon Sep 17 00:00:00 2001 From: Paul Lietar Date: Wed, 15 May 2024 14:27:12 +0100 Subject: [PATCH] Add GitHub workflow for continuous benchmarking. The workflow runs on-demand by commenting `/benchmark` on any pull request. It runs a few iterations of both the base branch and the PR and posts the results back to the pull request as a comment. Additionally, detailed plots are available as images in the workflow artifacts. Unfortunately GitHub doesn't have a way for actions to upload images in comments. At the moment the workflow runs two benchmarks, one at a 10k population size and 10k time steps, and second one at 1M population size and 1k time steps. The workflow currently takes on the order of 1 hour to run. We can easily tweak the set of benchmarks in the future if we want to run more scenarios, or conversly if we want to reduce the total time. There is no long-term tracking of benchmark results yet. Doing so is tricky as the performance of the underlying CI is unlikely to be stable long-term. --- .Rbuildignore | 1 + .github/workflows/touchstone.yaml | 144 ++++++++++++++++++++++++++++++ touchstone/.gitignore | 5 ++ touchstone/footer.R | 17 ++++ touchstone/header.R | 14 +++ touchstone/script.R | 44 +++++++++ 6 files changed, 225 insertions(+) create mode 100644 .github/workflows/touchstone.yaml create mode 100644 touchstone/.gitignore create mode 100644 touchstone/footer.R create mode 100644 touchstone/header.R create mode 100644 touchstone/script.R diff --git a/.Rbuildignore b/.Rbuildignore index 3e91f6fd..5963c998 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -12,3 +12,4 @@ codecov.yml ^data-raw$ ^doc$ ^Meta$ +^touchstone$ diff --git a/.github/workflows/touchstone.yaml b/.github/workflows/touchstone.yaml new file mode 100644 index 00000000..e44ae041 --- /dev/null +++ b/.github/workflows/touchstone.yaml @@ -0,0 +1,144 @@ +name: Continuous benchmarking + +concurrency: + group: ${{ github.workflow }}-${{ github.event.issue.number }} + cancel-in-progress: true + +on: + issue_comment: + types: ['created', 'edited'] + +permissions: + contents: read + statuses: write + pull-requests: write + +env: + WORKFLOW_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + +jobs: + prepare: + # The other jobs all depend on this one succeeding. They'll implicitly get + # skipped as well if this condition is not met. + if: > + github.event.issue.pull_request && + startsWith(github.event.comment.body, '/benchmark') && ( + github.event.comment.author_association == 'OWNER' || + github.event.comment.author_association == 'MEMBER' || + github.event.comment.author_association == 'COLLABORATOR' + ) + + runs-on: ubuntu-latest + + outputs: + # The HEAD's sha is exported so we can update the status when the workflow + # completes. + head_sha: ${{ steps.metadata.outputs.result }} + + steps: + - id: metadata + name: Fetch PR metadata + uses: actions/github-script@v7 + with: + result-encoding: string + script: | + let pr = (await github.rest.pulls.get({ + owner: context.repo.owner, + repo: context.repo.repo, + pull_number: context.issue.number + })).data; + + return pr.head.sha; + + - name: Set commit status as in progress + uses: actions/github-script@v7 + env: + HEAD_SHA: ${{ steps.metadata.outputs.result }} + with: + script: | + github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: process.env.HEAD_SHA, + state: "pending", + target_url: process.env.WORKFLOW_URL, + description: 'Benchmarking in progress...', + context: 'touchstone' + }); + + build: + needs: prepare + + # This job run potentially untrusted code from the PR (albeit gated by a + # comment from a collaborator). We restrict the scope of the token as much + # as we can. We also need to be careful not to use any repository secrets + # as inputs to the job. The rest of the workflow only runs code from the + # master branch so isn't vulnerable to outsiders. + permissions: + contents: read + + runs-on: ubuntu-24.04 + env: + RSPM: "https://packagemanager.posit.co/cran/__linux__/noble/2024-05-15" + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: lorenzwalthert/touchstone/actions/receive@main + + # https://github.com/lorenzwalthert/touchstone/pull/138 + - name: Upload raw results + uses: actions/upload-artifact@v2 + with: + name: data + path: touchstone/records/ + overwrite: true + + comment: + needs: ['prepare', 'build'] + + if: always() && needs.prepare.result == 'success' + + runs-on: ubuntu-latest + steps: + - name: Download benchmarking results + if: needs.build.result == 'success' + # Version number must match the one used by touchstone when uploading + uses: actions/download-artifact@v2 + with: + name: pr + + - name: Comment on PR + if: needs.build.result == 'success' + uses: actions/github-script@v7 + with: + script: | + var fs = require('fs'); + var body = fs.readFileSync('./info.txt').toString(); + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body: body + }); + + - name: Update commit status + uses: actions/github-script@v7 + env: + RESULT: ${{ needs.build.result }} + HEAD_SHA: ${{ needs.prepare.outputs.head_sha }} + + with: + script: | + let description = process.env.RESULT == "success" ? 'Benchmarking succeeded!' + : 'Benchmarking failed!'; + + github.rest.repos.createCommitStatus({ + owner: context.repo.owner, + repo: context.repo.repo, + sha: process.env.HEAD_SHA, + state: process.env.RESULT, + target_url: process.env.WORKFLOW_URL, + description: description, + context: 'touchstone' + }); diff --git a/touchstone/.gitignore b/touchstone/.gitignore new file mode 100644 index 00000000..30a0fd77 --- /dev/null +++ b/touchstone/.gitignore @@ -0,0 +1,5 @@ +* +!script.R +!.gitignore +!header.R +!footer.R diff --git a/touchstone/footer.R b/touchstone/footer.R new file mode 100644 index 00000000..8ddd815d --- /dev/null +++ b/touchstone/footer.R @@ -0,0 +1,17 @@ +# You can modify the PR comment footer here. You can use github markdown e.g. +# emojis like :tada:. +# This file will be parsed and evaluate within the context of +# `benchmark_analyze` and should return the comment text as the last value. +# See `?touchstone::pr_comment` + +documentation <- "https://lorenzwalthert.github.io/touchstone/articles/inference.html" + +# This is exported by the workflow itself +workflow <- Sys.getenv("WORKFLOW_URL") + +glue::glue( + "\n\nFurther explanation regarding interpretation and", + " methodology can be found in the [documentation]({documentation}).", + "\nPlots and raw data are available as artifacts of", + " [the workflow run]({workflow})." +) diff --git a/touchstone/header.R b/touchstone/header.R new file mode 100644 index 00000000..1ad5311c --- /dev/null +++ b/touchstone/header.R @@ -0,0 +1,14 @@ +# You can modify the PR comment header here. You can use github markdown e.g. +# emojis like :tada:. +# This file will be parsed and evaluate within the context of +# `benchmark_analyze` and should return the comment text as the last value. +# Available variables for glue substitution: +# * ci: confidence interval +# * branches: BASE and HEAD branches benchmarked against each other. +# See `?touchstone::pr_comment` + +glue::glue( + "This is how benchmark results would change (along with a", + " {100 * ci}% confidence interval in relative change) if ", + "{system2('git', c('rev-parse', 'HEAD'), stdout = TRUE)} is merged into {branches[1]}:\n" +) diff --git a/touchstone/script.R b/touchstone/script.R new file mode 100644 index 00000000..0ab84bae --- /dev/null +++ b/touchstone/script.R @@ -0,0 +1,44 @@ +library(magrittr) + +touchstone::branch_install() + +touchstone::benchmark_run( + small_population = { + set.seed(123) + params <- malariasimulation::get_parameters( + overrides = list(human_population=1e4)) + malariasimulation::run_simulation(10000, params) + }, + n = 10 +) + +touchstone::benchmark_run( + large_population = { + set.seed(123) + params <- malariasimulation::get_parameters( + overrides = list(human_population=1e6)) + malariasimulation::run_simulation(1000, params) + }, + n = 4 +) + +touchstone::benchmark_analyze() + +# Overwrite the plots generated by touchstone with something more sensible. +touchstone::benchmark_ls() %>% + dplyr::reframe(touchstone::benchmark_read(name, branch)) %>% + dplyr::mutate(branch=as.factor(branch), name=as.factor(name)) %>% + dplyr::group_by(name) %>% + dplyr::group_walk(function(data, key) { + ggplot2::ggplot(data, ggplot2::aes(y = branch, x = elapsed, color = branch)) + + ggplot2::geom_boxplot() + + ggplot2::geom_jitter(height = 0.2) + + ggplot2::guides(color="none") + + ggplot2::labs(x="Elapsed time", y="Branch") + + bench::scale_x_bench_time(base = NULL) + + ggplot2::ggtitle(key$name) + + fs::path(touchstone::dir_touchstone(), "plots", key$name) %>% + fs::path_ext_set("png") %>% + ggplot2::ggsave(height=3) + })