chore(gitflow): build release ci yml; cog init

esteinig · Nov 1, 2023 · 5202a57 · 5202a57
1 parent 9a0f478
commit 5202a57
Show file tree

Hide file tree

Showing 3 changed files with 118 additions and 72 deletions.
diff --git a/.github/workflows/build_release.yml b/.github/workflows/build_release.yml
@@ -0,0 +1,54 @@
+# release ci: build linux binaries and attach to release with SHA256
+
+name: build and release
+
+on:
+  workflow_dispatch:
+  release:
+    types: [ created ]
+
+permissions:
+  contents: write
+
+jobs:
+
+  build:
+    name: ${{ matrix.platform.os_name }} with rust ${{ matrix.toolchain }}
+    runs-on: ${{ matrix.platform.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - os_name: Linux-aarch64
+            os: ubuntu-20.04
+            target: aarch64-unknown-linux-musl
+            bin: cerebro-${{ github.event.release.tag_name }}-linux-arm64  # semver release tag
+          - os_name: Linux-x86_64
+            os: ubuntu-20.04
+            target: x86_64-unknown-linux-gnu
+            bin: cerebro-${{ github.event.release.tag_name }}-linux-amd64  # semver release tag
+        toolchain:
+          - stable
+    steps:
+      - uses: actions/checkout@v3
+      - name: Build binary
+        uses: houseabsolute/actions-rust-cross@v0
+        with:
+          command: "build"
+          target: ${{ matrix.platform.target }}
+          toolchain: ${{ matrix.toolchain }}
+          args: "--locked --release"
+          strip: true
+      - name: Rename binary (linux and macos)
+        run: mv target/${{ matrix.platform.target }}/release/cerebro target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}
+        if: matrix.platform.os_name != 'Windows-x86_64'
+      - name: Generate SHA-256 of uncompressed binary
+        run: shasum -a 256 target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }} | cut -d ' ' -f 1 > target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.sha256
+      - name: Compress binary and package files
+        run: tar -cJf target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.tar.xz target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}
+      - name: Release binary and SHA-256 checksum to GitHub
+        uses: softprops/action-gh-release@v1
+        with:
+          files: |
+            target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.tar.xz
+            target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.sha256
diff --git a/README.md b/README.md
@@ -50,66 +50,53 @@ Requirements for local execution:
 - `Nextflow >= v23.10.04`
 
 ```bash
-# pull latest from github, show help menu, use mamba local env
+# pull latest from github, show help menu, use mamba envs
 nextflow run -r latest esteinig/cerebro -profile mamba --help
 
 # provision with latest cipher kmer db build, may take some time
-nextflow run -r latest esteinig/cerebro -profile mamba \
-    --cipher_download \
-    --cipher_revision latest \
-    --cipher_modules full \
+nextflow run -r latest esteinig/cerebro -profile mamba -entry cipher \
+    --revision latest \
+    --representation full \
     --outdir cipher_db/
 
-# example 1 full classifier run: standard qc and align, assembly, kmer classifiers
-# mamba envs and cipher db build using a large resource profile
-
-# full tax profile on input read dir (pe illumina)
-nextflow run -r latest esteinig/cerebro -profile mamba,large \
-    --db cipher_db/ \
-    --outdir full_test_run/ \
-    --fastq "fastq/*_{R1,R2}.fq.gz"
-
-# example 2 k-mer profiling run: standard qc and kmer tax classifiers 
-# mamba envs and cipher kmer db build directory using a large resource profile
+# default qc and tax profile on input read dir (pe illumina)
+nextflow run esteinig/cerebro -r latest -profile mamba \
+    --fastq "fastq/*_{R1,R2}.fq.gz" \
+    --databases cipher_db/
 
 # kmer tax profile on input read dir (pe illumina)
-nextflow run -r latest esteinig/cerebro -profile mamba,large,kmer \
-    --db cipher_db/ \
-    --outdir kmer_test_run/ \
-    --fastq "fastq/*_{R1,R2}.fq.gz"
+nextflow run esteinig/cerebro -r latest -profile mamba,kmer \
+    --fastq "fastq/*_{R1,R2}.fq.gz" \   
+    --databases cipher_db/
 
-# example 3 sample sheet production: uses cerebro pipeline client to create input sample sheet
+# production: cerebro client to create input sample sheet
 cerebro pipeline sample-sheet 
     --input fastq/ \
     --output sample_sheet.csv \
-    --glob "*_{R1,R2}.fq.gz" \
-    --run-id prod-test
-
-# tax profile on dample sheet input (pe illumina)
-# production enables extra checks for sample and data auditing
-nextflow run -r latest esteinig/cerebro -profile mamba,large \
-    --db cipher_db/ \
-    --outdir prod_test_run/ \
-    --sample_sheet sample_sheet.csv \
-    --production true 
+    --run-id production_test \
+    --glob "*_{R1,R2}.fq.gz"
 
-# with api upload on successful completion 
-# see api interaction for login to get api token
-nextflow run -r latest esteinig/cerebro -profile mamba,large \
-    --db cipher_db/ \
-    --outdir prod_test_run/ \
+# production: tax profile on sample sheet input (pe illumina)
+nextflow run esteinig/cerebro -r latest -profile mamba \
+    --production true \
     --sample_sheet sample_sheet.csv \
+    --databases cipher_db/
+
+# production: db upload on successful completion 
+nextflow run esteinig/cerebro -r latest -profile mamba \
     --production true \
+    --sample_sheet sample_sheet.csv \    
+    --databases cipher_db/ \
     --cerebro.api.enabled true \
     --cerebro.api.url $CEREBRO_API_URL \ 
     --cerebro.api.token $CEREBRO_API_TOKEN \
     --cerebro.api.upload true
 ```
 
-#### Nextflow command-line configuration 
+#### Nextflow command-line configurations 
 
 ```bash
-# main profiles
+# runtime profiles
 nextflow run -r latest esteinig/cerebro \
     # local conda,mamba env created for each process
     -profile conda,mamba
@@ -118,25 +105,30 @@ nextflow run -r latest esteinig/cerebro \
     # process-configured resource profiles
     -profile small,medium,large,galactic
     # specific protocol configs
-    -profile cns_assay,panviral,aneuploidy
-    # data provisioning
-    -profile cipher_db
-    # workflow testing
-    -profile cipher_test
+    -profile cns@v1                                # central nervous system meta-gp protocol
+    -profile panviral@v1                           # panviral enrichment protocol rat sequencing
+    -profile kraken@v1                             # kraken2 nature protocols pathogen detection 
+    -profile aneuploidy@v1                         # wgs copy number variation for consented patients
+    # taxonomy + database provisions
+    -profile cipher-db@v1
+    # workflow integration tests
+    -profile cipher-cns@v1
     # workflow dev
-    -profile io_mode,qc_mode,dev_mode
+    -profile io,dev
 
 # nested module params in `nextflow.config`
 nextflow run -r latest esteinig/cerebro -profile mamba \
-    # all reference db + idx + tax
+
+    # input/output and workflow provision
     --db "cipher_db/" \
-    # paired read input
+    # pe read input
     --fastq "fastq/*_{R1,R2}.fq.gz" \
-    # production input tracked files
+    # production input
     --production \
     --sample_sheet "sample_sheet.csv" \
     # output directory
-    --outdir "module_test" \
+    --outdir "test_run" \
+
     # qc read processing module
     --qc.enabled \
     --qc.deduplication.enabled \
@@ -146,6 +138,7 @@ nextflow run -r latest esteinig/cerebro -profile mamba \
     --qc.controls.phage.enabled \
     --qc.host.depletion.enabled \
     --qc.background.mask.enabled \
+
     # taxon profiling with references and taxonomy files in --db
     --taxa.enabled \
     --taxa.kmer.enabled \
@@ -161,35 +154,30 @@ nextflow run -r latest esteinig/cerebro -profile mamba \
     --taxa.alignment.bowtie2.enabled false \      
     --taxa.assembly.enabled \                     # metaspades + align lca - ncbi nt/nr
     --taxa.assembly.blastn.enabled \
-    --taxa.assembly.diamond.enabled
-    # post workflow processing and api interaction
-    --cerebro.quality.enabled \                   # create run summary qc table
-    --cerebro.sample.enabled \                    # create run aggregated cerebro sample json
-    # require: --sample_sheet and --production
+    --taxa.assembly.diamond.enabled \
+
+    # api: requires --sample_sheet and --production
     --cerebro.api.enabled true \  
     --cerebro.api.url $CEREBRO_API_URL \ 
     --cerebro.api.token $CEREBRO_API_TOKEN \
-    --cerebro.api.status.enabled \                # status report logging of pipeline updates
-    --cerebro.api.status.slack.enabled \          # status report logging to configured slack channel
-    --cerebro.api.report.enabled \                # create run report (sample, qc tracking)
-    --cerebro.api.report.slack.enabled \          # post run report to configured slack channel
-    --cerebro.api.upload true \
+
+    # api: live workflow status and sample tracking
+    --cerebro.api.run.status.enabled \                # status report logging of pipeline updates
+    --cerebro.api.run.status.slack.enabled \          # status report logging to slack channel
+    --cerebro.api.run.report.enabled \                # create run report for sample tracking + qc
+    --cerebro.api.run.report.slack.enabled \          # post run report to slack channel
+
+    # api: upload to team collection
+    --cerebro.api.upload.enabled \
     --cerebro.api.upload.team "VIDRL" \
     --cerebro.api.upload.database "PRODUCTION" \
     --cerebro.api.upload.collection "MGP-CNS-20231012"
 
-
-# label-specific resource config with nested params
-# labels are defined in `lib/configs/resources.config` and
-# applied to process definitions in `lib/processes/*.nf`
-
-# cpus, memory, time, conda, container (docker)
-nextflow run -r latest esteinig/cerebro -profile mamba \
-    --resources.kraken2uniq.cpus 64 \
-    --resources.kraken2uniq.memory "256 GB" \
-    --resources,minimap2_align.cpus 32 \
+    # resource labels for each process
+    --resources.minimap2_align.cpus 32 \
     --resources.minimap2_align.memory "32 GB" \
-    --resources.minimap2_align.conda "envs/minimap2.dropin.yml"
+    --resources.minimap2_align.conda "envs/minimap2.replacement.yml" \
+    --resources.minimap2_align.container "biocontainers/minimap2:latest"
 ```
 
 #### Example pipeline configurations

diff --git a/cog.toml b/cog.toml
@@ -1,17 +1,21 @@
+skip_untracked = false
 from_latest_tag = false
 ignore_merge_commits = false
 generate_mono_repository_global_tag = true
+
 branch_whitelist = [
   "main",
   "release/**"
 ]
-skip_untracked = false
+
 pre_bump_hooks = [
     "cargo build --release",
-    "echo 'bumping from {{latest}} to {{version}}'",
-    "cargo bump {{version}}",
+    "echo 'bumping from {{latest}} to {{version}}'",   
+    "cargo set-version {{version}}"
 ]
+
 post_bump_hooks = []
+
 pre_package_bump_hooks = []
 post_package_bump_hooks = []