diff --git a/.github/workflows/build_release.yml b/.github/workflows/build_release.yml new file mode 100644 index 00000000..214810bd --- /dev/null +++ b/.github/workflows/build_release.yml @@ -0,0 +1,54 @@ +# release ci: build linux binaries and attach to release with SHA256 + +name: build and release + +on: + workflow_dispatch: + release: + types: [ created ] + +permissions: + contents: write + +jobs: + + build: + name: ${{ matrix.platform.os_name }} with rust ${{ matrix.toolchain }} + runs-on: ${{ matrix.platform.os }} + strategy: + fail-fast: false + matrix: + platform: + - os_name: Linux-aarch64 + os: ubuntu-20.04 + target: aarch64-unknown-linux-musl + bin: cerebro-${{ github.event.release.tag_name }}-linux-arm64 # semver release tag + - os_name: Linux-x86_64 + os: ubuntu-20.04 + target: x86_64-unknown-linux-gnu + bin: cerebro-${{ github.event.release.tag_name }}-linux-amd64 # semver release tag + toolchain: + - stable + steps: + - uses: actions/checkout@v3 + - name: Build binary + uses: houseabsolute/actions-rust-cross@v0 + with: + command: "build" + target: ${{ matrix.platform.target }} + toolchain: ${{ matrix.toolchain }} + args: "--locked --release" + strip: true + - name: Rename binary (linux and macos) + run: mv target/${{ matrix.platform.target }}/release/cerebro target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }} + if: matrix.platform.os_name != 'Windows-x86_64' + - name: Generate SHA-256 of uncompressed binary + run: shasum -a 256 target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }} | cut -d ' ' -f 1 > target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.sha256 + - name: Compress binary and package files + run: tar -cJf target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.tar.xz target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }} + - name: Release binary and SHA-256 checksum to GitHub + uses: softprops/action-gh-release@v1 + with: + files: | + target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.tar.xz + target/${{ matrix.platform.target }}/release/${{ matrix.platform.bin }}.sha256 diff --git a/README.md b/README.md index 0da42a84..a227d7dc 100644 --- a/README.md +++ b/README.md @@ -50,66 +50,53 @@ Requirements for local execution: - `Nextflow >= v23.10.04` ```bash -# pull latest from github, show help menu, use mamba local env +# pull latest from github, show help menu, use mamba envs nextflow run -r latest esteinig/cerebro -profile mamba --help # provision with latest cipher kmer db build, may take some time -nextflow run -r latest esteinig/cerebro -profile mamba \ - --cipher_download \ - --cipher_revision latest \ - --cipher_modules full \ +nextflow run -r latest esteinig/cerebro -profile mamba -entry cipher \ + --revision latest \ + --representation full \ --outdir cipher_db/ -# example 1 full classifier run: standard qc and align, assembly, kmer classifiers -# mamba envs and cipher db build using a large resource profile - -# full tax profile on input read dir (pe illumina) -nextflow run -r latest esteinig/cerebro -profile mamba,large \ - --db cipher_db/ \ - --outdir full_test_run/ \ - --fastq "fastq/*_{R1,R2}.fq.gz" - -# example 2 k-mer profiling run: standard qc and kmer tax classifiers -# mamba envs and cipher kmer db build directory using a large resource profile +# default qc and tax profile on input read dir (pe illumina) +nextflow run esteinig/cerebro -r latest -profile mamba \ + --fastq "fastq/*_{R1,R2}.fq.gz" \ + --databases cipher_db/ # kmer tax profile on input read dir (pe illumina) -nextflow run -r latest esteinig/cerebro -profile mamba,large,kmer \ - --db cipher_db/ \ - --outdir kmer_test_run/ \ - --fastq "fastq/*_{R1,R2}.fq.gz" +nextflow run esteinig/cerebro -r latest -profile mamba,kmer \ + --fastq "fastq/*_{R1,R2}.fq.gz" \ + --databases cipher_db/ -# example 3 sample sheet production: uses cerebro pipeline client to create input sample sheet +# production: cerebro client to create input sample sheet cerebro pipeline sample-sheet --input fastq/ \ --output sample_sheet.csv \ - --glob "*_{R1,R2}.fq.gz" \ - --run-id prod-test - -# tax profile on dample sheet input (pe illumina) -# production enables extra checks for sample and data auditing -nextflow run -r latest esteinig/cerebro -profile mamba,large \ - --db cipher_db/ \ - --outdir prod_test_run/ \ - --sample_sheet sample_sheet.csv \ - --production true + --run-id production_test \ + --glob "*_{R1,R2}.fq.gz" -# with api upload on successful completion -# see api interaction for login to get api token -nextflow run -r latest esteinig/cerebro -profile mamba,large \ - --db cipher_db/ \ - --outdir prod_test_run/ \ +# production: tax profile on sample sheet input (pe illumina) +nextflow run esteinig/cerebro -r latest -profile mamba \ + --production true \ --sample_sheet sample_sheet.csv \ + --databases cipher_db/ + +# production: db upload on successful completion +nextflow run esteinig/cerebro -r latest -profile mamba \ --production true \ + --sample_sheet sample_sheet.csv \ + --databases cipher_db/ \ --cerebro.api.enabled true \ --cerebro.api.url $CEREBRO_API_URL \ --cerebro.api.token $CEREBRO_API_TOKEN \ --cerebro.api.upload true ``` -#### Nextflow command-line configuration +#### Nextflow command-line configurations ```bash -# main profiles +# runtime profiles nextflow run -r latest esteinig/cerebro \ # local conda,mamba env created for each process -profile conda,mamba @@ -118,25 +105,30 @@ nextflow run -r latest esteinig/cerebro \ # process-configured resource profiles -profile small,medium,large,galactic # specific protocol configs - -profile cns_assay,panviral,aneuploidy - # data provisioning - -profile cipher_db - # workflow testing - -profile cipher_test + -profile cns@v1 # central nervous system meta-gp protocol + -profile panviral@v1 # panviral enrichment protocol rat sequencing + -profile kraken@v1 # kraken2 nature protocols pathogen detection + -profile aneuploidy@v1 # wgs copy number variation for consented patients + # taxonomy + database provisions + -profile cipher-db@v1 + # workflow integration tests + -profile cipher-cns@v1 # workflow dev - -profile io_mode,qc_mode,dev_mode + -profile io,dev # nested module params in `nextflow.config` nextflow run -r latest esteinig/cerebro -profile mamba \ - # all reference db + idx + tax + + # input/output and workflow provision --db "cipher_db/" \ - # paired read input + # pe read input --fastq "fastq/*_{R1,R2}.fq.gz" \ - # production input tracked files + # production input --production \ --sample_sheet "sample_sheet.csv" \ # output directory - --outdir "module_test" \ + --outdir "test_run" \ + # qc read processing module --qc.enabled \ --qc.deduplication.enabled \ @@ -146,6 +138,7 @@ nextflow run -r latest esteinig/cerebro -profile mamba \ --qc.controls.phage.enabled \ --qc.host.depletion.enabled \ --qc.background.mask.enabled \ + # taxon profiling with references and taxonomy files in --db --taxa.enabled \ --taxa.kmer.enabled \ @@ -161,35 +154,30 @@ nextflow run -r latest esteinig/cerebro -profile mamba \ --taxa.alignment.bowtie2.enabled false \ --taxa.assembly.enabled \ # metaspades + align lca - ncbi nt/nr --taxa.assembly.blastn.enabled \ - --taxa.assembly.diamond.enabled - # post workflow processing and api interaction - --cerebro.quality.enabled \ # create run summary qc table - --cerebro.sample.enabled \ # create run aggregated cerebro sample json - # require: --sample_sheet and --production + --taxa.assembly.diamond.enabled \ + + # api: requires --sample_sheet and --production --cerebro.api.enabled true \ --cerebro.api.url $CEREBRO_API_URL \ --cerebro.api.token $CEREBRO_API_TOKEN \ - --cerebro.api.status.enabled \ # status report logging of pipeline updates - --cerebro.api.status.slack.enabled \ # status report logging to configured slack channel - --cerebro.api.report.enabled \ # create run report (sample, qc tracking) - --cerebro.api.report.slack.enabled \ # post run report to configured slack channel - --cerebro.api.upload true \ + + # api: live workflow status and sample tracking + --cerebro.api.run.status.enabled \ # status report logging of pipeline updates + --cerebro.api.run.status.slack.enabled \ # status report logging to slack channel + --cerebro.api.run.report.enabled \ # create run report for sample tracking + qc + --cerebro.api.run.report.slack.enabled \ # post run report to slack channel + + # api: upload to team collection + --cerebro.api.upload.enabled \ --cerebro.api.upload.team "VIDRL" \ --cerebro.api.upload.database "PRODUCTION" \ --cerebro.api.upload.collection "MGP-CNS-20231012" - -# label-specific resource config with nested params -# labels are defined in `lib/configs/resources.config` and -# applied to process definitions in `lib/processes/*.nf` - -# cpus, memory, time, conda, container (docker) -nextflow run -r latest esteinig/cerebro -profile mamba \ - --resources.kraken2uniq.cpus 64 \ - --resources.kraken2uniq.memory "256 GB" \ - --resources,minimap2_align.cpus 32 \ + # resource labels for each process + --resources.minimap2_align.cpus 32 \ --resources.minimap2_align.memory "32 GB" \ - --resources.minimap2_align.conda "envs/minimap2.dropin.yml" + --resources.minimap2_align.conda "envs/minimap2.replacement.yml" \ + --resources.minimap2_align.container "biocontainers/minimap2:latest" ``` #### Example pipeline configurations diff --git a/cog.toml b/cog.toml index fa2b8f5f..387ff7d0 100644 --- a/cog.toml +++ b/cog.toml @@ -1,17 +1,21 @@ +skip_untracked = false from_latest_tag = false ignore_merge_commits = false generate_mono_repository_global_tag = true + branch_whitelist = [ "main", "release/**" ] -skip_untracked = false + pre_bump_hooks = [ "cargo build --release", - "echo 'bumping from {{latest}} to {{version}}'", - "cargo bump {{version}}", + "echo 'bumping from {{latest}} to {{version}}'", + "cargo set-version {{version}}" ] + post_bump_hooks = [] + pre_package_bump_hooks = [] post_package_bump_hooks = []