diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index b6b31907..72dda289 100644 --- a/.editorconfig +++ b/.editorconfig @@ -18,7 +18,16 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset +[/subworkflows/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset [/assets/email*] indent_size = unset + +# ignore python and markdown +[*.{py,md}] +indent_style = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 212cbdbe..600f7055 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -9,9 +9,8 @@ Please use the pre-filled template to save time. However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -:::info -If you need help using or modifying nf-core/crisprseq then the best place to ask is on the nf-core Slack [#crisprseq](https://nfcore.slack.com/channels/crisprseq) channel ([join our Slack here](https://nf-co.re/join/slack)). -::: +> [!NOTE] +> If you need help using or modifying nf-core/crisprseq then the best place to ask is on the nf-core Slack [#crisprseq](https://nfcore.slack.com/channels/crisprseq) channel ([join our Slack here](https://nf-co.re/join/slack)). ## Contribution workflow @@ -27,6 +26,12 @@ If you're not used to this workflow with git, you can start with some [docs from ## Tests +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` + When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -87,7 +92,7 @@ Once there, use `nf-core schema build` to add to `nextflow_schema.json`. Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 22f5beff..1ae44009 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/cris - [ ] If necessary, also make a PR on the nf-core/crisprseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index df8badba..49aa3c61 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/crisprseq' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: @@ -27,11 +27,11 @@ jobs: "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/crisprseq/results-${{ github.sha }}/targeted_test" } - profiles: test_full + profiles: test_targeted_full - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awsfulltest_screening.yml b/.github/workflows/awsfulltest_screening.yml index 8c91e9cf..3e1ecf90 100644 --- a/.github/workflows/awsfulltest_screening.yml +++ b/.github/workflows/awsfulltest_screening.yml @@ -8,12 +8,12 @@ on: types: [published] workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS full tests if: github.repository == 'nf-core/crisprseq' runs-on: ubuntu-latest steps: - - name: Launch workflow via tower + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -31,5 +31,5 @@ jobs: with: name: Tower debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 0d650d8e..53bbbc4f 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -5,13 +5,13 @@ name: nf-core AWS test on: workflow_dispatch: jobs: - run-tower: + run-platform: name: Run AWS tests if: github.repository == 'nf-core/crisprseq' runs-on: ubuntu-latest steps: - # Launch workflow using Tower CLI tool action - - name: Launch workflow via tower + # Launch workflow using Seqera Platform CLI tool action + - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -25,9 +25,9 @@ jobs: } profiles: test - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: - name: Tower debug log file + name: Seqera Platform debug log file path: | - tower_action_*.log - tower_action_*.json + seqera_platform_action_*.log + seqera_platform_action_*.json diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 4880de34..856008b0 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -19,7 +19,7 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | ## This PR is against the `master` branch :x: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 133e6739..2c34c41c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,17 +29,23 @@ jobs: ANALYSIS: - "test_screening" - "test_screening_paired" + - "test_screening_rra" - "test_targeted" - "test_umis" + - "test_screening_count_table" + steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + - name: Run pipeline with test data (${{ matrix.ANALYSIS }}) run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.ANALYSIS }},docker --outdir ./results_${{ matrix.ANALYSIS }} diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..2d20d644 --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,86 @@ +name: Test successful pipeline download with 'nf-core download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - master + pull_request_target: + branches: + - master + +env: + NXF_ANSI_LOG: false + +jobs: + download: + runs-on: ubuntu-latest + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + - uses: eWaterCycle/setup-singularity@931d4e31109e875b13309ae1d07c70ca8fbc8537 # v7 + with: + singularity-version: 3.8.3 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Get the repository name and current branch set as environment variable + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> ${GITHUB_ENV} + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> ${GITHUB_ENV} + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> ${GITHUB_ENV} + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./ + run: | + nf-core download ${{ env.REPO_LOWERCASE }} \ + --revision ${{ env.REPO_BRANCH }} \ + --outdir ./${{ env.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "ghcr.io" \ + --container-cache-utilisation 'amend' \ + --download-configuration + + - name: Inspect download + run: tree ./${{ env.REPOTITLE_LOWERCASE }} + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ job.steps.stub_run_pipeline.status == failure() }} + env: + NXF_SINGULARITY_CACHEDIR: ./ + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ env.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ env.REPO_BRANCH }}) -profile test,singularity --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index d5db7392..a9ddbe9d 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @nf-core-bot token to check out so we can push later - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 with: token: ${{ secrets.nf_core_bot_auth_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v3 + # Install and run pre-commit + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "core@nf-co.re" git config user.name "nf-core-bot" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/nf-core/crisprseq/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index b8bdd214..1fcafe88 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -11,74 +11,34 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - - uses: actions/setup-node@v3 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - uses: actions/setup-node@v3 - - - name: Install Prettier - run: npm install -g prettier - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` - - Once you push these changes the test should pass, and you can hide this comment :+1: + python-version: "3.12" - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install pre-commit + run: pip install pre-commit - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v3 + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" - name: Install dependencies @@ -99,7 +59,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..40acc23f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@09f2f74827fd3a8607589e5ad7f9398816f540fe # v3 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcments.yml b/.github/workflows/release-announcements.yml similarity index 80% rename from .github/workflows/release-announcments.yml rename to .github/workflows/release-announcements.yml index 6ad33927..03ecfcf7 100644 --- a/.github/workflows/release-announcments.yml +++ b/.github/workflows/release-announcements.yml @@ -9,6 +9,11 @@ jobs: toot: runs-on: ubuntu-latest steps: + - name: get topics and convert to hashtags + id: get_topics + run: | + echo "topics=$(curl -s https://nf-co.re/pipelines.json | jq -r '.remote_workflows[] | select(.full_name == "${{ github.repository }}") | .topics[]' | awk '{print "#"$0}' | tr '\n' ' ')" >> $GITHUB_OUTPUT + - uses: rzr/fediverse-action@master with: access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} @@ -20,11 +25,13 @@ jobs: Please see the changelog: ${{ github.event.release.html_url }} + ${{ steps.get_topics.outputs.topics }} #nfcore #openscience #nextflow #bioinformatics + send-tweet: runs-on: ubuntu-latest steps: - - uses: actions/setup-python@v4 + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 with: python-version: "3.10" - name: Install dependencies @@ -56,7 +63,7 @@ jobs: bsky-post: runs-on: ubuntu-latest steps: - - uses: zentered/bluesky-post-action@v0.0.2 + - uses: zentered/bluesky-post-action@80dbe0a7697de18c15ad22f4619919ceb5ccf597 # v0.1.0 with: post: | Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! diff --git a/.gitpod.yml b/.gitpod.yml index 25488dcc..105a1821 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,16 +4,17 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update + - name: unset JAVA_TOOL_OPTIONS + command: | + unset JAVA_TOOL_OPTIONS vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting + # - nextflow.nextflow # Nextflow syntax highlighting - oderwat.indent-rainbow # Highlight indentation level - streetsidesoftware.code-spell-checker # Spelling checker for source code + - charliermarsh.ruff # Code linter Ruff diff --git a/.nf-core.yml b/.nf-core.yml index 085dbd0a..edba5fb7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,9 @@ repository_type: pipeline lint: files_exist: - - conf/test.config # We skip these linting as we have splitted tests between targeted and screening + # We skip the linting of these files as we have splitted tests between targeted and screening + - conf/test.config + - conf/test_full.config files_unchanged: - - lib/NfcoreTemplate.groovy # Introduced a change ahead of the nf-core/tools release + - .github/PULL_REQUEST_TEMPLATE.md +nf_core_version: "2.14.1" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7aeeb5b..e87f1624 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,14 @@ repos: hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "2.7.3" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/CHANGELOG.md b/CHANGELOG.md index ed770355..7a7b8e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,34 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [v2.2.0 - Romarin Curie] + +### Added + +- Template update to 2.11.1 ([#105](https://github.com/nf-core/crisprseq/pull/105)) +- Added a csv input option for crisprcleanr ([#105](https://github.com/nf-core/crisprseq/pull/105)) +- Added a contrasts parameter so the pipeline automatically creates design matrices and MAGeCK MLE ([#109](https://github.com/nf-core/crisprseq/pull/109)) +- Added bowtie2 and three prime and five prime adapter trimming ([#103](https://github.com/nf-core/crisprseq/pull/103) and [#123](https://github.com/nf-core/crisprseq/pull/123)) +- Added `--day0_label` and `FluteMLE` for MLE data [#126](https://github.com/nf-core/crisprseq/pull/126) +- Template update to 2.13.1 ([#124](https://github.com/nf-core/crisprseq/pull/124)) +- Metromap added in the docs ([#128](https://github.com/nf-core/crisprseq/pull/128)) +- Added MAGeCK count table in the multiqc ([#131](https://github.com/nf-core/crisprseq/pull/131)) +- Added additional plots to Tower output ([#130](https://github.com/nf-core/crisprseq/pull/130)) + +### Fixed + +- Adapt cutadapt module to work with single-end and paired-end reads again ([#121](https://github.com/nf-core/crisprseq/pull/121)) +- Fix premature completion of the pipeline when paired-end reads were merged ([#145](https://github.com/nf-core/crisprseq/pull/145)) +- Create empty \*-QC-indels.csv file if alignments not found. ([#138](https://github.com/nf-core/crisprseq/pull/138)) +- Fix `--reference_fasta` and `--protospacer` parameters ([#144](https://github.com/nf-core/crisprseq/pull/144)) + ## [v2.1.1 - Jamon Salas - patch](https://github.com/nf-core/crisprseq/releases/tag/2.1.1) - [14.12.2023] ### Added - Update all modules to the last version in nf-core/modules ([#92](https://github.com/nf-core/crisprseq/pull/92)) +- More documentation for screening analysis. ([#99](https://github.com/nf-core/crisprseq/pull/99)) +- Contrasts are now given under a different flag and MAGeCK MLE and BAGEL2 are automatically run instead of MAGeCK RRA. ([#99](https://github.com/nf-core/crisprseq/pull/99)) - Added cutadapt for screening analysis ([#95](https://github.com/nf-core/crisprseq/pull/95)) ### Fixed diff --git a/CITATIONS.md b/CITATIONS.md index 6e78897d..887c4621 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -43,8 +43,12 @@ > Martin, M. (2011). Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, 17(1), pp. 10-12. doi:https://doi.org/10.14806/ej.17.1.200 - [Samtools](10.1093/bioinformatics/btp352) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PMID: 19505943; PMCID: PMC2723002. +- [MAGeCKFlute](https://doi.org/10.1038/s41596-018-0113-7) + > Wang, B., Wang, M., Zhang, W. et al. Integrative analysis of pooled CRISPR genetic screens using MAGeCKFlute. Nat Protoc 14, 756–780 (2019). + ## Software packaging/containerisation tools - [Anaconda](https://anaconda.com) diff --git a/README.md b/README.md index b9f2281e..38bad46b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,19 @@ -# ![nf-core/crisprseq](docs/images/nf-core-crisprseq_logo_light.png#gh-light-mode-only) ![nf-core/crisprseq](docs/images/nf-core-crisprseq_logo_dark.png#gh-dark-mode-only) +

+ + + nf-core/crisprseq + +

-[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/crisprseq/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7598496-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7598496) -[![GitHub Actions CI Status](https://github.com/nf-core/crisprseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/crisprseq/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/nf-core/crisprseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/crisprseq/actions?query=workflow%3A%22nf-core+linting%22)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/crisprseq/results) +[![GitHub Actions CI Status](https://github.com/nf-core/crisprseq/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/crisprseq/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/nf-core/crisprseq/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/crisprseq/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/crisprseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7598496-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7598496) +[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com) [![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/crisprseq) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/crisprseq) [![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23crisprseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/crisprseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) @@ -30,13 +34,13 @@ On release, automated continuous integration tests run the pipeline on a full-si ## Pipeline summary -For crispr targeted: - - - Text changing depending on mode. Light: 'So light!' Dark: 'So dark!' + + Text changing depending on mode. Light: 'So light!' Dark: 'So dark!' +For crispr targeting : + 1. Merge paired-end reads ([`Pear`](https://cme.h-its.org/exelixis/web/software/pear/doc.html)) 2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 3. Adapter trimming ([`Cutadapt`](http://dx.doi.org/10.14806/ej.17.1.200)) @@ -59,18 +63,19 @@ For crispr screening: 1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Read mapping ([`MAGeCK count`](https://sourceforge.net/p/mageck/wiki/usage/#count)) + - ([`MAGeCK count`](https://github.com/lh3/minimap2), _default_) + - ([`bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) 3. Optional: CNV correction and normalization with ([`CRISPRcleanR`](https://github.com/francescojm/CRISPRcleanR)) 4. Rank sgRNAs and genes ; a. ([MAGeCK test](https://sourceforge.net/p/mageck/wiki/usage/#test)) b. ([MAGeCK mle](https://sourceforge.net/p/mageck/wiki/Home/#mle)) + c. ([BAGEL2](https://github.com/hart-lab/bagel)) +5. Visualize analysis ## Usage -:::note -If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how -to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) -with `-profile test` before running the workflow on actual data. -::: +> [!NOTE] +> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. First, prepare a samplesheet with your input data that looks as follows: @@ -98,11 +103,9 @@ Now, you can run the pipeline using: nextflow run nf-core/crisprseq --input samplesheet.csv --analysis --outdir -profile ``` -:::warning -Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those -provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -::: +> [!WARNING] +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; +> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/crisprseq/usage) and the [parameter documentation](https://nf-co.re/crisprseq/parameters). @@ -131,6 +134,7 @@ We thank the following people for their extensive assistance in the development - [@mashehu](https://github.com/mashehu) - [@msanvicente](https://github.com/msanvicente) - [@SusiJo](https://github.com/SusiJo) +- [@mschaffer-incyte](https://github.com/mschaffer-incyte) ## Contributions and Support diff --git a/assets/email_template.html b/assets/email_template.html index ec162f42..07ce59a6 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

nf-core/crisprseq v${version}

+

nf-core/crisprseq ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/email_template.txt b/assets/email_template.txt index da4d1c29..0061a387 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -4,7 +4,7 @@ |\\ | |__ __ / ` / \\ |__) |__ } { | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, `._,._,' - nf-core/crisprseq v${version} + nf-core/crisprseq ${version} ---------------------------------------------------- Run Name: $runName diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index baec67f1..8ddc9dd0 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,9 @@ report_comment: > - This report has been generated by the nf-core/crisprseq + + This report has been generated by the nf-core/crisprseq analysis pipeline. For information about how to interpret these results, please see the - documentation. + documentation. + report_section_order: "nf-core-crisprseq-methods-description": order: -1000 @@ -11,6 +13,7 @@ report_section_order: order: -1002 export_plots: true +disable_version_detection: true # Modules to run run_modules: @@ -143,6 +146,52 @@ custom_data: min: 0 scale: "RdYlGn" + # MAGeCK count table + mageck_count: + id: "mageck_count" + section_name: "MAGeCK count table" + plot_type: "table" + description: | + Table showing count quality for MAGeCK count. + pconfig: + id: "mageck_count" + namespace: "MAGeCK count table" + table_title: "MAGeCK count table" + headers: + File: + title: "File" + description: "File name" + Label: + title: "Label" + description: "Label name" + format: "{:,.0f}" + Reads: + title: "Reads" + description: "Number of reads" + format: "{:,.0f}" + Mapped: + title: "Mapped" + description: "Number of reads mapped" + format: "{:,.0f}" + Percentage: + title: "Percentage mapped from 0 to 1 (100%)" + description: "Percentage mapped" + max: 1 + min: 0 + TotalsgRNAs: + title: "Total number of sgRNAs in the library" + description: "Total number of sgRNAs in the library" + format: "{:,.0f}" + Zerocounts: + title: "Total number of missing sgRNAs" + description: "sgRNAs that have 0 counts, recommended: no more than 1%" + format: "{:,.0f}" + GiniIndex: + title: "Gini Index" + description: "Measure of statistical dispersion. A smaller value indicates more evenness of the count distribution." + max: 1 + min: 0 + sp: edition_plot: fn: "*_edits.csv" @@ -152,11 +201,14 @@ sp: fn: "*_reads-summary.csv" cutadapt: fn: "*.cutadapt.log" + mageck_count: + fn: "*.countsummary.txt" # Define the order of sections module_order: - fastqc - cutadapt + - mageck_count - custom_content # Set the order of custom code plots and tables diff --git a/assets/nf-core-crisprseq_logo_light.png b/assets/nf-core-crisprseq_logo_light.png index 9f408dfb..98b93405 100644 Binary files a/assets/nf-core-crisprseq_logo_light.png and b/assets/nf-core-crisprseq_logo_light.png differ diff --git a/assets/slackreport.json b/assets/slackreport.json index a3cdd5e4..5206f763 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "nf-core/crisprseq v${version} - ${runName}", + "author_name": "nf-core/crisprseq ${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/bin/BAGEL.py b/bin/BAGEL.py index 205fbb44..aa2f5f97 100755 --- a/bin/BAGEL.py +++ b/bin/BAGEL.py @@ -209,8 +209,8 @@ def report_bagel_version(): """ print( "Bayesian Analysis of Gene EssentiaLity (BAGEL) suite:\n" - "Version: {VERSION}\n" - "Build: {BUILD}".format(VERSION=VERSION, BUILD=BUILD) + f"Version: {VERSION}\n" + f"Build: {BUILD}" ) @@ -220,7 +220,9 @@ def report_bagel_version(): @click.option("-c", "--control-columns", required=True) @click.option("-m", "--min-reads", type=int, default=0) @click.option("-Np", "--pseudo-count", type=int, default=5) -def calculate_fold_change(read_count_file, output_label, control_columns, min_reads, pseudo_count): +def calculate_fold_change( + read_count_file, output_label, control_columns, min_reads, pseudo_count +): """ \b Calculate fold changes from read count data outputting a fold change column: @@ -294,7 +296,9 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re normed = pd.DataFrame(index=reads.index.values) normed["GENE"] = reads.iloc[:, 0] # first column is gene name normed = ( - reads.iloc[:, list(range(1, numColumns))] / np.tile(sumReads, [numClones, 1]) * 10000000 + reads.iloc[:, list(range(1, numColumns))] + / np.tile(sumReads, [numClones, 1]) + * 10000000 ) # normalize to 10M reads # @@ -334,14 +338,26 @@ def calculate_fold_change(read_count_file, output_label, control_columns, min_re @click.option("-i", "--fold-change", required=True, type=click.Path(exists=True)) @click.option("-o", "--output-file", required=True) @click.option("-e", "--essential-genes", required=True, type=click.Path(exists=True)) -@click.option("-n", "--non-essential-genes", required=True, type=click.Path(exists=True)) +@click.option( + "-n", "--non-essential-genes", required=True, type=click.Path(exists=True) +) @click.option("-c", "--columns-to-test", required=True) -@click.option("-w", "--network-file", metavar="[network File]", default=None, type=click.Path(exists=True)) +@click.option( + "-w", + "--network-file", + metavar="[network File]", + default=None, + type=click.Path(exists=True), +) @click.option("-m", "--filter-multi-target", is_flag=True) @click.option("-m0", "--loci-without-mismatch", type=int, default=10) @click.option("-m1", "--loci-with-mismatch", type=int, default=10) @click.option( - "--align-info", metavar="--align-info [File]", default=None, type=click.Path(exists=True), cls=OptionRequiredIf + "--align-info", + metavar="--align-info [File]", + default=None, + type=click.Path(exists=True), + cls=OptionRequiredIf, ) @click.option("-b", "--use-bootstrapping", is_flag=True) @click.option("-NS", "--no-resampling", is_flag=True) @@ -474,7 +490,9 @@ def calculate_bayes_factors( if filter_multi_target: try: - aligninfo = pd.read_csv(align_info, header=None, index_col=0, sep="\t").fillna("") + aligninfo = pd.read_csv( + align_info, header=None, index_col=0, sep="\t" + ).fillna("") for seqid in aligninfo.index: perfectmatch = 0 mismatch_1bp = 0 @@ -488,7 +506,10 @@ def calculate_bayes_factors( mismatch_1bp = len(aligninfo[3][seqid].split(",")) if aligninfo[4][seqid] != "": mismatch_1bp_gene = len(aligninfo[4][seqid].split(",")) - if perfectmatch > loci_without_mismatch or mismatch_1bp > loci_with_mismatch: + if ( + perfectmatch > loci_without_mismatch + or mismatch_1bp > loci_with_mismatch + ): multi_targeting_sgrnas[seqid] = True elif perfectmatch > 1 or mismatch_1bp > 0: multi_targeting_sgrnas_info[seqid] = ( @@ -502,7 +523,9 @@ def calculate_bayes_factors( print("Please check align-info file") sys.exit(1) - print("Total %d multi-targeting gRNAs are discarded" % len(multi_targeting_sgrnas)) + print( + "Total %d multi-targeting gRNAs are discarded" % len(multi_targeting_sgrnas) + ) # # LOAD FOLDCHANGES @@ -521,7 +544,9 @@ def calculate_bayes_factors( except ValueError: column_labels = columns column_list = [ - x for x in range(len(fieldname) - 1) if fieldname[x + 1] in column_labels + x + for x in range(len(fieldname) - 1) + if fieldname[x + 1] in column_labels ] # +1 because of First column start 2 print("Using column: " + ", ".join(column_labels)) # print "Using column: " + ", ".join(map(str,column_list)) @@ -549,7 +574,9 @@ def calculate_bayes_factors( rna2gene[rnatag] = gsym fc[rnatag] = {} for i in column_list: - fc[rnatag][i] = float(fields[i + 1]) # per user docs, GENE is column 0, first data column is col 1. + fc[rnatag][i] = float( + fields[i + 1] + ) # per user docs, GENE is column 0, first data column is col 1. genes_array = np.array(list(genes.keys())) gene_idx = np.arange(len(genes)) @@ -590,7 +617,9 @@ def calculate_bayes_factors( for i in [0, 1]: if linearray[i] not in network: network[linearray[i]] = {} - network[linearray[i]][linearray[-1 * (i - 1)]] = 1 # save edge information + network[linearray[i]][ + linearray[-1 * (i - 1)] + ] = 1 # save edge information edgecount += 1 print("Number of network edges: " + str(edgecount)) @@ -602,7 +631,9 @@ def calculate_bayes_factors( # Define foldchange dynamic threshold. logarithm decay. # Parameters are defined by regression (achilles data) 2**-7 was used in previous version. - FC_THRESH = 2 ** (-1.1535 * np.log(len(np.intersect1d(genes_array, nonEss)) + 13.324) + 0.7728) + FC_THRESH = 2 ** ( + -1.1535 * np.log(len(np.intersect1d(genes_array, nonEss)) + 13.324) + 0.7728 + ) bf = {} boostedbf = {} for g in genes_array: @@ -619,12 +650,17 @@ def calculate_bayes_factors( # training_data = Training(setdiff1d(gene_idx,np.where(in1d(genes_array,coreEss))),cvnum=NUMCV) # declare training class (only for Gold-standard gene set) training_data = Training( - np.where(np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0], cvnum=no_of_cross_validations + np.where(np.in1d(genes_array, np.union1d(coreEss, nonEss)))[0], + cvnum=no_of_cross_validations, ) # all non-goldstandards - all_non_gs = np.where(np.logical_not(np.in1d(genes_array, np.union1d(coreEss, nonEss))))[0] + all_non_gs = np.where( + np.logical_not(np.in1d(genes_array, np.union1d(coreEss, nonEss))) + )[0] else: - training_data = Training(gene_idx, cvnum=no_of_cross_validations) # declare training class + training_data = Training( + gene_idx, cvnum=no_of_cross_validations + ) # declare training class if train_method == 0: LOOPCOUNT = bootstrap_iterations @@ -665,7 +701,12 @@ def calculate_bayes_factors( if run_test_mode: fp.write( - "%d\n%s\n%s\n" % (loop, ",".join(genes_array[gene_train_idx]), ",".join(genes_array[gene_test_idx])) + "%d\n%s\n%s\n" + % ( + loop, + ",".join(genes_array[gene_train_idx]), + ",".join(genes_array[gene_test_idx]), + ) ) train_ess = np.where(np.in1d(genes_array[gene_train_idx], coreEss))[0] @@ -679,16 +720,28 @@ def calculate_bayes_factors( # define ess_train: vector of observed fold changes of essential genes in training set # ess_train_fc_list_of_lists = [ - fc[rnatag] for g in genes_array[gene_train_idx[train_ess]] for rnatag in gene2rna[g] + fc[rnatag] + for g in genes_array[gene_train_idx[train_ess]] + for rnatag in gene2rna[g] + ] + ess_train_fc_flat_list = [ + obs + for sublist in ess_train_fc_list_of_lists + for obs in list(sublist.values()) ] - ess_train_fc_flat_list = [obs for sublist in ess_train_fc_list_of_lists for obs in list(sublist.values())] # # define non_train vector of observed fold changes of nonessential genes in training set # non_train_fc_list_of_lists = [ - fc[rnatag] for g in genes_array[gene_train_idx[train_non]] for rnatag in gene2rna[g] + fc[rnatag] + for g in genes_array[gene_train_idx[train_non]] + for rnatag in gene2rna[g] + ] + non_train_fc_flat_list = [ + obs + for sublist in non_train_fc_list_of_lists + for obs in list(sublist.values()) ] - non_train_fc_flat_list = [obs for sublist in non_train_fc_list_of_lists for obs in list(sublist.values())] # # calculate empirical fold change distributions for both # @@ -713,7 +766,9 @@ def calculate_bayes_factors( # logratio_lookup = {} for i in np.arange(xmin, xmax + 0.01, 0.01): - logratio_lookup[np.around(i * 100)] = np.log2(kess.evaluate(i) / knon.evaluate(i)) + logratio_lookup[np.around(i * 100)] = np.log2( + kess.evaluate(i) / knon.evaluate(i) + ) # # calculate BFs from lookup table for withheld test set # @@ -729,7 +784,9 @@ def calculate_bayes_factors( testx.append(np.around(foldchange * 100) / 100) testy.append(logratio_lookup[np.around(foldchange * 100)][0]) try: - slope, intercept, r_value, p_value, std_err = stats.linregress(np.array(testx), np.array(testy)) + slope, intercept, r_value, p_value, std_err = stats.linregress( + np.array(testx), np.array(testy) + ) except: print("Regression failed. Check quality of the screen") sys.exit(1) @@ -798,15 +855,23 @@ def calculate_bayes_factors( else: onlytarget.append(seqid) - if len(onlytarget) > 0: # comparsion between sgRNAs targeting one locus and multiple loci + if ( + len(onlytarget) > 0 + ): # comparsion between sgRNAs targeting one locus and multiple loci if len(multitarget) > 0: - bf_only = np.mean([sum(list(bf_mean_rna_rep[seqid].values())) for seqid in onlytarget]) + bf_only = np.mean( + [ + sum(list(bf_mean_rna_rep[seqid].values())) + for seqid in onlytarget + ] + ) for seqid in onlytarget: trainset[seqid] = [1, 0, 0] for seqid in multitarget: if ( - multi_targeting_sgrnas_info[seqid][2] > 1 or multi_targeting_sgrnas_info[seqid][3] > 0 + multi_targeting_sgrnas_info[seqid][2] > 1 + or multi_targeting_sgrnas_info[seqid][3] > 0 ): # train model using multi-targeting only targeting one protein coding gene continue @@ -821,7 +886,9 @@ def calculate_bayes_factors( if count < 10: print("Not enough train set for calculating multi-targeting effect.\n") - print("It may cause due to unmatched gRNA names between the foldchange file and the align info file.\n") + print( + "It may cause due to unmatched gRNA names between the foldchange file and the align info file.\n" + ) print("Filtering is not finished\n") filter_multi_target = False @@ -835,7 +902,9 @@ def calculate_bayes_factors( coeff_df = pd.DataFrame(regressor.coef_, X.columns, columns=["Coefficient"]) for i in [0, 1]: if coeff_df["Coefficient"][i] < 0: - print("Regression coefficient is below than zero. Substituted to zero\n") + print( + "Regression coefficient is below than zero. Substituted to zero\n" + ) coeff_df["Coefficient"][i] = 0.0 print( "Multiple effects from perfect matched loci = %.3f and 1bp mis-matched loci = %.3f" @@ -848,8 +917,10 @@ def calculate_bayes_factors( for seqid in gene2rna[g]: if seqid in multi_targeting_sgrnas_info: penalty += ( - float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df["Coefficient"][0] - + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df["Coefficient"][1] + float(multi_targeting_sgrnas_info[seqid][0] - 1) + * coeff_df["Coefficient"][0] + + float(multi_targeting_sgrnas_info[seqid][1]) + * coeff_df["Coefficient"][1] ) bf_multi_corrected_gene[g] = bf_mean[g] - penalty else: @@ -857,12 +928,16 @@ def calculate_bayes_factors( for seqid in gene2rna[g]: if seqid in multi_targeting_sgrnas_info: penalty = ( - float(multi_targeting_sgrnas_info[seqid][0] - 1) * coeff_df["Coefficient"][0] - + float(multi_targeting_sgrnas_info[seqid][1]) * coeff_df["Coefficient"][1] + float(multi_targeting_sgrnas_info[seqid][0] - 1) + * coeff_df["Coefficient"][0] + + float(multi_targeting_sgrnas_info[seqid][1]) + * coeff_df["Coefficient"][1] ) else: penalty = 0.0 - bf_multi_corrected_rna[seqid] = sum(list(bf_mean_rna_rep[seqid].values())) - penalty + bf_multi_corrected_rna[seqid] = ( + sum(list(bf_mean_rna_rep[seqid].values())) - penalty + ) # # NORMALIZE sgRNA COUNT @@ -889,7 +964,9 @@ def calculate_bayes_factors( # calculate network scores # - if network_boost == True and rna_level == False: # Network boost is only working for gene level + if ( + network_boost == True and rna_level == False + ): # Network boost is only working for gene level if run_test_mode == True: # TEST MODE fp = open(output_file + ".netscore", "w") print("\nNetwork score calculation start\n") @@ -931,8 +1008,16 @@ def calculate_bayes_factors( # # calculate Network BF for test set # - ess_ns_list = [networkscores[x] for x in genes_array[gene_train_idx[train_ess]] if x in networkscores] - non_ns_list = [networkscores[x] for x in genes_array[gene_train_idx[train_non]] if x in networkscores] + ess_ns_list = [ + networkscores[x] + for x in genes_array[gene_train_idx[train_ess]] + if x in networkscores + ] + non_ns_list = [ + networkscores[x] + for x in genes_array[gene_train_idx[train_non]] + if x in networkscores + ] kess = stats.gaussian_kde(ess_ns_list) knon = stats.gaussian_kde(non_ns_list) @@ -950,7 +1035,10 @@ def calculate_bayes_factors( if density_ess == 0.0 or density_non == 0.0: continue - if np.log2(density_ess / density_non) > -5 and networkscore < np.array(ess_ns_list).mean(): # reverse + if ( + np.log2(density_ess / density_non) > -5 + and networkscore < np.array(ess_ns_list).mean() + ): # reverse xmin = min(xmin, networkscore) for networkscore in np.arange(min(non_ns_list), max(non_ns_list), 0.01): @@ -958,7 +1046,10 @@ def calculate_bayes_factors( density_non = knon.evaluate(networkscore)[0] if density_ess == 0.0 or density_non == 0.0: continue - if np.log2(density_ess / density_non) < 5 and networkscore > np.array(non_ns_list).mean(): # reverse + if ( + np.log2(density_ess / density_non) < 5 + and networkscore > np.array(non_ns_list).mean() + ): # reverse xmax = max(xmax, networkscore) # # liner regression @@ -969,14 +1060,28 @@ def calculate_bayes_factors( if g in networkscores: if networkscores[g] >= xmin and networkscores[g] <= xmax: testx.append(np.around(networkscores[g] * 100) / 100) - testy.append(np.log2(kess.evaluate(networkscores[g])[0] / knon.evaluate(networkscores[g])[0])) + testy.append( + np.log2( + kess.evaluate(networkscores[g])[0] + / knon.evaluate(networkscores[g])[0] + ) + ) - slope, intercept, r_value, p_value, std_err = stats.linregress(np.array(testx), np.array(testy)) + slope, intercept, r_value, p_value, std_err = stats.linregress( + np.array(testx), np.array(testy) + ) for g in genes_array[gene_test_idx]: if g in networkscores: if run_test_mode == True: - fp.write("%s\t%f\t%f\n" % (g, networkscores[g], slope * networkscores[g] + intercept)) + fp.write( + "%s\t%f\t%f\n" + % ( + g, + networkscores[g], + slope * networkscores[g] + intercept, + ) + ) nbf = slope * networkscores[g] + intercept else: nbf = 0.0 @@ -1003,7 +1108,7 @@ def calculate_bayes_factors( if rna_level == True: fout.write("RNA\tGENE") for i in range(len(column_list)): - fout.write("\t{0:s}".format(column_labels[i])) + fout.write(f"\t{column_labels[i]:s}") if train_method == 0: fout.write("\t{0:s}".format(column_labels[i] + "_STD")) fout.write("\tBF") @@ -1013,28 +1118,30 @@ def calculate_bayes_factors( for rnatag in sorted(bf.keys()): # RNA tag - fout.write("{0:s}\t".format(rnatag)) + fout.write(f"{rnatag:s}\t") # Gene gene = rna2gene[rnatag] - fout.write("{0:s}\t".format(gene)) + fout.write(f"{gene:s}\t") # BF of replicates for rep in column_list: - fout.write("{0:4.3f}\t".format(bf_mean_rna_rep[rnatag][rep])) + fout.write(f"{bf_mean_rna_rep[rnatag][rep]:4.3f}\t") if train_method == 0: - fout.write("{0:4.3f}\t".format(bf_std_rna_rep[rnatag][rep])) + fout.write(f"{bf_std_rna_rep[rnatag][rep]:4.3f}\t") # Sum BF of replicates if filter_multi_target == True: fout.write( - "{0:4.3f}".format(float(bf_multi_corrected_rna[rnatag]) * eqf) + f"{float(bf_multi_corrected_rna[rnatag]) * eqf:4.3f}" ) # eqf = equalizing factor for the number of replicates else: - fout.write("{0:4.3f}".format(float(sum(list(bf_mean_rna_rep[rnatag].values()))) * eqf)) + fout.write( + f"{float(sum(list(bf_mean_rna_rep[rnatag].values()))) * eqf:4.3f}" + ) # Num obs if train_method == 0: - fout.write("\t{0:d}".format(num_obs[gene])) + fout.write(f"\t{num_obs[gene]:d}") fout.write("\n") else: fout.write("GENE") @@ -1051,27 +1158,27 @@ def calculate_bayes_factors( for g in sorted(genes.keys()): # Gene - fout.write("{0:s}".format(g)) + fout.write(f"{g:s}") if network_boost == True: boostedbf_mean = np.mean(boostedbf[g]) boostedbf_std = np.std(boostedbf[g]) - fout.write("\t{0:4.3f}".format(float(boostedbf_mean) * eqf)) + fout.write(f"\t{float(boostedbf_mean) * eqf:4.3f}") if train_method == 0: - fout.write("\t{0:4.3f}".format(float(boostedbf_std) * eqf)) + fout.write(f"\t{float(boostedbf_std) * eqf:4.3f}") # BF if filter_multi_target == True: fout.write( - "\t{0:4.3f}".format(float(bf_multi_corrected_gene[g]) * eqf) + f"\t{float(bf_multi_corrected_gene[g]) * eqf:4.3f}" ) # eqf = equalizing factor for the number of replicates else: - fout.write("\t{0:4.3f}".format(float(bf_mean[g]) * eqf)) + fout.write(f"\t{float(bf_mean[g]) * eqf:4.3f}") # STD, Count if train_method == 0: - fout.write("\t{0:4.3f}\t{1:d}".format(float(bf_std[g]), num_obs[g])) + fout.write(f"\t{float(bf_std[g]):4.3f}\t{num_obs[g]:d}") # Normalized BF if flat_sgrna == True: - fout.write("\t{0:4.3f}".format(float(bf_norm[g]))) + fout.write(f"\t{float(bf_norm[g]):4.3f}") fout.write("\n") @@ -1080,9 +1187,13 @@ def calculate_bayes_factors( @click.option("-i", "--bayes-factors", required=True, type=click.Path(exists=True)) @click.option("-o", "--output-file", required=True) @click.option("-e", "--essential-genes", required=True, type=click.Path(exists=True)) -@click.option("-n", "--non-essential-genes", required=True, type=click.Path(exists=True)) +@click.option( + "-n", "--non-essential-genes", required=True, type=click.Path(exists=True) +) @click.option("-k", "--use-column", default=None) -def calculate_precision_recall(bayes_factors, output_file, essential_genes, non_essential_genes, use_column): +def calculate_precision_recall( + bayes_factors, output_file, essential_genes, non_essential_genes, use_column +): """ Calculate precision-recall from an input Bayes Factors file: @@ -1102,7 +1213,7 @@ def calculate_precision_recall(bayes_factors, output_file, essential_genes, non_ \b Example: - BAGEL.py pr -i input.bf -o output.PR -e ref_essentials.txt -n ref_nonessentials.txt + BAGEL.py pr -i input.bf -o output.PR -e ref_essentials.txt -n ref_nonessentials.txt """ # @@ -1146,9 +1257,7 @@ def calculate_precision_recall(bayes_factors, output_file, essential_genes, non_ if (cumulative_tp > 0) | (cumulative_fp > 0): precision = cumulative_tp / (cumulative_tp + cumulative_fp) fout.write( - "{0:s}\t{1:4.3f}\t{2:4.3f}\t{3:4.3f}\t{4:4.3f}\n".format( - g, bf.loc[g, bf_column], recall, precision, 1.0 - precision - ) + f"{g:s}\t{bf.loc[g, bf_column]:4.3f}\t{recall:4.3f}\t{precision:4.3f}\t{1.0 - precision:4.3f}\n" ) diff --git a/bin/cigar_parser.R b/bin/cigar_parser.R index 2018d686..2092a926 100755 --- a/bin/cigar_parser.R +++ b/bin/cigar_parser.R @@ -1219,6 +1219,10 @@ if (dim(alignment_info)[1] != 0){ htmlwidgets::saveWidget(plotly::as_widget(fig), paste0(results_path,"_edition.html")) }else{ + reads_classes <- c("Raw reads", "Merged reads", "Quality filtered reads", "Clustered reads", "Aligned reads") + reads_counts <- c(0, 0, 0, 0, 0) + reads_summary <- data.frame(classes = unlist(reads_classes), counts = unlist(reads_counts)) + write.csv(reads_summary,file=paste0(results_path, "_reads-summary.csv")) fig<-empty_plot("No alignments were produced. Please check your files and references") htmlwidgets::saveWidget(plotly::as_widget(fig), paste0(results_path,"_edition.html")) @@ -1234,4 +1238,9 @@ if (dim(alignment_info)[1] != 0){ colnames(edit_summary_perc)[1] = results_path # Rename the column to add the sample ID edit_summary_perc <- t(edit_summary_perc) # t() will add classes as columns and counts as values, 1 row per sample write.csv(edit_summary_perc,file=paste0(results_path, "_edits.csv")) + prevc_classes_mqc <- c("Wt passing filter", "Wt NOT passing filter", "Indels NOT passing filter", + "Above error & in pick", "NOT above error & in pick", "NOT above error & NOT in pick", "Above error & NOT in pick") + prevc_counts_mqc <- c(0, 0, 0, 0, 0, 0, 0) + indel_filters <- data.frame(sample = unlist(prevc_counts_mqc), row.names = unlist(prevc_classes_mqc)) + write.csv(indel_filters,file=paste0(results_path, "_QC-indels.csv")) } diff --git a/bin/extract_umis.py b/bin/extract_umis.py index e4792d0f..95a186d7 100755 --- a/bin/extract_umis.py +++ b/bin/extract_umis.py @@ -36,7 +36,9 @@ def parse_args(argv): :type argv: List """ usage = "Command line interface to telemap" - parser = argparse.ArgumentParser(description=usage, formatter_class=argparse.RawDescriptionHelpFormatter) + parser = argparse.ArgumentParser( + description=usage, formatter_class=argparse.RawDescriptionHelpFormatter + ) parser.add_argument( "-l", "--log", @@ -78,8 +80,12 @@ def parse_args(argv): default=1, help="Number of threads.", ) - parser.add_argument("--tsv", dest="TSV", type=str, required=False, help="TSV output file") - parser.add_argument("-o", "--output", dest="OUT", type=str, required=False, help="FASTA output file") + parser.add_argument( + "--tsv", dest="TSV", type=str, required=False, help="TSV output file" + ) + parser.add_argument( + "-o", "--output", dest="OUT", type=str, required=False, help="FASTA output file" + ) parser.add_argument( "--fwd-context", dest="FWD_CONTEXT", @@ -108,7 +114,9 @@ def parse_args(argv): default="AAABBBBAABBBBAABBBBAABBBBAAA", help="Reverse UMI sequence", ) - parser.add_argument("INPUT_FA", type=str, nargs="+", default="/dev/stdin", help="Detected UMIs") + parser.add_argument( + "INPUT_FA", type=str, nargs="+", default="/dev/stdin", help="Detected UMIs" + ) args = parser.parse_args(argv) @@ -271,17 +279,25 @@ def extract_umis( for entry in fh: pbar.update(1) - read_5p_seq, read_3p_seq = extract_adapters(entry, max_adapter_length) + read_5p_seq, read_3p_seq = extract_adapters( + entry, max_adapter_length + ) if not read_5p_seq or not read_3p_seq: continue - strand = detect_read_strand(read_5p_seq, upstream_context_fwd, upstream_context_rev) + strand = detect_read_strand( + read_5p_seq, upstream_context_fwd, upstream_context_rev + ) strand_stats[strand] += 1 # Extract fwd UMI - result_5p_fwd_dist, result_5p_fwd_seq = align(read_5p_seq, pattern_fwd, max_pattern_dist) + result_5p_fwd_dist, result_5p_fwd_seq = align( + read_5p_seq, pattern_fwd, max_pattern_dist + ) # Extract rev UMI - result_3p_rev_dist, result_3p_rev_seq = align(read_3p_seq, pattern_rev, max_pattern_dist) + result_3p_rev_dist, result_3p_rev_seq = align( + read_3p_seq, pattern_rev, max_pattern_dist + ) if not result_5p_fwd_seq or not result_3p_rev_seq: continue @@ -302,11 +318,17 @@ def extract_umis( if strand_stats["-"]: fwd_rev_ratio = strand_stats["+"] / strand_stats["-"] logging.info( - "Found {} fwd and {} rev reads (ratio: {})".format(strand_stats["+"], strand_stats["-"], fwd_rev_ratio) + "Found {} fwd and {} rev reads (ratio: {})".format( + strand_stats["+"], strand_stats["-"], fwd_rev_ratio + ) ) if n_read: perc = 100.0 * n_both_umi / n_read - logging.info("{}% of reads contained both UMIs with max {} mismatches".format(perc, max_pattern_dist)) + logging.info( + "{}% of reads contained both UMIs with max {} mismatches".format( + perc, max_pattern_dist + ) + ) if tsv: print( output_file, diff --git a/bin/plotter.R b/bin/plotter.R index 8d0fd8ea..0cce6b1f 100755 --- a/bin/plotter.R +++ b/bin/plotter.R @@ -54,10 +54,17 @@ gR <- opt$gRNA_sequence substitutions_info <- opt$substitutions_info rel_cut_site <- as.numeric(opt$cut_site) -data <- read.csv(indels_info) +data <- read.csv(indels_info, colClasses = c("character")) ref_seq <- readFasta(reference) subs_plup <- read.csv(substitutions_info, row.names = 1) +data$Start <- as.numeric(data$Start) +data$Length <- as.numeric(data$Length) +data$freq <- as.numeric(data$freq) +data$Perc <- as.numeric(data$Perc) +data$cut_site <- as.numeric(data$cut_site) +data$wt_reads <- as.numeric(data$wt_reads) +data$t_reads <- as.numeric(data$t_reads) ###################### ##### CRISPR-GA-1 like plot https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4184265/ @@ -553,7 +560,7 @@ if (dim(data)[2]>3 && length(checkFaulty) == 0 && length(checkEmpty) == 0){ ### templata_based <- data$t_reads[1] ### 0 total_char <- wt + templata_based + dim(data)[1] - delCols_indels <- data %>% group_by(Modification, Start, Length, ins_nt, patterns) %>% dplyr::summarize(freq = n()) + delCols_indels <- data %>% group_by(Modification, Start, Length, ins_nt) %>% dplyr::summarize(freq = n()) unique_variants <- rbind(as.data.frame(delCols_indels), c("wt", 0, 0, NA, NA, wt), c("template-based", 0, 0, NA, NA, templata_based)) uniq_indels_sorted <- unique_variants[order(as.numeric(unique_variants$freq), decreasing = TRUE),] write.csv(uniq_indels_sorted,file=paste0(sample_name, "_unique-variants.csv")) diff --git a/conf/base.config b/conf/base.config index a0ac558c..5ff6a2fb 100644 --- a/conf/base.config +++ b/conf/base.config @@ -59,7 +59,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false - } } diff --git a/conf/modules.config b/conf/modules.config index fb82328d..d0b5594e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -104,7 +104,7 @@ process { withName: MAGECK_MLE { publishDir = [ - path: { "${params.outdir}/mageck/mle/${meta.id}/" }, + path: { "${params.outdir}/mageck/mle/${meta.treatment}_vs_${meta.reference}/" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -122,20 +122,64 @@ process { ] } + withName: MAGECK_GRAPHRRA { + containerOptions = '' + } + + withName: MAGECK_FLUTEMLE { + containerOptions = '' + publishDir = [ + path: { "${params.outdir}/mageck/FluteMLE/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: VENNDIAGRAM { + publishDir = [ + path: { "${params.outdir}/venndiagram/${meta.treatment}_vs_${meta.reference}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: MAGECK_MLE_DAY0 { + ext.args = "--day0-label '${params.day0_label}'" + publishDir = [ + path: { "${params.outdir}/mageck/mle/day0/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: CRISPRCLEANR_NORMALIZE { publishDir = [ path: { "${params.outdir}/crisprcleanr/normalization/" }, + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } withName: CUTADAPT { - if(params.analysis == 'targeted') { - ext.args = '-g file:overrepresented.fasta -N' - } - if(params.analysis == 'screening' && params.cutadapt) { - ext.args = "-a ${params.cutadapt}" - } + ext.args = '-g file:overrepresented.fasta -N' + publishDir = [ + path: { "${params.outdir}/preprocessing/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: CUTADAPT_FIVE_PRIME { + ext.args = "-g ${params.five_prime_adapter}" + publishDir = [ + path: { "${params.outdir}/preprocessing/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: CUTADAPT_THREE_PRIME { + ext.args = "-a ${params.three_prime_adapter}" publishDir = [ path: { "${params.outdir}/preprocessing/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, @@ -177,6 +221,14 @@ process { ] } + withName: MATRICESCREATION { + publishDir = [ + path: { "${params.outdir}/design_matrix" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MINIMAP2_ALIGN_UMI_1 { ext.args = '-x map-ont' ext.prefix = { "${reads.baseName}_cycle1" } @@ -271,6 +323,7 @@ process { withName: MINIMAP2_ALIGN_TEMPLATE { ext.args = '-A 29 -B 17 -O 25 -E 2' + ext.prefix = {"${meta.id}_template-align"} publishDir = [ path: { "${params.outdir}/preprocessing/sequences" }, mode: params.publish_dir_mode, @@ -291,16 +344,8 @@ process { ] } - withName: CUSTOM_DUMPSOFTWAREVERSIONS { - publishDir = [ - path: { "${params.outdir}/pipeline_info" }, - mode: params.publish_dir_mode, - pattern: '*_versions.yml' - ] - } - withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ path: { "${params.outdir}/multiqc" }, mode: params.publish_dir_mode, diff --git a/conf/test_screening.config b/conf/test_screening.config index 7661aacf..412271a0 100644 --- a/conf/test_screening.config +++ b/conf/test_screening.config @@ -20,10 +20,15 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata/samplesheet_test.csv" analysis = 'screening' crisprcleanr = "Brunello_Library" - mle_design_matrix = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/design_matrix.txt" - library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt" - rra_contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt" + library = params.pipelines_testdata_base_path + "crisprseq/testdata/brunello_target_sequence.txt" + contrasts = params.pipelines_testdata_base_path + "crisprseq/testdata/rra_contrasts.txt" +} + +process { + withName: BAGEL2_BF { + ext.args = '-s 3' // Seed to avoid random errors due to a too small sample + } } diff --git a/conf/test_screening_count_table.config b/conf/test_screening_count_table.config new file mode 100644 index 00000000..9dfefe96 --- /dev/null +++ b/conf/test_screening_count_table.config @@ -0,0 +1,28 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/crisprseq -profile test_screening_count_table, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test screening profile with an input count table' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + count_table = params.pipelines_testdata_base_path + "crisprseq/testdata/count_table.tsv" + analysis = 'screening' + mle_design_matrix = params.pipelines_testdata_base_path + "crisprseq/testdata/design_matrix.txt" + contrasts = params.pipelines_testdata_base_path + "crisprseq/testdata/rra_contrasts.txt" + +} diff --git a/conf/test_screening_full.config b/conf/test_screening_full.config index 57c0f066..2f74238c 100644 --- a/conf/test_screening_full.config +++ b/conf/test_screening_full.config @@ -15,9 +15,15 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/full_test/samplesheet_full.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata/full_test/samplesheet_full.csv" analysis = 'screening' crisprcleanr = "Brunello_Library" - mle_design_matrix = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/full_test/drugA_drugB_vs_treatment.txt" - library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt" + mle_design_matrix = params.pipelines_testdata_base_path + "crisprseq/testdata/full_test/drugA_drugB_vs_treatment.txt" + library = params.pipelines_testdata_base_path + "crisprseq/testdata/brunello_target_sequence.txt" +} + +process { + withName: BAGEL2_BF { + ext.args = '-s 3' // Seed to avoid random errors due to a too small sample + } } diff --git a/conf/test_screening_paired.config b/conf/test_screening_paired.config index d8874eff..1115f2a6 100644 --- a/conf/test_screening_paired.config +++ b/conf/test_screening_paired.config @@ -5,7 +5,7 @@ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/crisprseq -profile test_screening, --outdir + nextflow run nf-core/crisprseq -profile test_screening_paired, --outdir ---------------------------------------------------------------------------------------- */ @@ -20,7 +20,13 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test_paired.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata/samplesheet_test_paired.csv" analysis = 'screening' - library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt" + library = params.pipelines_testdata_base_path + "crisprseq/testdata/brunello_target_sequence.txt" +} + +process { + withName: BAGEL2_BF { + ext.args = '-s 3' // Seed to avoid random errors due to a too small sample + } } diff --git a/conf/test_screening_rra.config b/conf/test_screening_rra.config new file mode 100644 index 00000000..fb6b9d3f --- /dev/null +++ b/conf/test_screening_rra.config @@ -0,0 +1,35 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/crisprseq -profile test_screening_rra, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test screening profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = params.pipelines_testdata_base_path + "crisprseq/testdata/samplesheet_test.csv" + analysis = 'screening' + crisprcleanr = "Brunello_Library" + library = params.pipelines_testdata_base_path + "crisprseq/testdata/brunello_target_sequence.txt" + contrasts = params.pipelines_testdata_base_path + "crisprseq/testdata/rra_contrasts.txt" + rra = true +} + +process { + withName: BAGEL2_BF { + ext.args = '-s 3' // Seed to avoid random errors due to a too small sample + } +} diff --git a/conf/test_targeted.config b/conf/test_targeted.config index 9bf6cabc..1906efb2 100644 --- a/conf/test_targeted.config +++ b/conf/test_targeted.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata-edition/samplesheet_test.csv" analysis = 'targeted' // Aligner diff --git a/conf/test_full.config b/conf/test_targeted_full.config similarity index 84% rename from conf/test_full.config rename to conf/test_targeted_full.config index ba79a208..ff6f1c17 100644 --- a/conf/test_full.config +++ b/conf/test_targeted_full.config @@ -15,9 +15,12 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test_full.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata-edition/samplesheet_test_full.csv" analysis = 'targeted' // Aligner aligner = 'minimap2' + + // Steps + overrepresented = true } diff --git a/conf/test_umis.config b/conf/test_umis.config index d99effcb..d77b71f8 100644 --- a/conf/test_umis.config +++ b/conf/test_umis.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test_umis.csv' + input = params.pipelines_testdata_base_path + "crisprseq/testdata-edition/samplesheet_test_umis.csv" analysis = 'targeted' umi_clustering = true diff --git a/docs/images/crisprseq_metropmap_all.png b/docs/images/crisprseq_metropmap_all.png new file mode 100644 index 00000000..0c8e2053 Binary files /dev/null and b/docs/images/crisprseq_metropmap_all.png differ diff --git a/docs/images/nf-core-crisprseq_logo_dark.png b/docs/images/nf-core-crisprseq_logo_dark.png index bf6b79ec..d2409785 100644 Binary files a/docs/images/nf-core-crisprseq_logo_dark.png and b/docs/images/nf-core-crisprseq_logo_dark.png differ diff --git a/docs/images/nf-core-crisprseq_logo_light.png b/docs/images/nf-core-crisprseq_logo_light.png index 9f408dfb..642e7389 100644 Binary files a/docs/images/nf-core-crisprseq_logo_light.png and b/docs/images/nf-core-crisprseq_logo_light.png differ diff --git a/docs/images/venn.png b/docs/images/venn.png new file mode 100644 index 00000000..d117dd2f Binary files /dev/null and b/docs/images/venn.png differ diff --git a/docs/output/screening.md b/docs/output/screening.md index aa163333..d4a7978e 100644 --- a/docs/output/screening.md +++ b/docs/output/screening.md @@ -17,14 +17,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [Preprocessing](#preprocessing) - [FastQC](#fastqc) - Read Quality Control - [cutadapt](#cutadapt) - Trimming reads from fastq files +- [Mapping](#alignment) - bowtie2 aligned reads - [Counting](#counting) - - [MAGeCK count](#mageck-count) - Mapping reads to reference + - [MAGeCK count](#mageck-count) - Mapping reads to reference library - [CNV correction](#cnv-correction)) - [CRISPRcleanR](#crisprcleanr-normalization) - Copy Number Variation correction and read normalization in case of knock-out screens. - [Gene essentiality](#gene-essentiality-computation) - [MAGeCK rra](#mageck-rra) - modified robust ranking aggregation (RRA) algorithm - [MAGeCK mle](#mageck-mle) - maximum-likelihood estimation (MLE) for robust identification of CRISPR-screen hits - [BAGEL2](#BAGEL2) - Bayes Factor to identify essential genes + - [MAGeCKFlute](#flutemle) - graphics to visualise MAGECK MLE output - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution @@ -57,6 +59,18 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d [cutadapt](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/). Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. MAGeCK count normally automatically detects adapter sequences and trims, however if trimming lengths are different, cutadapt can be used, as mentioned [here](https://sourceforge.net/p/mageck/wiki/advanced_tutorial/). For further reading and documentation see the [cutadapt helper page](https://cutadapt.readthedocs.io/en/stable/guide.html). +## Alignment + +
+Output files + +- `bowtie2/` + - `*.log`: log file of the command ran and the output + - `*.bam`: bam file + - `*.bowtie2`: index from bowtie2 from the provided fasta file + +
+ ## Counting ### MAGeCK count @@ -108,8 +122,8 @@ For further reading and documentation see the [cutadapt helper page](https://cut - `*_count_sgrna_summary.txt`: sgRNA ranking results, tab separated file containing means, p-values - `*.report.Rmd`: markdown report recapping essential genes - `*_count_table.log`: log of the run - - `*_scatterview.png`: scatter view of the targeted genes and their logFC - - `*_rank.png`: rank view of the targeted genes + - `*_scatterview.png`: scatter view of the targeted genes in the library and their logFC + - `*_rank.png`: rank view of the targeted genes in the library @@ -134,6 +148,34 @@ For further reading and documentation see the [cutadapt helper page](https://cut [bagel2](https://github.com/hart-lab/bagel) is a computational tool to identify important essential genes for CRISPR-Cas9 screening experiments. +### Venn diagram + +
+Output files + +- `venndiagram` + - `*_common_genes_bagel_mle.txt`: common essential genes between BAGEL2 and MAGeCK MLE + - `*_venn_bagel2_mageckmle.png`: Venn diagram common essential genes between BAGEL2 and MAGeCK MLE. An example is shown here below + +![Venn diagram](/docs/images/venn.png) + +## Gene essentiality functional analysis + +### MAGeCKFlute + +- `FluteMLE/QC` + - `*.txt` : Quality control tables + - `*.png` : Quality control plots +- `FluteMLE/Selection` + - `*.txt`: Positive selection and negative selection. + - `*.png`: Rank and scatter view for positive and negative selection +- `FluteMLE/Enrichment` + - `*.txt`: Enrichment analysis for positive and negative selection genes. + - `*.png`: Enrichment analysis plots for positive and negative selection genes. +- `FluteMLE/PathwayView` + - `*.txt`: Pathway view for top enriched pathways. + - `*.png`: Pathway view for top enriched pathways. + ## MultiQC
diff --git a/docs/usage.md b/docs/usage.md index e95bbe73..9a574565 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -121,6 +121,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. @@ -197,35 +199,35 @@ The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementatio 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) 3. Create the custom config accordingly: - - For Docker: - - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Singularity: - - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - - For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` +- For Docker: + + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` + +- For Singularity: + + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` + +- For Conda: + + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` :::info If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. diff --git a/docs/usage/screening.md b/docs/usage/screening.md index 113bc897..cc7f3be7 100644 --- a/docs/usage/screening.md +++ b/docs/usage/screening.md @@ -21,6 +21,7 @@ nextflow run nf-core/crisprseq --analysis screening --input samplesheet.csv --li ``` The following required parameters are here described. +If you wish to input a raw count or normalized table, you can skip the samplesheet parameter as well as the library one and directly input your table using count_table `--count_table your_count_table`. Your count table should contain the following columns : sgRNA and gene. You can find an example [here](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/count_table.csv) If your count table is normalized, be sure to set the normalization method to none in MAGeCK MLE or MAGeCK RRA using a config file. ### Full samplesheet @@ -41,16 +42,39 @@ SRR8983580,SRR8983580.small.fastq.gz,,treatment An [example samplesheet](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/samplesheet_test.csv) has been provided with the pipeline. +### cutadapt + +MAGeCK count which is the main alignment software used is normally able to automatically determine the trimming length and sgRNA length, in most cases. Therefore, you don't need to go to this step unless MAGeCK fails to do so by itself. If the nucleotide length in front of sgRNA varies between different reads, you can use cutadapt to remove the adaptor sequences by using the flag `--five_prime_adapter` or `--three_prime_adapter` . + +### bowtie2 + +The MAGeCK count module supports bam files, which allows you to align with bowtie2 first. If you wish to do so (for instance to allow mapping reads to the library with mismatches or to set the aligner with specific flags) you can provide a fasta file with `--fasta` encoding the library. Currently, you also still need to provide the tab-separated library file with `--library`. + ### library If you are running the pipeline with fastq files and wish to obtain a count table, the library parameter is needed. The library table has three mandatory columns : id, target transcript (or gRNA sequence) and gene symbol. An [example](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/brunello_target_sequence.txt) has been provided with the pipeline. Many libraries can be found on [addgene](https://www.addgene.org/). -After the alignment step, the pipeline currently supports 3 algorithms to detect gene essentiality, MAGeCK rra, MAGeCK mle and BAGEL2. MAGeCK MLE (Maximum Likelihood Estimation) and MAGeCK RRA (Robust Ranking Aggregation) are two different methods provided by the MAGeCK software package to analyze CRISPR-Cas9 screens. BAGEL2 identifies gene essentiality through Bayesian Analysis. +After the alignment step, if you are performing KO (Knock-Out) screens, you can choose to correct gene-independent cell responses to CRISPR-Cas9 targeting using CRISPRcleanR. If you are performing a CRISPR interference or activation screen, this step is not needed. + +The pipeline currently supports 3 algorithms to detect gene essentiality, MAGeCK RRA, MAGeCK MLE and BAGEL2. MAGeCK MLE (Maximum Likelihood Estimation) and MAGeCK RRA (Robust Ranking Aggregation) are two different methods provided by the MAGeCK software package to analyze CRISPR-Cas9 screens. BAGEL2 identifies gene essentiality through Bayesian Analysis. +We recommend to run MAGeCK MLE and BAGEL2 as these are the most used and most recent algorithms to determine gene essentiality. + +### Running CRISPRcleanR + +[CRISPRcleanR](https://github.com/francescojm/CRISPRcleanR) is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline supports annotation libraries already present in the R package or user-provided annotation files. +Most used library already have an annotation dataset which you can find [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library"). +Otherwise, if you wish to provide your own file, please provide it in CSV format, and make sure it follows the following format (with the comma in front of "CODE" included): -### MAGeCK rra +| ,CODE | GENES | EXONE | CHRM | STRAND | STARTpos | ENDpos | +| -------------------- | ----------- | ------------- | ---- | ------ | -------- | -------- | +| ATGGTGTCCATTATAGCCAT | NM_021446.2 | 0610007P14Rik | ex2 | 12 | + | 85822165 | +| CTCTACGAGAAGCTCTACAC | NM_021446.2 | 0610007P14Rik | ex2 | 12 | + | 85822108 | +| GACTCTATCACATCACACTG | NM_021446.2 | 0610007P14Rik | ex4 | 12 | + | 85816419 | -MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK rra, `--rra_contrasts` contains two columns : treatment and reference. These two columns should be separated with a dot comma (;) and contain the `csv` extension. You can also integrate several samples/conditions by comma separating them. Please find an example here below : +### Running MAGeCK MLE and BAGEL2 with a contrast file + +To run both MAGeCK MLE and BAGEL2, you can provide a contrast file with the flag `--contrasts` with the mandatory headers "treatment" and "reference". These two columns should be separated with a dot comma (;) and contain the `csv` extension. You can also integrate several samples/conditions by comma separating them in each column. Please find an example here below : | reference | treatment | | ----------------- | --------------------- | @@ -59,24 +83,36 @@ MAGeCK RRA performs robust ranking aggregation to identify genes that are consis A full example can be found [here](https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/full_test/samplesheet_full.csv). -### cutadapt +#### Venn diagram + +Running MAGeCK MLE and BAGEL2 with a contrast file will also output a Venn diagram showing common genes having an FDR < 0.1. + +### Running MAGeCK RRA only -MAGeCK is normally able to automatically determine the trimming length and sgRNA length, in most cases. Therefore, you don't need to go to this step unless MAGeCK fails to do so by itself. If the nucleotide length in front of sgRNA varies between different reads, you can use cutadapt to remove the adapter sequences by using the flag `--cutadapt ADAPTER`. +MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK RRA, you can define the contrasts as previously stated in the last section (with a `.txt` extension) and also specify `--rra`. -### MAGeCK mle +### Running MAGeCK MLE only -MAGeCK MLE uses a maximum likelihood estimation approach to estimate the effects of gene knockout on cell fitness. It models the read count data of guide RNAs targeting each gene and estimates the dropout probability for each gene. MAGeCK mle requires a design matrix. The design matrix is a `txt` file indicating the effects of different conditions on different samples. -An [example design matrix](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/design_matrix.txt) has been provided with the pipeline. +#### With design matrices + +If you wish to run MAGeCK MLE only, you can specify several design matrices (where you state which comparisons you wish to run) with the flag `--mle_design_matrix`. +MAGeCK MLE uses a maximum likelihood estimation approach to estimate the effects of gene knockout on cell fitness. It models the read count data of guide RNAs targeting each gene and estimates the dropout probability for each gene. +MAGeCK MLE requires one or several design matrices. The design matrix is a `txt` file indicating the effects of different conditions on different samples. +An [example design matrix](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/design_matrix.txt) has been provided with the pipeline. The row names need to match the condition stated in the sample sheet. If there are several designs to be run, you can input a folder containing all the design matrices. The output results will automatically take the name of the design matrix, so make sure you give a meaningful name to the file, for instance "Drug_vs_control.txt". -### Running CRISPRcleanR +#### With the day0 label -CRISPRcleanR is used for gene count normalization and the removal of biases for genomic segments for which copy numbers are amplified. Currently, the pipeline only supports annotation libraries already present in the R package and which can be found [here](https://github.com/francescojm/CRISPRcleanR/blob/master/Reference_Manual.pdf). To use CRISPRcleanR normalization, use `--crisprcleanr library`, `library` being the exact name as the library in the CRISPRcleanR documentation (e.g: "AVANA_Library"). +If you wish to run MAGeCK MLE with the day0 label you can do so by specifying `--day0_label` and the sample names that should be used as day0. -### BAGEL2 +### Running BAGEL2 BAGEL2 (Bayesian Analysis of Gene Essentiality with Location) is a computational tool developed by the Hart Lab at Harvard University. It is designed for analyzing large-scale genetic screens, particularly CRISPR-Cas9 screens, to identify genes that are essential for the survival or growth of cells under different conditions. BAGEL2 integrates information about the location of guide RNAs within a gene and leverages this information to improve the accuracy of gene essentiality predictions. -BAGEL2 uses the same contrasts from `--rra_contrasts`. +BAGEL2 uses the same contrasts from `--contrasts`. + +### MAGECKFlute + +The downstream analysis involves distinguishing essential, non-essential, and target-associated genes. Additionally, it encompasses conducting biological functional category analysis and pathway enrichment analysis for these genes. Furthermore, it provides visualization of genes within pathways, enhancing user exploration of screening data. MAGECKFlute is run automatically after MAGeCK MLE and for each MLE design matrice. If you have used the `--day0_label`, MAGeCKFlute will be ran on all the other conditions. Please note that the DepMap data is used for these plots. Note that the pipeline will create the following files in your working directory: diff --git a/docs/usage/targeted.md b/docs/usage/targeted.md index 4c20f20f..fbbbfa35 100644 --- a/docs/usage/targeted.md +++ b/docs/usage/targeted.md @@ -35,7 +35,7 @@ CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz,G The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 6 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where `chr6` is single-end and has a template sequence _(this is a reduced samplesheet, please refer to the [pipeline example saplesheet](https://nf-co.re/crisprseq/1.0/assets/samplesheet.csv) to see the full version)_. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 3 samples, where `chr6` is single-end and has a template sequence _(this is a reduced samplesheet, please refer to the [pipeline example samplesheet](https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata-edition/samplesheet_test_full.csv) to see the full version)_. ```console sample,fastq_1,fastq_2,reference,protospacer,template @@ -50,10 +50,10 @@ chr6,chr6-61942198-61942498_R1.fastq.gz,,CAA...GGA,TTTTATGATATTTATCTTTT,TTC...CA | `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | | `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". (Optional) | | `reference` | Reference sequence of the target region. | -| `protospacer` | Sequence of the protospacer used for CRISPR editing. Must not includ the PAM. | -| `template` | Sequence of the template used in templet-based editing experiments. (Optional) | +| `protospacer` | Sequence of the protospacer used for CRISPR editing. Must not include the PAM. | +| `template` | Sequence of the template used in template-based editing experiments. (Optional) | -An [example samplesheet](https://nf-co.re/crisprseq/1.0/assets/samplesheet.csv) has been provided with the pipeline. +An [example samplesheet](https://github.com/nf-core/test-datasets/blob/crisprseq/testdata/samplesheet_test.csv) has been provided with the pipeline. ## Optional pipeline steps diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index e248e4c3..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,356 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/params_${timestamp}.json") - temp_pf.delete() - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // nf-core logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} - ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} - ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} - ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} - ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100644 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowCrisprseq.groovy b/lib/WorkflowCrisprseq.groovy deleted file mode 100755 index 05ea4360..00000000 --- a/lib/WorkflowCrisprseq.groovy +++ /dev/null @@ -1,144 +0,0 @@ -// -// This file holds several functions specific to the workflow/crisprseq.nf in the nf-core/crisprseq pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowCrisprseq { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - genomeExistsError(params, log) - } - - // - // Function to validate channels from input samplesheet - // - public static ArrayList validateInput(input) { - def (metas, fastqs) = input[1..2] - - // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end - def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 - if (!endedness_ok) { - Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") - } - - // Check that multiple runs of the same sample contain a reference or not - def reference_ok = metas.collect{ it.self_reference }.unique().size == 1 - if (!reference_ok) { - Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must all contain a reference or not: ${metas[0].id}") - } - - // Check that multiple runs of the same sample contain a template or not - def template_ok = metas.collect{ it.template }.unique().size == 1 - if (!template_ok) { - Nextflow.error("Please check input samplesheet -> Multiple runs of a sample must all contain a template or not: ${metas[0].id}") - } - - return [ metas[0], fastqs ] - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // TODO Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - } - - // - // Exit pipeline if incorrect --genome key provided - // - private static void genomeExistsError(params, log) { - if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + - " Currently, the available genome keys are:\n" + - " ${params.genomes.keySet().join(", ")}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - Nextflow.error(error_string) - } - } -} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 0e40058b..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,57 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the nf-core/crisprseq pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.7598496\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - } - // - // Get attribute from genome config file e.g. fasta - // - public static Object getGenomeAttribute(params, attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index 16c212bb..e06415e3 100644 --- a/main.nf +++ b/main.nf @@ -13,39 +13,25 @@ nextflow.enable.dsl = 2 /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -params.reference_fasta = params.reference_fasta ?: WorkflowMain.getGenomeAttribute(params, 'fasta') +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_crisprseq_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_crisprseq_pipeline' +include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_crisprseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) +params.reference_fasta = params.reference_fasta ?: getGenomeAttribute('fasta') /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ @@ -53,28 +39,83 @@ include { CRISPRSEQ_TARGETED } from './workflows/crisprseq_targeted' include { CRISPRSEQ_SCREENING } from './workflows/crisprseq_screening' // -// WORKFLOW: Run main nf-core/crisprseq analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow NFCORE_CRISPRSEQ { + + take: + reads_targeted // channel: fastqc files read in from --input + reads_screening // channel: fastqc files read in from --input + reference // channel: reference sequence read from --input + protospacer // channel: protospacer sequence read from --input + template // channel: template sequence read from --input + + main: + // + // WORKFLOW: Run pipeline + // if ( params.analysis == "targeted" ) { - CRISPRSEQ_TARGETED () + CRISPRSEQ_TARGETED ( + reads_targeted, + reference, + template, + protospacer + ) + multiqc_report_ch = CRISPRSEQ_TARGETED.out.multiqc_report } else if ( params.analysis == "screening" ) { - CRISPRSEQ_SCREENING () + CRISPRSEQ_SCREENING (reads_screening) + multiqc_report_ch = CRISPRSEQ_SCREENING.out.multiqc_report } -} + emit: + multiqc_report = multiqc_report_ch // channel: /path/to/multiqc_report.html +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - NFCORE_CRISPRSEQ () + + main: + + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.help, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + params.input + ) + + // + // WORKFLOW: Run main workflow + // + NFCORE_CRISPRSEQ ( + PIPELINE_INITIALISATION.out.reads_targeted, + PIPELINE_INITIALISATION.out.fastqc_screening, + PIPELINE_INITIALISATION.out.reference, + PIPELINE_INITIALISATION.out.protospacer, + PIPELINE_INITIALISATION.out.template + ) + + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + NFCORE_CRISPRSEQ.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 0adbc8ec..8e33a623 100644 --- a/modules.json +++ b/modules.json @@ -7,27 +7,27 @@ "nf-core": { "bowtie2/align": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "0fe30831abbc2ed115e46e92330edf38f56edc3d", "installed_by": ["modules"] }, "bowtie2/build": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", "installed_by": ["modules"] }, "bwa/index": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "086fa66260595e123b0ea47a6512539b72a9afa3", "installed_by": ["modules"] }, "bwa/mem": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "0c34b8159f62cde451c4ff249629c9d0a4f3f9c3", "installed_by": ["modules"] }, "cat/fastq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e", "installed_by": ["modules"] }, "crisprcleanr/normalize": { @@ -36,37 +36,32 @@ "installed_by": ["modules"], "patch": "modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff" }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "bba7e362e4afead70653f84d8700588ea28d0f9e", - "installed_by": ["modules"] - }, "cutadapt": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "6618151ed69274863dc6fe6d2920afa90abaca1f", "installed_by": ["modules"], "patch": "modules/nf-core/cutadapt/cutadapt.diff" }, "fastqc": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "285a50500f9e02578d90b3ce6382ea3c30216acd", "installed_by": ["modules"] }, "mageck/count": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "3fd600f14c03766f840acf29e1a6a1bc45d5c9a0", "installed_by": ["modules"], "patch": "modules/nf-core/mageck/count/mageck-count.diff" }, "mageck/mle": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "a179c03b8933e96cdf232e8de9addf5c83a05d24", "installed_by": ["modules"], "patch": "modules/nf-core/mageck/mle/mageck-mle.diff" }, "mageck/test": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "02ddbebcea3e0aad5ddd090d243e7ceb3ae2f063", "installed_by": ["modules"], "patch": "modules/nf-core/mageck/test/mageck-test.diff" }, @@ -78,18 +73,18 @@ }, "minimap2/align": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "minimap2/index": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "72e277acfd9e61a9f1368eafb4a9e83f5bcaa9f5", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "214d575774c172062924ad3564b4f66655600730", + "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", "installed_by": ["modules"] }, "pear": { @@ -99,28 +94,28 @@ }, "racon": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "f5ed3ac0834b68e80a00a06a61d04ce8e896f275", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "5394565c5fe4c760e5b35977ec7607c62e81d1f8", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", "installed_by": ["modules"] }, "seqtk/seq": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", "installed_by": ["modules"] }, "vsearch/cluster": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "d97b335eb448073c1b680710303c02a55f40c77c", "installed_by": ["modules"], "patch": "modules/nf-core/vsearch/cluster/vsearch-cluster.diff" }, "vsearch/sort": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "97cf4c4a37a8931e19ef728de92f930a366669f2", "installed_by": ["modules"] } } diff --git a/modules/local/bagel2/bf.nf b/modules/local/bagel2/bf.nf index 3ff86603..f4e8b5f5 100644 --- a/modules/local/bagel2/bf.nf +++ b/modules/local/bagel2/bf.nf @@ -1,5 +1,5 @@ process BAGEL2_BF { - tag "$meta.treatment" + tag "${meta.treatment}_${meta.reference}" label 'process_single' @@ -23,7 +23,7 @@ process BAGEL2_BF { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.treatment}" + def prefix = task.ext.prefix ?: "${meta.treatment}_vs_${meta.reference}" """ BAGEL.py bf -i $foldchange -o '${meta.treatment}_vs_${meta.reference}.bf' $args -e $reference_essentials -n $reference_nonessentials -c ${meta.treatment} diff --git a/modules/local/bagel2/graph.nf b/modules/local/bagel2/graph.nf index 32c96046..c95dc3fc 100644 --- a/modules/local/bagel2/graph.nf +++ b/modules/local/bagel2/graph.nf @@ -19,7 +19,7 @@ process BAGEL2_GRAPH { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.treatment}_${meta.reference}" """ #!/usr/bin/env python3 @@ -64,8 +64,8 @@ process BAGEL2_GRAPH { # alas, no `pyyaml` pre-installed in the cellranger container with open("versions.yml", "w") as f: f.write('"${task.process}":\\n') - f.write(f' pandas: "{version}"\\n') - f.write(f' matplotlib.pyplot: "{matplotlib_version}"\\n') + f.write(f' pandas: {version}\\n') + f.write(f' matplotlib.pyplot: {matplotlib_version}\\n') """ diff --git a/modules/local/bagel2/pr.nf b/modules/local/bagel2/pr.nf index e27366d6..5a8faeff 100644 --- a/modules/local/bagel2/pr.nf +++ b/modules/local/bagel2/pr.nf @@ -1,5 +1,5 @@ process BAGEL2_PR { - tag "$meta.treatment" + tag "${meta.treatment}_vs_${meta.reference}" label 'process_single' conda "python=3.11.4 pandas=2.0.3 numpy=1.25.1 scikit-learn=1.3.0 click=8.1.6" @@ -12,23 +12,23 @@ process BAGEL2_PR { tuple val(meta), path(bf), path(reference_essentials), path(reference_nonessentials) output: - tuple val(meta), path("*.pr") , emit: pr - path "versions.yml" , emit: versions + tuple val(meta), path("*.tsv"), emit: pr + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.treatment}_vs_${meta.reference}" """ - BAGEL.py pr -i $bf -o '${meta.treatment}_vs_${meta.reference}.pr' -e $reference_essentials -n $reference_nonessentials $args + BAGEL.py pr -i $bf -o '${meta.treatment}_vs_${meta.reference}.tsv' -e $reference_essentials -n $reference_nonessentials $args cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - BAGEL2: \$( BAGEL.py version | grep -o 'Version: [0-9.]*' | awk '{print \$2}' | grep -v '^\$') + "${task.process}": + python: \$(python --version | sed 's/Python //g') + BAGEL2: \$( BAGEL.py version | grep -o 'Version: [0-9.]*' | awk '{print \$2}' | grep -v '^\$') END_VERSIONS """ diff --git a/modules/local/mageck/flutemle.nf b/modules/local/mageck/flutemle.nf new file mode 100644 index 00000000..e9efbaaa --- /dev/null +++ b/modules/local/mageck/flutemle.nf @@ -0,0 +1,28 @@ + +process MAGECK_FLUTEMLE { + tag "$prefix" + label 'process_high' + + conda "bioconda::bioconductor-mageckflute=2.2.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mageckflute:2.2.0--r42hdfd78af_0': + 'biocontainers/bioconductor-mageckflute:2.2.0--r42hdfd78af_0' }" + + input: + tuple val(meta), path(gene_summary) + + output: + tuple val(meta), path("MAGeCKFlute_*/Enrichment/*") , emit: enrich + tuple val(meta), path("MAGeCKFlute_*/QC/*") , emit: qc + tuple val(meta), path("MAGeCKFlute_*/Selection/*") , emit: select + tuple val(meta), path("MAGeCKFlute_*/PathwayView/*"), emit: pathwayview + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + args = task.ext.args ?: ' ' + prefix = meta.id ?: "${meta.treatment}_vs_${meta.reference}" + template 'template_fluteMLE.R' +} diff --git a/modules/local/matricescreation.nf b/modules/local/matricescreation.nf new file mode 100644 index 00000000..86c00ee4 --- /dev/null +++ b/modules/local/matricescreation.nf @@ -0,0 +1,48 @@ +process MATRICESCREATION { + label 'process_single' + + conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' : + 'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }" + + input: + val(meta) + + output: + tuple val(meta), path("*.txt"), emit: design_matrix + + when: + task.ext.when == null || task.ext.when + + script: + meta.id = "${meta.treatment}_vs_${meta.reference}" + + """ + #!/usr/bin/env Rscript + #### author: Laurence Kuhlburger + #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text. + #### + + # Loop through each row in the data + control_samples <- unlist(strsplit('$meta.reference', ",")) + treatment_samples <- unlist(strsplit('$meta.treatment', ",")) + all_samples <- unique(c(control_samples, treatment_samples)) + name = paste0(gsub(',', '_', '$meta.treatment' ),"_vs_", gsub(',', '_','$meta.reference')) + design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3, + dimnames = list(all_samples, + c("Samples", "baseline", + name)))) + + # Set baseline and treatment values in the design matrix + design_matrix[, "Samples"] <- rownames(design_matrix) + design_matrix\$baseline <- 1 + design_matrix[treatment_samples, name] <- 1 + design_matrix[treatment_samples, paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(",","_",'$meta.reference'))] <- 1 + + # Print the design matrix to a file + output_file <- paste0(gsub(',', '_', '$meta.treatment' ),"_vs_",gsub(",","_",'$meta.reference'),".txt") + write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE) + + """ +} diff --git a/modules/local/venndiagram.nf b/modules/local/venndiagram.nf new file mode 100644 index 00000000..868e6ad7 --- /dev/null +++ b/modules/local/venndiagram.nf @@ -0,0 +1,61 @@ +process VENNDIAGRAM { + tag "${meta.treatment}_vs_${meta.reference}" + label 'process_low' + + + conda "bioconda::r-venndiagram=1.6.16" + container "ghcr.io/qbic-pipelines/rnadeseq:dev" + + input: + tuple val(meta), path(bagel_pr), path(gene_summary) + + output: + tuple val(meta), path("*.txt"), emit: common_list + tuple val(meta), path("*.png"), emit: venn_diagram + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.treatment}_vs_${meta.reference}" + + """ + #!/usr/bin/env Rscript + + #### author: Laurence Kuhlburger + #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text. + #### + #### produce a venn diagram + library(ggvenn) + mle = read.table('$gene_summary', sep = "\t", + header=TRUE) + bagel = read.table('$bagel_pr', sep = "\t", + header=TRUE) + + filtered_precision_recall <- subset(bagel, FDR < 0.1) + name <- gsub(",","_",paste0('${prefix}',".fdr")) + filtered_mageck_mle <- mle[mle[, name] < 0.1, ] + common_genes <- intersect(filtered_mageck_mle\$Gene, + filtered_precision_recall\$Gene) + data <- list(Bagel2 = filtered_precision_recall\$Gene, + MAGeCK_MLE = filtered_mageck_mle\$Gene) + + plot_test <- ggvenn(data) + ggsave("venn_bagel2_mageckmle.png",plot_test) + write.table(common_genes, paste0('${prefix}',"_common_genes_bagel_mle.txt"),sep = "\t", quote = FALSE, row.names=FALSE) + + #version + version_file_path <- "versions.yml" + version_ggvenn <- paste(unlist(packageVersion("ggvenn")), collapse = ".") + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" ggvenn: ", f, sep = "") + writeLines(version_ggvenn, f) + close(f) + + """ + + +} diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml index afc3ea87..d2796359 100644 --- a/modules/nf-core/bowtie2/align/environment.yml +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -4,6 +4,6 @@ channels: - bioconda - defaults dependencies: - - bioconda::bowtie2=2.4.4 - - bioconda::samtools=1.16.1 + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index e67f6a06..96a7027d 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -4,19 +4,24 @@ process BOWTIE2_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : - 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" input: tuple val(meta) , path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val save_unaligned val sort_bam output: - tuple val(meta), path("*.{bam,sam}"), emit: aligned + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true tuple val(meta), path("*.log") , emit: log - tuple val(meta), path("*fastq.gz") , emit: fastq, optional:true + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true path "versions.yml" , emit: versions when: @@ -39,7 +44,10 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ - def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" """ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` @@ -52,8 +60,8 @@ process BOWTIE2_ALIGN { --threads $task.cpus \\ $unaligned \\ $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.${extension} - + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz @@ -76,12 +84,27 @@ process BOWTIE2_ALIGN { def prefix = task.ext.prefix ?: "${meta.id}" def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } """ touch ${prefix}.${extension} + ${create_index} touch ${prefix}.bowtie2.log - touch ${prefix}.unmapped_1.fastq.gz - touch ${prefix}.unmapped_2.fastq.gz + ${create_unmapped} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000..03f1d5e5 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..fdc1c59d --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..03aeaf9e --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..028e7da6 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml "${task.process}": @@ -42,9 +53,19 @@ process BWA_MEM { """ stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension = args2.contains("--output-fmt sam") ? "sam" : + args2.contains("--output-fmt cram") ? "cram": + sort_bam && args2.contains("-O cram")? "cram": + !sort_bam && args2.contains("-C") ? "cram": + "bam" """ - touch ${prefix}.bam + touch ${prefix}.${extension} + touch ${prefix}.csi + touch ${prefix}.crai cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/bwa/mem/meta.yml b/modules/nf-core/bwa/mem/meta.yml index 440fb1f9..1532c261 100644 --- a/modules/nf-core/bwa/mem/meta.yml +++ b/modules/nf-core/bwa/mem/meta.yml @@ -37,6 +37,10 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fasta,fa}" - sort_bam: type: boolean description: use samtools sort (true) or samtools view (false) @@ -46,6 +50,18 @@ output: type: file description: Output BAM file containing read alignments pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - csi: + type: file + description: Optional index file for BAM file + pattern: "*.{csi}" + - crai: + type: file + description: Optional index file for CRAM file + pattern: "*.{crai}" - versions: type: file description: File containing software versions @@ -53,6 +69,8 @@ output: authors: - "@drpatelh" - "@jeremy1805" + - "@matthdsm" maintainers: - "@drpatelh" - "@jeremy1805" + - "@matthdsm" diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test b/modules/nf-core/bwa/mem/tests/main.nf.test index b199bb70..463b76f8 100644 --- a/modules/nf-core/bwa/mem/tests/main.nf.test +++ b/modules/nf-core/bwa/mem/tests/main.nf.test @@ -5,6 +5,7 @@ nextflow_process { tag "modules" tag "bwa" tag "bwa/mem" + tag "bwa/index" script "../main.nf" process "BWA_MEM" @@ -17,7 +18,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -30,11 +31,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = false + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -42,7 +44,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -57,7 +66,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -70,11 +79,12 @@ nextflow_process { input[0] = [ [ id:'test', single_end:true ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = true + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true """ } } @@ -82,7 +92,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -97,7 +114,7 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -110,12 +127,13 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = false + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -123,7 +141,14 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } ) } @@ -138,7 +163,56 @@ nextflow_process { """ input[0] = [ [id: 'test'], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Paired-End - no fasta") { + + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -151,12 +225,60 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[:],[]] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.cram, + process.out.csi, + process.out.crai, + process.out.versions, + file(process.out.bam[0][1]).name + ).match() + } + ) + } + + } + + test("Single-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] ] input[1] = BWA_INDEX.out.index - input[2] = true + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false """ } } @@ -164,9 +286,56 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } ) } + } + + test("Paired-end - stub") { + options "-stub" + setup { + run("BWA_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BWA_INDEX.out.index + input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false + """ + } + } + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } } } diff --git a/modules/nf-core/bwa/mem/tests/main.nf.test.snap b/modules/nf-core/bwa/mem/tests/main.nf.test.snap index ea3bfed4..038ee7b7 100644 --- a/modules/nf-core/bwa/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwa/mem/tests/main.nf.test.snap @@ -1,126 +1,140 @@ { "Single-End": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,3d43027d4163ada97911b814001511e5" - ] - ], - "1": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ], - "bam": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,3d43027d4163ada97911b814001511e5" - ] - ], - "versions": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-10-18T11:02:55.420631681" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:32.953673185" }, "Single-End Sort": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,61eac1213d2bf5e88e225e545010e9b8" - ] - ], - "1": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ], - "bam": [ - [ - { - "id": "test", - "single_end": true - }, - "test.bam:md5,61eac1213d2bf5e88e225e545010e9b8" - ] - ], - "versions": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-10-18T11:03:02.646869498" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:45.27066093" }, "Paired-End": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,809ccfe4300fa5005a9d0d4dc09b1a36" - ] - ], - "1": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,809ccfe4300fa5005a9d0d4dc09b1a36" - ] - ], - "versions": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-10-18T11:03:09.793041294" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:44:57.706852274" }, "Paired-End Sort": { "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2622f4380f992c505af7dab8c256313f" - ] - ], - "1": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ], - "bam": [ - [ - { - "id": "test", - "single_end": false - }, - "test.bam:md5,2622f4380f992c505af7dab8c256313f" - ] - ], - "versions": [ - "versions.yml:md5,809f4a8c7f0c8497a9099dab9d6cc71e" - ] - } + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" ], - "timestamp": "2023-10-18T11:04:43.662093286" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:10.376505036" + }, + "Single-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:07.182072398" + }, + "Paired-End - no fasta": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ], + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:45:53.813076501" + }, + "Paired-end - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,478b816fbd37871f5e8c617833d51d80" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-20T08:46:18.412916364" } } \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml index bff93add..8c69b121 100644 --- a/modules/nf-core/cat/fastq/environment.yml +++ b/modules/nf-core/cat/fastq/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf index 3d963784..f132b2ad 100644 --- a/modules/nf-core/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -76,5 +76,4 @@ process CAT_FASTQ { """ } } - } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test index f5f94182..a71dcb8d 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -1,3 +1,5 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 nextflow_process { name "Test Process CAT_FASTQ" @@ -16,11 +18,11 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ] - ] + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -28,8 +30,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -42,13 +43,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -56,8 +57,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -70,11 +70,11 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -82,8 +82,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -96,13 +95,13 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:false ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -110,8 +109,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } @@ -124,10 +122,10 @@ nextflow_process { } process { """ - input[0] = [ + input[0] = Channel.of([ [ id:'test', single_end:true ], // meta map - [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) """ } } @@ -135,8 +133,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.reads).match() }, - { assert path(process.out.versions.get(0)).getText().contains("cat") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap index ec2342e5..43dfe28f 100644 --- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -1,78 +1,169 @@ { "test_cat_fastq_single_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:12.990284837" + "timestamp": "2024-01-17T17:30:39.816981" }, "test_cat_fastq_single_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:31.554568147" + "timestamp": "2024-01-17T17:32:35.229332" }, "test_cat_fastq_single_end_single_file": { "content": [ - [ - [ - { - "id": "test", - "single_end": true - }, - "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af" + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:49.629360033" + "timestamp": "2024-01-17T17:34:00.058829" }, "test_cat_fastq_paired_end_same_name": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ [ - "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66", - "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-17T23:19:40.711617539" + "timestamp": "2024-01-17T17:33:33.031555" }, "test_cat_fastq_paired_end": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ [ - "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d", - "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda" + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" ] - ] + } ], - "timestamp": "2023-10-18T07:53:20.923560211" + "timestamp": "2024-01-17T17:32:02.270935" } } \ No newline at end of file diff --git a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff index 50dc1a76..daa9446f 100644 --- a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff +++ b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff @@ -1,12 +1,14 @@ Changes in module 'nf-core/crisprcleanr/normalize' --- modules/nf-core/crisprcleanr/normalize/main.nf +++ modules/nf-core/crisprcleanr/normalize/main.nf -@@ -8,12 +8,13 @@ +@@ -8,12 +8,15 @@ 'biocontainers/r-crisprcleanr:3.0.0--r42hdfd78af_1' }" input: - tuple val(meta), path(count_file), path(library_file) -+ tuple val(meta), path(count_file), val(library_file) ++ tuple val(meta), path(count_file) ++ val(library_value) ++ path(library_file) val(min_reads) val(min_targeted_genes) @@ -16,42 +18,58 @@ Changes in module 'nf-core/crisprcleanr/normalize' path "versions.yml", emit: versions when: -@@ -26,20 +27,32 @@ +@@ -26,20 +29,48 @@ """ #!/usr/bin/env Rscript library(CRISPRcleanR) - library <- read.delim('${library_file}', header=T,sep="\t") - row.names(library) <- library[["CODE"]] - normANDfcs <- ccr.NormfoldChanges('${count_file}',saveToFig = FALSE,min_reads=${min_reads},EXPname='${meta.id}', libraryAnnotation=library,display=FALSE) -- gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) -- correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta.id}') -- correctedCounts <- ccr.correctCounts('${meta.id}', + library(dplyr) -+ data('${library_file}') -+ count_file <- read.delim('${count_file}',header=T,sep = "\t") -+ count_file_to_normalize <- count_file %>% dplyr::left_join(get('${library_file}'), by=c("sgRNA"="Target.Context.Sequence"),multiple = "all") + -+ count_file_to_normalize <- count_file_to_normalize %>% -+ dplyr::select(colnames(count_file),CODE,-sgRNA) ++ print('${library_value}') ++ count_file <- read.delim('${count_file}',header=T,sep = "\t") ++ count_file <- count_file[!duplicated(count_file\$sgRNA), ] ++ if('${library_file}' == "") { ++ data('${library_value}') ++ library <- as.data.frame(get('${library_value}')) ++ #colnames(library) ++ #print(head(count_file)) ++ #print(head(library)) ++ count_file_to_normalize <- count_file %>% dplyr::left_join(library, by=c("sgRNA"="seq"),multiple = "all") ++ count_file_to_normalize <- count_file_to_normalize %>% ++ dplyr::select(colnames(count_file),CODE,-sgRNA) + -+ names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' -+ names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' -+ count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) ++ names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' ++ names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' ++ count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) ++ } else { ++ try(library <- read.delim('${library_file}',header=T,sep = ",")) ++ duplicates <- duplicated(library[, 1]) ++ unique_rows <- !duplicates ++ library <- library[unique_rows, , drop = FALSE] ++ rownames(library) = library[,1] ++ library = library[order(rownames(library)),] ++ library = library[,-1] ++ count_file_to_normalize <- count_file ++ } + -+ #crisprcleanr function -+ normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=get('${library_file}'),display=FALSE) -+ gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],get('${library_file}')) -+ correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta}') -+ correctedCounts <- ccr.correctCounts('${meta}', ++ normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) + gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) +- correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta.id}') +- correctedCounts <- ccr.correctCounts('${meta.id}', ++ correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='crisprcleanr') ++ correctedCounts <- ccr.correctCounts('crisprcleanr', normANDfcs[["norm_counts"]], correctedFCs, -- library, -+ get('${library_file}'), + library, minTargetedGenes=${min_targeted_genes}, OutDir='./') - write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") - +- write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") +- ++ write.table(correctedCounts, file=paste0("crisprcleanr","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") ++ + #version version_file_path <- "versions.yml" version_crisprcleanr <- paste(unlist(packageVersion("CRISPRcleanR")), collapse = ".") diff --git a/modules/nf-core/crisprcleanr/normalize/main.nf b/modules/nf-core/crisprcleanr/normalize/main.nf index 33e4ecd1..d8969379 100644 --- a/modules/nf-core/crisprcleanr/normalize/main.nf +++ b/modules/nf-core/crisprcleanr/normalize/main.nf @@ -8,7 +8,9 @@ process CRISPRCLEANR_NORMALIZE { 'biocontainers/r-crisprcleanr:3.0.0--r42hdfd78af_1' }" input: - tuple val(meta), path(count_file), val(library_file) + tuple val(meta), path(count_file) + val(library_value) + path(library_file) val(min_reads) val(min_targeted_genes) @@ -28,30 +30,46 @@ process CRISPRCLEANR_NORMALIZE { #!/usr/bin/env Rscript library(CRISPRcleanR) library(dplyr) - data('${library_file}') - count_file <- read.delim('${count_file}',header=T,sep = "\t") - count_file_to_normalize <- count_file %>% dplyr::left_join(get('${library_file}'), by=c("sgRNA"="Target.Context.Sequence"),multiple = "all") - count_file_to_normalize <- count_file_to_normalize %>% - dplyr::select(colnames(count_file),CODE,-sgRNA) + print('${library_value}') + count_file <- read.delim('${count_file}',header=T,sep = "\t") + count_file <- count_file[!duplicated(count_file\$sgRNA), ] + if('${library_file}' == "") { + data('${library_value}') + library <- as.data.frame(get('${library_value}')) + #colnames(library) + #print(head(count_file)) + #print(head(library)) + count_file_to_normalize <- count_file %>% dplyr::left_join(library, by=c("sgRNA"="seq"),multiple = "all") + count_file_to_normalize <- count_file_to_normalize %>% + dplyr::select(colnames(count_file),CODE,-sgRNA) - names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' - names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' - count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) + names(count_file_to_normalize)[names(count_file_to_normalize) == 'Gene'] <- 'gene' + names(count_file_to_normalize)[names(count_file_to_normalize) == 'CODE'] <- 'sgRNA' + count_file_to_normalize <- count_file_to_normalize %>% dplyr::select(sgRNA, gene, everything()) + } else { + try(library <- read.delim('${library_file}',header=T,sep = ",")) + duplicates <- duplicated(library[, 1]) + unique_rows <- !duplicates + library <- library[unique_rows, , drop = FALSE] + rownames(library) = library[,1] + library = library[order(rownames(library)),] + library = library[,-1] + count_file_to_normalize <- count_file + } - #crisprcleanr function - normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=get('${library_file}'),display=FALSE) - gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],get('${library_file}')) - correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='${meta}') - correctedCounts <- ccr.correctCounts('${meta}', + normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) + gwSortedFCs <- ccr.logFCs2chromPos(normANDfcs[["logFCs"]],library) + correctedFCs <- ccr.GWclean(gwSortedFCs,display=FALSE,label='crisprcleanr') + correctedCounts <- ccr.correctCounts('crisprcleanr', normANDfcs[["norm_counts"]], correctedFCs, - get('${library_file}'), + library, minTargetedGenes=${min_targeted_genes}, OutDir='./') - write.table(correctedCounts, file=paste0("${prefix}","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") - + write.table(correctedCounts, file=paste0("crisprcleanr","_norm_table.tsv"),row.names=FALSE,quote=FALSE,sep="\t") + #version version_file_path <- "versions.yml" version_crisprcleanr <- paste(unlist(packageVersion("CRISPRcleanR")), collapse = ".") diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index f0c63f69..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: custom_dumpsoftwareversions -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.17 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 7685b33c..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5fd..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index e55b8d43..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import platform -from textwrap import dedent - -import yaml - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index eec1db10..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,38 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 4274ed57..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,27 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ], - "1": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "2": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "mqc_yml": [ - "software_versions_mqc.yml:md5,2570f4ba271ad08357b0d3d32a9cf84d" - ], - "versions": [ - "versions.yml:md5,3843ac526e762117eedf8825b40683df" - ], - "yml": [ - "software_versions.yml:md5,1c851188476409cda5752ce971b20b58" - ] - } - ], - "timestamp": "2023-11-03T14:43:22.157011" - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24a..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/cutadapt/cutadapt.diff b/modules/nf-core/cutadapt/cutadapt.diff index a69969df..2816f8ac 100644 --- a/modules/nf-core/cutadapt/cutadapt.diff +++ b/modules/nf-core/cutadapt/cutadapt.diff @@ -1,44 +1,29 @@ Changes in module 'nf-core/cutadapt' --- modules/nf-core/cutadapt/main.nf +++ modules/nf-core/cutadapt/main.nf -@@ -8,11 +8,11 @@ - 'biocontainers/cutadapt:3.4--py39h38f01e4_1' }" +@@ -8,7 +8,7 @@ + 'biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" input: - tuple val(meta), path(reads) + tuple val(meta), path(reads), path(adapter_seq) output: -- tuple val(meta), path('*.trim.fastq.gz'), emit: reads -- tuple val(meta), path('*.log') , emit: log -+ tuple val(meta), path('*.trim.fastq.gz'), optional: true, emit: reads -+ tuple val(meta), path('*.log') , optional: true, emit: log - path "versions.yml" , emit: versions - - when: -@@ -21,14 +21,21 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -- def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" -+ if (adapter_seq != []) - """ - cutadapt \\ - --cores $task.cpus \\ - $args \\ -- $trimmed \\ -+ -o ${prefix}.trim.fastq.gz \\ + tuple val(meta), path('*.trim.fastq.gz'), emit: reads +@@ -30,12 +30,13 @@ + $trimmed \\ $reads \\ > ${prefix}.cutadapt.log -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ cutadapt: \$(cutadapt --version) -+ END_VERSIONS -+ """ -+ else -+ """ ++ cat <<-END_VERSIONS > versions.yml "${task.process}": cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +- ++ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" ************************************************************ diff --git a/modules/nf-core/cutadapt/environment.yml b/modules/nf-core/cutadapt/environment.yml index d32a8f97..288ea6f0 100644 --- a/modules/nf-core/cutadapt/environment.yml +++ b/modules/nf-core/cutadapt/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::cutadapt=3.4 + - bioconda::cutadapt=4.6 diff --git a/modules/nf-core/cutadapt/main.nf b/modules/nf-core/cutadapt/main.nf index 2c6bf763..128faa88 100644 --- a/modules/nf-core/cutadapt/main.nf +++ b/modules/nf-core/cutadapt/main.nf @@ -4,15 +4,15 @@ process CUTADAPT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cutadapt:3.4--py39h38f01e4_1' : - 'biocontainers/cutadapt:3.4--py39h38f01e4_1' }" + 'https://depot.galaxyproject.org/singularity/cutadapt:4.6--py39hf95cd2a_1' : + 'biocontainers/cutadapt:4.6--py39hf95cd2a_1' }" input: tuple val(meta), path(reads), path(adapter_seq) output: - tuple val(meta), path('*.trim.fastq.gz'), optional: true, emit: reads - tuple val(meta), path('*.log') , optional: true, emit: log + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log path "versions.yml" , emit: versions when: @@ -21,13 +21,16 @@ process CUTADAPT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" """ cutadapt \\ + -Z \\ --cores $task.cpus \\ $args \\ - -o ${prefix}.trim.fastq.gz \\ + $trimmed \\ $reads \\ > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml "${task.process}": cutadapt: \$(cutadapt --version) diff --git a/modules/nf-core/cutadapt/tests/main.nf.test b/modules/nf-core/cutadapt/tests/main.nf.test new file mode 100644 index 00000000..b7ea6ef9 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test @@ -0,0 +1,69 @@ +nextflow_process { + + name "Test Process CUTADAPT" + script "../main.nf" + process "CUTADAPT" + tag "modules" + tag "modules_nfcore" + tag "cutadapt" + + test("sarscov2 Illumina single end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1) ==~ ".*.trim.fastq.gz" }, + { assert snapshot(process.out.versions).match("versions_single_end") }, + { assert snapshot(path(process.out.reads.get(0).get(1)).linesGzip[0]).match() } + ) + } + } + + test("sarscov2 Illumina paired-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.reads != null }, + { assert process.out.reads.get(0).get(1).get(0) ==~ ".*.1.trim.fastq.gz" }, + { assert process.out.reads.get(0).get(1).get(1) ==~ ".*.2.trim.fastq.gz" }, + { assert snapshot(path(process.out.reads.get(0).get(1).get(1)).linesGzip[0]).match() }, + { assert snapshot(process.out.versions).match("versions_paired_end") } + ) + } + } +} diff --git a/modules/nf-core/cutadapt/tests/main.nf.test.snap b/modules/nf-core/cutadapt/tests/main.nf.test.snap new file mode 100644 index 00000000..3df7389e --- /dev/null +++ b/modules/nf-core/cutadapt/tests/main.nf.test.snap @@ -0,0 +1,46 @@ +{ + "sarscov2 Illumina single end [fastq]": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:15.235936866" + }, + "sarscov2 Illumina paired-end [fastq]": { + "content": [ + "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/2" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:24.38468252" + }, + "versions_paired_end": { + "content": [ + [ + "versions.yml:md5,bc9892c68bfa7084ec5dbffbb9e8322f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:24.38799189" + }, + "versions_single_end": { + "content": [ + [ + "versions.yml:md5,bc9892c68bfa7084ec5dbffbb9e8322f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-06T10:27:15.219246449" + } +} \ No newline at end of file diff --git a/modules/nf-core/cutadapt/tests/nextflow.config b/modules/nf-core/cutadapt/tests/nextflow.config new file mode 100644 index 00000000..6c3b4253 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: CUTADAPT { + ext.args = '-q 25' + } + +} diff --git a/modules/nf-core/cutadapt/tests/tags.yml b/modules/nf-core/cutadapt/tests/tags.yml new file mode 100644 index 00000000..f64f9975 --- /dev/null +++ b/modules/nf-core/cutadapt/tests/tags.yml @@ -0,0 +1,2 @@ +cutadapt: + - modules/nf-core/cutadapt/** diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 50e59f2b..d79f1c86 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -25,6 +25,11 @@ process FASTQC { def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } def rename_to = old_new_pairs*.join(' ').join(' ') def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) + """ printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name @@ -33,11 +38,12 @@ process FASTQC { fastqc \\ $args \\ --threads $task.cpus \\ + --memory $fastqc_memory \\ $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ @@ -49,7 +55,7 @@ process FASTQC { cat <<-END_VERSIONS > versions.yml "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index 6437a144..70edae4d 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,24 +3,20 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" - } process { """ - input[0] = [ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) """ } } @@ -28,14 +24,189 @@ nextflow_process { then { assertAll ( { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. // looks like this:
    Mon 2 Oct 2023
    test.gz
    // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_single") } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_paired") } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") } ) } } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_bam") } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_multiple") } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + + { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.html.collect { file(it[1]).getName() } + + process.out.zip.collect { file(it[1]).getName() } + + process.out.versions ).match("fastqc_stub") } + ) + } + } + } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32ce..86f7c311 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,88 @@ { - "versions": { + "fastqc_versions_interleaved": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:07.293713" + }, + "fastqc_stub": { + "content": [ + [ + "test.html", + "test.zip", + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:31:01.425198" + }, + "fastqc_versions_multiple": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:55.797907" + }, + "fastqc_versions_bam": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:40:26.795862" + }, + "fastqc_versions_single": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:27.043675" + }, + "fastqc_versions_paired": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:39:47.584191" + }, + "fastqc_versions_custom_prefix": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-31T17:41:14.576531" } } \ No newline at end of file diff --git a/modules/nf-core/mageck/count/environment.yml b/modules/nf-core/mageck/count/environment.yml index 0729b284..c7221d6f 100644 --- a/modules/nf-core/mageck/count/environment.yml +++ b/modules/nf-core/mageck/count/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::mageck=0.5.9 + - bioconda::mageck=0.5.9.5 diff --git a/modules/nf-core/mageck/count/mageck-count.diff b/modules/nf-core/mageck/count/mageck-count.diff index 9becf832..fa342b5a 100644 --- a/modules/nf-core/mageck/count/mageck-count.diff +++ b/modules/nf-core/mageck/count/mageck-count.diff @@ -1,57 +1,49 @@ Changes in module 'nf-core/mageck/count' --- modules/nf-core/mageck/count/main.nf +++ modules/nf-core/mageck/count/main.nf -@@ -1,6 +1,6 @@ +@@ -1,6 +1,7 @@ process MAGECK_COUNT { tag "$meta.id" - label 'process_medium' + label 'process_high' ++ conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - -@@ -8,13 +8,16 @@ - 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" +@@ -8,12 +9,15 @@ + 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: - tuple val(meta), path(inputfile) + tuple val(meta), path(fastq1), path(fastq2) - path(library) output: -- tuple val(meta), path("*count*.txt"), emit: count -+ tuple val(meta), path("*count.txt"), emit: count +- tuple val(meta), path("*count.txt") , emit: count ++ tuple val(meta), path("*count.txt") , emit: count tuple val(meta), path("*.count_normalized.txt"), emit: norm -- path "versions.yml" , emit: versions -+ tuple val(meta), path("*.countsummary.txt"), emit: summary ++ tuple val(meta), path("*.countsummary.txt") , emit: summary + tuple val(meta), path("*.count_normalized.txt"), emit: normalized -+ tuple val(meta), path("*.log"), emit: logs -+ path "versions.yml", emit: versions ++ tuple val(meta), path("*.log") , emit: logs + path "versions.yml" , emit: versions when: - task.ext.when == null || task.ext.when -@@ -22,9 +25,15 @@ - +@@ -22,8 +26,13 @@ script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def input_file = ("$inputfile".endsWith(".fastq.gz")) ? "--fastq ${inputfile}" : "-k ${inputfile}" - def sample_label = ("$inputfile".endsWith(".fastq.gz") || "$inputfile".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' -- -+ // def input_file = ("$inputfile".endsWith(".fastq.gz") || "$inputfile".endsWith(".fq.gz")) ? "--fastq ${inputfile}" : "-k ${inputfile}" + def sample_label = ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' + -+ if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz"))) { ++ if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) || "$fastq1".endsWith(".bam")) { + input = "--fastq $fastq1" - + } else { + input = "--fastq $fastq1 --fastq-2 $fastq2" + } """ mageck \\ - count \\ @@ -32,7 +41,7 @@ -l $library \\ -n $prefix \\ @@ -61,5 +53,29 @@ Changes in module 'nf-core/mageck/count' cat <<-END_VERSIONS > versions.yml - +@@ -43,14 +52,21 @@ + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def input_file = ("$inputfile".endsWith(".fastq.gz")) ? "--fastq ${inputfile}" : "-k ${inputfile}" +- def sample_label = ("$inputfile".endsWith(".fastq.gz") || "$inputfile".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' ++ def sample_label = ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' ++ ++ if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) || "$fastq1".endsWith(".bam")) { ++ input = "--fastq $fastq1" ++ } else { ++ input = "--fastq $fastq1 --fastq-2 $fastq2" ++ } + """ + touch ${prefix}.count.txt + touch ${prefix}.count_normalized.txt ++ touch ${prefix}.countsummary.txt ++ touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mageck: \$(mageck -v) + END_VERSIONS + """ +-} ++} ************************************************************ diff --git a/modules/nf-core/mageck/count/main.nf b/modules/nf-core/mageck/count/main.nf index d48d9a30..76a0b186 100644 --- a/modules/nf-core/mageck/count/main.nf +++ b/modules/nf-core/mageck/count/main.nf @@ -2,10 +2,11 @@ process MAGECK_COUNT { tag "$meta.id" label 'process_high' + conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mageck:0.5.9--py37h6bb024c_0': - 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" + 'https://depot.galaxyproject.org/singularity/mageck:0.5.9.5--py39h1f90b4d_3': + 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: tuple val(meta), path(fastq1), path(fastq2) @@ -25,10 +26,9 @@ process MAGECK_COUNT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - // def input_file = ("$inputfile".endsWith(".fastq.gz") || "$inputfile".endsWith(".fq.gz")) ? "--fastq ${inputfile}" : "-k ${inputfile}" def sample_label = ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' - if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz"))) { + if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) || "$fastq1".endsWith(".bam")) { input = "--fastq $fastq1" } else { input = "--fastq $fastq1 --fastq-2 $fastq2" @@ -49,4 +49,24 @@ process MAGECK_COUNT { mageck: \$(mageck -v) END_VERSIONS """ -} + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def sample_label = ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) ? "--sample-label ${meta.id}" : '' + + if (meta.single_end && ("$fastq1".endsWith(".fastq.gz") || "$fastq1".endsWith(".fq.gz")) || "$fastq1".endsWith(".bam")) { + input = "--fastq $fastq1" + } else { + input = "--fastq $fastq1 --fastq-2 $fastq2" + } + """ + touch ${prefix}.count.txt + touch ${prefix}.count_normalized.txt + touch ${prefix}.countsummary.txt + touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mageck: \$(mageck -v) + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/nf-core/mageck/count/meta.yml b/modules/nf-core/mageck/count/meta.yml index 3e3ad475..6450de0a 100644 --- a/modules/nf-core/mageck/count/meta.yml +++ b/modules/nf-core/mageck/count/meta.yml @@ -6,12 +6,12 @@ keywords: - sgRNA - CRISPR-Cas9 tools: - - "mageck": + - mageck: description: "MAGeCK (Model-based Analysis of Genome-wide CRISPR-Cas9 Knockout), an algorithm to process, QC, analyze and visualize CRISPR screening data." homepage: "https://sourceforge.net/p/mageck/wiki/Home/" documentation: "https://sourceforge.net/p/mageck/wiki/demo/#step-4-run-the-mageck-count-command" doi: "10.1186/s13059-014-0554-4" - licence: "['BSD License']" + licence: ["BSD License"] input: - meta: type: map diff --git a/modules/nf-core/mageck/count/tests/main.nf.test b/modules/nf-core/mageck/count/tests/main.nf.test new file mode 100644 index 00000000..fbfeaeb6 --- /dev/null +++ b/modules/nf-core/mageck/count/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process MAGECK_COUNT" + script "../main.nf" + process "MAGECK_COUNT" + + tag "modules" + tag "modules_nfcore" + tag "mageck" + tag "mageck/count" + + test("mus_musculus - fastq") { + + when { + process { + """ + input[0] = [ [ id:'test,test2', single_end:true] , // meta map + [file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/ERR376998.small.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/ERR376999.small.fastq.gz', checkIfExists: true)] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/yusa_library.csv') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("mus_musculus - fastq - stub" ) { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test,test2', single_end:true] , // meta map + [file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/ERR376998.small.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/ERR376999.small.fastq.gz', checkIfExists: true)] + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/yusa_library.csv') + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() + } + ) + } + + } +} diff --git a/modules/nf-core/mageck/count/tests/main.nf.test.snap b/modules/nf-core/mageck/count/tests/main.nf.test.snap new file mode 100644 index 00000000..31dbc74b --- /dev/null +++ b/modules/nf-core/mageck/count/tests/main.nf.test.snap @@ -0,0 +1,100 @@ +{ + "mus_musculus - fastq": { + "content": [ + { + "0": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count.txt:md5,2759bdc010998f863518cf7a2df812dc" + ] + ], + "1": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count_normalized.txt:md5,d79f3863168ee0f0750a59d94f70ce1a" + ] + ], + "2": [ + "versions.yml:md5,8fd54f61f3c77976003f79397f6b12d9" + ], + "count": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count.txt:md5,2759bdc010998f863518cf7a2df812dc" + ] + ], + "norm": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count_normalized.txt:md5,d79f3863168ee0f0750a59d94f70ce1a" + ] + ], + "versions": [ + "versions.yml:md5,8fd54f61f3c77976003f79397f6b12d9" + ] + } + ], + "timestamp": "2024-06-14T17:31:00.990366" + }, + "mus_musculus - fastq - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count_normalized.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,8fd54f61f3c77976003f79397f6b12d9" + ], + "count": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "norm": [ + [ + { + "id": "test,test2", + "single_end": true + }, + "test,test2.count_normalized.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,8fd54f61f3c77976003f79397f6b12d9" + ] + } + ], + "timestamp": "2024-06-14T17:31:12.173269" + } +} \ No newline at end of file diff --git a/modules/nf-core/mageck/count/tests/tags.yml b/modules/nf-core/mageck/count/tests/tags.yml new file mode 100644 index 00000000..b1ee5528 --- /dev/null +++ b/modules/nf-core/mageck/count/tests/tags.yml @@ -0,0 +1,2 @@ +mageck/count: + - "modules/nf-core/mageck/count/**" diff --git a/modules/nf-core/mageck/mle/environment.yml b/modules/nf-core/mageck/mle/environment.yml index 27d8c762..ed2cf001 100644 --- a/modules/nf-core/mageck/mle/environment.yml +++ b/modules/nf-core/mageck/mle/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::mageck=0.5.9 + - bioconda::mageck=0.5.9.5 diff --git a/modules/nf-core/mageck/mle/mageck-mle.diff b/modules/nf-core/mageck/mle/mageck-mle.diff index f5d5c324..9cb169b0 100644 --- a/modules/nf-core/mageck/mle/mageck-mle.diff +++ b/modules/nf-core/mageck/mle/mageck-mle.diff @@ -1,23 +1,45 @@ Changes in module 'nf-core/mageck/mle' --- modules/nf-core/mageck/mle/main.nf +++ modules/nf-core/mageck/mle/main.nf -@@ -1,6 +1,6 @@ - process MAGECK_MLE { - tag "$meta.id" -- label 'process_medium' -+ label 'process_high' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -8,8 +8,7 @@ - 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" + 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: - tuple val(meta), path(count_table) - path(design_matrix) -+ tuple val(meta), path(count_table), path(design_matrix) ++ tuple val(meta), path(design_matrix), path(count_table) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary - +@@ -21,7 +20,8 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" ++ prefix = meta.id ?: "${meta.treatment}_vs_${meta.reference}" ++ def design_command = design_matrix ? "-d $design_matrix" : '' + + """ + mageck \\ +@@ -29,8 +29,9 @@ + $args \\ + --threads $task.cpus \\ + -k $count_table \\ +- -d $design_matrix \\ +- -n $prefix ++ -n $prefix \\ ++ $design_command ++ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -49,6 +50,5 @@ + mageck: \$(mageck -v) + END_VERSIONS + """ +- +- +-} ++ ++} ************************************************************ diff --git a/modules/nf-core/mageck/mle/main.nf b/modules/nf-core/mageck/mle/main.nf index 75ff06f2..fac59c23 100644 --- a/modules/nf-core/mageck/mle/main.nf +++ b/modules/nf-core/mageck/mle/main.nf @@ -1,14 +1,14 @@ process MAGECK_MLE { tag "$meta.id" - label 'process_high' + label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mageck:0.5.9--py37h6bb024c_0': - 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" + 'https://depot.galaxyproject.org/singularity/mageck:0.5.9.5--py39h1f90b4d_3': + 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: - tuple val(meta), path(count_table), path(design_matrix) + tuple val(meta), path(design_matrix), path(count_table) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary @@ -20,7 +20,8 @@ process MAGECK_MLE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = meta.id ?: "${meta.treatment}_vs_${meta.reference}" + def design_command = design_matrix ? "-d $design_matrix" : '' """ mageck \\ @@ -28,12 +29,26 @@ process MAGECK_MLE { $args \\ --threads $task.cpus \\ -k $count_table \\ - -d $design_matrix \\ - -n $prefix + -n $prefix \\ + $design_command + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mageck: \$(mageck -v) + END_VERSIONS + """ + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.gene_summary.txt + touch ${prefix}.sgrna_summary.txt cat <<-END_VERSIONS > versions.yml "${task.process}": mageck: \$(mageck -v) END_VERSIONS """ -} + +} \ No newline at end of file diff --git a/modules/nf-core/mageck/mle/meta.yml b/modules/nf-core/mageck/mle/meta.yml index bdb3d0cb..0b944aaf 100644 --- a/modules/nf-core/mageck/mle/meta.yml +++ b/modules/nf-core/mageck/mle/meta.yml @@ -11,7 +11,7 @@ tools: documentation: "https://sourceforge.net/p/mageck/wiki/Home/" tool_dev_url: "https://bitbucket.org/liulab/mageck/src" doi: "10.1186/s13059-015-0843-6" - licence: "['BSD License']" + licence: ["BSD"] input: - meta: type: map diff --git a/modules/nf-core/mageck/mle/tests/main.nf.test b/modules/nf-core/mageck/mle/tests/main.nf.test new file mode 100644 index 00000000..5b20fd44 --- /dev/null +++ b/modules/nf-core/mageck/mle/tests/main.nf.test @@ -0,0 +1,67 @@ +nextflow_process { + + name "Test Process MAGECK_MLE" + script "../main.nf" + process "MAGECK_MLE" + + tag "modules" + tag "modules_nfcore" + tag "mageck" + tag "mageck/mle" + + test("mageck_mle - csv") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['mus_musculus']['csv']['count_table'], checkIfExists: true) + ] + input[1] = + file(params.test_data['mus_musculus']['txt']['design_matrix'], + checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match() }, + { assert file(process.out.gene_summary[0][1]).readLines().first().contains("Gene") }, + { assert file(process.out.sgrna_summary[0][1]).exists() } + ) + + } + + } + + test("mageck_mle - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['mus_musculus']['csv']['count_table'], checkIfExists: true) + ] + input[1] = + file(params.test_data['mus_musculus']['txt']['design_matrix'], + checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mageck/mle/tests/main.nf.test.snap b/modules/nf-core/mageck/mle/tests/main.nf.test.snap new file mode 100644 index 00000000..6fb2101d --- /dev/null +++ b/modules/nf-core/mageck/mle/tests/main.nf.test.snap @@ -0,0 +1,67 @@ +{ + "mageck_mle - csv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sgrna_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,d83f4ee1ef0e972c5ea9a8deee7ba3b3" + ], + "gene_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sgrna_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sgrna_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d83f4ee1ef0e972c5ea9a8deee7ba3b3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T13:27:46.709493836" + }, + "mageck_mle - csv": { + "content": [ + [ + "versions.yml:md5,d83f4ee1ef0e972c5ea9a8deee7ba3b3" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-04T10:55:23.297715246" + } +} \ No newline at end of file diff --git a/modules/nf-core/mageck/mle/tests/tags.yml b/modules/nf-core/mageck/mle/tests/tags.yml new file mode 100644 index 00000000..97929cb1 --- /dev/null +++ b/modules/nf-core/mageck/mle/tests/tags.yml @@ -0,0 +1,2 @@ +mageck/mle: + - "modules/nf-core/mageck/mle/**" diff --git a/modules/nf-core/mageck/test/environment.yml b/modules/nf-core/mageck/test/environment.yml index 622038ac..1d056e5a 100644 --- a/modules/nf-core/mageck/test/environment.yml +++ b/modules/nf-core/mageck/test/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::mageck=0.5.9 + - bioconda::mageck=0.5.9.5 diff --git a/modules/nf-core/mageck/test/mageck-test.diff b/modules/nf-core/mageck/test/mageck-test.diff index e4be2cfc..0bab2ebc 100644 --- a/modules/nf-core/mageck/test/mageck-test.diff +++ b/modules/nf-core/mageck/test/mageck-test.diff @@ -8,21 +8,22 @@ Changes in module 'nf-core/mageck/test' label 'process_medium' conda "${moduleDir}/environment.yml" -@@ -14,6 +14,8 @@ +@@ -13,7 +13,9 @@ + output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary tuple val(meta), path("*.sgrna_summary.txt") , emit: sgrna_summary - tuple val(meta), path("*.R") , emit: r_script +- tuple val(meta), path("*.R") , emit: r_script, optional: true ++ tuple val(meta), path("*.R") , emit: r_script + tuple val(meta), path("*.Rnw") , emit: r_summary + tuple val(meta), path("*.log") , emit: logs path "versions.yml" , emit: versions when: -@@ -21,14 +23,17 @@ - +@@ -22,13 +24,16 @@ script: def args = task.ext.args ?: '' -+ def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" ++ def args2 = task.ext.args2 ?: '' """ mageck \\ @@ -34,7 +35,7 @@ Changes in module 'nf-core/mageck/test' - -n $prefix + -n ${meta.treatment}_${meta.reference} - cat <<-END_VERSIONS > versions.yml + "${task.process}": ************************************************************ diff --git a/modules/nf-core/mageck/test/main.nf b/modules/nf-core/mageck/test/main.nf index 8f954b80..2036ced6 100644 --- a/modules/nf-core/mageck/test/main.nf +++ b/modules/nf-core/mageck/test/main.nf @@ -4,8 +4,8 @@ process MAGECK_TEST { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mageck:0.5.9--py37h6bb024c_0': - 'biocontainers/mageck:0.5.9--py37h6bb024c_0' }" + 'https://depot.galaxyproject.org/singularity/mageck:0.5.9.5--py39h1f90b4d_3': + 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: tuple val(meta), path(count_table) @@ -23,8 +23,8 @@ process MAGECK_TEST { script: def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def args2 = task.ext.args2 ?: '' """ mageck \\ @@ -35,6 +35,19 @@ process MAGECK_TEST { -k $count_table \\ -n ${meta.treatment}_${meta.reference} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mageck: \$(mageck -v) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.gene_summary.txt + touch ${prefix}.sgrna_summary.txt + touch ${prefix}.R cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/mageck/test/meta.yml b/modules/nf-core/mageck/test/meta.yml index 4749d4d7..d0d93f33 100644 --- a/modules/nf-core/mageck/test/meta.yml +++ b/modules/nf-core/mageck/test/meta.yml @@ -5,13 +5,13 @@ keywords: - rra - CRISPR tools: - - "mageck": + - mageck: description: "MAGeCK (Model-based Analysis of Genome-wide CRISPR-Cas9 Knockout), an algorithm to process, QC, analyze and visualize CRISPR screening data." homepage: "https://sourceforge.net/p/mageck/wiki/Home/#mle" documentation: "https://sourceforge.net/p/mageck/wiki/Home/" tool_dev_url: "https://bitbucket.org/liulab/mageck/src" doi: "10.1186/s13059-015-0843-6" - licence: "['BSD License']" + licence: ["BSD License"] input: - meta: type: map diff --git a/modules/nf-core/mageck/test/tests/main.nf.test b/modules/nf-core/mageck/test/tests/main.nf.test new file mode 100644 index 00000000..45cb3734 --- /dev/null +++ b/modules/nf-core/mageck/test/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process MAGECK_TEST" + script "../main.nf" + process "MAGECK_TEST" + + tag "modules" + tag "modules_nfcore" + tag "mageck" + tag "mageck/test" + + test("mageck-test - csv") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/count_table.csv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.gene_summary.get(0).get(1)).readLines()[0], + file(process.out.sgrna_summary.get(0).get(1)).readLines()[0], + file(process.out.r_script.get(0).get(1)).readLines()[0] + ).match() + } + ) + } + + } + + test("mageck-test-day0 - csv") { + + config "./nextflow-day0_label.config" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/count_table.csv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.versions, + file(process.out.gene_summary.get(0).get(1)[0]).readLines()[0], + file(process.out.sgrna_summary.get(0).get(1)[0]).readLines()[0] + ).match() + } + ) + } + + } + + test("mageck-test - csv - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/mus_musculus/mageck/count_table.csv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mageck/test/tests/main.nf.test.snap b/modules/nf-core/mageck/test/tests/main.nf.test.snap new file mode 100644 index 00000000..3b69c697 --- /dev/null +++ b/modules/nf-core/mageck/test/tests/main.nf.test.snap @@ -0,0 +1,102 @@ +{ + "mageck-test - csv - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sgrna_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,d63cf999d719137c7310a3f5b4e03fd8" + ], + "gene_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.gene_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "r_script": [ + [ + { + "id": "test", + "single_end": false + }, + "test.R:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sgrna_summary": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sgrna_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,d63cf999d719137c7310a3f5b4e03fd8" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.07.0" + }, + "timestamp": "2024-06-17T15:11:41.655784" + }, + "mageck-test - csv": { + "content": [ + [ + "versions.yml:md5,d63cf999d719137c7310a3f5b4e03fd8" + ], + "id\tnum\tneg|score\tneg|p-value\tneg|fdr\tneg|rank\tneg|goodsgrna\tneg|lfc\tpos|score\tpos|p-value\tpos|fdr\tpos|rank\tpos|goodsgrna\tpos|lfc", + "sgrna\tGene\tcontrol_count\ttreatment_count\tcontrol_mean\ttreat_mean\tLFC\tcontrol_var\tadj_var\tscore\tp.low\tp.high\tp.twosided\tFDR\thigh_in_treatment", + "pdf(file='test.pdf',width=4.5,height=4.5);" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.07.0" + }, + "timestamp": "2024-06-17T15:11:06.123141" + }, + "mageck-test-day0 - csv": { + "content": [ + [ + "versions.yml:md5,d63cf999d719137c7310a3f5b4e03fd8" + ], + "id\tnum\tneg|score\tneg|p-value\tneg|fdr\tneg|rank\tneg|goodsgrna\tneg|lfc\tpos|score\tpos|p-value\tpos|fdr\tpos|rank\tpos|goodsgrna\tpos|lfc", + "sgrna\tGene\tcontrol_count\ttreatment_count\tcontrol_mean\ttreat_mean\tLFC\tcontrol_var\tadj_var\tscore\tp.low\tp.high\tp.twosided\tFDR\thigh_in_treatment" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.07.0" + }, + "timestamp": "2024-06-17T15:11:25.915572" + } +} \ No newline at end of file diff --git a/modules/nf-core/mageck/test/tests/nextflow-day0_label.config b/modules/nf-core/mageck/test/tests/nextflow-day0_label.config new file mode 100644 index 00000000..4a33e5fb --- /dev/null +++ b/modules/nf-core/mageck/test/tests/nextflow-day0_label.config @@ -0,0 +1,6 @@ +process { + + withName: 'MAGECK_TEST' { + ext.args = "--day0-label 'HL60.initial' " + } +} diff --git a/modules/nf-core/mageck/test/tests/nextflow.config b/modules/nf-core/mageck/test/tests/nextflow.config new file mode 100644 index 00000000..48f87585 --- /dev/null +++ b/modules/nf-core/mageck/test/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + withName: 'MAGECK_TEST' { + ext.args = "-t 'HL60.initial' " + } +} diff --git a/modules/nf-core/mageck/test/tests/tags.yml b/modules/nf-core/mageck/test/tests/tags.yml new file mode 100644 index 00000000..1910c717 --- /dev/null +++ b/modules/nf-core/mageck/test/tests/tags.yml @@ -0,0 +1,2 @@ +mageck/test: + - "modules/nf-core/mageck/test/**" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index 60b9a8bf..051ca8ef 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -4,5 +4,6 @@ channels: - bioconda - defaults dependencies: - - bioconda::minimap2=2.24 - - bioconda::samtools=1.14 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 54683a2c..0b1ae562 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -1,12 +1,12 @@ process MINIMAP2_ALIGN { tag "$meta.id" - label 'process_medium' + label 'process_high' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' }" input: tuple val(meta), path(reads), path(reference) @@ -17,28 +17,43 @@ process MINIMAP2_ALIGN { output: tuple val(meta), path("*.paf"), optional: true, emit: paf tuple val(meta), path("*.bam"), optional: true, emit: bam + tuple val(meta), path("*.csi"), optional: true, emit: csi path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort | samtools view -@ ${task.cpus} -b -h -o ${prefix}.bam" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' """ minimap2 \\ $args \\ -t $task.cpus \\ - "${reference ?: reads}" \\ - "$reads" \\ + ${reference ?: reads} \\ + $reads \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + """ + touch $output_file + touch ${prefix}.csi + cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 6faf1573..13a12457 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -1,4 +1,17 @@ Changes in module 'nf-core/minimap2/align' +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -9,8 +9,7 @@ + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' }" + + input: +- tuple val(meta), path(reads) +- tuple val(meta2), path(reference) ++ tuple val(meta), path(reads), path(reference) + val bam_format + val cigar_paf_format + val cigar_bam + --- modules/nf-core/minimap2/align/meta.yml +++ modules/nf-core/minimap2/align/meta.yml @@ -25,11 +25,6 @@ @@ -14,17 +27,4 @@ Changes in module 'nf-core/minimap2/align' type: file description: | ---- modules/nf-core/minimap2/align/main.nf -+++ modules/nf-core/minimap2/align/main.nf -@@ -9,8 +9,7 @@ - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:1679e915ddb9d6b4abda91880c4b48857d471bd8-0' }" - - input: -- tuple val(meta), path(reads) -- tuple val(meta2), path(reference) -+ tuple val(meta), path(reads), path(reference) - val bam_format - val cigar_paf_format - val cigar_bam - ************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 00000000..83cceeab --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,181 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.paf[0][1]).name, + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..19a8f204 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,69 @@ +{ + "sarscov2 - fastq, fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:18.939731126" + }, + "sarscov2 - fastq, fasta, true, false, false - stub": { + "content": [ + "test.bam", + "test.csi", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:34.275879844" + }, + "sarscov2 - fastq, fasta, false, false, false - stub": { + "content": [ + "test.paf", + "test.csi", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:39.227958138" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:24.265054877" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + "test.bam", + [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:14:29.27901773" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 00000000..39dba374 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml index 2a66e410..8a912a12 100644 --- a/modules/nf-core/minimap2/index/environment.yml +++ b/modules/nf-core/minimap2/index/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::minimap2=2.24 + - bioconda::minimap2=2.28 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf index 45e1cec0..38320214 100644 --- a/modules/nf-core/minimap2/index/main.nf +++ b/modules/nf-core/minimap2/index/main.nf @@ -1,11 +1,11 @@ process MINIMAP2_INDEX { - label 'process_medium' + label 'process_low' // Note: the versions here need to match the versions used in minimap2/align conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/minimap2:2.24--h7132678_1' : - 'biocontainers/minimap2:2.24--h7132678_1' }" + 'https://depot.galaxyproject.org/singularity/minimap2:2.28--he4a0461_0' : + 'biocontainers/minimap2:2.28--he4a0461_0' }" input: tuple val(meta), path(fasta) @@ -31,4 +31,14 @@ process MINIMAP2_INDEX { minimap2: \$(minimap2 --version 2>&1) END_VERSIONS """ + + stub: + """ + touch ${fasta.baseName}.mmi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ } diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test new file mode 100644 index 00000000..97840ff7 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process MINIMAP2_INDEX" + script "../main.nf" + process "MINIMAP2_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/index" + + test("minimap2 index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap new file mode 100644 index 00000000..0b098828 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ], + "index": [ + [ + { + "id": "test_ref" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,0fced0ee8015e7f50b82566e3db8f7b0" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T11:46:30.000058092" + }, + "minimap2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,2f8340380c6741e9261a284262a90bde" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-05T10:58:29.828187662" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/tests/tags.yml b/modules/nf-core/minimap2/index/tests/tags.yml new file mode 100644 index 00000000..e5ef8e19 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/index: + - modules/nf-core/minimap2/index/** diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index d2a9f21a..ca39fb67 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -4,4 +4,4 @@ channels: - bioconda - defaults dependencies: - - bioconda::multiqc=1.17 + - bioconda::multiqc=1.21 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 2bbc3983..47ac352f 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,8 +3,8 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.17--pyhdfd78af_0' : - 'biocontainers/multiqc:1.17--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : + 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" @@ -25,12 +25,14 @@ process MULTIQC { def args = task.ext.args ?: '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' """ multiqc \\ --force \\ $args \\ $config \\ $extra_config \\ + $logo \\ . cat <<-END_VERSIONS > versions.yml @@ -41,7 +43,7 @@ process MULTIQC { stub: """ - touch multiqc_data + mkdir multiqc_data touch multiqc_plots touch multiqc_report.html diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index f1aa660e..45a9bc35 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,4 +1,3 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: multiqc description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test new file mode 100644 index 00000000..f1c4242e --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process MULTIQC" + script "../main.nf" + process "MULTIQC" + + tag "modules" + tag "modules_nfcore" + tag "multiqc" + + test("sarscov2 single-end [fastqc]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_single") } + ) + } + + } + + test("sarscov2 single-end [fastqc] [config]") { + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, + { assert process.out.data[0] ==~ ".*/multiqc_data" }, + { assert snapshot(process.out.versions).match("multiqc_versions_config") } + ) + } + } + + test("sarscov2 single-end [fastqc] - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.report.collect { file(it).getName() } + + process.out.data.collect { file(it).getName() } + + process.out.plots.collect { file(it).getName() } + + process.out.versions ).match("multiqc_stub") } + ) + } + + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap new file mode 100644 index 00000000..bfebd802 --- /dev/null +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -0,0 +1,41 @@ +{ + "multiqc_versions_single": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:48:55.657331" + }, + "multiqc_stub": { + "content": [ + [ + "multiqc_report.html", + "multiqc_data", + "multiqc_plots", + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:49.071937" + }, + "multiqc_versions_config": { + "content": [ + [ + "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-29T08:49:25.457567" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml new file mode 100644 index 00000000..bea6c0d3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/tags.yml @@ -0,0 +1,2 @@ +multiqc: + - modules/nf-core/multiqc/** diff --git a/modules/nf-core/racon/tests/main.nf.test b/modules/nf-core/racon/tests/main.nf.test new file mode 100644 index 00000000..1f97fb95 --- /dev/null +++ b/modules/nf-core/racon/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process RACON" + script "../main.nf" + process "RACON" + + tag "modules" + tag "modules_nfcore" + tag "racon" + + test("single-end - bacteroides_fragilis - [fastq_gz, fna_gz, paf]") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true), + file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true), + file(params.test_data['bacteroides_fragilis']['genome']['genome_paf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} \ No newline at end of file diff --git a/modules/nf-core/racon/tests/main.nf.test.snap b/modules/nf-core/racon/tests/main.nf.test.snap new file mode 100644 index 00000000..1ac00d5d --- /dev/null +++ b/modules/nf-core/racon/tests/main.nf.test.snap @@ -0,0 +1,37 @@ +{ + "single-end - bacteroides_fragilis - [fastq_gz, fna_gz, paf]": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_assembly_consensus.fasta.gz:md5,e610bf2e1df1701b88675e36a844baf9" + ] + ], + "1": [ + "versions.yml:md5,7eeab4e50acdc04c648149c2b5a01c84" + ], + "improved_assembly": [ + [ + { + "id": "test", + "single_end": true + }, + "test_assembly_consensus.fasta.gz:md5,e610bf2e1df1701b88675e36a844baf9" + ] + ], + "versions": [ + "versions.yml:md5,7eeab4e50acdc04c648149c2b5a01c84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-23T11:31:56.975593" + } +} \ No newline at end of file diff --git a/modules/nf-core/racon/tests/tags.yml b/modules/nf-core/racon/tests/tags.yml new file mode 100644 index 00000000..5b883e80 --- /dev/null +++ b/modules/nf-core/racon/tests/tags.yml @@ -0,0 +1,2 @@ +racon: + - modules/nf-core/racon/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index 3c6f95b2..a5e50649 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::samtools=1.17 + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index 256bd7c4..dc14f98d 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' : - 'biocontainers/samtools:1.17--h00cdaf9_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index c76a9169..bb7756d1 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -8,7 +8,7 @@ nextflow_process { tag "samtools" tag "samtools/index" - test("sarscov2 [BAI]") { + test("bai") { when { params { @@ -16,10 +16,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -28,12 +28,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.bai).match("bai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("bai_versions") } ) } } - test("homo_sapiens [CRAI]") { + test("crai") { when { params { @@ -41,10 +41,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_cram'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) """ } } @@ -53,12 +53,12 @@ nextflow_process { assertAll ( { assert process.success }, { assert snapshot(process.out.crai).match("crai") }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("crai_versions") } ) } } - test("homo_sapiens [CSI]") { + test("csi") { config "./csi.nextflow.config" @@ -68,10 +68,10 @@ nextflow_process { } process { """ - input[0] = [ - [ id:'test' ], // meta map - file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) - ] + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) """ } } @@ -80,7 +80,7 @@ nextflow_process { assertAll ( { assert process.success }, { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert path(process.out.versions.get(0)).getText().contains("samtools") } + { assert snapshot(process.out.versions).match("csi_versions") } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index b3baee7f..3dc8e7de 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,28 +1,74 @@ { + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, "crai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" ] ] ], - "timestamp": "2023-11-15T15:17:37.30801" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" }, "bai": { "content": [ [ [ { - "id": "test" + "id": "test", + "single_end": false }, "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" ] ] ], - "timestamp": "2023-11-15T15:17:30.869234" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" } } \ No newline at end of file diff --git a/modules/nf-core/seqtk/seq/main.nf b/modules/nf-core/seqtk/seq/main.nf index af085f0d..3199f7ec 100644 --- a/modules/nf-core/seqtk/seq/main.nf +++ b/modules/nf-core/seqtk/seq/main.nf @@ -37,4 +37,20 @@ process SEQTK_SEQ { seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz/ || "$args" ==~ /\-[aA]/ ) { + extension = "fasta" + } + """ + echo "" | gzip > ${prefix}.seqtk-seq.${extension}.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk: \$(echo \$(seqtk 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/seqtk/seq/tests/main.nf.test b/modules/nf-core/seqtk/seq/tests/main.nf.test new file mode 100644 index 00000000..82e0a0ce --- /dev/null +++ b/modules/nf-core/seqtk/seq/tests/main.nf.test @@ -0,0 +1,88 @@ +nextflow_process { + + name "Test Process SEQTK_SEQ" + script "modules/nf-core/seqtk/seq/main.nf" + process "SEQTK_SEQ" + config "./standard.config" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/seq" + + test("sarscov2_seq_fa") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.fastx[0][1]).name + ).match("seq_fa") + } + ) + } + + } + + test("sarscov2_seq_fq") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.fastx[0][1]).name + ).match("seq_fq") + } + ) + } + + } + + test("sarscov2_seq_fa_stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/seqtk/seq/tests/main.nf.test.snap b/modules/nf-core/seqtk/seq/tests/main.nf.test.snap new file mode 100644 index 00000000..2940cde1 --- /dev/null +++ b/modules/nf-core/seqtk/seq/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "sarscov2_seq_fa": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,e73599798195a519ba2565c3f0275b93" + ] + ], + "1": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,e73599798195a519ba2565c3f0275b93" + ] + ], + "versions": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ] + } + ], + "timestamp": "2024-02-22T15:57:07.883846" + }, + "sarscov2_seq_fq": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "1": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,f0c5c9110ce19e9ebbc9a6b6baf9e105" + ] + ], + "versions": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ] + } + ], + "timestamp": "2024-02-22T15:57:16.282429" + }, + "sarscov2_seq_fa_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.seqtk-seq.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,d3214d3bd8c0bb57f06550146eab94fa" + ] + } + ], + "timestamp": "2024-02-22T15:57:24.751443" + }, + "seq_fa": { + "content": [ + "test.seqtk-seq.fasta.gz" + ], + "timestamp": "2024-02-22T15:57:07.936813" + }, + "seq_fq": { + "content": [ + "test.seqtk-seq.fasta.gz" + ], + "timestamp": "2024-02-22T15:57:16.311642" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/seq/tests/standard.config b/modules/nf-core/seqtk/seq/tests/standard.config new file mode 100644 index 00000000..beeffb97 --- /dev/null +++ b/modules/nf-core/seqtk/seq/tests/standard.config @@ -0,0 +1,5 @@ +process { + withName: 'SEQTK_SEQ' { + ext.args = '-A' + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/seq/tests/tags.yml b/modules/nf-core/seqtk/seq/tests/tags.yml new file mode 100644 index 00000000..d237d869 --- /dev/null +++ b/modules/nf-core/seqtk/seq/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/seq: + - "modules/nf-core/seqtk/seq/**" diff --git a/modules/nf-core/vsearch/cluster/tests/main.nf.test b/modules/nf-core/vsearch/cluster/tests/main.nf.test new file mode 100644 index 00000000..1a376793 --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/main.nf.test @@ -0,0 +1,132 @@ +nextflow_process { + + name "Test Process VSEARCH_CLUSTER" + script "../main.nf" + process "VSEARCH_CLUSTER" + + tag "modules" + tag "modules_nfcore" + tag "vsearch" + tag "vsearch/cluster" + + test("sarscov2 - fastq - cluster fast") { + + config './nextflow_fast.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq - cluster size") { + + config './nextflow_size.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq - cluster smallmem") { + + config './nextflow_smallmem.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq - cluster unoise") { + + config './nextflow_unoise.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq - cluster userout") { + + config './nextflow_userout.config' + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/vsearch/cluster/tests/main.nf.test.snap b/modules/nf-core/vsearch/cluster/tests/main.nf.test.snap new file mode 100644 index 00000000..cb91f4ed --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/main.nf.test.snap @@ -0,0 +1,487 @@ +{ + "sarscov2 - fastq - cluster smallmem": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,f8a6497f29c55465df843da5c5af39ca" + ] + ], + "9": [ + + ], + "aln": [ + + ], + "bam": [ + + ], + "biom": [ + + ], + "blast": [ + + ], + "centroids": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,f8a6497f29c55465df843da5c5af39ca" + ] + ], + "clusters": [ + + ], + "mothur": [ + + ], + "msa": [ + + ], + "otu": [ + + ], + "out": [ + + ], + "profile": [ + + ], + "uc": [ + + ], + "versions": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ] + } + ], + "timestamp": "2023-12-06T14:35:18.210433" + }, + "sarscov2 - fastq - cluster userout": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.tsv.gz:md5,fb3256dd2327bf3dd4b9695394f5d5a8" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "aln": [ + + ], + "bam": [ + + ], + "biom": [ + + ], + "blast": [ + + ], + "centroids": [ + + ], + "clusters": [ + + ], + "mothur": [ + + ], + "msa": [ + + ], + "otu": [ + + ], + "out": [ + [ + { + "id": "test", + "single_end": false + }, + "test.out.tsv.gz:md5,fb3256dd2327bf3dd4b9695394f5d5a8" + ] + ], + "profile": [ + + ], + "uc": [ + + ], + "versions": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ] + } + ], + "timestamp": "2023-12-06T14:35:27.471744" + }, + "sarscov2 - fastq - cluster size": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4359e9c250a63a887feb4e8f50555cfd" + ] + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "aln": [ + + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,4359e9c250a63a887feb4e8f50555cfd" + ] + ], + "biom": [ + + ], + "blast": [ + + ], + "centroids": [ + + ], + "clusters": [ + + ], + "mothur": [ + + ], + "msa": [ + + ], + "otu": [ + + ], + "out": [ + + ], + "profile": [ + + ], + "uc": [ + + ], + "versions": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ] + } + ], + "timestamp": "2023-12-06T14:35:13.922868" + }, + "sarscov2 - fastq - cluster unoise": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,e84ed4c7d22e1b8a05400c3751c29efc" + ] + ], + "9": [ + + ], + "aln": [ + + ], + "bam": [ + + ], + "biom": [ + + ], + "blast": [ + + ], + "centroids": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,e84ed4c7d22e1b8a05400c3751c29efc" + ] + ], + "clusters": [ + + ], + "mothur": [ + + ], + "msa": [ + + ], + "otu": [ + + ], + "out": [ + + ], + "profile": [ + + ], + "uc": [ + + ], + "versions": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ] + } + ], + "timestamp": "2023-12-06T14:35:22.849537" + }, + "sarscov2 - fastq - cluster fast": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,7ce841a7bba1c14ced4e1322d73a2d1f" + ] + ], + "9": [ + + ], + "aln": [ + + ], + "bam": [ + + ], + "biom": [ + + ], + "blast": [ + + ], + "centroids": [ + [ + { + "id": "test", + "single_end": false + }, + "test.centroids.fasta.gz:md5,7ce841a7bba1c14ced4e1322d73a2d1f" + ] + ], + "clusters": [ + + ], + "mothur": [ + + ], + "msa": [ + + ], + "otu": [ + + ], + "out": [ + + ], + "profile": [ + + ], + "uc": [ + + ], + "versions": [ + "versions.yml:md5,c9f7c3c0952e510ae64bf71dc85c3c0f" + ] + } + ], + "timestamp": "2023-12-06T14:35:07.719346" + } +} \ No newline at end of file diff --git a/modules/nf-core/vsearch/cluster/tests/nextflow_fast.config b/modules/nf-core/vsearch/cluster/tests/nextflow_fast.config new file mode 100644 index 00000000..df76a060 --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/nextflow_fast.config @@ -0,0 +1,7 @@ +process { + + ext.args = '--id 0.8' + ext.args2 = "--cluster_fast" + ext.args3 = "--centroids" + +} diff --git a/modules/nf-core/vsearch/cluster/tests/nextflow_size.config b/modules/nf-core/vsearch/cluster/tests/nextflow_size.config new file mode 100644 index 00000000..b650043e --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/nextflow_size.config @@ -0,0 +1,7 @@ +process { + + ext.args = '--id 0.8' + ext.args2 = "--cluster_size" + ext.args3 = "--samout" // Test also sam to bam conversion + +} diff --git a/modules/nf-core/vsearch/cluster/tests/nextflow_smallmem.config b/modules/nf-core/vsearch/cluster/tests/nextflow_smallmem.config new file mode 100644 index 00000000..5e4efb8b --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/nextflow_smallmem.config @@ -0,0 +1,7 @@ +process { + + ext.args = '--id 0.8 --usersort' + ext.args2 = "--cluster_smallmem" + ext.args3 = "--centroids" + +} diff --git a/modules/nf-core/vsearch/cluster/tests/nextflow_unoise.config b/modules/nf-core/vsearch/cluster/tests/nextflow_unoise.config new file mode 100644 index 00000000..96ccf725 --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/nextflow_unoise.config @@ -0,0 +1,7 @@ +process { + + ext.args = '--id 0.8 --minsize 1' + ext.args2 = "--cluster_unoise" + ext.args3 = "--centroids" + +} diff --git a/modules/nf-core/vsearch/cluster/tests/nextflow_userout.config b/modules/nf-core/vsearch/cluster/tests/nextflow_userout.config new file mode 100644 index 00000000..63b04555 --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/nextflow_userout.config @@ -0,0 +1,7 @@ +process { + + ext.args = '--id 0.8 --userfields query+target+id' + ext.args2 = "--cluster_fast" + ext.args3 = "--userout" + +} diff --git a/modules/nf-core/vsearch/cluster/tests/tags.yml b/modules/nf-core/vsearch/cluster/tests/tags.yml new file mode 100644 index 00000000..7238024f --- /dev/null +++ b/modules/nf-core/vsearch/cluster/tests/tags.yml @@ -0,0 +1,2 @@ +vsearch/cluster: + - "modules/nf-core/vsearch/cluster/**" diff --git a/modules/nf-core/vsearch/sort/tests/main.nf.test b/modules/nf-core/vsearch/sort/tests/main.nf.test new file mode 100644 index 00000000..db043851 --- /dev/null +++ b/modules/nf-core/vsearch/sort/tests/main.nf.test @@ -0,0 +1,60 @@ +nextflow_process { + + name "Test Process VSEARCH_SORT" + script "../main.nf" + process "VSEARCH_SORT" + + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "vsearch" + tag "vsearch/sort" + + test("vsearch-sort - sort - size") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = "--sortbysize" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("vsearch-sort - sort - length") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = "--sortbylength" + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/vsearch/sort/tests/main.nf.test.snap b/modules/nf-core/vsearch/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..0a278d10 --- /dev/null +++ b/modules/nf-core/vsearch/sort/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "vsearch-sort - sort - size": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_sorted.fasta:md5,86457b12184a7215734ff1e4d868ac51" + ] + ], + "1": [ + "versions.yml:md5,19dba7cb81af6ce4b80f3bf99829ee9d" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test_sorted.fasta:md5,86457b12184a7215734ff1e4d868ac51" + ] + ], + "versions": [ + "versions.yml:md5,19dba7cb81af6ce4b80f3bf99829ee9d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-24T13:05:54.524638" + }, + "vsearch-sort - sort - length": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test_sorted.fasta:md5,e733a34d0c655966408e773de876e5fb" + ] + ], + "1": [ + "versions.yml:md5,19dba7cb81af6ce4b80f3bf99829ee9d" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test_sorted.fasta:md5,e733a34d0c655966408e773de876e5fb" + ] + ], + "versions": [ + "versions.yml:md5,19dba7cb81af6ce4b80f3bf99829ee9d" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.1" + }, + "timestamp": "2024-05-24T13:06:01.210745" + } +} \ No newline at end of file diff --git a/modules/nf-core/vsearch/sort/tests/nextflow.config b/modules/nf-core/vsearch/sort/tests/nextflow.config new file mode 100644 index 00000000..7b3f6a9c --- /dev/null +++ b/modules/nf-core/vsearch/sort/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.prefix = { "${meta.id}_sorted" } + +} diff --git a/modules/nf-core/vsearch/sort/tests/tags.yml b/modules/nf-core/vsearch/sort/tests/tags.yml new file mode 100644 index 00000000..d7220281 --- /dev/null +++ b/modules/nf-core/vsearch/sort/tests/tags.yml @@ -0,0 +1,2 @@ +vsearch/sort: + - modules/nf-core/vsearch/sort/** diff --git a/nextflow.config b/nextflow.config index 6dd777af..40c69e2e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,12 +16,16 @@ params { protospacer = null library = null crisprcleanr = null - cutadapt = null - rra_contrasts = null + contrasts = null mle_design_matrix = null count_table = null + fasta = null + five_prime_adapter = null + day0_label = null + three_prime_adapter = null min_reads = 30 min_targeted_genes = 3 + rra = false bagel_reference_essentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/CEGv2.txt' bagel_reference_nonessentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/NEGv1.txt' @@ -40,9 +44,8 @@ params { // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false - reference_fasta = null // MultiQC options multiqc_config = null @@ -52,15 +55,16 @@ params { multiqc_methods_description = null // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false + outdir = null + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + hook_url = null + help = false + version = false + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' // Config options config_profile_name = null @@ -70,7 +74,6 @@ params { config_profile_contact = null config_profile_url = null - // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -97,109 +100,120 @@ try { } // Load nf-core/crisprseq custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/crisprseq.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/crisprseq profiles: ${params.custom_config_base}/pipeline/crisprseq.config") -// } +try { + includeConfig "${params.custom_config_base}/pipeline/crisprseq.config" +} catch (Exception e) { + System.err.println("WARNING: Could not load nf-core/config/crisprseq profiles: ${params.custom_config_base}/pipeline/crisprseq.config") +} profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false + nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda', 'defaults'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - docker.userEmulation = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + process.containerOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + process.containerOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB } - test { includeConfig 'conf/test_targeted.config' } - test_targeted { includeConfig 'conf/test_targeted.config' } - test_full { includeConfig 'conf/test_full.config' } - test_umis { includeConfig 'conf/test_umis.config' } - test_screening_full { includeConfig 'conf/test_screening_full.config' } - test_screening { includeConfig 'conf/test_screening.config' } - test_screening_paired { includeConfig 'conf/test_screening_paired.config' } + test { includeConfig 'conf/test_targeted.config' } + test_targeted { includeConfig 'conf/test_targeted.config' } + test_full { includeConfig 'conf/test_targeted_full.config' } + test_targeted_full { includeConfig 'conf/test_targeted_full.config' } + test_umis { includeConfig 'conf/test_umis.config' } + test_screening_full { includeConfig 'conf/test_screening_full.config' } + test_screening { includeConfig 'conf/test_screening.config' } + test_screening_paired { includeConfig 'conf/test_screening_paired.config' } + test_screening_rra { includeConfig 'conf/test_screening_rra.config' } + test_screening_count_table { includeConfig 'conf/test_screening_count_table.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -212,7 +226,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Load igenomes.config if required @@ -235,9 +249,13 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Disable process selector warnings by default. Use debug profile to enable warnings. +nextflow.enable.configProcessNamesValidation = false + // Set default registry for Docker, Singularity and Podman independent of -profile // Will not be used unless Docker, Singularity and Podman are enabled // Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' docker.registry = 'quay.io' podman.registry = 'quay.io' singularity.registry = 'quay.io' @@ -267,7 +285,7 @@ manifest { description = """Pipeline for the analysis of CRISPR data""" mainScript = 'main.nf' nextflowVersion = '!>=23.04.0' - version = '2.1.1' + version = '2.2.0' doi = 'https://doi.org/10.5281/zenodo.7598496' } diff --git a/nextflow_schema.json b/nextflow_schema.json index c2da2600..dc4fe674 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -82,7 +82,7 @@ }, "medaka_model": { "type": "string", - "default": "r941_min_high_g360", + "default": "r941_min_high_g303", "fa_icon": "fas fa-font", "description": "Medaka model (-m) to use according to the basecaller used." } @@ -104,6 +104,8 @@ }, "protospacer": { "type": "string", + "pattern": "^[ACGTacgt]+$", + "errorMessage": "The protospacer must be a valid DNA sequence.", "fa_icon": "fas fa-grip-lines", "description": "Provide the same protospacer sequence for all samples. Will override protospacer sequences provided by an input samplesheet." } @@ -150,18 +152,47 @@ "description": "Parameters used for functional genomic screenings", "default": "", "properties": { + "library": { + "type": "string", + "format": "file-path", + "pattern": "^\\S+\\.(tsv|txt)$", + "mimetype": "text/tsv", + "exists": true, + "fa_icon": "far fa-address-book", + "description": "sgRNA and targetting genes, tab separated" + }, + "five_prime_adapter": { + "type": "string", + "description": "Sequencing adapter sequence to use for trimming on the 5' end" + }, + "three_prime_adapter": { + "type": "string", + "description": "Sequencing adapter sequence to use for trimming on the 3' end" + }, + "fasta": { + "type": "string", + "description": "Library in fasta file format in case you want to map with bowtie2 and then MAGeCK count" + }, + "day0_label": { + "type": "string", + "description": "Specify the label for control sample (usually day 0 or plasmid). For every other sample label, the module will treat it as a treatment condition and compare with control sample for MAGeCK MLE" + }, "mle_design_matrix": { "type": "string", "format": "file-path", "exists": true, "description": "Design matrix used for MAGeCK MLE to call essential genes under multiple conditions while considering sgRNA knockout efficiency" }, - "rra_contrasts": { + "contrasts": { "type": "string", "format": "file-path", "exists": true, "description": "Comma-separated file with the conditions to be compared. The first one will be the reference (control)" }, + "rra": { + "type": "boolean", + "description": "Parameter indicating if MAGeCK RRA should be ran instead of MAGeCK MLE." + }, "count_table": { "type": "string", "format": "file-path", @@ -170,32 +201,19 @@ "exists": true, "description": "Please provide your count table if the mageck test should be skipped." }, - "library": { - "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.(tsv|txt)$", - "mimetype": "text/tsv", - "exists": true, - "fa_icon": "far fa-address-book", - "description": "sgRNA and targetting genes, tab separated" - }, "crisprcleanr": { "type": "string", "description": "sgRNA library annotation for crisprcleanR" }, - "cutadapt": { - "type": "string", - "description": "cut adapter for screening analysis" - }, "min_reads": { "type": "number", "description": "a filter threshold value for sgRNAs, based on their average counts in the control sample", - "default": 30.0 + "default": 30 }, "min_targeted_genes": { "type": "number", "description": "Minimal number of different genes targeted by sgRNAs in a biased segment in order for the corresponding counts to be corrected for CRISPRcleanR", - "default": 3.0 + "default": 3 }, "bagel_reference_essentials": { "type": "string", @@ -424,7 +442,7 @@ }, "validationSchemaIgnoreParams": { "type": "string", - "default": "genomes", + "default": "genomes,igenomes_base", "description": "Ignore JSON schema validation of the following params", "fa_icon": "fas fa-ban", "hidden": true @@ -442,6 +460,13 @@ "description": "Validation of parameters in lenient more.", "hidden": true, "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + }, + "pipelines_testdata_base_path": { + "type": "string", + "fa_icon": "far fa-check-circle", + "description": "Base URL or local path to location of pipeline test dataset files", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/", + "hidden": true } } } diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf new file mode 100644 index 00000000..1a73efd1 --- /dev/null +++ b/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf @@ -0,0 +1,484 @@ +// +// Subworkflow with functionality specific to the nf-core/crisprseq pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin' +include { paramsSummaryMap } from 'plugin/nf-validation' +include { fromSamplesheet } from 'plugin/nf-validation' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { dashedLine } from '../../nf-core/utils_nfcore_pipeline' +include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE +======================================================================================== +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + help // boolean: Display help text + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + input // string: Path to input samplesheet + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + pre_help_text = nfCoreLogo(monochrome_logs) + post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) + def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFVALIDATION_PLUGIN ( + help, + workflow_command, + pre_help_text, + post_help_text, + validate_params, + "nextflow_schema.json" + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + // + // Custom validation for pipeline parameters + // + validateInputParameters() + + reads_targeted = Channel.empty() + reads_screening = Channel.empty() + fastqc_screening = Channel.empty() + reference = Channel.empty() + protospacer = Channel.empty() + template = Channel.empty() + versions = Channel.empty() + + // + // Create channel from input file provided through params.input + // + if(params.input) { + Channel + .fromSamplesheet("input") + .multiMap { + meta, fastq_1, fastq_2, reference, protospacer, template -> + if (fastq_2) { + files = [ fastq_1, fastq_2 ] + } else { + files = [ fastq_1 ] + } + reads_targeted: [ meta.id, meta - meta.subMap('condition') + [ single_end : fastq_2 ? false : true, self_reference : reference ? false : true, template : template ? true : false ], files ] + reads_screening:[ meta + [ single_end:fastq_2?false:true ], files ] + reference: [meta - meta.subMap('condition') + [ single_end : fastq_2 ? false : true, self_reference : reference ? false : true, template : template ? true : false ], reference] + protospacer: [meta - meta.subMap('condition') + [ single_end : fastq_2 ? false : true, self_reference : reference ? false : true, template : template ? true : false ], protospacer] + template: [meta - meta.subMap('condition') + [ single_end : fastq_2 ? false : true, self_reference : reference ? false : true, template : template ? true : false ], template] + } + .set { ch_input } + + // + // Validate input samplesheet + // + ch_input.reads_targeted + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .set { reads_targeted } + + fastqc_screening = ch_input.reads_screening + reference = ch_input.reference + protospacer = ch_input.protospacer + template = ch_input.template + } else { + ch_input = Channel.empty() + } + + emit: + reads_targeted + fastqc_screening + reference + protospacer + template + versions = ch_versions +} + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE CHANNELS - SCREENING +======================================================================================== +*/ + +workflow INITIALISATION_CHANNEL_CREATION_SCREENING { + + take: + + main: + + ch_library = Channel.empty() + ch_crisprcleanr = Channel.empty() + ch_design = Channel.empty() + + // Library + if (params.library) { + ch_library = Channel.fromPath(params.library) + } + + // Crisprcleanr + if (params.crisprcleanr) { + if(params.crisprcleanr.endsWith(".csv")) { + ch_crisprcleanr = Channel.fromPath(params.crisprcleanr) + } else { + ch_crisprcleanr = Channel.value(params.crisprcleanr) + } + } + + // MLE design matrix + if(params.mle_design_matrix) { + ch_design = Channel.fromPath(params.mle_design_matrix) + } + + + emit: + library = ch_library // channel: library file + crisprcleanr = ch_crisprcleanr // channel: crisprcleanr file or value + design = ch_design // channel: design matrix file +} + +/* +======================================================================================== + SUBWORKFLOW TO INITIALISE PIPELINE CHANNELS - TARGETED +======================================================================================== +*/ + +workflow INITIALISATION_CHANNEL_CREATION_TARGETED { + + take: + input_reads + input_reference + input_template + input_protospacer + + main: + + // + // Separate samples by the ones containing all reads in one file or the ones with many files to be concatenated + // + input_reads + .groupTuple() + .branch { + meta, fastqs -> + single : fastqs.size() == 1 + return [ meta, fastqs.flatten() ] + multiple: fastqs.size() > 1 + return [ meta, fastqs.flatten() ] + } + .set { ch_fastq } + + // + // Add reference sequences to file + // + input_reference + .tap{ meta_reference } + .filter{ meta, sequence -> sequence instanceof String } + .collectFile() { meta, reference -> + [ "${meta.id}_reference.fasta", ">${meta.id}\n${reference}\n" ] // Write each reference sequence to a file + } + .map{ new_file -> + [new_file.baseName.split("_reference")[0], new_file] // create a channel with the meta.id and the new file + } + .join(meta_reference + .map{ meta, reference -> + [meta.id, meta] // Join the channel by meta.id with the meta map + } + ) + .map{ metaid, new_file, meta -> + [meta, new_file] // Obtain the final channel with meta map and the new file + } + .set{ ch_seq_reference } + + + // + // Add template sequences to file + // + input_template + .tap{ meta_template } + .filter{ meta, sequence -> sequence instanceof String } + .collectFile() { meta, template -> + [ "${meta.id}_template.fasta", ">${meta.id}\n${template}\n" ] // Write each template sequence to a file + } + .map{ new_file -> + [new_file.baseName.split("_template")[0], new_file] // create a channel with the meta.id and the new file + } + .join(meta_template + .map{ meta, template -> + [meta.id, meta] // Join the channel by meta.id with the meta map + } + ) + .map{ metaid, new_file, meta -> + [meta, new_file] // Obtain the final channel with meta map and the new file + } + .set{ ch_seq_template } + + + // Join channels with reference and protospacer + // to channel: [ meta, reference, protospacer] + if (!params.reference_fasta && !params.protospacer) { + ch_seq_reference + .join(input_protospacer) + .set{ reference_protospacer } + } else if (!params.reference_fasta) { + // If a protospacer was provided through the --protospacer param instead of the samplesheet + ch_protospacer = Channel.of(params.protospacer) + ch_seq_reference + .combine(ch_protospacer) + .set{ reference_protospacer } + } else if (!params.protospacer) { + // If a reference was provided through a fasta file or igenomes instead of the samplesheet + ch_reference = Channel.fromPath(params.reference_fasta) + input_protospacer + .combine(ch_reference) + .map{ meta, protospacer, reference -> [ meta, reference, protospacer ]} // Change the order of the channel + .set{ reference_protospacer } + } else { + ch_reference = Channel.fromPath(params.reference_fasta) + ch_protospacer = Channel.of(params.protospacer) + input_reads + .combine(ch_reference) + .combine(ch_protospacer) + .map{ meta, fastqs, reference, protospacer -> [ meta, reference, protospacer ]} // Don't add fastqs to the channel + .set{ reference_protospacer } + } + + emit: + fastq_multiple = ch_fastq.multiple // [ meta, fastqs ] // Channel with the samples with multiple files + fastq_single = ch_fastq.single // [ meta, fastqs ] // Channel with the samples with only one file + template = ch_seq_template // [ meta, template ] // Channel with the template sequences + reference_protospacer = reference_protospacer // [ meta, reference, protospacer] // Channel with the reference and protospacer sequences + +} + +/* +======================================================================================== + SUBWORKFLOW FOR PIPELINE COMPLETION +======================================================================================== +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs, multiqc_report.toList()) + } + + completionSummary(monochrome_logs) + + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ +// +// Check and validate pipeline parameters +// +def validateInputParameters() { + genomeExistsError() +} + +// +// Validate channels from input samplesheet +// +def validateInputSamplesheet(input) { + def (metas, fastqs) = input[1..2] + + // Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + def endedness_ok = metas.collect{ it.single_end }.unique().size == 1 + if (!endedness_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end: ${metas[0].id}") + } + + // Check that multiple runs of the same sample contain a reference or not + def reference_ok = metas.collect{ it.self_reference }.unique().size == 1 + if (!reference_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must all contain a reference or not: ${metas[0].id}") + } + + // Check that multiple runs of the same sample contain a template or not + def template_ok = metas.collect{ it.template }.unique().size == 1 + if (!template_ok) { + error("Please check input samplesheet -> Multiple runs of a sample must all contain a template or not: ${metas[0].id}") + } + + return [ metas[0], fastqs ] +} +// +// Get attribute from genome config file e.g. fasta +// +def getGenomeAttribute(attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null +} + +// +// Exit pipeline if incorrect --genome key provided +// +def genomeExistsError() { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + def error_string = "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + error(error_string) + } +} + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // TODO nf-core: Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // TODO nf-core: Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + String[] manifest_doi = meta.manifest_map.doi.tokenize(",") + for (String doi_ref: manifest_doi) temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + +def validateParametersScreening() { + if(params.rra && params.mle_design_matrix) { + warning "mle_design_matrix will only be used for the MAGeCK MLE computations" + } + + if(params.fasta && params.count_table) { + error "Please provide either a fasta file or a count_table" + } + + if(params.fasta && !params.library) { + error "Please provide a fasta file and the library file" + } + + if(params.day0_label && params.mle_design_matrix) { + warning "MAGeCK MLE module will be run twice, once with the design matrix and once with day0-label" + } + + if(params.rra && params.mle_design_matrix) { + warning "mle_design_matrix will only be used for the MAGeCK MLE computations" + } + + if(params.rra && !params.contrasts) { + error "Please also provide the contrasts table to compare the samples for MAGeCK RRA" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..ac31f28f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml +import groovy.json.JsonOutput +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info "${workflow.manifest.name} ${getWorkflowVersion()}" + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = JsonOutput.toJson(params) + temp_pf.text = JsonOutput.prettyPrint(jsonStr) + + FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..ca964ce8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,111 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.stdout.contains("nextflow_workflow v9.9.9") } + ) + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..14558c39 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,446 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +import org.yaml.snakeyaml.Yaml +import nextflow.extension.FilesEx + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFCORE_PIPELINE { + + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + valid_config = true + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } + if (nextflow_cli_args[0]) { + log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + } +} + +// +// Citation string for pipeline +// +def workflowCitation() { + def temp_doi_ref = "" + String[] manifest_doi = workflow.manifest.doi.tokenize(",") + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n" + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + temp_doi_ref + "\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + String version_string = "" + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + Yaml yaml = new Yaml() + versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + $workflow.manifest.name: ${getWorkflowVersion()} + Nextflow: $workflow.nextflow.version + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions + .unique() + .map { processVersionsFromYAML(it) } + .unique() + .mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + for (group in summary_params.keySet()) { + def group_params = summary_params.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    $group

    \n" + summary_section += "
    \n" + for (param in group_params.keySet()) { + summary_section += "
    $param
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// nf-core logo +// +def nfCoreLogo(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) +} + +// +// Return dashed line +// +def dashedLine(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// +// Attach the multiqc report to email +// +def attachMultiqcReport(multiqc_report) { + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + return mqc_report +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = attachMultiqcReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(monochrome_logs) + if (email_address) { + try { + if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + Map colors = logColours(monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..1dc317f8 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,134 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function workflowCitation") { + + function "workflowCitation" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function nfCoreLogo") { + + function "nfCoreLogo" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dashedLine") { + + function "dashedLine" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..1037232c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function nfCoreLogo": { + "content": [ + "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:10.562934" + }, + "Test Function workflowCitation": { + "content": [ + "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:07.019761" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function dashedLine": { + "content": [ + "-\u001b[2m----------------------------------------------------\u001b[0m-" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:14.366181" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf new file mode 100644 index 00000000..2585b65d --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf @@ -0,0 +1,62 @@ +// +// Subworkflow that uses the nf-validation plugin to render help text and parameter summary +// + +/* +======================================================================================== + IMPORT NF-VALIDATION PLUGIN +======================================================================================== +*/ + +include { paramsHelp } from 'plugin/nf-validation' +include { paramsSummaryLog } from 'plugin/nf-validation' +include { validateParameters } from 'plugin/nf-validation' + +/* +======================================================================================== + SUBWORKFLOW DEFINITION +======================================================================================== +*/ + +workflow UTILS_NFVALIDATION_PLUGIN { + + take: + print_help // boolean: print help + workflow_command // string: default commmand used to run pipeline + pre_help_text // string: string to be printed before help text and summary log + post_help_text // string: string to be printed after help text and summary log + validate_params // boolean: validate parameters + schema_filename // path: JSON schema file, null to use default value + + main: + + log.debug "Using schema file: ${schema_filename}" + + // Default values for strings + pre_help_text = pre_help_text ?: '' + post_help_text = post_help_text ?: '' + workflow_command = workflow_command ?: '' + + // + // Print help message if needed + // + if (print_help) { + log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text + System.exit(0) + } + + // + // Print parameter summary to stdout + // + log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text + + // + // Validate parameters relative to the parameter JSON schema + // + if (validate_params){ + validateParameters(parameters_schema: schema_filename) + } + + emit: + dummy_emit = true +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml new file mode 100644 index 00000000..3d4a6b04 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml @@ -0,0 +1,44 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFVALIDATION_PLUGIN" +description: Use nf-validation to initiate and validate a pipeline +keywords: + - utility + - pipeline + - initialise + - validation +components: [] +input: + - print_help: + type: boolean + description: | + Print help message and exit + - workflow_command: + type: string + description: | + The command to run the workflow e.g. "nextflow run main.nf" + - pre_help_text: + type: string + description: | + Text to print before the help message + - post_help_text: + type: string + description: | + Text to print after the help message + - validate_params: + type: boolean + description: | + Validate the parameters and error if invalid. + - schema_filename: + type: string + description: | + The filename of the schema to validate against. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test new file mode 100644 index 00000000..5784a33f --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test @@ -0,0 +1,200 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFVALIDATION_PLUGIN" + script "../main.nf" + workflow "UTILS_NFVALIDATION_PLUGIN" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "plugin/nf-validation" + tag "'plugin/nf-validation'" + tag "utils_nfvalidation_plugin" + tag "subworkflows/utils_nfvalidation_plugin" + + test("Should run nothing") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should run help") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with command") { + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = null + post_help_text = null + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } } + ) + } + } + + test("Should run help with extra text") { + + + when { + + params { + monochrome_logs = true + test_data = '' + } + workflow { + """ + help = true + workflow_command = "nextflow run noorg/doesntexist" + pre_help_text = "pre-help-text" + post_help_text = "post-help-text" + validate_params = false + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.exitStatus == 0 }, + { assert workflow.stdout.any { it.contains('pre-help-text') } }, + { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } }, + { assert workflow.stdout.any { it.contains('Input/output options') } }, + { assert workflow.stdout.any { it.contains('--outdir') } }, + { assert workflow.stdout.any { it.contains('post-help-text') } } + ) + } + } + + test("Should validate params") { + + when { + + params { + monochrome_logs = true + test_data = '' + outdir = 1 + } + workflow { + """ + help = false + workflow_command = null + pre_help_text = null + post_help_text = null + validate_params = true + schema_filename = "$moduleTestDir/nextflow_schema.json" + + input[0] = help + input[1] = workflow_command + input[2] = pre_help_text + input[3] = post_help_text + input[4] = validate_params + input[5] = schema_filename + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..7626c1c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "definitions": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/definitions/input_output_options" + }, + { + "$ref": "#/definitions/generic_options" + } + ] +} diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml new file mode 100644 index 00000000..60b1cfff --- /dev/null +++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfvalidation_plugin: + - subworkflows/nf-core/utils_nfvalidation_plugin/** diff --git a/templates/alignment_summary.py b/templates/alignment_summary.py index 035e7b90..1af7a9f6 100644 --- a/templates/alignment_summary.py +++ b/templates/alignment_summary.py @@ -24,10 +24,14 @@ if "aligned-reads" not in line: output_file.write(line) else: - output_file.write(f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n") + output_file.write( + f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n" + ) add_line = False if add_line: - output_file.write(f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n") + output_file.write( + f"aligned-reads, {mapped_reads_count} ({round(mapped_reads_percentage, 1)}%)\\n" + ) with open("versions.yml", "w") as f: f.write('"${task.process}":\\n') diff --git a/templates/preprocessing_summary.py b/templates/preprocessing_summary.py index 13c698ce..122733d1 100755 --- a/templates/preprocessing_summary.py +++ b/templates/preprocessing_summary.py @@ -18,7 +18,9 @@ assembled_reads_count = 0 else: with gzip.open("$assembled_reads", "rt") as handle: - assembled_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Merged reads R1+R2 + assembled_reads_count = len( + list(SeqIO.parse(handle, "fastq")) + ) # Merged reads R1+R2 with gzip.open("$trimmed_reads", "rt") as handle: trimmed_reads_count = len(list(SeqIO.parse(handle, "fastq"))) # Filtered reads @@ -34,7 +36,9 @@ if field.isdigit(): adapters_count = field # reads with adapters if "%" in field: - adapters_percentage = field # percentage of reads with adapters: ex. "(100.0%)" + adapters_percentage = ( + field # percentage of reads with adapters: ex. "(100.0%)" + ) if "$task.ext.prefix" != "null": prefix = "$task.ext.prefix" diff --git a/templates/template_fluteMLE.R b/templates/template_fluteMLE.R new file mode 100644 index 00000000..1a9c10fd --- /dev/null +++ b/templates/template_fluteMLE.R @@ -0,0 +1,47 @@ + #!/usr/bin/env Rscript + #### author: Laurence Kuhlburger + #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text. + #### + #### graphs mageck MLE + + library(MAGeCKFlute) + library(clusterProfiler) + library(ggplot2) + + library(pathview) + options(ggrepel.max.overlaps = Inf) + mle <- read.table("${gene_summary}", header = TRUE, sep = "\t", stringsAsFactors = FALSE) + + if("${prefix}" == "day0") { + beta_strings <- grep("\\\\.beta", colnames(mle), value = TRUE) + before_beta <- sub("\\\\.beta.*", "", beta_strings) + unique_strings <- unique(before_beta) + for(i in unique_strings) { + FluteMLE(mle, treatname= i, proj=i, pathview.top=5) + } + } else { + beta_strings <- grep("\\\\.beta", colnames(mle), value = TRUE) + before_beta <- sub("\\\\.beta.*", "", beta_strings) + unique_strings <- unique(before_beta) + for(i in unique_strings) { + FluteMLE(mle, treatname= i, proj=i, ${args}, pathview.top=5) + } + } + + version_file_path <- "versions.yml" + version_flute <- paste(unlist(packageVersion("MAGeCKFlute")), collapse = ".") + version_ggplot <- paste(unlist(packageVersion("ggplot2")), collapse = ".") + version_clusterprofiler <- paste(unlist(packageVersion("clusterProfiler")), collapse = ".") + version_pathview <- paste(unlist(packageVersion("pathview")), collapse = ".") + + f <- file(version_file_path, "w") + writeLines('"${task.process}":', f) + writeLines(" MAGeCKFlute: ", f, sep = "") + writeLines(version_flute, f) + writeLines(" ggplot2: ", f, sep = "") + writeLines(version_ggplot, f) + writeLines(" clusterProfiler: ", f, sep = "") + writeLines(version_clusterprofiler, f) + writeLines(" pathview: ", f, sep = "") + writeLines(version_pathview, f) + close(f) diff --git a/tower.yml b/tower.yml index 787aedfe..03cbd716 100644 --- a/tower.yml +++ b/tower.yml @@ -3,3 +3,33 @@ reports: display: "MultiQC HTML report" samplesheet.csv: display: "Auto-created samplesheet with collated metadata and FASTQ paths" + "plots/*accumulative.html": + display: "accumulative" + "plots/*delAlleles_plot.png": + display: "delAlleles_plot" + "plots/*Deletions.html": + display: "Deletions" + "plots/*Insertions.html": + display: "Insertions" + "plots/*subs-perc_plot_LOGO.png": + display: "subs-perc_plot_LOGO" + "plots/*subs-perc_plot.png": + display: "subs-perc_plot" + "plots/*top-alleles_LOGO.png": + display: "top-alleles_LOGO" + "plots/*top.html": + display: "top" + "cigar/*_cutSite.json": + display: "cutSite" + "cigar/*_edition.html": + display: "edition" + "cigar/*_edits.csv": + display: "edits" + "cigar/*_indels.csv": + display: "indels" + "cigar/*_QC-indels.html": + display: "QC-indels" + "cigar/*_reads.html": + display: "reads" + "cigar/*_subs-perc.csv": + display: "subs-perc" diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf index 7607a4c6..5ccb9310 100644 --- a/workflows/crisprseq_screening.nf +++ b/workflows/crisprseq_screening.nf @@ -1,96 +1,69 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowCrisprseq.initialise(params, log) - -// Set screening parameters and channels -if (params.library) { ch_library = file(params.library) } -if (params.crisprcleanr) { ch_crisprcleanr = Channel.value(params.crisprcleanr) } - -if(params.mle_design_matrix) { - Channel.fromPath(params.mle_design_matrix) - .set { ch_design } -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +// Local modules +include { BAGEL2_FC } from '../modules/local/bagel2/fc' +include { BAGEL2_BF } from '../modules/local/bagel2/bf' +include { BAGEL2_PR } from '../modules/local/bagel2/pr' +include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph' +include { MATRICESCREATION } from '../modules/local/matricescreation' +include { MAGECK_FLUTEMLE } from '../modules/local/mageck/flutemle' +include { MAGECK_FLUTEMLE as MAGECK_FLUTEMLE_CONTRASTS } from '../modules/local/mageck/flutemle' +include { MAGECK_FLUTEMLE as MAGECK_FLUTEMLE_DAY0 } from '../modules/local/mageck/flutemle' +include { VENNDIAGRAM } from '../modules/local/venndiagram' +// nf-core modules +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { CUTADAPT as CUTADAPT_THREE_PRIME } from '../modules/nf-core/cutadapt/main' +include { CUTADAPT as CUTADAPT_FIVE_PRIME } from '../modules/nf-core/cutadapt/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MAGECK_COUNT } from '../modules/nf-core/mageck/count/main' +include { MAGECK_MLE } from '../modules/nf-core/mageck/mle/main' +include { MAGECK_TEST } from '../modules/nf-core/mageck/test/main' +include { MAGECK_GRAPHRRA } from '../modules/local/mageck/graphrra' +include { CRISPRCLEANR_NORMALIZE } from '../modules/nf-core/crisprcleanr/normalize/main' +include { MAGECK_MLE as MAGECK_MLE_MATRIX } from '../modules/nf-core/mageck/mle/main' +include { MAGECK_MLE as MAGECK_MLE_DAY0 } from '../modules/nf-core/mageck/mle/main' +include { BOWTIE2_BUILD } from '../modules/nf-core/bowtie2/build/main' +include { BOWTIE2_ALIGN } from '../modules/nf-core/bowtie2/align/main' +// Local subworkflows +include { INITIALISATION_CHANNEL_CREATION_SCREENING } from '../subworkflows/local/utils_nfcore_crisprseq_pipeline' +// Functions +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crisprseq_pipeline' +include { validateParametersScreening } from '../subworkflows/local/utils_nfcore_crisprseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ +workflow CRISPRSEQ_SCREENING { -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { CUTADAPT } from '../modules/nf-core/cutadapt/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { MAGECK_COUNT } from '../modules/nf-core/mageck/count/main' -include { MAGECK_MLE } from '../modules/nf-core/mageck/mle/main' -include { MAGECK_TEST } from '../modules/nf-core/mageck/test/main' -include { MAGECK_GRAPHRRA } from '../modules/local/mageck/graphrra' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { CRISPRCLEANR_NORMALIZE } from '../modules/nf-core/crisprcleanr/normalize/main' -include { BAGEL2_FC } from '../modules/local/bagel2/fc' -include { BAGEL2_BF } from '../modules/local/bagel2/bf' -include { BAGEL2_PR } from '../modules/local/bagel2/pr' -include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph' + take: + ch_samplesheet // channel: samplesheet read in from --input -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + main: -// Info required for completion email and summary -def multiqc_report = [] + // Set screening parameters and channels + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() -workflow CRISPRSEQ_SCREENING { + // Validate parameters specific to the screening subworkflow + validateParametersScreening() - ch_versions = Channel.empty() + // + // Initialise channels + // + INITIALISATION_CHANNEL_CREATION_SCREENING() if(!params.count_table){ - // - // Create input channel from input file provided through params.input - // - Channel.fromSamplesheet("input") - .map{ meta, fastq_1, fastq_2, x, y, z -> - // x (reference), y (protospacer), and z (template) are part of the targeted workflows and we don't need them - return [ meta + [ single_end:fastq_2?false:true ], fastq_2?[ fastq_1, fastq_2 ]:[ fastq_1 ] ] } - .set { ch_input } - + ch_input = ch_samplesheet // // MODULE: Run FastQC @@ -98,26 +71,67 @@ workflow CRISPRSEQ_SCREENING { FASTQC ( ch_input ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + //set adapter seq to null to make it compatible with crispr targeted + ch_cutadapt = ch_input.combine(Channel.value([[]])) + if(params.five_prime_adapter) { + CUTADAPT_FIVE_PRIME( + ch_cutadapt + ) + CUTADAPT_FIVE_PRIME.out.reads.combine(Channel.value([[]])).set { ch_cutadapt } + ch_cutadapt.map{ meta, fastq, proto -> + meta.id = "${meta.id}_trim" + [meta, fastq, proto] + }.set { ch_cutadapt } + + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect{it[1]}) + ch_versions = ch_versions.mix(CUTADAPT_FIVE_PRIME.out.versions) + } - ch_input_cutadapt = ch_input.combine(Channel.value([[]])) + if(params.three_prime_adapter) { + CUTADAPT_THREE_PRIME( + ch_cutadapt + ) + ch_cutadapt = CUTADAPT_THREE_PRIME.out.reads.combine(Channel.value([[]])) + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect{it[1]}) + ch_versions = ch_versions.mix(CUTADAPT_THREE_PRIME.out.versions) + } - if(params.cutadapt) { - CUTADAPT( - ch_input_cutadapt - ) - ch_versions = ch_versions.mix(CUTADAPT.out.versions) - CUTADAPT.out.reads - .map{ meta, fastq -> - [meta, [fastq]] + if(params.five_prime_adapter || params.three_prime_adapter) { + ch_cutadapt + .map{ meta, fastq, empty -> + [meta, fastq] + } + .set { ch_input } } - .set { ch_input } + + if(params.fasta){ + Channel.of("fasta") + .combine(Channel.fromPath(params.fasta)) + .set{ ch_fasta } + + BOWTIE2_BUILD(ch_fasta) + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + + BOWTIE2_ALIGN ( + ch_input, + BOWTIE2_BUILD.out.index, + false, + false + ) + + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + + BOWTIE2_ALIGN.out.aligned.map{ meta, bam -> + [meta, [bam]] + }.set{ch_input} } // this is to concatenate everything for mageck count - ch_input .map { meta, fastqs -> if(fastqs.size() == 1){ @@ -145,10 +159,11 @@ workflow CRISPRSEQ_SCREENING { // MAGECK_COUNT ( joined, - ch_library + INITIALISATION_CHANNEL_CREATION_SCREENING.out.library ) ch_versions = ch_versions.mix(MAGECK_COUNT.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(MAGECK_COUNT.out.summary.collect{it[1]}) MAGECK_COUNT.out.count.map { it -> it[1] @@ -159,14 +174,26 @@ workflow CRISPRSEQ_SCREENING { .set { ch_counts } } - if(params.crisprcleanr) { - ch_crispr_normalize = Channel.of([id: "count_table_normalize"]) - CRISPRCLEANR_NORMALIZE( - ch_crispr_normalize.concat(ch_counts,ch_crisprcleanr).collect(), - params.min_reads, - params.min_targeted_genes - ) + ch_crispr_normalize = Channel.of([id: "count_table_normalize"]).concat(ch_counts) + + if(params.crisprcleanr.endsWith(".csv")) { + CRISPRCLEANR_NORMALIZE( + ch_crispr_normalize.collect(), + '', + INITIALISATION_CHANNEL_CREATION_SCREENING.out.crisprcleanr, + params.min_reads, + params.min_targeted_genes + ) } else + { + ch_crispr_normalize = Channel.of([id: "count_table_normalize"]).concat(ch_counts) + CRISPRCLEANR_NORMALIZE( + ch_crispr_normalize.collect(), + INITIALISATION_CHANNEL_CREATION_SCREENING.out.crisprcleanr, + [], + params.min_reads, + params.min_targeted_genes) + } ch_versions = ch_versions.mix(CRISPRCLEANR_NORMALIZE.out.versions) @@ -176,8 +203,8 @@ workflow CRISPRSEQ_SCREENING { }.set { ch_counts } } - if(params.rra_contrasts) { - Channel.fromPath(params.rra_contrasts) + if(params.rra) { + Channel.fromPath(params.contrasts) .splitCsv(header:true, sep:';' ) .set { ch_contrasts } counts = ch_contrasts.combine(ch_counts) @@ -194,15 +221,16 @@ workflow CRISPRSEQ_SCREENING { ch_versions = ch_versions.mix(MAGECK_GRAPHRRA.out.versions) } - if(params.rra_contrasts) { - Channel.fromPath(params.rra_contrasts) + if(params.contrasts) { + Channel.fromPath(params.contrasts) .splitCsv(header:true, sep:';' ) - .set { ch_bagel } - counts = ch_bagel.combine(ch_counts) + .set { ch_contrasts } + counts = ch_contrasts.combine(ch_counts) + //Define non essential and essential genes channels for bagel2 - ch_bagel_reference_essentials= Channel.value(params.bagel_reference_essentials) - ch_bagel_reference_nonessentials= Channel.value(params.bagel_reference_nonessentials) + ch_bagel_reference_essentials= Channel.fromPath(params.bagel_reference_essentials).first() + ch_bagel_reference_nonessentials= Channel.fromPath(params.bagel_reference_nonessentials).first() BAGEL2_FC ( counts @@ -224,7 +252,6 @@ workflow CRISPRSEQ_SCREENING { BAGEL2_PR ( ch_bagel_pr ) - ch_versions = ch_versions.mix(BAGEL2_PR.out.versions) BAGEL2_GRAPH ( @@ -235,73 +262,67 @@ workflow CRISPRSEQ_SCREENING { } - if(params.mle_design_matrix) { - ch_mle = ch_counts.combine(ch_design) - ch_mle.map { - it -> [[id: it[1].getBaseName()], it[0], it[1]] - }.set { ch_designed_mle } - - MAGECK_MLE ( - ch_designed_mle - ) - - ch_versions = ch_versions.mix(MAGECK_MLE.out.versions) - - + if((params.mle_design_matrix) || (params.contrasts && !params.rra) || (params.day0_label)) { + if(params.mle_design_matrix) { + INITIALISATION_CHANNEL_CREATION_SCREENING.out.design.map { + it -> [[id: it.getBaseName()], it] + }.set { ch_designed_mle } + + ch_mle = ch_designed_mle.combine(ch_counts) + MAGECK_MLE_MATRIX (ch_mle) + ch_versions = ch_versions.mix(MAGECK_MLE_MATRIX.out.versions) + MAGECK_FLUTEMLE(MAGECK_MLE_MATRIX.out.gene_summary) + ch_versions = ch_versions.mix(MAGECK_FLUTEMLE.out.versions) + } + if(params.contrasts) { + MATRICESCREATION(ch_contrasts) + ch_mle = MATRICESCREATION.out.design_matrix.combine(ch_counts) + MAGECK_MLE (ch_mle) + ch_versions = ch_versions.mix(MAGECK_MLE.out.versions) + MAGECK_FLUTEMLE_CONTRASTS(MAGECK_MLE.out.gene_summary) + ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_CONTRASTS.out.versions) + ch_venndiagram = BAGEL2_PR.out.pr.join(MAGECK_MLE.out.gene_summary) + VENNDIAGRAM(ch_venndiagram) + ch_versions = ch_versions.mix(VENNDIAGRAM.out.versions) + } + if(params.day0_label) { + ch_mle = Channel.of([id: "day0"]).merge(Channel.of([[]])).merge(ch_counts) + MAGECK_MLE_DAY0 (ch_mle) + ch_versions = ch_versions.mix(MAGECK_MLE_DAY0.out.versions) + MAGECK_FLUTEMLE_DAY0(MAGECK_MLE_DAY0.out.gene_summary) + ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_DAY0.out.versions) + } } - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') - ) + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowCrisprseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowCrisprseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - if(!params.count_table) { - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - } else { - ch_multiqc_files = channel.empty() - } + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), - ch_multiqc_config.collect().ifEmpty([]), - ch_multiqc_custom_config.collect().ifEmpty([]), - ch_multiqc_logo.collect().ifEmpty([]) + ch_multiqc_config.toList(), + ch_multiqc_custom_config.toList(), + ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() - ch_versions = ch_versions.mix(MULTIQC.out.versions) -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.adaptivecard(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/crisprseq_targeted.nf b/workflows/crisprseq_targeted.nf index abf67bb2..857867c7 100644 --- a/workflows/crisprseq_targeted.nf +++ b/workflows/crisprseq_targeted.nf @@ -1,65 +1,22 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY + IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowCrisprseq.initialise(params, log) -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules -// - -// -// MODULE -// -include { FIND_ADAPTERS } from '../modules/local/find_adapters' -include { EXTRACT_UMIS } from '../modules/local/extract_umis' -include { ORIENT_REFERENCE } from '../modules/local/orient_reference' -include { CIGAR_PARSER } from '../modules/local/cigar_parser' -include { PREPROCESSING_SUMMARY } from '../modules/local/preprocessing_summary' -include { CLUSTERING_SUMMARY } from '../modules/local/clustering_summary' -include { ALIGNMENT_SUMMARY } from '../modules/local/alignment_summary' -include { TEMPLATE_REFERENCE } from '../modules/local/template_reference' -include { CRISPRSEQ_PLOTTER } from '../modules/local/crisprseq_plotter' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT NF-CORE MODULES/SUBWORKFLOWS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -// -// MODULE: Installed directly from nf-core/modules -// +// Local modules +include { FIND_ADAPTERS } from '../modules/local/find_adapters' +include { EXTRACT_UMIS } from '../modules/local/extract_umis' +include { ORIENT_REFERENCE } from '../modules/local/orient_reference' +include { CIGAR_PARSER } from '../modules/local/cigar_parser' +include { PREPROCESSING_SUMMARY } from '../modules/local/preprocessing_summary' +include { CLUSTERING_SUMMARY } from '../modules/local/clustering_summary' +include { ALIGNMENT_SUMMARY } from '../modules/local/alignment_summary' +include { TEMPLATE_REFERENCE } from '../modules/local/template_reference' +include { CRISPRSEQ_PLOTTER } from '../modules/local/crisprseq_plotter' +// nf-core modules include { FASTQC } from '../modules/nf-core/fastqc/main' include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { PEAR } from '../modules/nf-core/pear/main' include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' include { SEQTK_SEQ as SEQTK_SEQ_MASK } from '../modules/nf-core/seqtk/seq/main' @@ -80,7 +37,13 @@ include { MINIMAP2_INDEX } from '../modules/nf-core/m include { MEDAKA } from '../modules/nf-core/medaka/main' include { CUTADAPT } from '../modules/nf-core/cutadapt/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' - +// Local subworkflows +include { INITIALISATION_CHANNEL_CREATION_TARGETED } from '../subworkflows/local/utils_nfcore_crisprseq_pipeline' +// Functions +include { paramsSummaryMap } from 'plugin/nf-validation' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_crisprseq_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -127,122 +90,34 @@ def umi_to_sequence_centroid(cluster) { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow CRISPRSEQ_TARGETED { - ch_versions = Channel.empty() + take: + ch_input_reads // channel: input reads read in from --input + ch_input_reference // channel: reference sequence read in from --input + ch_input_template // channel: template sequence read in from --input + ch_input_protospacer // channel: protospacer sequence read in from --input - // - // Create input channel from input file provided through params.input - // - Channel.fromSamplesheet("input") - .multiMap { meta, fastq_1, fastq_2, reference, protospacer, template -> - // meta.condition is part of the screening workflow and we need to remove it - reads: [ meta.id, meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], fastq_2?[ fastq_1, fastq_2 ]:[ fastq_1 ] ] - reference: [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], reference] - protospacer: [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], protospacer] - template: [meta - meta.subMap('condition') + [ single_end:fastq_2?false:true, self_reference:reference?false:true, template:template?true:false ], template] - } - .set { ch_input } - - ch_input - .reads - .groupTuple() - .map { - WorkflowCrisprseq.validateInput(it) - } - // Separate samples by the ones containing all reads in one file or the ones with many files to be concatenated - .branch { - meta, fastqs -> - single : fastqs.size() == 1 - return [ meta, fastqs.flatten() ] - multiple: fastqs.size() > 1 - return [ meta, fastqs.flatten() ] - } - .set { ch_fastq } + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // - // Add reference sequences to file + // Initialise channels // - ch_input.reference - .tap{ meta_reference } - .filter{ meta, sequence -> sequence instanceof String } - .collectFile() { meta, reference -> - [ "${meta.id}_reference.fasta", ">${meta.id}\n${reference}\n" ] // Write each reference sequence to a file - } - .map{ new_file -> - [new_file.baseName.split("_reference")[0], new_file] // create a channel with the meta.id and the new file - } - .join(meta_reference - .map{ meta, reference -> - [meta.id, meta] // Join the channel by meta.id with the meta map - } + INITIALISATION_CHANNEL_CREATION_TARGETED( + ch_input_reads, + ch_input_reference, + ch_input_template, + ch_input_protospacer ) - .map{ metaid, new_file, meta -> - [meta, new_file] // Obtain the final channel with meta map and the new file - } - .set{ ch_seq_reference } - - - // - // Add template sequences to file - // - ch_input.template - .tap{ meta_template } - .filter{ meta, sequence -> sequence instanceof String } - .collectFile() { meta, template -> - [ "${meta.id}_template.fasta", ">${meta.id}\n${template}\n" ] // Write each template sequence to a file - } - .map{ new_file -> - [new_file.baseName.split("_template")[0], new_file] // create a channel with the meta.id and the new file - } - .join(meta_template - .map{ meta, template -> - [meta.id, meta] // Join the channel by meta.id with the meta map - } - ) - .map{ metaid, new_file, meta -> - [meta, new_file] // Obtain the final channel with meta map and the new file - } - .set{ ch_seq_template } - - - // Join channels with reference and protospacer - // to channel: [ meta, reference, protospacer] - if (!params.reference_fasta && !params.protospacer) { - ch_seq_reference - .join(ch_input.protospacer) - .set{ reference_protospacer } - } else if (!params.reference_fasta) { - // If a protospacer was provided through the --protospacer param instead of the samplesheet - ch_protospacer = Channel.of(params.protospacer) - ch_seq_reference - .combine(ch_protospacer) - .set{ reference_protospacer } - } else if (!params.protospacer) { - // If a reference was provided through a fasta file or igenomes instead of the samplesheet - ch_reference = Channel.fromPath(params.reference_fasta) - ch_input.protospacer - .combine(ch_reference) - .set{ reference_protospacer } - } else { - ch_reference = Channel.fromPath(params.reference_fasta) - ch_protospacer = Channel.of(params.protospacer) - ch_input.reads - .combine(ch_reference) - .combine(ch_protospacer) - .set{ reference_protospacer } - } - // // MODULE: Prepare reference sequence // ORIENT_REFERENCE ( - reference_protospacer + INITIALISATION_CHANNEL_CREATION_TARGETED.out.reference_protospacer ) ch_versions = ch_versions.mix(ORIENT_REFERENCE.out.versions) @@ -250,11 +125,11 @@ workflow CRISPRSEQ_TARGETED { // MODULE: Concatenate FastQ files from same sample if required // CAT_FASTQ ( - ch_fastq.multiple + INITIALISATION_CHANNEL_CREATION_TARGETED.out.fastq_multiple ) .reads .groupTuple(by: [0]) - .mix(ch_fastq.single) + .mix(INITIALISATION_CHANNEL_CREATION_TARGETED.out.fastq_single) // Separate samples by paired-end or single-end .branch { meta, fastq -> @@ -273,16 +148,66 @@ workflow CRISPRSEQ_TARGETED { ch_cat_fastq.paired ) .assembled + .map { + // Set single_end to true for the assembled reads + meta, assembled -> + return [ meta - meta.subMap('single_end') + [ single_end:true ], assembled ] + } .mix( ch_cat_fastq.single ) .set { ch_pear_fastq } ch_versions = ch_versions.mix(PEAR.out.versions) + // Change reference, protospacer and template channels to have the same meta information as the reads + ch_pear_fastq + .map {meta, reads -> + // save single_end value and remove the key from the meta map + single_end = meta.single_end + return [ meta - meta.subMap('single_end'), reads, single_end ] + } + .tap { no_single_end } + .join( + ORIENT_REFERENCE.out.reference + .map {meta, reference -> + // Remove single_end from the meta map to allow joining two channels with different single_end values + return [ meta - meta.subMap('single_end'), reference ] + } + ) + .map {meta, reads, single_end, reference -> + // Add the correct single_end value to the reference meta map. + return [ meta + ["single_end": single_end], reference ] + } + .tap{ ch_oriented_reference } + no_single_end + .join( + INITIALISATION_CHANNEL_CREATION_TARGETED.out.template + .map {meta, template -> + return [ meta - meta.subMap('single_end'), template ] + } + ) + .map {meta, reads, single_end, template -> + return [ meta + ["single_end": single_end], template ] + } + .set{ ch_template } + no_single_end + .join( + INITIALISATION_CHANNEL_CREATION_TARGETED.out.reference_protospacer + .map {meta, reference, protospacer -> + return [ meta - meta.subMap('single_end'), protospacer ] + } + ) + .map {meta, reads, single_end, protospacer -> + return [ meta + ["single_end": single_end], protospacer ] + } + .set{ ch_protospacer } + + // // MODULE: Run FastQC // FASTQC ( ch_pear_fastq ) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) ch_versions = ch_versions.mix(FASTQC.out.versions.first()) ch_trimmed = Channel.empty() @@ -314,6 +239,7 @@ workflow CRISPRSEQ_TARGETED { CUTADAPT ( ch_adapter_seqs.adapters ) + ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect{it[1]}) ch_versions = ch_versions.mix(CUTADAPT.out.versions) ch_adapter_seqs.no_adapters @@ -341,25 +267,50 @@ workflow CRISPRSEQ_TARGETED { ch_cat_fastq.paired .mix(ch_cat_fastq.single) .join(PEAR.out.assembled, remainder: true) - .join(SEQTK_SEQ_MASK.out.fastx) - .join(CUTADAPT.out.log) - .map { meta, reads, assembled, masked, trimmed -> + .map { meta, reads, assembled -> + // Remove the single_end key from the meta map to allow joining channels with different single_end values + return [ meta - meta.subMap('single_end'), reads, assembled] + } + .join( + SEQTK_SEQ_MASK.out.fastx + .map { meta, masked -> + single_end = meta.single_end + return [ meta - meta.subMap('single_end'), masked, single_end] + } + ) + .join( + CUTADAPT.out.log + .map { meta, trimmed -> + return [ meta - meta.subMap('single_end'), trimmed] + } + ) + .map { meta, reads, assembled, masked, single_end, trimmed -> if (assembled == null) { assembled = [] } - return [ meta, reads, assembled, masked, trimmed ] + return [ meta + ["single_end": single_end], reads, assembled, masked, trimmed ] } .set { ch_preprocessing_summary_data } } else { ch_cat_fastq.paired .mix(ch_cat_fastq.single) .join(PEAR.out.assembled, remainder: true) - .join(SEQTK_SEQ_MASK.out.fastx) - .map { meta, reads, assembled, masked -> + .map { meta, reads, assembled -> + // Remove the single_end key from the meta map to allow joining channels with different single_end values + return [ meta - meta.subMap('single_end'), reads, assembled] + } + .join( + SEQTK_SEQ_MASK.out.fastx + .map { meta, masked -> + single_end = meta.single_end + return [ meta - meta.subMap('single_end'), masked, single_end] + } + ) + .map { meta, reads, assembled, masked, single_end -> if (assembled == null) { assembled = [] } - return [ meta, reads, assembled, masked, [] ] + return [ meta + ["single_end": single_end], reads, assembled, masked, [] ] } .set { ch_preprocessing_summary_data } } @@ -612,7 +563,7 @@ workflow CRISPRSEQ_TARGETED { if (params.aligner == "minimap2") { MINIMAP2_ALIGN_ORIGINAL ( ch_preprocess_reads - .join(ORIENT_REFERENCE.out.reference), + .join(ch_oriented_reference), true, false, true @@ -626,7 +577,7 @@ workflow CRISPRSEQ_TARGETED { // if (params.aligner == "bwa") { BWA_INDEX ( - ORIENT_REFERENCE.out.reference + ch_oriented_reference ) ch_versions = ch_versions.mix(BWA_INDEX.out.versions) BWA_MEM ( @@ -643,7 +594,7 @@ workflow CRISPRSEQ_TARGETED { // if (params.aligner == "bowtie2") { BOWTIE2_BUILD ( - ORIENT_REFERENCE.out.reference + ch_oriented_reference ) ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) BOWTIE2_ALIGN ( @@ -679,8 +630,8 @@ workflow CRISPRSEQ_TARGETED { // MODULE: Obtain a new reference with the template modification // TEMPLATE_REFERENCE ( - ORIENT_REFERENCE.out.reference - .join(ch_seq_template) + ch_oriented_reference + .join(ch_template) ) ch_versions = ch_versions.mix(TEMPLATE_REFERENCE.out.versions.first()) @@ -690,30 +641,20 @@ workflow CRISPRSEQ_TARGETED { // MINIMAP2_ALIGN_TEMPLATE ( TEMPLATE_REFERENCE.out.fasta - .join(ORIENT_REFERENCE.out.reference), + .join(ch_oriented_reference), true, false, true ) .bam - .map { - meta, bam -> - if (bam.baseName.contains("template-align")) { - return [ meta, bam ] - } else { - new_file = bam.parent / bam.baseName + "_template-align." + bam.extension - bam.renameTo(new_file) - return[ meta, new_file ] - } - } .set { ch_template_bam } ch_versions = ch_versions.mix(MINIMAP2_ALIGN_TEMPLATE.out.versions) ch_mapped_bam .join(SAMTOOLS_INDEX.out.bai) - .join(ORIENT_REFERENCE.out.reference) - .join(ch_input.protospacer) - .join(ch_seq_template, remainder: true) + .join(ch_oriented_reference) + .join(ch_protospacer) + .join(ch_template, remainder: true) .join(ch_template_bam, remainder: true) .join(TEMPLATE_REFERENCE.out.fasta, remainder: true) .join(ALIGNMENT_SUMMARY.out.summary) @@ -738,6 +679,9 @@ workflow CRISPRSEQ_TARGETED { CIGAR_PARSER ( ch_to_parse_cigar ) + ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.processing.collect{it[1]}) + ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.edition.collect{it[2]}) + ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.qcindels.collect{it[1]}) ch_versions = ch_versions.mix(CIGAR_PARSER.out.versions.first()) @@ -746,39 +690,32 @@ workflow CRISPRSEQ_TARGETED { // CRISPRSEQ_PLOTTER ( CIGAR_PARSER.out.indels - .join(ORIENT_REFERENCE.out.reference) - .join(ch_input.protospacer) + .join(ch_oriented_reference) + .join(ch_protospacer) ) ch_versions = ch_versions.mix(CRISPRSEQ_PLOTTER.out.versions.first()) // - // MODULE: Dump software versions + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique{ it.text }.collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_pipeline_software_mqc_versions.yml', sort: true, newLine: true) + .set { ch_collated_versions } // // MODULE: MultiQC // - workflow_summary = WorkflowCrisprseq.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowCrisprseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) - - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.processing.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.edition.collect{it[2]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(CIGAR_PARSER.out.qcindels.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) - if (params.overrepresented) { - ch_multiqc_files = ch_multiqc_files.mix(CUTADAPT.out.log.collect{it[1]}.ifEmpty([])) - } + ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath(params.multiqc_logo, checkIfExists: true) : Channel.empty() + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value(methodsDescriptionText(ch_multiqc_custom_methods_description)) + ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml', sort: false)) MULTIQC ( ch_multiqc_files.collect(), @@ -786,28 +723,8 @@ workflow CRISPRSEQ_TARGETED { ch_multiqc_custom_config.toList(), ch_multiqc_logo.toList() ) - multiqc_report = MULTIQC.out.report.toList() -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/