diff --git a/.github/workflow-templates/readme-template.md b/.github/workflow-templates/readme-template.md deleted file mode 100644 index 0bc4b0d70..000000000 --- a/.github/workflow-templates/readme-template.md +++ /dev/null @@ -1,16 +0,0 @@ -# container - -Main tool : [](link to program) - -Additional tools: -- list - -Full documentation: link to documentation or wiki - - - -## Example Usage - -```bash - -``` diff --git a/.github/workflows/build-to-deploy.yml b/.github/workflows/build-to-deploy.yml index d3d21c94e..6f49535d7 100644 --- a/.github/workflows/build-to-deploy.yml +++ b/.github/workflows/build-to-deploy.yml @@ -51,7 +51,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Check Out Repo (+ download Git LFS dependencies) # each job runs in an isolated environment, so need to check out the repo in each job - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: lfs: true @@ -61,10 +61,10 @@ jobs: - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Cache Docker layers # also need to set up the cache in each job, using the same prefix (here ${{ runner.os }}-buildx) makes it sharable between jobs - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /tmp/.buildx-cache-${{ inputs.cache }} key: ${{ runner.os }}-buildx-${{ inputs.cache }}-${{ github.sha }} @@ -82,13 +82,13 @@ jobs: quay.io/${{ inputs.repository_name }}/${{ inputs.container_name }}:latest - name: Login to DockerHub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.docker_username }} password: ${{ secrets.docker_access_token }} - name: Login to Quay - uses: docker/login-action@v1 + uses: docker/login-action@v2 if: ${{ inputs.push_quay }} with: registry: quay.io @@ -97,7 +97,7 @@ jobs: - name: Build and push user-defined tag to DockerHub id: docker_build_user_defined_tag - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: context: ${{ inputs.path_to_context }} file: ${{ inputs.path_to_context }}/${{ inputs.dockerfile_name }} @@ -111,7 +111,7 @@ jobs: - name: Build and push latest tag to DockerHub id: docker_build_latest_tag - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 if: ${{ inputs.push_latest_tag }} with: context: ${{ inputs.path_to_context }} @@ -126,7 +126,7 @@ jobs: - name: Build and push user-defined tag to Quay id: quay_build_user_defined_tag - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 if: ${{ inputs.push_quay }} with: context: ${{ inputs.path_to_context }} @@ -141,7 +141,7 @@ jobs: - name: Build and push latest tag to Quay id: quay_build_latest_tag - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 if: ${{ inputs.push_quay && inputs.push_latest_tag }} with: context: ${{ inputs.path_to_context }} diff --git a/.github/workflows/build-to-test.yml b/.github/workflows/build-to-test.yml index 4cd45388e..11b4f8dad 100644 --- a/.github/workflows/build-to-test.yml +++ b/.github/workflows/build-to-test.yml @@ -22,14 +22,17 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Check Out Repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Cache Docker layers - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /tmp/.buildx-cache-${{ inputs.cache }} key: ${{ runner.os }}-buildx-${{ inputs.cache }}-${{ github.sha }} @@ -38,7 +41,7 @@ jobs: - name: Build to test id: docker_build_to_test - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: context: ${{ inputs.path_to_context }} file: ${{ inputs.path_to_context }}/${{ inputs.dockerfile_name }} diff --git a/.github/workflows/manual-deploy.yml b/.github/workflows/manual-deploy.yml index c4b3e5070..2cd7c250a 100644 --- a/.github/workflows/manual-deploy.yml +++ b/.github/workflows/manual-deploy.yml @@ -30,6 +30,8 @@ on: description: "Repository name. /tool:tag (Usually staphb)" default: "staphb" +run-name: Deploy ${{ github.event.inputs.tool }} version ${{ github.event.inputs.version }} + jobs: # This job calls a workflow to build the image to the 'test' stage diff --git a/.github/workflows/run-singularity.yml b/.github/workflows/run-singularity.yml index 9ad1e86ed..251890e89 100644 --- a/.github/workflows/run-singularity.yml +++ b/.github/workflows/run-singularity.yml @@ -23,7 +23,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Check out Repo - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Singularity uses: eWaterCycle/setup-singularity@v7 diff --git a/.github/workflows/test-PR-dockerfiles.yml b/.github/workflows/test-PR-dockerfiles.yml index bf8bdc3ef..bbb53d2f8 100644 --- a/.github/workflows/test-PR-dockerfiles.yml +++ b/.github/workflows/test-PR-dockerfiles.yml @@ -15,9 +15,9 @@ jobs: outputs: json: ${{ steps.files.outputs.added_modified }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - id: files - uses: Ana06/get-changed-files@v2.1.0 + uses: Ana06/get-changed-files@v2.2.0 with: format: 'json' filter: 'Dockerfile' @@ -31,60 +31,96 @@ jobs: matrix: added_modified: ${{ fromJson(needs.find_new_dockerfiles.outputs.json) }} steps: - - uses: actions/checkout@v2 - name: parse file path id: parse run: | - tool=$(echo "${{matrix.added_modified}}" | cut -f 1 -d "/" ) - version=$(echo "${{matrix.added_modified}}" | cut -f 2 -d "/" ) - echo "::set-output name=tool::$tool" - echo "::set-output name=version::$version" - + tool=$(echo "${{ matrix.added_modified }}" | cut -f 1 -d "/" ) + version=$(echo "${{ matrix.added_modified }}" | cut -f 2 -d "/" ) + echo "tool=$tool" >> $GITHUB_OUTPUT + echo "version=$version" >> $GITHUB_OUTPUT + ##### --------------------------------------------------------------------------- ##### -##### Attempted shortcut ##### +##### Workflows still cannot be run in parrallel as of 2022-12-01 ##### ##### --------------------------------------------------------------------------- ##### -# - name: test -# uses: ./.github/workflows/build-to-test.yml -# with: -# path_to_context: "./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }}" -# cache: ${{ steps.parse.outputs.tool }} -# I think the ideal is to call ./.github/workflows/build-to-test.yml, but I kept getting the error: -# Can't find 'action.yml', 'action.yaml' or 'Dockerfile' under '/home/runner/work/docker-builds/docker-builds/.github/workflows/build-to-test.yml'. Did you forget to run actions/checkout before running your local action? -# Instead, I've created a stop-gap by copying the steps from ./.github/workflows/build-to-test.yml to here, which seems to work fine. +# - name: test files +# uses: ./.github/workflows/build-to-test.yml +# with: +# path_to_context: "./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }}" +# cache: ${{ steps.parse.outputs.tool }} ##### --------------------------------------------------------------------------- ##### -##### The long way (which works) ##### -##### --------------------------------------------------------------------------- ##### +##### The steps of ./.github/workflows/build-to-test.yml are copied here. ##### +##### --------------------------------------------------------------------------- ##### + + - name: Checkout + uses: actions/checkout@v3 + + - name: Layer check + run: | + #checking layers + warning='' + app_layer=$(grep FROM ./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }}/Dockerfile | grep "as app") + tst_layer=$(grep FROM ./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }}/Dockerfile | grep "as test") + if [ -z "$app_layer" ] ; then echo "FATAL : app layer is missing" ; warning='warning' ; fi + if [ -z "$tst_layer" ] ; then echo "FATAL : test layer is missing" ; warning='warning' ; fi + if [ -n "$warning" ] ; then echo "Please see template for recommended format https://github.com/StaPH-B/docker-builds/blob/master/dockerfile-template/Dockerfile" ; exit 1 ; fi + + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Cache Docker layers - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /tmp/.buildx-cache-${{ steps.parse.outputs.tool }} key: ${{ runner.os }}-buildx-${{ steps.parse.outputs.tool }}-${{ github.sha }} restore-keys: | ${{ runner.os }}-buildx-${{ steps.parse.outputs.tool }} - + - name: Build to test id: docker_build_to_test - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: context: ./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }} file: ./${{ steps.parse.outputs.tool }}/${{ steps.parse.outputs.version }}/Dockerfile target: test + tags: ${{ steps.parse.outputs.tool }}:${{ steps.parse.outputs.version }} + load: true push: false cache-from: type=local,src=/tmp/.buildx-cache-${{ steps.parse.outputs.tool }} cache-to: type=local,mode=max,dest=/tmp/.buildx-cache-${{ steps.parse.outputs.tool }}-new + - name: Check labels + run: | + # checking labels + warning='' + for label in base.image dockerfile.version software software.version description website maintainer maintainer.email + do + value=$(docker inspect --format '{{ index .Config.Labels "'$label'"}}' ${{ steps.parse.outputs.tool }}:${{ steps.parse.outputs.version }} ) + if [ -z "$value" ] ; then warning='warning' ; echo "FATAL : $label label not found in ${{ steps.parse.outputs.tool }}:${{ steps.parse.outputs.version }}" ; fi + done + + if [ -z "$(docker inspect --format '{{.Config.WorkingDir}}' ${{ steps.parse.outputs.tool }}:${{ steps.parse.outputs.version }} )" ] ; + then + warning='warning' + echo "FATAL : WORKDIR not set." + fi + + if [ -n "$warning" ] ; then echo "Please see template for recommended format https://github.com/StaPH-B/docker-builds/blob/master/dockerfile-template/Dockerfile" ; exit 1 ; fi + + - name: Check commonly overlooked commands + run: | + # checking commands + docker run ${{ steps.parse.outputs.tool }}:${{ steps.parse.outputs.version }} ps --help + - name: Move cache # apparently prevents the cache from growing in size forever run: | rm -rf /tmp/.buildx-cache-${{ steps.parse.outputs.tool }} mv /tmp/.buildx-cache-${{ steps.parse.outputs.tool }}-new /tmp/.buildx-cache-${{ steps.parse.outputs.tool }} - + - name: Image digest run: echo ${{ steps.docker_build.outputs.digest }} - diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 000000000..e5a3bb083 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,25 @@ +# shamelessly stolen and modified from: https://github.com/rpetit3/pasty/blob/main/.gitpod.yml + +# includes docker, nextflow, conda, mamba +# I believe this is the source dockerfile for nfcore/gitpod:latest https://github.com/seqeralabs/nf-training-public/blob/master/docker/Dockerfile +image: nfcore/gitpod:latest + +tasks: + - name: Initialize + init: | + pip install graphviz + . /opt/conda/etc/profile.d/conda.sh + conda activate base + git checkout main +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code + - anwar.papyrus-pdf # PDF preview \ No newline at end of file diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..9841f061e --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,95 @@ +# Citizen Code of Conduct + +## 1. Purpose + +A primary goal of StaPH-B/docker-builds is to be inclusive to the every bioinformatician in public health (or adjacent interests), with the most varied and diverse backgrounds possible. As such, we are committed to providing a friendly, safe and welcoming environment for all, regardless of gender, sexual orientation, ability, ethnicity, socioeconomic status, and religion (or lack thereof). + +This code of conduct outlines our expectations for all those who participate in our community, as well as the consequences for unacceptable behavior. + +We invite all those who participate in Docker Builds to help us create safe and positive experiences for everyone. + +## 2. Open [Source/Culture/Tech] Citizenship + +A supplemental goal of this Code of Conduct is to increase open [source/culture/tech] citizenship by encouraging participants to recognize and strengthen the relationships between our actions and their effects on our community. + +Communities mirror the societies in which they exist and positive action is essential to counteract the many forms of inequality and abuses of power that exist in society. + +## 3. Expected Behavior + +The following behaviors are expected and requested of all community members: + + * Participate in an authentic and active way. In doing so, you contribute to the health and longevity of this community. + * Exercise consideration and respect in your speech and actions. + * Attempt collaboration before conflict. + * Refrain from demeaning, discriminatory, or harassing behavior and speech. + * Be mindful of your surroundings and of your fellow participants. Alert community leaders if you notice a dangerous situation, someone in distress, or violations of this Code of Conduct, even if they seem inconsequential. + * Remember that community event venues may be shared with members of the public; please be respectful to all patrons of these locations. + +## 4. Unacceptable Behavior + +The following behaviors are considered harassment and are unacceptable within our community: + + * Violence, threats of violence or violent language directed against another person. + * Sexist, racist, homophobic, transphobic, ableist or otherwise discriminatory jokes and language. + * Posting or displaying sexually explicit or violent material. + * Posting or threatening to post other people's personally identifying information ("doxing"). + * Personal insults, particularly those related to gender, sexual orientation, race, religion, or disability. + * Inappropriate photography or recording. + * Inappropriate physical contact. You should have someone's consent before touching them. + * Unwelcome sexual attention. This includes, sexualized comments or jokes; inappropriate touching, groping, and unwelcomed sexual advances. + * Deliberate intimidation, stalking or following (online or in person). + * Advocating for, or encouraging, any of the above behavior. + * Sustained disruption of community events, including talks and presentations. + +## 5. Weapons Policy + +No weapons will be allowed at StaPH-B/docker-builds events, community spaces, or in other spaces covered by the scope of this Code of Conduct. Weapons include but are not limited to guns, explosives (including fireworks), and large knives such as those used for hunting or display, as well as any other item used for the purpose of causing injury or harm to others. Anyone seen in possession of one of these items will be asked to leave immediately, and will only be allowed to return without the weapon. Community members are further expected to comply with all state and local laws on this matter. + +## 6. Consequences of Unacceptable Behavior + +Unacceptable behavior from any community member, including sponsors and those with decision-making authority, will not be tolerated. + +Anyone asked to stop unacceptable behavior is expected to comply immediately. + +If a community member engages in unacceptable behavior, the community organizers may take any action they deem appropriate, up to and including a temporary ban or permanent expulsion from the community without warning (and without refund in the case of a paid event). + +## 7. Reporting Guidelines + +If you are subject to or witness unacceptable behavior, or have any other concerns, please notify a community organizer as soon as possible. + +A community organizer includes +- [Erin Young](eriny@utah.gov) +- [Curtis Kapsak](kapsakcj@gmail.com) +- Anyone on the StaPH-B steering committee + +Additionally, community organizers are available to help community members engage with local law enforcement or to otherwise help those experiencing unacceptable behavior feel safe. In the context of in-person events, organizers will also provide escorts as desired by the person experiencing distress. + +## 8. Addressing Grievances + +If you feel you have been falsely or unfairly accused of violating this Code of Conduct, you should notify StaPH-B with a concise description of your grievance. Your grievance will be handled in accordance with our existing governing policies. + + + +## 9. Scope + +We expect all community participants (contributors, paid or otherwise; sponsors; and other guests) to abide by this Code of Conduct in all community venues--online and in-person--as well as in all one-on-one communications pertaining to community business. + +This code of conduct and its related procedures also applies to unacceptable behavior occurring outside the scope of community activities when such behavior has the potential to adversely affect the safety and well-being of community members. + +## 10. Contact info + +If somebody is observed to be in breach of this code of conduct, please contact Curtis (kapsakcj@gmail.com) and Erin (eriny@utah.gov) privately via email. + +## 11. License and attribution + +The Citizen Code of Conduct is distributed by [Stumptown Syndicate](http://stumptownsyndicate.org) under a [Creative Commons Attribution-ShareAlike license](http://creativecommons.org/licenses/by-sa/3.0/). + +Portions of text derived from the [Django Code of Conduct](https://www.djangoproject.com/conduct/) and the [Geek Feminism Anti-Harassment Policy](http://geekfeminism.wikia.com/wiki/Conference_anti-harassment/Policy). + +_Revision 2.3. Posted 6 March 2017._ + +_Revision 2.2. Posted 4 February 2016._ + +_Revision 2.1. Posted 23 June 2014._ + +_Revision 2.0, adopted by the [Stumptown Syndicate](http://stumptownsyndicate.org) board on 10 January 2013. Posted 17 March 2013._ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..7f2cce0ae --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,3 @@ +Thank you for your interest in contributing to the StaPH-B/docker-builds repository! + +Please see [https://staphb.org/docker-builds/contribute/](https://staphb.org/docker-builds/contribute/) for more information on how to contribute. diff --git a/Program_Licenses.md b/Program_Licenses.md index c948ead30..bcd0c34d2 100644 --- a/Program_Licenses.md +++ b/Program_Licenses.md @@ -17,6 +17,7 @@ The licenses of the open-source software that is contained in these Docker image | bedtools | MIT | https://github.com/arq5x/bedtools2/blob/master/LICENSE | | blast+ | Public Domain | https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE | | bowtie2 | GNU GPLv3 | https://github.com/BenLangmead/bowtie2/blob/master/LICENSE | +| BUSCO | MIT | https://gitlab.com/ezlab/busco/-/raw/master/LICENSE | | BWA | GNU GPLv3 | https://github.com/lh3/bwa/blob/master/COPYING | | Canu
Racon
Minimap2 | GNU GPLv3 (Canu),
MIT (Racon),
MIT (Minimap2) | https://github.com/marbl/canu/blob/master/README.license.GPL https://github.com/isovic/racon/blob/master/LICENSE https://github.com/lh3/minimap2/blob/master/LICENSE.txt | | centroid | GitHub No License | https://github.com/https://github.com/stjacqrm/centroid | @@ -24,9 +25,13 @@ The licenses of the open-source software that is contained in these Docker image | cfsan-snp-pipeline | non-standard license see --> | https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/LICENSE.txt | | Circlator | GNU GPLv3 | https://github.com/sanger-pathogens/circlator/blob/master/LICENSE | | colorid | MIT | https://github.com/hcdenbakker/colorid/blob/master/LICENSE | +| datasets-sars-cov-2 | Apache 2.0 | https://github.com/CDCgov/datasets-sars-cov-2/blob/master/LICENSE | +| dnaapler | MIT | https://github.com/gbouras13/dnaapler/blob/main/LICENSE | +| dragonflye | GNU GPLv3 | https://github.com/rpetit3/dragonflye/blob/main/LICENSE | | DSK | GNU Affero GPLv3 | https://github.com/GATB/dsk/blob/master/LICENSE | -| emm-typing-tool | GNU GPLv3 | https://github.com/phe-bioinformatics/emm-typing-tool/blob/master/LICENCE | | emboss | GNU GPLv3 | http://emboss.sourceforge.net/licence/ | +| emmtyper | GNU GPLv3 | https://github.com/MDU-PHL/emmtyper/blob/master/LICENSE | +| emm-typing-tool | GNU GPLv3 | https://github.com/phe-bioinformatics/emm-typing-tool/blob/master/LICENCE | | EToKi | GNU GPLv3 | https://github.com/zheminzhou/EToKi/blob/master/LICENSE | | FastANI | Apache v2.0 | https://github.com/ParBLiSS/FastANI/blob/master/LICENSE | | Fastp | MIT | https://github.com/OpenGene/fastp/blob/master/LICENSE | @@ -38,10 +43,12 @@ The licenses of the open-source software that is contained in these Docker image | FLASH | GNU GPLv3 | https://github.com/ebiggers/flash/blob/master/COPYING | | Flye | BSD-3 | https://github.com/fenderglass/Flye/blob/flye/LICENSE | | Freyja | BSD-2 | https://github.com/andersen-lab/Freyja/blob/main/LICENSE | -| GAMBIT | GNU aGPLv3 | https://github.com/hesslab-gambit/gambit/blob/master/LICENSE | +| GAMBIT | GNU aGPLv3 | https://github.com/jlumpe/gambit/blob/master/LICENSE | | GAMMA | Apache 2.0 | https://github.com/rastanton/GAMMA/blob/main/LICENSE | | Genotyphi | GNU GPLv3 | https://github.com/katholt/genotyphi/blob/main/LICENSE | +| gfastats | MIT | https://github.com/vgl-hub/gfastats/blob/main/LICENSE | | Hmmer | BSD-3 | http://eddylab.org/software/hmmer/Userguide.pdf | +| homopolish | GNU GPLv3 | https://github.com/ythuang0522/homopolish/blob/master/LICENSE | | htslib | MIT | https://github.com/samtools/htslib/blob/develop/LICENSE | | iqtree | GNU GPLv2 | https://github.com/Cibiv/IQ-TREE/blob/master/LICENSE | | iqtree2 | GNU GPLv2 | https://github.com/iqtree/iqtree2/blob/master/LICENSE | @@ -63,6 +70,7 @@ The licenses of the open-source software that is contained in these Docker image | MaSuRCA | GNU GPLv3 | https://github.com/alekseyzimin/masurca/blob/master/LICENSE.txt | Medaka | Mozilla Public License 2.0 | https://github.com/nanoporetech/medaka/blob/master/LICENSE.md | | Metaphlan | MIT | https://github.com/biobakery/MetaPhlAn/blob/3.0/license.txt | +| MIDAS | GNU GPLv3 |https://github.com/snayfach/MIDAS/blob/master/LICENSE | | minimap2 | MIT | https://github.com/lh3/minimap2/blob/master/LICENSE.txt | | miniasm | MIT | https://github.com/lh3/miniasm/blob/master/LICENSE.txt | | minipolish | GNU GPLv3 | https://github.com/rrwick/Minipolish/blob/main/LICENSE | @@ -74,10 +82,12 @@ The licenses of the open-source software that is contained in these Docker image | NanoPlot | GNU GPLv3 | https://github.com/wdecoster/NanoPlot/blob/master/LICENSE | | NCBI AMRFinderPlus | Public Domain | https://github.com/ncbi/amr/blob/master/LICENSE | | NCBI Datasets | Public Domain | https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md | +| NCBI table2asn | Public Domain | unknown | | ngmaster | GNU GPLv3 | https://github.com/MDU-PHL/ngmaster/blob/master/LICENSE | | OrthoFinder | GNU GPLv3 | https://github.com/davidemms/OrthoFinder/blob/master/License.md | | Panaroo | MIT | https://github.com/gtonkinhill/panaroo/blob/master/LICENSE | | Pangolin | GNU GPLv3 | https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt | +| pasty | Apache 2.0 | https://github.com/rpetit3/pasty/blob/main/LICENSE | | pbptyper | MIT | https://github.com/rpetit3/pbptyper/blob/main/LICENSE | | Phyml | GNU GPLv3 | https://github.com/stephaneguindon/phyml/blob/master/COPYING | | Piggy | GNU GPLv3 | https://github.com/harry-thorpe/piggy/blob/master/LICENSE | @@ -85,7 +95,9 @@ The licenses of the open-source software that is contained in these Docker image | Piranha | GNU GPLv3 | https://github.com/polio-nanopore/piranha/blob/main/LICENSE | | PlasmidSeeker | BSD 3-Clause | https://github.com/bioinfo-ut/PlasmidSeeker/blob/master/LICENSE | | pmga | GNU GPLv2 | https://github.com/rpetit3/pmga/blob/master/LICENSE | +| polypolish | GNU GPLv3 | https://github.com/rrwick/Polypolish/blob/main/LICENSE | | PopPUNK | Apache 2.0 | https://github.com/bacpop/PopPUNK/blob/master/LICENSE | +| Porechop | GNU GPLv3 | https://github.com/rrwick/Porechop/blob/master/LICENSE | | Prokka | GNU GPLv3 | https://raw.githubusercontent.com/tseemann/prokka/master/doc/LICENSE.Prokka | | pyGenomeViz | MIT | https://github.com/moshi4/pyGenomeViz/blob/main/LICENSE | | QUAST | GNU GPLv2 | https://github.com/ablab/quast/blob/master/LICENSE.txt | @@ -98,6 +110,7 @@ The licenses of the open-source software that is contained in these Docker image | Roary | GNU GPLv3 | https://github.com/sanger-pathogens/Roary/blob/master/GPL-LICENSE | | SalmID| MIT | https://github.com/hcdenbakker/SalmID/blob/master/LICENSE | | Samtools | GNU GPLv3 | https://github.com/samtools/samtools/blob/develop/LICENSE | +| SeqKit | MIT | https://github.com/shenwei356/seqkit/blob/master/LICENSE | SeqSero | GNU GPLv2 | https://github.com/denglab/SeqSero/blob/master/LICENSE | | SeqSero2 | GNU GPLv2 | https://github.com/denglab/SeqSero2/blob/master/LICENSE | | seqyclean | MIT | https://github.com/ibest/seqyclean/blob/master/LICENSE | @@ -109,6 +122,7 @@ The licenses of the open-source software that is contained in these Docker image | Shovill | GNU GPLv3 | https://github.com/tseemann/shovill/blob/master/LICENSE | | SISTR | Apache 2.0 | https://github.com/phac-nml/sistr_cmd/blob/master/LICENSE | | SKA | MIT | https://github.com/simonrharris/SKA/blob/master/LICENSE | +| skani | MIT | https://github.com/bluenote-1577/skani/blob/main/LICENSE | | SKESA | Public Domain | https://github.com/ncbi/SKESA/blob/master/LICENSE | | Smalt | GNU GPLv3 | https://www.sanger.ac.uk/tool/smalt-0/ | | SnpEff | MIT | https://github.com/pcingola/SnpEff/blob/master/LICENSE.md | @@ -122,6 +136,7 @@ The licenses of the open-source software that is contained in these Docker image | Staramr | Apache 2.0 | https://github.com/phac-nml/staramr/blob/master/LICENSE | | TBProfiler | GNU GPLv3 | https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE | | TipToft | GNU GPLv3 | https://github.com/andrewjpage/tiptoft/blob/master/LICENSE | +| Tostadas | Apache-2.0 license | https://github.com/CDCgov/tostadas/blob/master/LICENSE | | Treemmer | GNU GPLv3 | https://github.com/fmenardo/Treemmer/blob/master/COPYING | | Trimmomatic | GNU GPLv3 | https://github.com/usadellab/Trimmomatic/blob/main/distSrc/LICENSE | | Trycycler | GNU GPLv3 | https://github.com/rrwick/Trycycler/blob/master/LICENSE | @@ -130,4 +145,5 @@ The licenses of the open-source software that is contained in these Docker image | VIBRANT | GNU GPLv3 | https://github.com/AnantharamanLab/VIBRANT/blob/master/LICENSE | | VIGOR4 | GNU GPLv3 | https://github.com/JCVenterInstitute/VIGOR4/blob/master/LICENSE.txt | | VirSorter2 | GNU GPLv2 | https://github.com/jiarong/VirSorter2/blob/master/LICENSE | +| VirulenceFinder | Apache 2.0 | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/ | | wtdbg2 | GNU GPLv3 | https://github.com/ruanjue/wtdbg2/blob/master/LICENSE.txt | diff --git a/README.md b/README.md index e61172f26..311b41a3c 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,96 @@ +[![Gitpod ready-to-code](https://img.shields.io/badge/Gitpod-ready--to--code-908a85?logo=gitpod)](https://gitpod.io/#https://github.com/StaPH-B/docker-builds) + # [docker-builds](#) This repository contains the Dockerfiles and other assorted files necessary for building Docker images for a variety of programs used by members of the StaPH-B (State Public Health Lab Bioinformatics) consortium. The purpose of this repository is to provide a centralized location for Docker images that is easily accessible for users, with clear documentation on how the containers were built and how to use them. -If you would like to contribute with a Docker image or improve upon the existing images, please fork the repository, make your changes/additions, and submit a pull request. If you are having an issue with an existing image, please submit an issue. We welcome any and all feedback! -[See more details on how to contribute here](https://staph-b.github.io/docker-builds/contribute/) +There are several community projects that create and share containers for bioinformatic tools. This repository contains the Dockerfiles and other assorted files necessary for building Docker images for a variety of tools used by members of the StaPH-B (State Public Health Lab Bioinformatics) consortium. The purpose of this repository is to provide a centralized location for Docker images that is easily accessible for users, with clear documentation on how the containers were built and how to use them. -## [Docker User Guide](https://staph-b.github.io/docker-builds/) -We have also created a user guide that outlines methods and best practices for using and developing docker containers. -[Docker User Guide](https://staph-b.github.io/docker-builds/) +This is a community resource, built and maintined by users from varied backgrounds and expertise levels. As such, we have provided some [templates for contributing to this repository](./dockerfile-template). If **you** would like to add a Docker image or improve upon the existing images, please fork the repository, make your changes/additions, and submit a pull request. If you are having an issue with an existing image, please submit an issue. We welcome any and all feedback! -#### What about Singularity? -For many people Docker is not an option, but Singularity is. Most Docker containers are compatible with Singularity and can easily be converted to Singularity format. Please see the User Guide linked above to for instructions on how to download docker images from dockerhub and how to run them using Singularity. We've worked hard to ensure that our containers are compatibile with Singularity, but if you find one that isn't, please leave an issue and let us know! +[See more details on how to contribute here](https://staph-b.github.io/docker-builds/contribute/) ## Docker image repositories & hosting -We host all of our docker images on two different repositories and periodically sync the images between the two: - 1. Dockerhub - https://hub.docker.com/r/staphb/ - 2. Quay.io - https://quay.io/organization/staphb/ +We host all of our docker images on two different repositories: + + 1. [Dockerhub - https://hub.docker.com/r/staphb/](https://hub.docker.com/r/staphb/) + 2. [Quay.io - https://quay.io/organization/staphb/](https://quay.io/organization/staphb/) + +The development process of creating a new image is summarized as follows: + +```mermaid +graph TD + A[fork staphb/docker-builds repo]-->B[create tool/version directory] + B-->C[create readme] + B-->D[create dockerfile] + D-->G[create app and test layers] + A-->E[add License to Program_Licenses.md] + A-->F[add tool to list in this readme] + E-->H[submit PR] + F-->H + G-->H + C-->H + H-->I{build to test} + I--success-->J[PR merged and docker image pushed to dockerhub and quay] + I--failure-->K[edit dockerfile] + K-->I +``` + +## User Guide + +The [StaPH-B Docker User Guide](https://staphb.org/docker-builds/) was created to outline methods and best practices for using and developing docker containers. There are chapters for: + +- [Contributing](https://staphb.org/docker-builds/contribute/) +- [Downloading Docker images](https://staphb.org/docker-builds/get_containers/) +- [Running Docker containers](https://staphb.org/docker-builds/run_containers/) +- [Developing Docker images](https://staphb.org/docker-builds/make_containers/) +- [Useful links](https://staphb.org/docker-builds/useful_links/) + +### Summarized usage guide for docker + +```bash +# Build a docker image to the 'test' layer +docker build --tag tool:test --target test +docker build --tag samtools:test --target test samtools/1.15 + +# Download a docker image from dockerhub (most tools have a 'latest' version tag) +docker pull staphb/tool:version +docker pull staphb/shigatyper:2.0.2 + +# Run the container (don't forget to mount your volumes!) +docker run --rm -u $(id -u):$(id -g) -v :/data tool:version +docker run --rm -u $(id -u):$(id -g) -v $(pwd)/amrfinder_test_files:/data amrfinder:3.10 amrfinder --nucleotide 2021CK-01854_contigs.fa --threads 20 --name 2021CK-01854 --output /data/2021CK-01854.txt --organism Klebsiella +``` + +Further documentation can be found at [docs.docker.com](https://docs.docker.com/engine/reference/run/) + +### Templates + +Several template files are provided. These are intended to be copied and edited by contributors. + +1. [dockerfile-template/Dockerfile](./dockerfile-template/Dockerfile) is the basic template useful for most images +2. [dockerfile-template/Dockerfile_mamba](./dockerfile-template/Dockerfile_mamba) is a basic template for using the micromamba base image +3. [dockerfile-template/README.md](./dockerfile-template/README.md) is a basic readme file template to assist others in using the image + +### What about Singularity? + +For many people, Docker is not an option, but Singularity is. Most Docker containers are compatible with Singularity and can easily be converted to Singularity format. Please see the [User Guide](https://staphb.org/docker-builds/) for instructions on how to download docker images from dockerhub and how to run them using Singularity. We've worked hard to ensure that our containers are compatibile with Singularity, but if you find one that isn't, please leave an issue and let us know! + +### Summarized usage guide for singularity + +```bash +# Pulling a container from dockerhub (creates a file) +singularity pull --name docker://staphb/bbtools:38.96 +singularity pull --name staphb-bbtools-38.96.simg docker://staphb/bbtools:38.96 + +# Running the container (don't forget to mount your volumes!) +singularity exec --bind :/data +singularity exec --bind $(pwd)/fastq:/data staphb-bbtools-38.96.simg bbduk.sh in1=sample1_R1.fastq.gz in2=sample1_R2.fastq.gz out1=bbduk/sample1_rmphix_R1.fastq.gz out2=bbduk/sample1_rmphix_R2.fastq.gz outm=bbduk/sample1.matched_phix.fq ref=/opt/bbmap/resources/phix174_ill.ref.fa.gz stats=bbduk/sample1.phix.stats.txt threads=4 +``` + +Further documentation can be found at [docs.sylabs.io](https://docs.sylabs.io/guides/3.1/user-guide/cli.html) + +## Logs In November 2020, Docker began to implement pull rate limits for images hosted on dockerhub. This limits the number of `docker pull`'s per time period (e.g. anonymous users allowed 100 pulls per six hours). We applied and were approved for Docker's "Open Source Program," which should have removed the pull rate limits for all `staphb` docker images! 🎉 🥳 If you encounter an error such as `ERROR: toomanyrequests: Too Many Requests.` or `You have reached your pull rate limit. You may increase the limit by authenticating and upgrading: https://www.docker.com/increase-rate-limits.` , please let us know by [submitting an issue.](https://github.com/StaPH-B/docker-builds/issues) @@ -31,7 +106,7 @@ To learn more about the docker pull rate limits and the open source software pro | :--------: | ------- | -------- | | [ABRicate](https://hub.docker.com/r/staphb/abricate/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/abricate)](https://hub.docker.com/r/staphb/abricate) |
  • 0.8.7
  • 0.8.13
  • 0.8.13s (+serotypefinder db)
  • 0.9.8
  • 1.0.0
  • [1.0.1 (+ A. baumannii plasmid typing db)](https://github.com/StaPH-B/docker-builds/blob/master/abricate/1.0.1-Abaum-plasmid)
  • [1.0.1 (+ InsaFlu db)](https://github.com/StaPH-B/docker-builds/blob/master/abricate/1.0.1-insaflu-220727)
| https://github.com/tseemann/abricate | | [any2fasta](https://hub.docker.com/r/staphb/any2fasta/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/any2fasta)](https://hub.docker.com/r/staphb/any2fasta) |
  • 0.4.2
| https://github.com/tseemann/any2fasta | -| [ARIBA](https://hub.docker.com/r/staphb/ariba/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ariba)](https://hub.docker.com/r/staphb/ariba) |
  • 2.14.4
| https://github.com/sanger-pathogens/ariba | +| [ARIBA](https://hub.docker.com/r/staphb/ariba/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ariba)](https://hub.docker.com/r/staphb/ariba) |
  • 2.14.4
  • [2.14.6](ariba/2.14.6/)
| https://github.com/sanger-pathogens/ariba | | [artic-ncov2019](https://hub.docker.com/r/staphb/artic-ncov2019)
[![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019)](https://hub.docker.com/r/staphb/artic-ncov2019) |
  • 1.3.0
| https://github.com/artic-network/fieldbioinformatics | | [artic-ncov2019-epi2me](https://hub.docker.com/r/staphb/artic-ncov2019-epi2me)
[![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019-epi2me)](https://hub.docker.com/r/staphb/artic-ncov2019-epi2me) |
  • 0.3.10
| https://github.com/epi2me-labs/wf-artic | | [artic-ncov2019-medaka](https://hub.docker.com/r/staphb/artic-ncov2019-medaka)
[![docker pulls](https://badgen.net/docker/pulls/staphb/artic-ncov2019-medaka)](https://hub.docker.com/r/staphb/artic-ncov2019-medaka) |
  • 1.1.0
| https://github.com/artic-network/artic-ncov2019 | @@ -39,87 +114,98 @@ To learn more about the docker pull rate limits and the open source software pro | [Augur](https://github.com/nextstrain/augur)
[![docker pulls](https://badgen.net/docker/pulls/staphb/augur)](https://hub.docker.com/r/staphb/augur) |
  • 6.3.0
  • 7.0.2
  • 8.0.0
  • 9.0.0
  • 16.0.3
| https://github.com/nextstrain/augur | | [Auspice](https://github.com/nextstrain/auspice)
[![docker pulls](https://badgen.net/docker/pulls/staphb/auspice)](https://hub.docker.com/r/staphb/auspice) |
  • 2.12.0
| https://github.com/nextstrain/auspice | | [BBTools](https://hub.docker.com/r/staphb/bbtools/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bbtools)](https://hub.docker.com/r/staphb/bbtools) |
  • 38.76
  • 38.86
  • 38.95
  • 38.96
  • 38.97
  • 38.98
  • 38.99
  • 39.00
  • 39.01
| https://jgi.doe.gov/data-and-tools/bbtools/ | -| [bcftools](https://hub.docker.com/r/staphb/bcftools/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bcftools)](https://hub.docker.com/r/staphb/bcftools) |
  • 1.10.2
  • 1.11
  • 1.12
  • 1.13
  • 1.14
  • 1.15
  • 1.16
| https://github.com/samtools/bcftools | -| [bedtools](https://hub.docker.com/r/staphb/bedtools/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bedtools)](https://hub.docker.com/r/staphb/bedtools) |
  • 2.29.2
  • 2.30.0
| https://bedtools.readthedocs.io/en/latest/
https://github.com/arq5x/bedtools2 | +| [bcftools](https://hub.docker.com/r/staphb/bcftools/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bcftools)](https://hub.docker.com/r/staphb/bcftools) |
  • 1.10.2
  • 1.11
  • 1.12
  • 1.13
  • 1.14
  • 1.15
  • 1.16
  • 1.17
  • [1.18](bcftools/1.18/)
| https://github.com/samtools/bcftools | +| [bedtools](https://hub.docker.com/r/staphb/bedtools/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bedtools)](https://hub.docker.com/r/staphb/bedtools) |
  • 2.29.2
  • 2.30.0
  • [2.31.0](bedtools/2.31.0/)
| https://bedtools.readthedocs.io/en/latest/
https://github.com/arq5x/bedtools2 | | [berrywood-report-env](https://hub.docker.com/r/staphb/berrywood-report-env/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/berrywood-report-env)](https://hub.docker.com/r/staphb/berrywood-report-env) |
  • 1.0
| none | -| [blast+](https://hub.docker.com/r/staphb/blast/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/blast)](https://hub.docker.com/r/staphb/blast) |
  • 2.13.0
| https://www.ncbi.nlm.nih.gov/books/NBK279690/ | -| [bowtie2](https://hub.docker.com/r/staphb/bowtie2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bowtie2)](https://hub.docker.com/r/staphb/bowtie2) |
  • 2.4.4
  • 2.4.5
| http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
https://github.com/BenLangmead/bowtie2 | +| [blast+](https://hub.docker.com/r/staphb/blast/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/blast)](https://hub.docker.com/r/staphb/blast) |
  • 2.13.0
  • [2.14.0](blast/2.14.0/)
  • [2.14.1](blast/2.14.1/)
| https://www.ncbi.nlm.nih.gov/books/NBK279690/ | +| [bowtie2](https://hub.docker.com/r/staphb/bowtie2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bowtie2)](https://hub.docker.com/r/staphb/bowtie2) |
  • 2.4.4
  • 2.4.5
  • 2.5.1
| http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml
https://github.com/BenLangmead/bowtie2 | +| [BUSCO](https://hub.docker.com/r/staphb/busco/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/busco)](https://hub.docker.com/r/staphb/busco) |
  • 5.4.7
| https://busco.ezlab.org/busco_userguide.html
https://gitlab.com/ezlab/busco | | [BWA](https://hub.docker.com/r/staphb/bwa)
[![docker pulls](https://badgen.net/docker/pulls/staphb/bwa)](https://hub.docker.com/r/staphb/bwa) |
  • 0.7.17
| https://github.com/lh3/bwa | | [Canu](https://hub.docker.com/r/staphb/canu)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu?)](https://hub.docker.com/r/staphb/canu)|
  • 2.0
  • 2.1.1
  • 2.2
| https://canu.readthedocs.io/en/latest/
https://github.com/marbl/canu | | [Canu-Racon](https://hub.docker.com/r/staphb/canu-racon/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/canu-racon)](https://hub.docker.com/r/staphb/canu-racon) |
  • 1.7.1 (Canu), 1.3.1 (Racon), 2.13 (minimap2)
  • 1.9 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
  • 1.9i (Canu), 1.4.3 (Racon), 2.17 (minimap2), (+racon_preprocess.py)
  • 2.0 (Canu), 1.4.3 (Racon), 2.17 (minimap2)
| https://canu.readthedocs.io/en/latest/
https://github.com/lbcb-sci/racon
https://github.com/isovic/racon (ARCHIVED)
https://lh3.github.io/minimap2/ | | [centroid](https://hub.docker.com/r/staphb/centroid/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/centroid)](https://hub.docker.com/r/staphb/centroid) |
  • 1.0.0
| https://github.com/stjacqrm/centroid | | [CDC-SPN](https://hub.docker.com/r/staphb/cdc-spn/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cdc-spn)](https://hub.docker.com/r/staphb/cdc-spn) |
  • 0.1 (no version)
| https://github.com/BenJamesMetcalf/Spn_Scripts_Reference | -| [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) |
  • 2.0.2
| https://github.com/CFSAN-Biostatistics/snp-pipeline | -| [Circlator](https://hub.docker.com/r/staphb/circlator)
[![docker pulls](https://badgen.net/docker/pulls/staphb/circlator)](https://hub.docker.com/r/staphb/circlator) |
  • 1.5.6
| https://github.com/sanger-pathogens/circlator | +| [cfsan-snp-pipeline](https://hub.docker.com/r/staphb/cfsan-snp-pipeline)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cfsan-snp-pipeline)](https://hub.docker.com/r/staphb/cfsan-snp-pipeline) |
  • 2.0.2
  • 2.2.1
| https://github.com/CFSAN-Biostatistics/snp-pipeline | +| [Circlator](https://hub.docker.com/r/staphb/circlator)
[![docker pulls](https://badgen.net/docker/pulls/staphb/circlator)](https://hub.docker.com/r/staphb/circlator) |
  • 1.5.6
  • 1.5.5
| https://github.com/sanger-pathogens/circlator | | [Clustalo](https://hub.docker.com/r/staphb/clustalo)
[![docker pulls](https://badgen.net/docker/pulls/staphb/clustalo)](https://hub.docker.com/r/staphb/clustalo) |
  • 1.2.4
| http://www.clustal.org/omega/ | | [colorid](https://hub.docker.com/r/staphb/colorid)
[![docker pulls](https://badgen.net/docker/pulls/staphb/colorid)](https://hub.docker.com/r/staphb/colorid) |
  • 0.1.4.3
| https://github.com/hcdenbakker/colorid | | [cutshaw-report-env](https://hub.docker.com/r/staphb/cutshaw-report-env)
[![docker pulls](https://badgen.net/docker/pulls/staphb/cutshaw-report-env)](https://hub.docker.com/r/staphb/cutshaw-report-env) |
  • 1.0.0
| https://github.com/VADGS/CutShaw | | [datasets-sars-cov-2](https://github.com/CDCgov/datasets-sars-cov-2)
[![docker pulls](https://badgen.net/docker/pulls/staphb/datasets-sars-cov-2)](https://hub.docker.com/r/staphb/datasets-sars-cov-2) |
  • 0.6.2
  • 0.6.3
  • 0.7.2
| https://github.com/CDCgov/datasets-sars-cov-2 | +| [dnaapler](https://hub.docker.com/r/staphb/dnaapler)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dnaapler)](https://hub.docker.com/r/staphb/dnaapler) |
  • [0.1.0](dnaapler/0.1.0/)
| https://github.com/gbouras13/dnaapler | +| [dragonflye](https://hub.docker.com/r/staphb/dragonflye)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dragonflye)](https://hub.docker.com/r/staphb/dragonflye) |
  • 1.0.14
  • [1.1.1](dragonflye/1.1.1/)
| https://github.com/rpetit3/dragonflye | | [DSK](https://hub.docker.com/r/staphb/dsk)
[![docker pulls](https://badgen.net/docker/pulls/staphb/dsk)](https://hub.docker.com/r/staphb/dsk) |
  • 0.0.100
| https://gatb.inria.fr/software/dsk/ | | [emboss](https://hub.docker.com/r/staphb/emboss)
[![docker pulls](https://badgen.net/docker/pulls/staphb/emboss)](https://hub.docker.com/r/staphb/emboss) |
  • 6.6.0 (no version)
| http://emboss.sourceforge.net | +| [emmtyper](https://hub.docker.com/r/staphb/emmtyper)
[![docker pulls](https://badgen.net/docker/pulls/staphb/emmtyper)](https://hub.docker.com/r/staphb/emmtyper) |
  • 0.2.0
| https://github.com/MDU-PHL/emmtyper | | [emm-typing-tool](https://hub.docker.com/r/staphb/emm-typing-tool)
[![docker pulls](https://badgen.net/docker/pulls/staphb/emm-typing-tool)](https://hub.docker.com/r/staphb/emm-typing-tool) |
  • 0.0.1 (no version)
| https://github.com/phe-bioinformatics/emm-typing-tool | | [EToKi](https://hub.docker.com/r/staphb/etoki)
[![docker pulls](https://badgen.net/docker/pulls/staphb/etoki)](https://hub.docker.com/r/staphb/etoki) |
  • 1.2.1
| https://github.com/zheminzhou/EToKi | -| [FastANI](https://hub.docker.com/r/staphb/fastani)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastani)](https://hub.docker.com/r/staphb/fastani) |
  • 1.1
  • 1.32
  • 1.33
  • 1.33 + RGDv2
| https://github.com/ParBLiSS/FastANI | -| [Fastp](https://hub.docker.com/r/staphb/fastp)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastp)](https://hub.docker.com/r/staphb/fastp) |
  • 0.23.2
| http://opengene.org/fastp/
https://github.com/OpenGene/fastp | +| [FastANI](https://hub.docker.com/r/staphb/fastani)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastani)](https://hub.docker.com/r/staphb/fastani) |
  • 1.1
  • 1.32
  • 1.33
  • 1.33 + RGDv2
  • [1.34](fastani/1.34)
  • [1.34 + RGDv2](fastani/1.34-RGDV2/)
| https://github.com/ParBLiSS/FastANI | +| [Fastp](https://hub.docker.com/r/staphb/fastp)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastp)](https://hub.docker.com/r/staphb/fastp) |
  • 0.23.2
  • [0.23.4](fastp/0.23.4/)
| http://opengene.org/fastp/
https://github.com/OpenGene/fastp | | [FastTree](https://hub.docker.com/r/staphb/fasttree)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fasttree)](https://hub.docker.com/r/staphb/fasttree) |
  • 2.1.11
| http://www.microbesonline.org/fasttree/ | -| [FastQC](https://hub.docker.com/r/staphb/fastqc)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastqc)](https://hub.docker.com/r/staphb/fastqc) |
  • 0.11.8
  • 0.11.9
| https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
https://github.com/s-andrews/FastQC | +| [FastQC](https://hub.docker.com/r/staphb/fastqc)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastqc)](https://hub.docker.com/r/staphb/fastqc) |
  • 0.11.8
  • 0.11.9
  • 0.12.1
| https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
https://github.com/s-andrews/FastQC | | [fastq-scan](https://hub.docker.com/r/staphb/fastq-scan)
[![docker pulls](https://badgen.net/docker/pulls/staphb/fastq-scan)](https://hub.docker.com/r/staphb/fastq-scan) |
  • 0.4.3
  • 0.4.4
  • 1.0.0
  • 1.0.1
| https://github.com/rpetit3/fastq-scan | | [Freebayes](https://hub.docker.com/r/staphb/freebayes)
[![docker pulls](https://badgen.net/docker/pulls/staphb/freebayes)](https://hub.docker.com/r/staphb/freebayes) |
  • 1.3.6
| https://github.com/freebayes/freebayes | | [Filtlong](https://hub.docker.com/r/staphb/filtlong)
[![docker pulls](https://badgen.net/docker/pulls/staphb/filtlong)](https://hub.docker.com/r/staphb/filtlong) |
  • 0.2.0
  • 0.2.1
| https://github.com/rrwick/filtlong | | [FLASH](https://hub.docker.com/r/staphb/flash)
[![docker pulls](https://badgen.net/docker/pulls/staphb/flash)](https://hub.docker.com/r/staphb/flash) |
  • 1.2.11
| http://ccb.jhu.edu/software/FLASH | -| [Flye](https://hub.docker.com/r/staphb/flye)
[![docker pulls](https://badgen.net/docker/pulls/staphb/flye)](https://hub.docker.com/r/staphb/flye) |
  • 2.5
  • 2.7
  • 2.8
  • 2.9
  • 2.9.1
| https://github.com/fenderglass/Flye | -| [Freyja](https://hub.docker.com/r/staphb/freyja)
[![docker pulls](https://badgen.net/docker/pulls/staphb/freyja)](https://hub.docker.com/r/staphb/freyja) |
  • 1.2
  • 1.2.1
  • 1.3.1
  • 1.3.2
  • 1.3.4
  • 1.3.7
  • 1.3.8
  • 1.3.9
  • 1.3.10
  • 1.3.11
| https://github.com/andersen-lab/Freyja | -| [GAMBIT](https://hub.docker.com/r/staphb/gambit)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gambit)](https://hub.docker.com/r/staphb/gambit) |
  • 0.3.0
  • 0.4.0
  • 0.5.0
| https://github.com/hesslab-gambit/gambit | +| [Flye](https://hub.docker.com/r/staphb/flye)
[![docker pulls](https://badgen.net/docker/pulls/staphb/flye)](https://hub.docker.com/r/staphb/flye) |
  • 2.5
  • 2.7
  • 2.8
  • 2.9
  • 2.9.1
  • 2.9.2
| https://github.com/fenderglass/Flye | +| [Freyja](https://hub.docker.com/r/staphb/freyja)
[![docker pulls](https://badgen.net/docker/pulls/staphb/freyja)](https://hub.docker.com/r/staphb/freyja) |
  • 1.2
  • 1.2.1
  • 1.3.1
  • 1.3.2
  • 1.3.4
  • 1.3.7
  • 1.3.8
  • 1.3.9
  • 1.3.10
  • 1.3.11
  • 1.3.12
  • 1.4.2
  • [1.4.3](freyja/1.4.3/)
  • [1.4.4](freyja/1.4.4/)
  • [1.4.5](freyja/1.4.5/)
| https://github.com/andersen-lab/Freyja | +| [GAMBIT](https://hub.docker.com/r/staphb/gambit)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gambit)](https://hub.docker.com/r/staphb/gambit) |
  • 0.3.0
  • 0.4.0
  • 0.5.0
  • 1.0.0
| https://github.com/jlumpe/gambit | | [GAMMA](https://hub.docker.com/r/staphb/gamma)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gamma)](https://hub.docker.com/r/staphb/gamma) |
  • 1.4
  • 2.1
  • 2.2
| https://github.com/rastanton/GAMMA/ | -| [hmmer](https://hub.docker.com/r/staphb/hmmer)
[![docker pulls](https://badgen.net/docker/pulls/staphb/hmmer)](https://hub.docker.com/r/staphb/hmmer) |
  • 3.3
| http://hmmer.org/ | -| [htslib](https://hub.docker.com/r/staphb/htslib)
[![docker pulls](https://badgen.net/docker/pulls/staphb/htslib)](https://hub.docker.com/r/staphb/htslib) |
  • 1.14
  • 1.15
  • 1.16
| https://www.htslib.org/ | +| [gfastats](https://hub.docker.com/r/staphb/gfastats)
[![docker pulls](https://badgen.net/docker/pulls/staphb/gfastats)](https://hub.docker.com/r/staphb/gfastats) |
  • 1.3.6
| https://github.com/vgl-hub/gfastats | +| [hmmer](https://hub.docker.com/r/staphb/hmmer)
[![docker pulls](https://badgen.net/docker/pulls/staphb/hmmer)](https://hub.docker.com/r/staphb/hmmer) |
  • [3.3](hmmer/3.3/)
  • [3.3.2](hmmer/3.3.2/)
| http://hmmer.org/ | +| [homopolish](https://hub.docker.com/r/staphb/homopolish)
[![docker pulls](https://badgen.net/docker/pulls/staphb/homopolish)](https://hub.docker.com/r/staphb/homopolish) |
  • 0.4.1
| https://github.com/ythuang0522/homopolish/ | +| [htslib](https://hub.docker.com/r/staphb/htslib)
[![docker pulls](https://badgen.net/docker/pulls/staphb/htslib)](https://hub.docker.com/r/staphb/htslib) |
  • 1.14
  • 1.15
  • 1.16
  • 1.17
  • [1.18](htslib/1.18/)
| https://www.htslib.org/ | | [iqtree](https://hub.docker.com/r/staphb/iqtree/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/iqtree)](https://hub.docker.com/r/staphb/iqtree) |
  • 1.6.7
| http://www.iqtree.org/ | -| [iqtree2](https://hub.docker.com/r/staphb/iqtree2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/iqtree2)](https://hub.docker.com/r/staphb/iqtree2) |
  • 2.1.2
| http://www.iqtree.org/ | +| [iqtree2](https://hub.docker.com/r/staphb/iqtree2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/iqtree2)](https://hub.docker.com/r/staphb/iqtree2) |
  • 2.1.2
  • 2.2.2.2
  • [2.2.2.6](iqtree2/2.2.2.6/)
| http://www.iqtree.org/ | | [IRMA](https://hub.docker.com/r/staphb/irma/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/irma)](https://hub.docker.com/r/staphb/irma) |
  • 1.0.2
  • 1.0.3
| https://wonder.cdc.gov/amd/flu/irma/| -| [iVar](https://hub.docker.com/r/staphb/ivar/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ivar)](https://hub.docker.com/r/staphb/ivar) |
  • 1.1
  • 1.1 (+SARS-CoV2 reference)
  • 1.2.1
  • 1.2.1 (+SC2 ref)
  • 1.2.2 (+SC2 ref and artic bedfiles)
  • 1.3
  • 1.3.1
| https://github.com/andersen-lab/ivar | -| [Kaptive](https://hub.docker.com/r/staphb/kaptive/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kaptive)](https://hub.docker.com/r/staphb/kaptive) |
  • 2.0.0
  • 2.0.3
| https://github.com/katholt/Kaptive/
https://github.com/katholt/Kaptive/ | -| [Kleborate](https://hub.docker.com/r/staphb/kleborate/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kleborate)](https://hub.docker.com/r/staphb/kleborate) |
  • 2.0.4
  • 2.1.0
  • 2.2.0
| https://github.com/katholt/Kleborate/
https://github.com/katholt/Kaptive/ | -| [kma](https://hub.docker.com/r/staphb/kma/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kma)](https://hub.docker.com/r/staphb/kma) |
  • 1.2.21
| https://bitbucket.org/genomicepidemiology/kma/ | +| [iVar](https://hub.docker.com/r/staphb/ivar/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ivar)](https://hub.docker.com/r/staphb/ivar) |
  • 1.1
  • 1.1 (+SARS-CoV2 reference)
  • 1.2.1
  • 1.2.1 (+SC2 ref)
  • 1.2.2 (+SC2 ref and artic bedfiles)
  • 1.3
  • 1.3.1
  • 1.3.2
  • 1.4.1
  • 1.4.2
| https://github.com/andersen-lab/ivar | +| [Kaptive](https://hub.docker.com/r/staphb/kaptive/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kaptive)](https://hub.docker.com/r/staphb/kaptive) |
  • 2.0.0
  • 2.0.3
  • 2.0.5
| https://github.com/katholt/Kaptive/ | +| [Kleborate](https://hub.docker.com/r/staphb/kleborate/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kleborate)](https://hub.docker.com/r/staphb/kleborate) |
  • 2.0.4
  • 2.1.0
  • 2.2.0
  • 2.3.2
  • [2.3.2-2023-05](kleborate/2.3.2-2023-05/README.md)
| https://github.com/katholt/Kleborate/
https://github.com/katholt/Kaptive/ | +| [kma](https://hub.docker.com/r/staphb/kma/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kma)](https://hub.docker.com/r/staphb/kma) |
  • 1.2.21
  • 1.4.10 (no database)
| https://bitbucket.org/genomicepidemiology/kma/ | | [Kraken](https://hub.docker.com/r/staphb/kraken/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kraken)](https://hub.docker.com/r/staphb/kraken) |
  • 1.0
  • 1.1.1
  • 1.1.1 (no database)
| https://github.com/DerrickWood/kraken | -| [Kraken2](https://hub.docker.com/r/staphb/kraken2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kraken2)](https://hub.docker.com/r/staphb/kraken2) |
  • 2.0.8-beta (no database)
  • 2.0.8-beta (MiniKraken2_v1_8GB db)
  • 2.0.8-beta_hv (human + virus db)
  • 2.0.9-beta (no db)
  • 2.0.9-beta (Minikraken v2 RefSeq: bacteria, archaea, viral, and human 8GB db)
  • 2.1.0 (no db)
  • 2.1.1 (no db)
  • 2.1.2 (no db)
| https://github.com/DerrickWood/kraken2 | +| [Kraken2](https://hub.docker.com/r/staphb/kraken2/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/kraken2)](https://hub.docker.com/r/staphb/kraken2) |
  • 2.0.8-beta (no database)
  • 2.0.8-beta (MiniKraken2_v1_8GB db)
  • 2.0.8-beta_hv (human + virus db)
  • 2.0.9-beta (no db)
  • 2.0.9-beta (Minikraken v2 RefSeq: bacteria, archaea, viral, and human 8GB db)
  • 2.1.0 (no db)
  • 2.1.1 (no db)
  • 2.1.2 (no db)
  • [2.1.3](kraken2/2.1.3/) (no db)
| https://github.com/DerrickWood/kraken2 | | [kSNP3](https://hub.docker.com/r/staphb/ksnp3/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ksnp3)](https://hub.docker.com/r/staphb/ksnp3)|
  • 3.1
| https://sourceforge.net/projects/ksnp/ | | [kSNP4](https://hub.docker.com/r/staphb/ksnp4/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/ksnp4)](https://hub.docker.com/r/staphb/ksnp4)|
  • 4.0
| https://sourceforge.net/projects/ksnp/ | | [legsta](https://hub.docker.com/r/staphb/legsta/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/legsta)](https://hub.docker.com/r/staphb/legsta)|
  • 0.3.7
  • 0.5.1
| https://github.com/tseemann/legsta | | [liftoff](https://hub.docker.com/r/staphb/liftoff/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/liftoff)](https://hub.docker.com/r/staphb/liftoff)|
  • 1.6.3
| https://github.com/agshumate/Liftoff | | [Lyve-SET (includes CG-Pipeline scripts and raxml)](https://hub.docker.com/r/staphb/lyveset/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/lyveset)](https://hub.docker.com/r/staphb/lyveset) |
  • 1.1.4f
  • 2.0.1
| https://github.com/lskatz/lyve-SET https://github.com/lskatz/CG-Pipeline | -| [MAFFT](https://hub.docker.com/r/staphb/mafft/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/mafft)](https://hub.docker.com/r/staphb/mafft) |
  • 7.450
  • 7.475
| https://mafft.cbrc.jp/alignment/software/ | +| [MAFFT](https://hub.docker.com/r/staphb/mafft/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/mafft)](https://hub.docker.com/r/staphb/mafft) |
  • 7.450
  • 7.475
  • 7.505
| https://mafft.cbrc.jp/alignment/software/ | | [Mash](https://hub.docker.com/r/staphb/mash/)
[![docker pulls](https://badgen.net/docker/pulls/staphb/mash)](https://hub.docker.com/r/staphb/mash) |
  • 2.1
  • 2.2
  • 2.3
| https://github.com/marbl/Mash | | [mashtree](https://hub.docker.com/r/staphb/mashtree)
[![docker pulls](https://badgen.net/docker/pulls/staphb/mashtree)](https://hub.docker.com/r/staphb/mashtree) |
  • 0.52.0
  • 0.57.0
  • 1.0.4
  • 1.2.0
| https://github.com/lskatz/mashtree | -| [MaSuRCA](https://hub.docker.com/r/staphb/masurca)
[![docker pulls](https://badgen.net/docker/pulls/staphb/masurca)](https://hub.docker.com/r/staphb/masurca) |
  • 4.0.8
  • 4.0.9
| https://github.com/alekseyzimin/masurca | +| [MaSuRCA](https://hub.docker.com/r/staphb/masurca)
[![docker pulls](https://badgen.net/docker/pulls/staphb/masurca)](https://hub.docker.com/r/staphb/masurca) |
  • 4.0.8
  • 4.0.9
  • 4.1.0
| https://github.com/alekseyzimin/masurca | | [medaka](https://hub.docker.com/r/staphb/medaka)
[![docker pulls](https://badgen.net/docker/pulls/staphb/medaka)](https://hub.docker.com/r/staphb/medaka) |
  • 0.8.1
  • 1.0.1
  • 1.2.0
| https://github.com/nanoporetech/medaka | | [metaphlan](https://hub.docker.com/r/staphb/metaphlan)
[![docker pulls](https://badgen.net/docker/pulls/staphb/metaphlan)](https://hub.docker.com/r/staphb/metaphlan) |
  • 3.0.3-no-db (no database)
  • 3.0.3 (~3GB db) | https://github.com/biobakery/MetaPhlAn/tree/3.0 | -| [minimap2](https://hub.docker.com/r/staphb/minimap2)
    [![docker pulls](https://badgen.net/docker/pulls/staphb/minimap2)](https://hub.docker.com/r/staphb/minimap2) |
    • 2.17
    • 2.18
    • 2.21
    • 2.22
    • 2.23
    • 2.24
    | https://github.com/lh3/minimap2 | +| [MIDAS](https://hub.docker.com/r/staphb/midas)
    [![docker pulls](https://badgen.net/docker/pulls/staphb/midas)](https://hub.docker.com/r/staphb/midas) |
    • 1.3.2 (no database)
    • | https://github.com/snayfach/MIDAS | +| [minimap2](https://hub.docker.com/r/staphb/minimap2)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/minimap2)](https://hub.docker.com/r/staphb/minimap2) |
      • 2.17
      • 2.18
      • 2.21
      • 2.22
      • 2.23
      • 2.24
      • 2.25
      | https://github.com/lh3/minimap2 | | [minipolish](https://hub.docker.com/r/staphb/minipolish)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/minipolish)](https://hub.docker.com/r/staphb/minipolish) |
      • 0.1.3
      | https://github.com/rrwick/Minipolish | -| [mlst](https://hub.docker.com/r/staphb/mlst)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mlst)](https://hub.docker.com/r/staphb/mlst) |
      • 2.16.2
      • 2.17.6
      • 2.19.0
      • 2.22.0
      • 2.22.1
      • 2.23.0
      | https://github.com/tseemann/mlst | +| [mlst](https://hub.docker.com/r/staphb/mlst)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mlst)](https://hub.docker.com/r/staphb/mlst) |
      • 2.16.2
      • 2.17.6
      • 2.19.0
      • 2.22.0
      • 2.22.1
      • 2.23.0
      • [2.23.0-2023-07 (databases updated July 2023)](mlst/2.23.0-2023-07/)
      • [2.23.0-2023-08 (databases updated Aug 2023)](mlst/2.23.0-2023-08/)
      | https://github.com/tseemann/mlst | | [Mugsy](https://hub.docker.com/r/staphb/mugsy)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mugsy)](https://hub.docker.com/r/staphb/mugsy) |
      • 1r2.3
      | http://mugsy.sourceforge.net/ | | [MultiQC](https://hub.docker.com/r/staphb/multiqc)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/multiqc)](https://hub.docker.com/r/staphb/multiqc) |
      • 1.7
      • 1.8
      | https://github.com/ewels/MultiQC | -| [Mummer](https://hub.docker.com/r/staphb/mummer)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mummer)](https://hub.docker.com/r/staphb/mummer) |
      • 4.0.0
      • 4.0.0 + RGDv2
      | https://github.com/mummer4/mummer | -| [Mykrobe + Genotyphi + sonneityping](https://hub.docker.com/r/staphb/mykrobe)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mykrobe)](https://hub.docker.com/r/staphb/mykrobe) |
      • 0.11.0 (Mykrobe) & 1.9.1 (Genotyphi)
      • 0.12.1 (Mykrobe) & 1.9.1 (Genotyphi) & v20210201 (sonneityping)
      | https://github.com/Mykrobe-tools/mykrobe
      https://github.com/katholt/genotyphi
      https://github.com/katholt/sonneityping | -| [NanoPlot](https://hub.docker.com/r/staphb/nanoplot)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/nanoplot)](https://hub.docker.com/r/staphb/nanoplot) |
      • 1.27.0
      • 1.29.0
      • 1.30.1
      • 1.32.0
      • 1.33.0
      • 1.40.0
      | https://github.com/wdecoster/NanoPlot | +| [Mummer](https://hub.docker.com/r/staphb/mummer)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mummer)](https://hub.docker.com/r/staphb/mummer) |
      • 4.0.0
      • 4.0.0 + RGDv2
      • 4.0.0 + RGDv2 + gnuplot
      | https://github.com/mummer4/mummer | +| [Mykrobe + Genotyphi + sonneityping](https://hub.docker.com/r/staphb/mykrobe)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/mykrobe)](https://hub.docker.com/r/staphb/mykrobe) |
      • 0.11.0 (Mykrobe) & 1.9.1 (Genotyphi)
      • 0.12.1 (Mykrobe) & 1.9.1 (Genotyphi) & v20210201 (sonneityping)
      • 0.12.1 (Mykrobe) & 2.0 (Genotyphi) & v20210201 (sonneityping)
      | https://github.com/Mykrobe-tools/mykrobe
      https://github.com/typhoidgenomics/genotyphi
      https://github.com/katholt/sonneityping | +| [NanoPlot](https://hub.docker.com/r/staphb/nanoplot)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/nanoplot)](https://hub.docker.com/r/staphb/nanoplot) |
      • 1.27.0
      • 1.29.0
      • 1.30.1
      • 1.32.0
      • 1.33.0
      • 1.40.0
      • [1.41.6](nanoplot/1.41.6/)
      | https://github.com/wdecoster/NanoPlot | | [ngmaster](https://hub.docker.com/r/staphb/ngmaster)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ngmaster)](https://hub.docker.com/r/staphb/ngmaster) |
      • 0.5.8
      • 1.0.0
      | https://github.com/MDU-PHL/ngmaster | -| [NCBI Datasets](https://hub.docker.com/r/staphb/ncbi-datasets)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ncbi-datasets)](https://hub.docker.com/r/staphb/ncbi-datasets) |
      • 13.31.0
      • 13.35.0
      • 13.43.2
      • 14.0.0
      | [https://github.com/ncbi/datasets](https://github.com/ncbi/datasets)
      [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/) | -| [NCBI AMRFinderPlus](https://hub.docker.com/r/staphb/ncbi-amrfinderplus)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ncbi-amrfinderplus)](https://hub.docker.com/r/staphb/ncbi-amrfinderplus) |
      • 3.1.1b
      • 3.8.4
      • 3.8.28
      • 3.9.3
      • 3.9.8
      • 3.10.1
      • 3.10.5
      • 3.10.16
      • 3.10.20
      • 3.10.24
      • 3.10.30
      • 3.10.36
      • 3.10.42
      | [https://github.com/ncbi/amr](https://github.com/ncbi/amr) | +| [NCBI Datasets](https://hub.docker.com/r/staphb/ncbi-datasets)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ncbi-datasets)](https://hub.docker.com/r/staphb/ncbi-datasets) |
      Click to see all datasets versions **datasets versions**
      • 13.31.0
      • 13.35.0
      • 13.43.2
      • 14.0.0
      • 14.3.0
      • 14.7.0
      • 14.13.2
      • 14.20.0
      • [14.27.0](ncbi-datasets/14.27.0/)
      • [15.1.0](ncbi-datasets/15.1.0/)
      • [15.2.0](ncbi-datasets/15.2.0/)
      • [15.11.0](ncbi-datasets/15.11.0/)
      | [https://github.com/ncbi/datasets](https://github.com/ncbi/datasets)
      [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/) | +| [NCBI AMRFinderPlus](https://hub.docker.com/r/staphb/ncbi-amrfinderplus)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ncbi-amrfinderplus)](https://hub.docker.com/r/staphb/ncbi-amrfinderplus) | **AMRFinderPlus & database verion**
      Click to see AMRFinderplus v3.11.4 and older versions!
      • 3.1.1b
      • 3.8.4
      • 3.8.28
      • 3.9.3
      • 3.9.8
      • 3.10.1
      • 3.10.5
      • 3.10.16
      • 3.10.20
      • 3.10.24
      • 3.10.30
      • 3.10.36
      • 3.10.42
      • 3.11.2 & 2022-12-19.1
      • [3.11.2 & 2023-02-23.1](ncbi-amrfinderplus/3.11.2-2023-02-23.1/)
      • [3.11.4 & 2023-02-23.1](ncbi-amrfinderplus/3.11.4-2023-02-23.1/)
      • [3.11.8 & 2023-02-23.1](ncbi-amrfinderplus/3.11.8-2023-02-23.1/)
      • [3.11.11 & 2023-04-17.1](ncbi-amrfinderplus/3.11.11-2023-04-17.1)
      • [3.11.14 & 2023-04-17.1](ncbi-amrfinderplus/3.11.14-2023-04-17.1/)
      • [3.11.17 & 2023-07-13.2](ncbi-amrfinderplus/3.11.17-2023-07-13.2/)
      • [3.11.18 & 2023-08-08.2](ncbi-amrfinderplus/3.11.18-2023-08-08.2/)
      • [3.11.20 & 2023-09-26.1](ncbi-amrfinderplus/3.11.20-2023-09-26.1/)
      | [https://github.com/ncbi/amr](https://github.com/ncbi/amr) | +| [NCBI table2asn](https://hub.docker.com/r/staphb/ncbi-table2asn)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ncbi-table2asn)](https://hub.docker.com/r/staphb/ncbi-table2asn) |
      • 1.26.678
      | [https://www.ncbi.nlm.nih.gov/genbank/table2asn/](https://www.ncbi.nlm.nih.gov/genbank/table2asn/)
      [https://ftp.ncbi.nlm.nih.gov/asn1-converters/versions/2022-06-14/by_program/table2asn/](https://ftp.ncbi.nlm.nih.gov/asn1-converters/versions/2022-06-14/by_program/table2asn/) | | [OrthoFinder](https://hub.docker.com/r/staphb/OrthoFinder)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/orthofinder)](https://hub.docker.com/r/staphb/orthofinder) |
      • 2.17
      | https://github.com/davidemms/OrthoFinder | | [Panaroo](https://hub.docker.com/r/staphb/panaroo)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/panaroo)](https://hub.docker.com/r/staphb/panaroo) |
      • 1.2.10
      | https://github.com/gtonkinhill/panaroo | -| [Pangolin](https://hub.docker.com/r/staphb/pangolin)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pangolin)](https://hub.docker.com/r/staphb/pangolin) |
      Click to see Pangolin v3 and older versions! **Pangolin version & pangoLEARN data release date**
      • 1.1.14
      • 2.0.4 & 2020-07-20
      • 2.0.5 & 2020-07-20
      • 2.1.1 & 2020-12-17
      • 2.1.3 & 2020-12-17
      • 2.1.6 & 2021-01-06
      • 2.1.7 & 2021-01-11
      • 2.1.7 & 2021-01-20
      • 2.1.8 & 2021-01-22
      • 2.1.10 & 2021-02-01
      • 2.1.11 & 2021-02-01
      • 2.1.11 & 2021-02-05
      • 2.2.1 & 2021-02-06
      • 2.2.2 & 2021-02-06
      • 2.2.2 & 2021-02-11
      • 2.2.2 & 2021-02-12
      • 2.3.0 & 2021-02-12
      • 2.3.0 & 2021-02-18
      • 2.3.0 & 2021-02-21
      • 2.3.2 & 2021-02-21
      • 2.3.3 & 2021-03-16
      • 2.3.4 & 2021-03-16
      • 2.3.5 & 2021-03-16
      • 2.3.6 & 2021-03-16
      • 2.3.6 & 2021-03-29
      • 2.3.8 & 2021-04-01
      • 2.3.8 & 2021-04-14
      • 2.3.8 & 2021-04-21
      • 2.3.8 & 2021-04-23
      • 2.4 & 2021-04-28
      • 2.4.1 & 2021-04-28
      • 2.4.2 & 2021-04-28
      • 2.4.2 & 2021-05-10
      • 2.4.2 & 2021-05-11
      • 2.4.2 & 2021-05-19
      • 3.0.5 & 2021-06-05
      • 3.1.3 & 2021-06-15
      • 3.1.5 & 2021-06-15
      • 3.1.5 & 2021-07-07-2
      • 3.1.7 & 2021-07-09
      • 3.1.8 & 2021-07-28
      • 3.1.10 & 2021-07-28
      • 3.1.11 & 2021-08-09
      • 3.1.11 & 2021-08-24
      • 3.1.11 & 2021-09-17
      • 3.1.14 & 2021-09-28
      • 3.1.14 & 2021-10-13
      • 3.1.16 & 2021-10-18
      • 3.1.16 & 2021-11-04
      • 3.1.16 & 2021-11-09
      • 3.1.16 & 2021-11-18
      • 3.1.16 & 2021-11-25
      • 3.1.17 & 2021-11-25
      • 3.1.17 & 2021-12-06
      • 3.1.17 & 2022-01-05
      • 3.1.18 & 2022-01-20
      • 3.1.19 & 2022-01-20
      • 3.1.20 & 2022-02-02
      • 3.1.20 & 2022-02-28
      **Pangolin version & pangolin-data version**
      • 4.0 & 1.2.133
      • 4.0.1 & 1.2.133
      • 4.0.2 & 1.2.133
      • 4.0.3 & 1.2.133
      • 4.0.4 & 1.2.133
      • 4.0.5 & 1.3
      • 4.0.6 & 1.6
      • 4.0.6 & 1.8
      • 4.0.6 & 1.9
      • 4.1.1 & 1.11
      • 4.1.2 & 1.12
      • 4.1.2 & 1.13
      • 4.1.2 & 1.14
      • 4.1.3 & 1.15.1
      • 4.1.3 & 1.16
      | https://github.com/cov-lineages/pangolin
      https://github.com/cov-lineages/pangoLEARN
      https://github.com/cov-lineages/pango-designation
      https://github.com/cov-lineages/scorpio
      https://github.com/cov-lineages/constellations
      https://github.com/cov-lineages/lineages (archived)
      https://github.com/hCoV-2019/pangolin (archived) | +| [Pangolin](https://hub.docker.com/r/staphb/pangolin)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pangolin)](https://hub.docker.com/r/staphb/pangolin) |
      Click to see Pangolin v4.2 and older versions! **Pangolin version & pangoLEARN data release date**
      • 1.1.14
      • 2.0.4 & 2020-07-20
      • 2.0.5 & 2020-07-20
      • 2.1.1 & 2020-12-17
      • 2.1.3 & 2020-12-17
      • 2.1.6 & 2021-01-06
      • 2.1.7 & 2021-01-11
      • 2.1.7 & 2021-01-20
      • 2.1.8 & 2021-01-22
      • 2.1.10 & 2021-02-01
      • 2.1.11 & 2021-02-01
      • 2.1.11 & 2021-02-05
      • 2.2.1 & 2021-02-06
      • 2.2.2 & 2021-02-06
      • 2.2.2 & 2021-02-11
      • 2.2.2 & 2021-02-12
      • 2.3.0 & 2021-02-12
      • 2.3.0 & 2021-02-18
      • 2.3.0 & 2021-02-21
      • 2.3.2 & 2021-02-21
      • 2.3.3 & 2021-03-16
      • 2.3.4 & 2021-03-16
      • 2.3.5 & 2021-03-16
      • 2.3.6 & 2021-03-16
      • 2.3.6 & 2021-03-29
      • 2.3.8 & 2021-04-01
      • 2.3.8 & 2021-04-14
      • 2.3.8 & 2021-04-21
      • 2.3.8 & 2021-04-23
      • 2.4 & 2021-04-28
      • 2.4.1 & 2021-04-28
      • 2.4.2 & 2021-04-28
      • 2.4.2 & 2021-05-10
      • 2.4.2 & 2021-05-11
      • 2.4.2 & 2021-05-19
      • 3.0.5 & 2021-06-05
      • 3.1.3 & 2021-06-15
      • 3.1.5 & 2021-06-15
      • 3.1.5 & 2021-07-07-2
      • 3.1.7 & 2021-07-09
      • 3.1.8 & 2021-07-28
      • 3.1.10 & 2021-07-28
      • 3.1.11 & 2021-08-09
      • 3.1.11 & 2021-08-24
      • 3.1.11 & 2021-09-17
      • 3.1.14 & 2021-09-28
      • 3.1.14 & 2021-10-13
      • 3.1.16 & 2021-10-18
      • 3.1.16 & 2021-11-04
      • 3.1.16 & 2021-11-09
      • 3.1.16 & 2021-11-18
      • 3.1.16 & 2021-11-25
      • 3.1.17 & 2021-11-25
      • 3.1.17 & 2021-12-06
      • 3.1.17 & 2022-01-05
      • 3.1.18 & 2022-01-20
      • 3.1.19 & 2022-01-20
      • 3.1.20 & 2022-02-02
      • 3.1.20 & 2022-02-28
      **Pangolin version & pangolin-data version**
      • 4.0 & 1.2.133
      • 4.0.1 & 1.2.133
      • 4.0.2 & 1.2.133
      • 4.0.3 & 1.2.133
      • 4.0.4 & 1.2.133
      • 4.0.5 & 1.3
      • 4.0.6 & 1.6
      • 4.0.6 & 1.8
      • 4.0.6 & 1.9
      • 4.1.1 & 1.11
      • 4.1.2 & 1.12
      • 4.1.2 & 1.13
      • 4.1.2 & 1.14
      • 4.1.3 & 1.15.1
      • 4.1.3 & 1.16
      • 4.1.3 & 1.17
      • 4.2 & 1.18
      • 4.2 & 1.18.1
      • 4.2 & 1.18.1.1
      • 4.2 & 1.19
      **Pangolin version & pangolin-data version**
      • [4.3 & 1.20](pangolin/4.3-pdata-1.20/)
      • [4.3 & 1.21](pangolin/4.3-pdata-1.21/)
      • [4.3.1 & 1.22](pangolin/4.3.1-pdata-1.22/)
      | https://github.com/cov-lineages/pangolin
      https://github.com/cov-lineages/pangoLEARN
      https://github.com/cov-lineages/pango-designation
      https://github.com/cov-lineages/scorpio
      https://github.com/cov-lineages/constellations
      https://github.com/cov-lineages/lineages (archived)
      https://github.com/hCoV-2019/pangolin (archived) | | [parallel-perl](https://hub.docker.com/r/staphb/parallel-perl)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/parallel-perl)](https://hub.docker.com/r/staphb/parallel-perl) |
      • 20200722
      | https://www.gnu.org/software/parallel | +| [pasty](https://hub.docker.com/r/staphb/pasty)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pasty)](https://hub.docker.com/r/staphb/pasty) |
      • 1.0.2
      | https://github.com/rpetit3/pasty | | [pbptyper](https://hub.docker.com/r/staphb/pbptyper)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pbptyper)](https://hub.docker.com/r/staphb/pbptyper) |
      • 1.0.0
      • 1.0.1
      • 1.0.4
      | https://github.com/rpetit3/pbptyper | | [Phyml](https://hub.docker.com/r/staphb/phyml)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/phyml)](https://hub.docker.com/r/staphb/phyml) |
      • 3.3.20220408
      | https://github.com/stephaneguindon/phyml | | [Piggy](https://hub.docker.com/r/staphb/piggy)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/piggy)](https://hub.docker.com/r/staphb/piggy) |
      • 1.5
      | https://github.com/harry-thorpe/piggy | -| [Pilon](https://hub.docker.com/r/staphb/pilon)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pilon)](https://hub.docker.com/r/staphb/pilon) |
      • 1.23.0
      | https://github.com/broadinstitute/pilon | +| [Pilon](https://hub.docker.com/r/staphb/pilon)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pilon)](https://hub.docker.com/r/staphb/pilon) |
      • 1.23.0
      • 1.24
      | https://github.com/broadinstitute/pilon | | [Piranha](https://hub.docker.com/r/staphb/piranha)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/piranha)](https://hub.docker.com/r/staphb/piranha) |
      • 1.0.4
      | https://github.com/polio-nanopore/piranha | | [PlasmidFinder](https://hub.docker.com/r/staphb/plasmidfinder)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/plasmidfinder)](https://hub.docker.com/r/staphb/plasmidfinder) |
      • 2.1.6
      | https://bitbucket.org/genomicepidemiology/plasmidfinder/src/master/ | | [PlasmidSeeker](https://hub.docker.com/r/staphb/plasmidseeker)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/plasmidseeker)](https://hub.docker.com/r/staphb/plasmidseeker) |
      • 1.0
      • 1.3
      | https://github.com/bioinfo-ut/PlasmidSeeker | | [pmga](https://hub.docker.com/r/staphb/pmga/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pmga)](https://hub.docker.com/r/staphb/pmga) |
      • 3.0.2
      | https://github.com/rpetit3/pmga | -| [PopPUNK](https://hub.docker.com/r/staphb/poppunk/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/poppunk)](https://hub.docker.com/r/staphb/poppunk) |
      • 2.4.0
      • 2.5.0
      | https://github.com/bacpop/PopPUNK | -| [Prokka](https://hub.docker.com/r/staphb/prokka/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/prokka)](https://hub.docker.com/r/staphb/prokka) |
      • 1.13.4
      • 1.14.0
      • 1.14.5
      | https://github.com/tseemann/prokka | +| [polypolish](https://hub.docker.com/r/staphb/polypolish/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/polypolish)](https://hub.docker.com/r/staphb/polypolish) |
      • 0.5.0
      | https://github.com/rrwick/Polypolish | +| [PopPUNK](https://hub.docker.com/r/staphb/poppunk/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/poppunk)](https://hub.docker.com/r/staphb/poppunk) |
      • 2.4.0
      • 2.5.0
      • 2.6.0
      | https://github.com/bacpop/PopPUNK | +| [Porechop](https://hub.docker.com/r/staphb/porechop/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/porechop)](https://hub.docker.com/r/staphb/porechop) |
      • 0.2.4
      | https://github.com/rrwick/Porechop | +| [Prokka](https://hub.docker.com/r/staphb/prokka/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/prokka)](https://hub.docker.com/r/staphb/prokka) |
      • 1.13.4
      • 1.14.0
      • 1.14.5
      • 1.14.6
      | https://github.com/tseemann/prokka | | [pyGenomeViz](https://hub.docker.com/r/staphb/pygenomeviz/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/pygenomeviz)](https://hub.docker.com/r/staphb/pygenomeviz) |
      • 0.2.2
      • 0.3.2
      • [0.4.2](pygenomeviz/0.4.2/)
      • [0.4.3](pygenomeviz/0.4.3/)
      | https://github.com/moshi4/pyGenomeViz | -| [QUAST](https://hub.docker.com/r/staphb/quast/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/quast)](https://hub.docker.com/r/staphb/quast) |
      • 5.0.0
      • 5.0.2
      | https://github.com/ablab/quast | +| [QUAST](https://hub.docker.com/r/staphb/quast/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/quast)](https://hub.docker.com/r/staphb/quast) |
      • 5.0.0
      • 5.0.2
      • [5.2.0](./quast/5.2.0)
      | https://github.com/ablab/quast | | [QuickSNP](https://hub.docker.com/r/staphb/quicksnp/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/quicksnp)](https://hub.docker.com/r/staphb/quicksnp) |
      • 1.0.1
      | https://github.com/k-florek/QuickSNP | | [racon](https://hub.docker.com/r/staphb/racon)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/racon)](https://hub.docker.com/r/staphb/racon)|
      • 1.4.3
      • 1.4.20
      | https://github.com/lbcb-sci/racon
      https://github.com/isovic/racon (ARCHIVED) | | [rasusa](https://hub.docker.com/r/staphb/rasusa/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/rasusa)](https://hub.docker.com/r/staphb/rasusa) |
      • 0.1.0
      • 0.2.0
      • 0.3.0
      • 0.6.0
      • 0.7.0
      | https://github.com/mbhall88/rasusa | @@ -128,19 +214,21 @@ To learn more about the docker pull rate limits and the open source software pro | [ResFinder](https://hub.docker.com/r/staphb/resfinder/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/resfinder)](https://hub.docker.com/r/staphb/resfinder) |
      • 4.1.1
      | https://bitbucket.org/genomicepidemiology/resfinder/src/master/ | | [Roary](https://hub.docker.com/r/staphb/roary/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/roary)](https://hub.docker.com/r/staphb/roary) |
      • 3.12.0
      • 3.13.0
      | https://github.com/sanger-pathogens/Roary | | [SalmID](https://hub.docker.com/r/staphb/salmid)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/salmid)](https://hub.docker.com/r/staphb/salmid) |
      • 0.1.23
      | https://github.com/hcdenbakker/SalmID | -| [Samtools](https://hub.docker.com/r/staphb/samtools)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/samtools)](https://hub.docker.com/r/staphb/samtools) |
      • 1.9
      • 1.10
      • 1.11
      • 1.12
      • 1.13
      • 1.14
      • 1.15
      • 1.16
      | https://github.com/samtools/samtools | +| [Samtools](https://hub.docker.com/r/staphb/samtools)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/samtools)](https://hub.docker.com/r/staphb/samtools) |
      • 1.9
      • 1.10
      • 1.11
      • 1.12
      • 1.13
      • 1.14
      • 1.15
      • 1.16
      • 1.16.1
      • 1.17
      • [1.17-2023-06](samtools/1.17-2023-06/)
      • [1.18](samtools/1.18/)
      | https://github.com/samtools/samtools | +| [SeqKit](https://hub.docker.com/r/staphb/SeqKit)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seqkit)](https://hub.docker.com/r/staphb/seqkit) |
      • 2.3.1
      | https://github.com/shenwei356/seqkit | | [SeqSero](https://hub.docker.com/r/staphb/seqsero/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seqsero)](https://hub.docker.com/r/staphb/seqsero) |
      • 1.0.1
      | https://github.com/denglab/SeqSero | | [SeqSero2](https://hub.docker.com/r/staphb/seqsero2/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seqsero2)](https://hub.docker.com/r/staphb/seqsero2) |
      • 0.1.0
      • 1.0.0
      • 1.0.2
      • 1.1.0
      • 1.1.1
      • 1.2.1
      | https://github.com/denglab/SeqSero2/ | | [seqtk](https://hub.docker.com/r/staphb/seqtk)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seqtk)](https://hub.docker.com/r/staphb/seqtk) |
      • 1.3
      | https://github.com/lh3/seqtk | | [seqyclean](https://hub.docker.com/r/staphb/seqyclean)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seqyclean)](https://hub.docker.com/r/staphb/seqyclean) |
      • 1.10.09
      | https://github.com/ibest/seqyclean | | [Seroba](https://hub.docker.com/r/staphb/seroba)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/seroba)](https://hub.docker.com/r/staphb/seroba) |
      • 1.0.0
      • 1.0.2
      | https://github.com/sanger-pathogens/seroba | | [SerotypeFinder](https://hub.docker.com/r/staphb/serotypefinder/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/serotypefinder)](https://hub.docker.com/r/staphb/serotypefinder) |
      • 1.1 (perl version)
      • 2.0.1 (python version)
      | https://bitbucket.org/genomicepidemiology/serotypefinder/ | -| [shigatyper](https://hub.docker.com/r/staphb/shigatyper/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shigatyper)](https://hub.docker.com/r/staphb/shigatyper) |
      • 2.0.1
      • 2.0.2
      | https://github.com/CFSAN-Biostatistics/shigatyper | -| [ShigEiFinder](https://hub.docker.com/r/staphb/shigeifinder/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shigeifinder)](https://hub.docker.com/r/staphb/shigeifinder) |
      • 1.3.2
      | https://github.com/LanLab/ShigEiFinder | +| [shigatyper](https://hub.docker.com/r/staphb/shigatyper/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shigatyper)](https://hub.docker.com/r/staphb/shigatyper) |
      • 2.0.1
      • 2.0.2
      • 2.0.3
      • [2.0.4](shigatyper/2.0.4/)
      • [2.0.5](shigatyper/2.0.5/)
      | https://github.com/CFSAN-Biostatistics/shigatyper | +| [ShigEiFinder](https://hub.docker.com/r/staphb/shigeifinder/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shigeifinder)](https://hub.docker.com/r/staphb/shigeifinder) |
      • [1.3.2](shigeifinder/1.3.2/)
      • [1.3.3](shigeifinder/1.3.3/)
      • [1.3.5](shigeifinder/1.3.5/)
      | https://github.com/LanLab/ShigEiFinder | | [Shovill](https://hub.docker.com/r/staphb/shovill/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shovill)](https://hub.docker.com/r/staphb/shovill) |
      • 1.0.4
      • 1.1.0
      | https://github.com/tseemann/shovill | | [Shovill-se](https://hub.docker.com/r/staphb/shovill-se/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/shovill-se)](https://hub.docker.com/r/staphb/shovill-se) |
      • 1.1.0
      | https://github.com/rpetit3/shovill/tree/v1.1.0se | | [SISTR](https://hub.docker.com/r/staphb/sistr/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/sistr)](https://hub.docker.com/r/staphb/sistr) |
      • 1.0.2
      • 1.1.1
      | https://github.com/phac-nml/sistr_cmd | | [SKA](https://hub.docker.com/r/staphb/ska/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/ska)](https://hub.docker.com/r/staphb/ska) |
      • 1.0
      | https://github.com/simonrharris/SKA | +| [skani](https://github.com/bluenote-1577/skani)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/skani)](https://hub.docker.com/r/staphb/skani) |
      • [0.2.0](./skani/0.2.0)
      • [0.2.1](./skani/0.2.1)
      | https://github.com/bluenote-1577/skani | | [SKESA](https://hub.docker.com/r/staphb/skesa)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/skesa)](https://hub.docker.com/r/staphb/skesa) |
      • 2.3.0
      • 2.4.0 (`gfa_connector` & `kmercounter` included)
      | https://github.com/ncbi/SKESA | | [Smalt](https://hub.docker.com/r/staphb/smalt)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/smalt)](https://hub.docker.com/r/staphb/smalt) |
      • 0.7.6
      | https://www.sanger.ac.uk/tool/smalt-0/ | | [snpeff](https://hub.docker.com/r/staphb/snpeff)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/snpeff)](https://hub.docker.com/r/staphb/snpeff) |
      • 5.1
      | https://pcingola.github.io/SnpEff | @@ -150,18 +238,20 @@ To learn more about the docker pull rate limits and the open source software pro | [SNVPhyl-tools](https://hub.docker.com/r/staphb/snvphyl-tools)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/snvphyl-tools)](https://hub.docker.com/r/staphb/snvphyl-tools) |
      • 1.8.2
      | https://github.com/phac-nml/snvphyl-tools | | [SPAdes](https://hub.docker.com/r/staphb/spades/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/spades)](https://hub.docker.com/r/staphb/spades) |
      • 3.8.2
      • 3.12.0
      • 3.13.0
      • 3.14.0
      • 3.14.1
      • 3.15.0
      • 3.15.1
      • 3.15.2
      • 3.15.3
      • 3.15.4
      • 3.15.5
      | https://github.com/ablab/spades
      http://cab.spbu.ru/software/spades/ | | [SRA-toolkit](https://hub.docker.com/r/staphb/sratoolkit/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/sratoolkit)](https://hub.docker.com/r/staphb/sratoolkit) |
      • 2.9.2
      | https://github.com/ncbi/sra-tools | -| [SRST2](https://hub.docker.com/r/staphb/srst2/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/srst2)](https://hub.docker.com/r/staphb/srst2) |
      • 0.2.0
      | https://github.com/katholt/srst2 | +| [SRST2](https://hub.docker.com/r/staphb/srst2/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/srst2)](https://hub.docker.com/r/staphb/srst2) |
      • 0.2.0
      • [0.2.0 + custom Vibrio cholerae database](srst2/0.2.0-vibrio-230224/README.md)
      | https://github.com/katholt/srst2 | | [Staramr](https://hub.docker.com/r/staphb/staramr/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/staramr)](https://hub.docker.com/r/staphb/staramr) |
      • 0.5.1
      • 0.7.1
      • 0.8.0
      | https://github.com/phac-nml/staramr | -| [TBProfiler](https://hub.docker.com/r/staphb/tbprofiler/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/tbprofiler)](https://hub.docker.com/r/staphb/tbprofiler) |
      • 4.3.0
      | https://github.com/jodyphelan/TBProfiler | +| [TBProfiler](https://hub.docker.com/r/staphb/tbprofiler/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/tbprofiler)](https://hub.docker.com/r/staphb/tbprofiler) |
      • 4.3.0
      • 4.4.0
      • 4.4.2
      | https://github.com/jodyphelan/TBProfiler | | [TipToft](https://hub.docker.com/r/staphb/tiptoft/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/tiptoft)](https://hub.docker.com/r/staphb/tiptoft) |
      • 1.0.0
      • 1.0.2
      | https://github.com/andrewjpage/tiptoft | +| [Tostadas](https://hub.docker.com/r/staphb/tostadas/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/tostadas)](https://hub.docker.com/r/staphb/tostadas) |
      • 0.2.0-beta
      | https://github.com/CDCgov/tostadas | | [Treemmer](https://hub.docker.com/r/staphb/treemmer/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/treemmer)](https://hub.docker.com/r/staphb/treemmer) |
      • 0.3
      | https://git.scicore.unibas.ch/TBRU/Treemmer (archived, moved to GitHub)
      https://github.com/fmenardo/Treemmer | | [Trimmomatic](https://hub.docker.com/r/staphb/trimmomatic/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/trimmomatic)](https://hub.docker.com/r/staphb/trimmomatic) |
      • 0.38
      • 0.39
      | http://www.usadellab.org/cms/?page=trimmomatic
      https://github.com/usadellab/Trimmomatic | -| [Trycycler](https://hub.docker.com/r/staphb/trycycler/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/trycycler)](https://hub.docker.com/r/staphb/trycycler) |
      • 0.3.1
      • 0.3.2
      • 0.3.3
      • 0.5.0
      | https://github.com/rrwick/Trycycler | +| [Trycycler](https://hub.docker.com/r/staphb/trycycler/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/trycycler)](https://hub.docker.com/r/staphb/trycycler) |
      • 0.3.1
      • 0.3.2
      • 0.3.3
      • 0.5.0
      • 0.5.3
      • 0.5.4
      | https://github.com/rrwick/Trycycler | | [Unicycler](https://hub.docker.com/r/staphb/unicycler/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/unicycler)](https://hub.docker.com/r/staphb/unicycler) |
      • 0.4.7
      • 0.4.8
      • 0.4.9
      • 0.5.0
      | https://github.com/rrwick/Unicycler | -| [VADR](https://hub.docker.com/r/staphb/vadr/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/vadr)](https://hub.docker.com/r/staphb/vadr) |
      • 1.1
      • 1.1.2
      • 1.1.3
      • 1.2
      • 1.2.1
      • 1.3 & SARS-CoV-2 models 1.3-1
      • 1.3 & SARS-CoV-2 models 1.3-2
      • 1.4 & SARS-CoV-2 models 1.3-2
      • 1.4.1 & SARS-CoV-2 models 1.3-2
      • 1.4.2 & SARS-CoV-2 models 1.3-2, MPXV models 1.4.2-1
      • 1.5 & SARS-CoV-2 models 1.3-2, MPXV models 1.4.2-1
      | https://github.com/nawrockie/vadr (archived, now redirects to ncbi/vadr)
      https://github.com/ncbi/vadr | +| [VADR](https://hub.docker.com/r/staphb/vadr/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/vadr)](https://hub.docker.com/r/staphb/vadr) |
      • 1.1
      • 1.1.2
      • 1.1.3
      • 1.2
      • 1.2.1
      • 1.3 & SARS-CoV-2 models 1.3-1
      • 1.3 & SARS-CoV-2 models 1.3-2
      • 1.4 & SARS-CoV-2 models 1.3-2
      • 1.4.1 & SARS-CoV-2 models 1.3-2
      • 1.4.2 & SARS-CoV-2 models 1.3-2, MPXV models 1.4.2-1
      • 1.5 & SARS-CoV-2 models 1.3-2, MPXV models 1.4.2-1
      • 1.5.1 & SARS-CoV-2 models 1.3-2, MPXV models 1.4.2-1, RSV models 1.5-2
      | https://github.com/nawrockie/vadr (archived, now redirects to ncbi/vadr)
      https://github.com/ncbi/vadr | | [VIBRANT](https://hub.docker.com/r/staphb/vibrant/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/vibrant)](https://hub.docker.com/r/staphb/vibrant) |
      • 1.2.1
      | https://github.com/AnantharamanLab/VIBRANT | | [VIGOR4](https://hub.docker.com/r/staphb/vigor4/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/vigor4)](https://hub.docker.com/r/staphb/vigor4) |
      • 4.1.20190131
      | https://github.com/JCVenterInstitute/VIGOR4 | | [VirSorter2](https://hub.docker.com/r/staphb/virsorter2/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/virsorter2)](https://hub.docker.com/r/staphb/virsorter2/) |
      • 2.1
      | https://github.com/jiarong/VirSorter2 | +| [VirulenceFinder](https://hub.docker.com/r/staphb/virulencefinder/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/virulencefinder)](https://hub.docker.com/r/staphb/virulencefinder/) |
      • [2.1.4](virulencefinder/2.0.4/)
      | https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/
      https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/ | | [wtdbg2](https://hub.docker.com/r/staphb/wtdbg2/)
      [![docker pulls](https://badgen.net/docker/pulls/staphb/wtdbg2)](https://hub.docker.com/r/staphb/wtdbg2) |
      • 2.5
      | https://github.com/ruanjue/wtdbg2 | You can also view the list of images on Docker hub here: https://hub.docker.com/r/staphb/ @@ -202,4 +292,27 @@ Each Dockerfile lists the author(s)/maintainer(s) as a metadata `LABEL`, but the * [@idolawoye](https://github.com/idolawoye) * [@michellescribner](https://github.com/michellescribner) * [@cjjossart](https://github.com/cjjossart) + * [@jlumpe](https://github.com/jlumpe) * [@kissake](https://github.com/kissake) + * [@sage-wright](https://github.com/sage-wright) + * [@ankushkgupta](https://github.com/ankushkgupta) + * [@kyleoconnell](https://github.com/kyleoconnell) + * [@CTindall-1](https://github.com/CTindall-1) + * [@hkunerth](https://github.com/hkunerth) + * [@eetueklund](https://github.com/eetueklund) + * [@wchen190](https://github.com/wchen190) + * [@shelby-bennett](https://github.com/shelby-bennett) + * [@cjalcorta](https://github.com/cjalcorta) + * [@mndoucette](https://github.com/mndoucette) + * [@jcw349](https://github.com/jcw349) + * [@poojasgupta](https://github.com/poojasgupta) + * [@hollygene](https://github.com/hollygene) + * [@HarryHung](https://github.com/HarryHung) + * [@sam-baird](https://github.com/sam-baird) + * [@eproctor118](https://github.com/eproctor118) + * [@cimendes](https://github.com/cimendes) + * [@golden75](https://github.com/golden75) + * [@Kincekara](https://github.com/Kincekara) + * [@Haikelnb](https://github.com/Haikelnb) + * [@kprus](https://github.com/kprus) + diff --git a/any2fasta/0.4.2/Dockerfile b/any2fasta/0.4.2/Dockerfile index c066533d6..e76679da2 100644 --- a/any2fasta/0.4.2/Dockerfile +++ b/any2fasta/0.4.2/Dockerfile @@ -1,34 +1,88 @@ -# base image -FROM ubuntu:focal - -# metadata -LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="1" -LABEL software="any2fasta" -LABEL software.version="0.4.2" -LABEL description="Turns files into fastas." -LABEL website="https://github.com/tseemann/any2fasta" -LABEL license="https://github.com/tseemann/any2fasta/blob/master/LICENSE" -LABEL maintainer="Erin Young" -LABEL maintainer.email="eriny@utah.gov" - -# version to build with -ARG ANY2FASTA_VERSION=0.4.2 - -# install dependencies via apt-get or yum if using a centos or fedora base -RUN apt-get update && apt-get install -y wget perl && \ - apt-get autoclean && rm -rf /var/lib/apt/lists/* - -# install any2fasta -RUN wget https://github.com/tseemann/any2fasta/archive/refs/tags/v${ANY2FASTA_VERSION}.tar.gz && \ - tar -xvf v${ANY2FASTA_VERSION}.tar.gz && \ - rm v${ANY2FASTA_VERSION}.tar.gz && \ - cd any2fasta-${ANY2FASTA_VERSION} && \ - chmod +x any2fasta - -# set environmental variables e.g. $PATH and locale settings for singularity compatibility -ENV PATH="/any2fasta-${ANY2FASTA_VERSION}:$PATH" \ - LC_ALL=C - -# set working directory -WORKDIR /data +ARG ANY2FASTA_VERSION=0.4.2 + +# base image +FROM ubuntu:focal as app + +# version to build with +ARG ANY2FASTA_VERSION + +# metadata +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="2" +LABEL software="any2fasta" +LABEL software.version="0.4.2" +LABEL description="Turns files into fastas." +LABEL website="https://github.com/tseemann/any2fasta" +LABEL license="https://github.com/tseemann/any2fasta/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# install dependencies via apt-get or yum if using a centos or fedora base +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + perl \ + zip \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install any2fasta +RUN wget https://github.com/tseemann/any2fasta/archive/refs/tags/v${ANY2FASTA_VERSION}.tar.gz && \ + tar -xvf v${ANY2FASTA_VERSION}.tar.gz && \ + rm v${ANY2FASTA_VERSION}.tar.gz && \ + cd any2fasta-${ANY2FASTA_VERSION} && \ + chmod +x any2fasta + +# set environmental variables e.g. $PATH and locale settings for singularity compatibility +ENV PATH="/any2fasta-${ANY2FASTA_VERSION}:$PATH" \ + LC_ALL=C + +# set working directory +WORKDIR /data + +####################### +##### NEXT STAGE! ##### +####################### + +# running a 'test' in the 'test' stage +FROM app as test + +ARG ANY2FASTA_VERSION + +# check version and print help options +RUN any2fasta -v && \ + any2fasta -h && \ + any2fasta -h | grep github + +WORKDIR /test + +RUN echo "move test data here..." && \ + mv /any2fasta-${ANY2FASTA_VERSION}/test.* . + +RUN echo "running any2fasta on test data..." && \ + any2fasta /dev/null 2>&1 | grep 'ERROR' && \ + any2fasta test.noseq.gff 2>&1 | grep 'ERROR' && \ + any2fasta test.gbk | grep -m 3 '^>' && \ + any2fasta test.gff | grep -m 3 '^>' && \ + any2fasta test.fna | grep -m 3 '^>' && \ + any2fasta test.gfa | grep -m 3 '^>' && \ + any2fasta test.fq | grep -m 3 '^>' && \ + any2fasta test.embl | grep -m 3 '^>' && \ + any2fasta test.clw | grep -m 3 '^>' && \ + any2fasta test.sth | grep -m 3 '^>' && \ + any2fasta test.fna | grep 'CRYANT' && \ + any2fasta -n test.fna | grep 'CNNANT' && \ + any2fasta test.gfa | grep '^>24292$' && \ + any2fasta test.fq | grep '^>ERR1163317.999' && \ + any2fasta test.sth | grep '^>O83071' && \ + any2fasta test.clw | grep '^>gene03' && \ + any2fasta -l test.gbk | grep 'taagaatgagtagaaggttttga' && \ + any2fasta -u test.gbk | grep 'TAAGAATGAGTAGAAGGTTTTGA' && \ + any2fasta -u test.embl | grep 'K02675' && \ + any2fasta -q -l -n test.fq | wc -l | grep '^2000$' && \ + any2fasta - < test.gbk | grep -m 1 -F 'NZ_AHMY02000074' && \ + gzip -c test.gbk | any2fasta - | grep -m 1 -F 'NZ_AHMY02000074' && \ + bzip2 -c test.gbk | any2fasta - | grep -m 1 -F 'NZ_AHMY02000074' && \ + zip test.gbk.zip test.gbk && \ + any2fasta test.gbk.zip | grep -m 1 -F 'NZ_AHMY02000074' diff --git a/ariba/2.14.6/Dockerfile b/ariba/2.14.6/Dockerfile new file mode 100644 index 000000000..ee0dc734c --- /dev/null +++ b/ariba/2.14.6/Dockerfile @@ -0,0 +1,88 @@ +FROM ubuntu:bionic as app + +# for easy upgrade later. ARG variables only persist during build time +# ARIBA=2.14.6, pysam=0.15.4; pysam=0.16.0 breaks ariba 2.14.4 (June 2020) +ARG PYSAM_VER="0.15.4" +ARG ARIBA_VER="2.14.6" +ARG SPADES_VER="3.15.5" + +LABEL base.image="ubuntu:bionic" +LABEL dockerfile.version="1" +LABEL software="ARIBA" +LABEL software.version="2.14.6" +LABEL description="ARIBA: Antimicrobial Resistance Identification By Assembly" +LABEL website="https://github.com/sanger-pathogens/ariba" +LABEL license="https://github.com/sanger-pathogens/ariba/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Harry Hung" +LABEL maintainer2.email="ch31@sanger.ac.uk" + +# prevents having to enter commands during apt-get install +ARG DEBIAN_FRONTEND=noninteractive + +# dependencies +RUN apt-get update && apt-get install --no-install-recommends -y \ + python3-dev \ + python3-pip \ + python3-tk \ + python3-setuptools \ + python3-wheel \ + cython3 \ + zlib1g-dev \ + bowtie2 \ + mummer \ + cd-hit \ + wget \ + curl \ + gawk \ + locales-all \ + build-essential \ + libbz2-dev \ + libjpeg-dev \ + liblzma-dev \ + autoconf \ + automake \ + perl \ + libcurl4-gnutls-dev \ + libssl-dev && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +ENV LANG=en_US.UTF-8 \ + LANGUAGE=en_US:en + +# optional installation to enable the usage of SPAdes assembler +RUN wget https://github.com/ablab/spades/releases/download/v${SPADES_VER}/SPAdes-${SPADES_VER}-Linux.tar.gz && \ + tar -xzf SPAdes-${SPADES_VER}-Linux.tar.gz && \ + rm -r SPAdes-${SPADES_VER}-Linux.tar.gz && \ + mkdir /data + +# needed to avoid a matplotlib error +# https://github.com/sanger-pathogens/ariba/blob/b1b524c9d9588cba9d998c9121bd74c63856526/Dockerfile#L49 +ENV MPLBACKEND="agg" + +RUN pip3 install pysam==${PYSAM_VER} ariba==${ARIBA_VER} + +# set $PATH and locale settings for singularity compatibility +ENV PATH="$PATH:/SPAdes-${SPADES_VER}-Linux/bin" \ + LC_ALL=C + +WORKDIR /data + +# default command is to pull up help options +CMD ["ariba", "--help"] + +# new base for testing +FROM app as test + +# print out ARIBA version and +RUN ariba version + +# run built-in test and confirm files are successfully created +RUN ariba test out && \ + ls out/OUT/assemblies.fa.gz && \ + ls out/OUT/assembled_genes.fa.gz && \ + ls out/OUT/assembled_seqs.fa.gz && \ + ls out/OUT/log.clusters.gz && \ + ls out/OUT/report.tsv && \ + ls out/OUT/version_info.txt diff --git a/ariba/2.14.6/README.md b/ariba/2.14.6/README.md new file mode 100644 index 000000000..83d8870e2 --- /dev/null +++ b/ariba/2.14.6/README.md @@ -0,0 +1,32 @@ +# ARIBA container + +Main tool: [ARIBA](https://github.com/sanger-pathogens/ariba) + +Additional tools: +- [pysam](https://github.com/pysam-developers/pysam) 0.15.4 +- [SPAdes](https://github.com/ablab/spades) 3.15.5 + +Full documentation: [https://github.com/sanger-pathogens/ariba/wiki](https://github.com/sanger-pathogens/ariba/wiki) + +Antimicrobial Resistance Identification By Assembly + +ARIBA is a tool that identifies antibiotic resistance genes by running local assemblies. It can also be used for MLST calling. + +The input is a FASTA file of reference sequences (can be a mix of genes and noncoding sequences) and paired sequencing reads. ARIBA reports which of the reference sequences were found, plus detailed information on the quality of the assemblies and any variants between the sequencing reads and the reference sequences. + +## Example Usage + +```bash +# Get reference data, for instance from CARD +ariba getref ncbi out.ncbi + +# Prepare reference data for ARIBA +ariba prepareref -f out.ncbi.fa -m out.ncbi.tsv out.ncbi.prepareref + +# Run local assemblies and call variants +ariba run out.ncbi.prepareref reads1.fastq reads2.fastq out.run + +#Summarise data from several runs +ariba summary out.summary out.run1/report1.tsv out.run2/report2.tsv out.run3/report3.tsv +``` +Please read the [ARIBA wiki page](https://github.com/sanger-pathogens/ariba/wiki) for full usage instructions. \ No newline at end of file diff --git a/bcftools/1.17/Dockerfile b/bcftools/1.17/Dockerfile new file mode 100644 index 000000000..fa3e94ce6 --- /dev/null +++ b/bcftools/1.17/Dockerfile @@ -0,0 +1,59 @@ +FROM ubuntu:focal as app + +# for easy upgrade later. ARG variables only persist during build time +ARG bcftoolsVer="1.17" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="bcftools" +LABEL software.version="${bcftoolsVer}" +LABEL description="Variant calling and manipulating files in the Variant Call Format (VCF) and its binary counterpart BCF" +LABEL website="https://github.com/samtools/bcftools" +LABEL license="https://github.com/samtools/bcftools/blob/develop/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install dependencies, cleanup apt garbage +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + perl \ + bzip2 \ + autoconf \ + automake \ + make \ + gcc \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libperl-dev \ + libgsl0-dev \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# get bcftools and make /data +RUN wget https://github.com/samtools/bcftools/releases/download/${bcftoolsVer}/bcftools-${bcftoolsVer}.tar.bz2 && \ + tar -vxjf bcftools-${bcftoolsVer}.tar.bz2 && \ + rm bcftools-${bcftoolsVer}.tar.bz2 && \ + cd bcftools-${bcftoolsVer} && \ + make && \ + make install && \ + mkdir /data + +# set $PATH (honestly unnecessary here, lol) and locale settings for singularity compatibility +ENV PATH="$PATH" \ + LC_ALL=C + +# set working directory +WORKDIR /data + +FROM app as test + +RUN wget https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4/SARS-CoV-2.reference.fasta && \ + wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + bcftools mpileup -A -d 200 -B -Q 0 -f SARS-CoV-2.reference.fasta SRR13957123.primertrim.sorted.bam | \ + bcftools call -mv -Ov -o SRR13957123.vcf diff --git a/bcftools/1.17/README.md b/bcftools/1.17/README.md new file mode 100644 index 000000000..4625b40e1 --- /dev/null +++ b/bcftools/1.17/README.md @@ -0,0 +1,18 @@ +# bcftools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/bcftools) + +Additional tools: + +* perl 5.30.0 + +## Example Usage + +```bash +bcftools mpileup -A -d 200 -B -Q 0 -f {reference_genome} {bam} | bcftools call -mv -Ov -o bcftools_variants/{sample}.vcf +``` + +Better documentation can be found at [https://www.htslib.org/doc/bcftools.html](https://www.htslib.org/doc/bcftools.html) diff --git a/bcftools/1.18/Dockerfile b/bcftools/1.18/Dockerfile new file mode 100644 index 000000000..893761600 --- /dev/null +++ b/bcftools/1.18/Dockerfile @@ -0,0 +1,96 @@ +# for easy upgrade later. ARG variables only persist during build time +ARG BCFTOOLS_VER="1.18" + +FROM ubuntu:jammy as builder + +# re-instantiate variable +ARG BCFTOOLS_VER + +# install dependencies, cleanup apt garbage +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + perl \ + bzip2 \ + autoconf \ + automake \ + make \ + gcc \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + libperl-dev \ + libgsl0-dev \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# download, compile, and install bcftools +RUN wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLS_VER}/bcftools-${BCFTOOLS_VER}.tar.bz2 && \ + tar -xjf bcftools-${BCFTOOLS_VER}.tar.bz2 && \ + rm -v bcftools-${BCFTOOLS_VER}.tar.bz2 && \ + cd bcftools-${BCFTOOLS_VER} && \ + make && \ + make install && \ + make test + +### start of app stage ### +FROM ubuntu:jammy as app + +# re-instantiate variable +ARG BCFTOOLS_VER + +# putting the labels in +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="bcftools" +LABEL software.version="${BCFTOOLS_VER}" +LABEL description="Variant calling and manipulating files in the Variant Call Format (VCF) and its binary counterpart BCF" +LABEL website="https://github.com/samtools/bcftools" +LABEL license="https://github.com/samtools/bcftools/blob/develop/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install dependencies required for running bcftools +# https://github.com/samtools/bcftools/blob/develop/INSTALL#L29 +RUN apt-get update && apt-get install --no-install-recommends -y \ + perl \ + zlib1g \ + libncurses5 \ + bzip2 \ + liblzma-dev \ + libcurl4-gnutls-dev \ + procps \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ + mkdir /data + +# copy in bcftools executables from builder stage +COPY --from=builder /usr/local/bin/* /usr/local/bin/ + +# set locale settings for singularity compatibility +ENV LC_ALL=C + +# set final working directory +WORKDIR /data + +# default command is to pull up help optoins +CMD ["bcftools", "--help"] + +### start of test stage ### +FROM app as test + +# install wget for downloading test files +RUN apt-get update && apt-get install --no-install-recommends -y wget ca-certificates + +RUN echo "downloading test SC2 BAM and FASTA and running bcftools mpileup and bcftools call test commands..." && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V4/SARS-CoV-2.reference.fasta && \ + wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + bcftools mpileup -A -d 200 -B -Q 0 -f SARS-CoV-2.reference.fasta SRR13957123.primertrim.sorted.bam | \ + bcftools call -mv -Ov -o SRR13957123.vcf + +# FYI Test suite "make test" is performed in the builder stage since app and +# test stages do not include bcftools source code. +# This is to avoid having to re-download source code simply to run test suite diff --git a/bcftools/1.18/README.md b/bcftools/1.18/README.md new file mode 100644 index 000000000..b69b1ab64 --- /dev/null +++ b/bcftools/1.18/README.md @@ -0,0 +1,18 @@ +# bcftools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/bcftools) + +Additional tools: + +* perl 5.34.0 + +## Example Usage + +```bash +bcftools mpileup -A -d 200 -B -Q 0 -f {reference_genome} {bam} | bcftools call -mv -Ov -o bcftools_variants/{sample}.vcf +``` + +Better documentation can be found at [https://www.htslib.org/doc/bcftools.html](https://www.htslib.org/doc/bcftools.html) diff --git a/bedtools/2.31.0/Dockerfile b/bedtools/2.31.0/Dockerfile new file mode 100644 index 000000000..ba75985d1 --- /dev/null +++ b/bedtools/2.31.0/Dockerfile @@ -0,0 +1,83 @@ +ARG BEDTOOLS_VER="2.31.0" + +### builder stage for compiling bedtools code ### +FROM ubuntu:jammy as builder + +# re-instantiate variable so we can use it in builder stage +ARG BEDTOOLS_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="bedtools" +LABEL software.version="${BEDTOOLS_VER}" +LABEL description="bedtools - the swiss army knife for genome arithmetic" +LABEL website="https://github.com/arq5x/bedtools2" +LABEL license="https://github.com/arq5x/bedtools2/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Erin Young" +LABEL maintainer2.email="eriny@utah.gov" + +# install deps via apt-get, these are mainly for compiling bedtools code and for running tests. some are for downloading files (wget, ca-certificates) +# last command is to point 'python' cmd to `python3` so that bedtools test scripts work. There are bash scripts that call 'python' +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + zlib1g-dev \ + libghc-bzlib-dev \ + liblzma-dev \ + wget \ + ca-certificates \ + python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# python3 required when compiling via `make` command for creating old CLI executables +# dependencies listed here (albeit for v2.30.0, still should be identical): https://packages.ubuntu.com/jammy/bedtools +# requires libghc-bzlib-dev, build-essential, zlib1g-dev, and a few others +# 'make install' should place binary executable files in /usr/local/bin +RUN wget -q https://github.com/arq5x/bedtools2/archive/refs/tags/v${BEDTOOLS_VER}.tar.gz && \ + tar -xzf v${BEDTOOLS_VER}.tar.gz && \ + cd bedtools2-${BEDTOOLS_VER} && \ + make && \ + make install + +### keeping old installation cmds here in case we want to install via pre-compiled binary again in the future ### +# RUN cd /usr/local/bin && \ +# wget https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VER}/bedtools.static && \ +# mv bedtools.static bedtools && \ +# chmod +x bedtools && \ +# mkdir /data + +### test stage ### +FROM builder as test + +# re-instantiate variable so we can use it again in the test stage +ARG BEDTOOLS_VER + +# test scripts expect to be run from the bedtools root dir +WORKDIR /bedtools2-${BEDTOOLS_VER} + +# commenting out ulimit command in test/test.sh (gitpod complains) +# run tests included with bedtools code +RUN sed -i 's/ulimit/#ulimit/g' test/test.sh && \ +make test + +### final app stage ### +# starting from fresh base image instead of a previous stage +FROM ubuntu:jammy as app + +# copy in all bedtools executable files from builder stage to final app stage +COPY --from=builder /usr/local/bin/* /usr/local/bin + +# setting just in case for singularity compatibility +ENV LC_ALL=C + +# default command is to print help options +CMD [ "bedtools", "--help" ] + +# set final working directory to /data +WORKDIR /data + +# check help options and version +RUN bedtools --help && \ +bedtools --version \ No newline at end of file diff --git a/bedtools/2.31.0/README.md b/bedtools/2.31.0/README.md new file mode 100644 index 000000000..d5391c74a --- /dev/null +++ b/bedtools/2.31.0/README.md @@ -0,0 +1,63 @@ +# bedtools2 container + +GitHub Repo: [bedtools](https://github.com/arq5x/bedtools2/) + +Full documentation: [https://bedtools.readthedocs.io/en/latest/index.html](https://bedtools.readthedocs.io/en/latest/index.html) + +> Collectively, the bedtools utilities are a swiss-army knife of tools for a wide-range of genomics analysis tasks. The most widely-used tools enable genome arithmetic: that is, set theory on the genome. For example, bedtools allows one to intersect, merge, count, complement, and shuffle genomic intervals from multiple files in widely-used genomic file formats such as BAM, BED, GFF/GTF, VCF. While each individual tool is designed to do a relatively simple task (e.g., intersect two interval files), quite sophisticated analyses can be conducted by combining multiple bedtools operations on the UNIX command line. + +List of sub-commands in bedtools 2.31.0: + +- annotate +- bamtobed +- bamtofastq +- bed12tobed6 +- bedpetobam +- bedtobam +- closest +- cluster +- complement +- coverage +- expand +- flank +- fisher +- genomecov +- getfasta +- groupby +- igv +- intersect +- jaccard +- links +- makewindows +- map +- maskfasta +- merge +- multicov +- multiinter +- nuc +- overlap +- pairtobed +- pairtopair +- random +- reldist +- shift +- shuffle +- slop +- sort +- subtract +- summary +- tag +- unionbedg +- window + +## Example Usage + +```bash +# bedtools consists of a suite of sub-commands that are invoked as follows: +# bedtools [sub-command] [options] + +# For example, to intersect two BED files, one would invoke the following: +bedtools intersect -a a.bed -b b.bed +``` + +More examples are found in the [bedtools tutorial](http://quinlanlab.org/tutorials/bedtools/bedtools.html) diff --git a/blast/2.14.0/Dockerfile b/blast/2.14.0/Dockerfile new file mode 100644 index 000000000..f6307b5c5 --- /dev/null +++ b/blast/2.14.0/Dockerfile @@ -0,0 +1,61 @@ +FROM ubuntu:focal as app + +ARG BLAST_VER="2.14.0" + +# LABEL instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="blast+" +LABEL software.version=$BLAST_VER +LABEL description="Finds matches in sequencing reads" +LABEL website="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download" +LABEL license="https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + libgomp1 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install and/or setup more things. Make /data for use as a working dir +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + mkdir /data + +# ENV instructions set environment variables that persist from the build into the resulting image +# Use for e.g. $PATH and locale settings for compatibility with Singularity +ENV PATH="/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# WORKDIR sets working directory +WORKDIR /data + +# default command is to pull up help options for virulencefinder +# yes, there are more tools than blastn, but it's likely the most common one used +CMD [ "blastn", "-help" ] + +# A second FROM insruction creates a new stage +# We use `test` for the test image +FROM app as test + +# getting all the exectubles in bin +RUN ls /ncbi-blast-*/bin/ + +# getting a genome +RUN mkdir db && \ + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz -P db && \ + gunzip db/GCF_000005845.2_ASM584v2_genomic.fna.gz && \ + makeblastdb -dbtype nucl -in db/GCF_000005845.2_ASM584v2_genomic.fna + +# getting a list of genes +RUN wget https://raw.githubusercontent.com/rrwick/Unicycler/main/unicycler/gene_data/dnaA.fasta + +# getting some blast results +RUN tblastn -query dnaA.fasta \ + -db db/GCF_000005845.2_ASM584v2_genomic.fna \ + -outfmt '6' \ + -out blast_hits.txt && \ + head blast_hits.txt diff --git a/blast/2.14.0/README.md b/blast/2.14.0/README.md new file mode 100644 index 000000000..052b8be52 --- /dev/null +++ b/blast/2.14.0/README.md @@ -0,0 +1,60 @@ +# blast+ container + +Main tools: + +- [blast+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download) + +This is meant to assist in local blast searches. No blast databases will be maintained in this container. Be sure to mount your relevant Volumes with `--volumes` or `-v` when using the command line. + +blast+ is actually a suite of tools. blast+ v.2.14.0 includes: + +```bash +$ ls /ncbi-blast-2.14.0+/bin +blast_formatter +blast_formatter_vdb +blast_vdb_cmd +blastdb_aliastool +blastdbcheck +blastdbcmd +blastn +blastn_vdb +blastp +blastx +cleanup-blastdb-volumes.py +convert2blastmask +deltablast +dustmasker +get_species_taxids.sh +legacy_blast.pl +makeblastdb +makembindex +makeprofiledb +psiblast +rpsblast +rpstblastn +segmasker +tblastn +tblastn_vdb +tblastx +update_blastdb.pl +windowmasker +``` + +Currently not supported, but could be: + +```bash +get_species_taxids.sh # requires E-direct +update_blastdb.pl # requires perl +``` + +## Example Usage + +```bash +# making a blast database +makeblastdb -dbtype nucl -in fasta.fa + +# query +tblastn -query query.fasta -db fasta.fa -outfmt '6' -out blast_hits.txt +``` + +More documentation can be found at [https://www.ncbi.nlm.nih.gov/books/NBK569856/](https://www.ncbi.nlm.nih.gov/books/NBK569856/) and [https://www.ncbi.nlm.nih.gov/books/NBK279690/](https://www.ncbi.nlm.nih.gov/books/NBK279690/) diff --git a/blast/2.14.1/Dockerfile b/blast/2.14.1/Dockerfile new file mode 100644 index 000000000..edecf6952 --- /dev/null +++ b/blast/2.14.1/Dockerfile @@ -0,0 +1,63 @@ +FROM ubuntu:focal as app + +ARG BLAST_VER="2.14.1" + +# LABEL instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="blast+" +LABEL software.version=$BLAST_VER +LABEL description="Finds matches in sequencing reads" +LABEL website="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download" +LABEL license="https://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/lxr/source/scripts/projects/blast/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + libgomp1 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install and/or setup more things. Make /data for use as a working dir +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + mkdir /data + +# ENV instructions set environment variables that persist from the build into the resulting image +# Use for e.g. $PATH and locale settings for compatibility with Singularity +ENV PATH="/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# WORKDIR sets working directory +WORKDIR /data + +# default command is to pull up help options for virulencefinder +# yes, there are more tools than blastn, but it's likely the most common one used +CMD [ "blastn", "-help" ] + + + +# A second FROM insruction creates a new stage +# We use `test` for the test image +FROM app as test + +# getting all the exectubles in bin +RUN ls /ncbi-blast-*/bin/ + +# getting a genome +RUN mkdir db && \ + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/005/845/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz -P db && \ + gunzip db/GCF_000005845.2_ASM584v2_genomic.fna.gz && \ + makeblastdb -dbtype nucl -in db/GCF_000005845.2_ASM584v2_genomic.fna + +# getting a list of genes +RUN wget https://raw.githubusercontent.com/rrwick/Unicycler/main/unicycler/gene_data/dnaA.fasta + +# getting some blast results +RUN tblastn -query dnaA.fasta \ + -db db/GCF_000005845.2_ASM584v2_genomic.fna \ + -outfmt '6' \ + -out blast_hits.txt && \ + head blast_hits.txt diff --git a/blast/2.14.1/README.md b/blast/2.14.1/README.md new file mode 100644 index 000000000..5724be14b --- /dev/null +++ b/blast/2.14.1/README.md @@ -0,0 +1,60 @@ +# blast+ container + +Main tools: + +- [blast+](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download) + +This is meant to assist in local blast searches. No blast databases will be maintained in this container. Be sure to mount your relevant Volumes with `--volumes` or `-v` when using the command line. + +blast+ is actually a suite of tools. blast+ v.2.14.1 includes: + +```bash +$ ls /ncbi-blast-2.14.1+/bin +blast_formatter +blast_formatter_vdb +blast_vdb_cmd +blastdb_aliastool +blastdbcheck +blastdbcmd +blastn +blastn_vdb +blastp +blastx +cleanup-blastdb-volumes.py +convert2blastmask +deltablast +dustmasker +get_species_taxids.sh +legacy_blast.pl +makeblastdb +makembindex +makeprofiledb +psiblast +rpsblast +rpstblastn +segmasker +tblastn +tblastn_vdb +tblastx +update_blastdb.pl +windowmasker +``` + +Currently not supported, but could be: + +```bash +get_species_taxids.sh # requires E-direct +update_blastdb.pl # requires perl +``` + +## Example Usage + +```bash +# making a blast database +makeblastdb -dbtype nucl -in fasta.fa + +# query +tblastn -query query.fasta -db fasta.fa -outfmt '6' -out blast_hits.txt +``` + +More documentation can be found at [https://www.ncbi.nlm.nih.gov/books/NBK569856/](https://www.ncbi.nlm.nih.gov/books/NBK569856/) and [https://www.ncbi.nlm.nih.gov/books/NBK279690/](https://www.ncbi.nlm.nih.gov/books/NBK279690/) diff --git a/bowtie2/2.5.1/Dockerfile b/bowtie2/2.5.1/Dockerfile new file mode 100644 index 000000000..32a53cc68 --- /dev/null +++ b/bowtie2/2.5.1/Dockerfile @@ -0,0 +1,57 @@ +# FROM defines the base docker image to start from. This command has to come first in the file +FROM --platform=linux/x86_64 ubuntu:xenial as app + +ARG SAMTOOLSVER=1.15 +ARG BOWTIE2VER=2.5.1 + +# metadata (there are a few other labels you can add, these are optional but preferred!) +LABEL base.image="ubuntu:xenial" +LABEL dockerfile.version="1" +LABEL software="Bowtie2" +LABEL software.version=$BOWTIE2VER +LABEL description="Bowtie2: Genome assembler using a reference and mapping\n Samtools: a set of tools for interacting with and reformatting sequence data" +LABEL website="http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml\n https://www.htslib.org/" +LABEL maintainer="Holly Halstead" +LABEL maintainer.email="holly.halstead@doh.wa.gov" + +# install dependencies, cleanup apt garbage. +RUN apt-get update && apt-get install -y --no-install-recommends\ + build-essential=12.1ubuntu2 \ + autoconf=2.69-9 \ + zlib1g-dev=1:1.2.8.dfsg-2ubuntu4.3 \ + python3=3.5.1-3 \ + wget=1.17.1-1ubuntu1.5 \ + libbz2-dev=1.0.6-8ubuntu0.2 \ + liblzma-dev=5.1.1alpha+20120614-2ubuntu2 \ + libncurses5-dev=6.0+20160213-1ubuntu1 \ + bedtools=2.25.0-1 \ + python3-pip=8.1.1-2ubuntu0.6 \ + unzip=6.0-20ubuntu1.1 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# download, unpack Bowtie2 and Samtools +RUN wget -q -O bowtie2.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie2/${BOWTIE2VER}/bowtie2-${BOWTIE2VER}-linux-x86_64.zip/download; \ + unzip bowtie2.zip -d /opt/; \ + ln -s /opt/bowtie2-${BOWTIE2VER}-linux-x86_64/ /opt/bowtie2; \ + rm bowtie2.zip + ENV PATH $PATH:/opt/bowtie2 + + +#samtools# SAMtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# set working directory +WORKDIR /data + +FROM app as test + +RUN wget https://raw.githubusercontent.com/BenLangmead/bowtie2/master/example/reads/longreads.fq && \ + wget https://raw.githubusercontent.com/BenLangmead/bowtie2/master/example/reference/lambda_virus.fa && \ + bowtie2-build lambda_virus.fa lambda_virus && \ + bowtie2 -x lambda_virus -U longreads.fq diff --git a/bowtie2/2.5.1/README.md b/bowtie2/2.5.1/README.md new file mode 100644 index 000000000..6a87ebb5c --- /dev/null +++ b/bowtie2/2.5.1/README.md @@ -0,0 +1,21 @@ +# bowtie2 container +Main tool : [bowtie2](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml) + +Required tool : [samtools](https://www.htslib.org/) + +Bowtie2 : Genome assembler using a reference and mapping + +Samtools: a set of tools for interacting with and reformatting sequence data + +# Example Usage + +``` +bowtie2-build lambda_virus.fa /index/lambda_virus +``` +``` +bowtie2 -x /index/lambda_virus -U longreads.fq +``` +``` +bowtie2-inspect --summary /index/lambda_virus +``` +Better documentation can be found at [https://github.com/BenLangmead/bowtie2](https://github.com/BenLangmead/bowtie2) diff --git a/busco/5.4.7/Dockerfile b/busco/5.4.7/Dockerfile new file mode 100644 index 000000000..99618b0cf --- /dev/null +++ b/busco/5.4.7/Dockerfile @@ -0,0 +1,85 @@ +FROM ubuntu:focal as app + +ARG BUSCO_VER="5.4.7" +ARG BBMAP_VER="39.01" +ARG BLAST_VER="2.14.0" +ARG DEBIAN_FRONTEND=noninteractive + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="BUSCO" +LABEL software.version="${BUSCO_VER}" +LABEL description="Assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs" +LABEL website="https://busco.ezlab.org/" +LABEL license="https://gitlab.com/ezlab/busco/-/raw/master/LICENSE" +LABEL maintainer="Kutluhan Incekara" +LABEL maintainer.email="kutluhan.incekara@ct.gov" + +# install dependencies +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + python3-biopython \ + python3-pandas \ + python3-setuptools\ + hmmer \ + prodigal \ + augustus \ + r-cran-ggplot2 \ + gcc-x86-64-linux-gnu \ + default-jre \ + libjenkins-json-java \ + libgoogle-gson-java \ + libjson-java \ + && rm -rf /var/lib/apt/lists/* && apt-get autoclean \ + && ln -s /usr/bin/python3 /usr/bin/python + +# install other necessary tools +# blast +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.14.0/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz &&\ + tar -xvf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz +# sepp (greengenes version) +RUN wget https://raw.githubusercontent.com/smirarab/sepp-refs/54415e8905c5fa26cdd631c526b21f2bcdba95b5/gg/sepp-package.tar.bz &&\ + tar xvfj sepp-package.tar.bz && rm sepp-package.tar.bz &&\ + cd sepp-package/sepp &&\ + python setup.py config -c && chmod 755 run_* +# bbtools +RUN wget https://sourceforge.net/projects/bbmap/files/BBMap_${BBMAP_VER}.tar.gz &&\ + tar -xvf BBMap_${BBMAP_VER}.tar.gz && rm BBMap_${BBMAP_VER}.tar.gz &&\ + mv /bbmap/* /usr/local/bin/ +# metaeuk +RUN wget https://github.com/soedinglab/metaeuk/releases/download/6-a5d39d9/metaeuk-linux-sse41.tar.gz &&\ + tar -xvf metaeuk-linux-sse41.tar.gz && rm metaeuk-linux-sse41.tar.gz &&\ + mv /metaeuk/bin/* /usr/local/bin/ + +# and finally busco +RUN wget https://gitlab.com/ezlab/busco/-/archive/${BUSCO_VER}/busco-${BUSCO_VER}.tar.gz &&\ + tar -xvf busco-${BUSCO_VER}.tar.gz && \ + rm busco-${BUSCO_VER}.tar.gz &&\ + cd busco-${BUSCO_VER} && \ + python3 setup.py install + +ENV AUGUSTUS_CONFIG_PATH="/usr/share/augustus/config/" +ENV PATH="${PATH}:/ncbi-blast-${BLAST_VER}+/bin:/sepp-package/sepp:/usr/share/augustus/scripts" +ENV LC_ALL=C + +WORKDIR /data + +CMD busco -h + +## Tests ## +FROM app as test +# run tests for bacteria and eukaryota +RUN busco -i /busco-5.4.7/test_data/bacteria/genome.fna -c 8 -m geno -f --out test_bacteria +RUN busco -i /busco-5.4.7/test_data/eukaryota/genome.fna -c 8 -m geno -f --out test_eukaryota +RUN busco -i /busco-5.4.7/test_data/eukaryota/genome.fna -l eukaryota_odb10 -c 8 -m geno -f --out test_eukaryota_augustus --augustus + +# generate plot +RUN mkdir my_summaries &&\ + find . -name "short_summary.*.txt" -exec cp {} my_summaries \; &&\ + python3 /busco-5.4.7/scripts/generate_plot.py -wd my_summaries + +# using actual data (Salmonella genome) +RUN wget -q https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + busco -m genome -i GCA_010941835.1_PDT000052640.3_genomic.fna -o busco_GCA_010941835.1 --cpu 4 --auto-lineage-prok && \ + head busco_GCA_010941835.1/short_summary*.txt diff --git a/busco/5.4.7/README.md b/busco/5.4.7/README.md new file mode 100644 index 000000000..414b7f640 --- /dev/null +++ b/busco/5.4.7/README.md @@ -0,0 +1,94 @@ +# Assessing genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs (BUSCO) container + +Main tool : [BUSCO](https://gitlab.com/ezlab/busco/) + +Additional tools: +- BBTools 39.01 +- HMMER 3.3 +- Prodigal 2.6.3 +- BLAST+ 2.14.0 +- AUGUSTUS 3.3.3 +- MetaEuk (Release 6-a5d39d9) +- SEPP 4.5.1 +- Python 3.8.10 +- BioPython 1.76 +- R 3.6.3 +- Perl 5.30.0 +- OpenJDK 11.0.20 + +Full documentation: https://busco.ezlab.org/busco_userguide.html + +This fully functional BUSCO docker image allows you to use all the program options. All additional tools were added to satisfy the requirements of those functions. This image does not contain any lineage dataset. BUSCO downloads the passed dataset name automatically while running. If a full path is given as lineage, this automated management will be disabled. The usage options are given below. Please refer to the BUSCO manual for further information. +## Example Usage +### Specific lineage +```bash +busco -i assembly.fasta -l bacteria_odb10 -o output -m genome +``` +or +```bash +busco -i assembly.fasta -l /path/to/folder/bacteria_odb10 -o output -m genome +``` +### Auto lineage selection: +```bash +busco -i assembly.fasta -o output -m genome --auto-lineage-prok +``` +### Additional options: +```bash + -i FASTA FILE, --in FASTA FILE + Input sequence file in FASTA format. Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set. + -o OUTPUT, --out OUTPUT + Give your analysis run a recognisable short name. Output folders and files will be labelled with this name. WARNING: do not provide a path + -m MODE, --mode MODE Specify which BUSCO analysis mode to run. + There are three valid modes: + - geno or genome, for genome assemblies (DNA) + - tran or transcriptome, for transcriptome assemblies (DNA) + - prot or proteins, for annotated gene sets (protein) + -l LINEAGE, --lineage_dataset LINEAGE + Specify the name of the BUSCO lineage to be used. + --auto-lineage Run auto-lineage to find optimum lineage path + --auto-lineage-prok Run auto-lineage just on non-eukaryote trees to find optimum lineage path + --auto-lineage-euk Run auto-placement just on eukaryote tree to find optimum lineage path + -c N, --cpu N Specify the number (N=integer) of threads/cores to use. + -f, --force Force rewriting of existing files. Must be used when output files with the provided name already exist. + -r, --restart Continue a run that had already partially completed. + -q, --quiet Disable the info logs, displays only errors + --out_path OUTPUT_PATH + Optional location for results folder, excluding results folder name. Default is current working directory. + --download_path DOWNLOAD_PATH + Specify local filepath for storing BUSCO dataset downloads + --datasets_version DATASETS_VERSION + Specify the version of BUSCO datasets, e.g. odb10 + --download_base_url DOWNLOAD_BASE_URL + Set the url to the remote BUSCO dataset location + --update-data Download and replace with last versions all lineages datasets and files necessary to their automated selection + --offline To indicate that BUSCO cannot attempt to download files + --metaeuk_parameters METAEUK_PARAMETERS + Pass additional arguments to Metaeuk for the first run. All arguments should be contained within a single pair of quotation marks, separated by commas. E.g. "--param1=1,--param2=2" + --metaeuk_rerun_parameters METAEUK_RERUN_PARAMETERS + Pass additional arguments to Metaeuk for the second run. All arguments should be contained within a single pair of quotation marks, separated by commas. E.g. "--param1=1,--param2=2" + -e N, --evalue N E-value cutoff for BLAST searches. Allowed formats, 0.001 or 1e-03 (Default: 1e-03) + --limit REGION_LIMIT How many candidate regions (contig or transcript) to consider per BUSCO (default: 3) + --augustus Use augustus gene predictor for eukaryote runs + --augustus_parameters AUGUSTUS_PARAMETERS + Pass additional arguments to Augustus. All arguments should be contained within a single pair of quotation marks, separated by commas. E.g. "--param1=1,--param2=2" + --augustus_species AUGUSTUS_SPECIES + Specify a species for Augustus training. + --long Optimization Augustus self-training mode (Default: Off); adds considerably to the run time, but can improve results for some non-model organisms + --config CONFIG_FILE Provide a config file + -v, --version Show this version and exit + -h, --help Show this help message and exit + --list-datasets Print the list of available BUSCO datasets +``` +### Plot +Example usage of plotting script: +```bash +# collect short summaries +mkdir my_summaries +cp SPEC1/short_summary.generic.lineage1_odb10.SPEC1.txt my_summaries/. +cp SPEC2/short_summary.generic.lineage2_odb10.SPEC2.txt my_summaries/. +cp SPEC3/short_summary.specific.lineage2_odb10.SPEC3.txt my_summaries/. +cp SPEC4/short_summary.generic.lineage3_odb10.SPEC4.txt my_summaries/. +cp SPEC5/short_summary.generic.lineage4_odb10.SPEC5.txt my_summaries/. +# plot via script +python3 scripts/generate_plot.py –wd my_summaries +``` diff --git a/bwa/0.7.17/Dockerfile b/bwa/0.7.17/Dockerfile index 17f7ff0e4..440464002 100644 --- a/bwa/0.7.17/Dockerfile +++ b/bwa/0.7.17/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:xenial +FROM ubuntu:xenial as app LABEL base.image="ubuntu:xenial" LABEL version="1" @@ -9,12 +9,17 @@ LABEL website="https://github.com/lh3/bwa" LABEL license="https://github.com/lh3/bwa/blob/master/COPYING" LABEL maintainer="Curtis Kapsak" LABEL maintainer.email="pjx8@cdc.gov" +LABEL dockerfile.version="2" -RUN apt-get update && apt-get install -y make \ - wget \ - gcc \ - zlib1g-dev \ - bzip2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + make \ + gcc \ + zlib1g-dev \ + bzip2 \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* RUN mkdir bwa &&\ mkdir /data &&\ @@ -27,3 +32,9 @@ RUN mkdir bwa &&\ ENV PATH="${PATH}:/bwa/bwa-0.7.17" WORKDIR /data + +FROM app as test + +RUN wget "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nucleotide&id=5&rettype=fasta" \ + -o "sequence.fasta" &&\ + bwa index "sequence.fasta" diff --git a/bwa/0.7.17/README.md b/bwa/0.7.17/README.md new file mode 100644 index 000000000..9bd4ab999 --- /dev/null +++ b/bwa/0.7.17/README.md @@ -0,0 +1,16 @@ +# bwa container + +Main tool: + +* [https://bio-bwa.sourceforge.net/](https://bio-bwa.sourceforge.net/) +* [GitHub](https://github.com/lh3/bwa) + +## Example Usage + +```bash +bwa mem ref.fa reads.fq > aln.sam + +bwa aln ref.fa reads.fq > reads.sai; bwa samse ref.fa reads.sai reads.fq > aln-se.sam +``` + +Better documentation can be found at [https://bio-bwa.sourceforge.net/bwa.shtml](https://bio-bwa.sourceforge.net/bwa.shtml) diff --git a/canu/2.2/Dockerfile b/canu/2.2/Dockerfile index 984fe6956..0dbac7d5b 100644 --- a/canu/2.2/Dockerfile +++ b/canu/2.2/Dockerfile @@ -1,5 +1,5 @@ # Use ubuntu as base image -FROM ubuntu:xenial +FROM ubuntu:xenial as app # metadata LABEL base.image="ubuntu:xenial" @@ -43,3 +43,26 @@ WORKDIR /data # set perl locale settings ENV LC_ALL=C + +# ======== Adding a test layer ======== + +FROM app as test + +# creating test working DIR. +WORKDIR /test_canu + +# download test input file into /test_canu/ directory +RUN wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz + +# testing analysis RUN, canu need to know approx genome size (for coverage calculation) and sequencing platform +RUN canu \ + -p portiera -d portiera-nanopore \ + genomeSize=0.3m \ + -nanopore ERR3772599.fastq.gz \ + maxMemory=7 maxThreads=2 \ + redMemory=7 redThreads=2 \ + corMemory=7 corThreads=2 \ + oeaMemory=7 oeaThreads=2 \ + batMemory=7 batThreads=2 \ + cnsMemory=7 cnsThreads=2 + diff --git a/cfsan-snp-pipeline/2.0.2/Dockerfile b/cfsan-snp-pipeline/2.0.2/Dockerfile index 2660f5146..7f66d3608 100644 --- a/cfsan-snp-pipeline/2.0.2/Dockerfile +++ b/cfsan-snp-pipeline/2.0.2/Dockerfile @@ -1,8 +1,8 @@ -FROM ubuntu:xenial +FROM ubuntu:xenial as app # metadata LABEL base.image="ubuntu:xenial" -LABEL version="1" +LABEL dockerfile.version="2" LABEL software="cfsan_snp_pipeline" LABEL software.version="2.0.2" LABEL description="A pipeline for the production of SNP matrices from sequence data used in the phylogenetic analysis of pathogenic organisms sequenced from samples of interest to food safety." @@ -15,11 +15,11 @@ LABEL maintainer.email="pjx8@cdc.gov" # This Dockerfile is based on a Docker file from Justin Payne which can be found here: # https://hub.docker.com/r/crashfrog/snp-pipeline/dockerfile -# The dockerfile has been modified from it's original form by Curtis Kapsak +# The dockerfile has been modified from it's original form by Curtis Kapsak and Sam Baird WORKDIR /tmp/ RUN apt-get update -y \ - && apt-get install -y \ + && apt-get install -y --no-install-recommends \ bowtie2 \ openjdk-8-jre \ g++ \ @@ -44,8 +44,11 @@ RUN apt-get update -y \ libperl-dev \ && apt-get clean +ARG SNP_PIPELINE_VER +ENV SNP_PIPELINE_VER=${SNP_PIPELINE_VER:-2.0.2} + # install pip (code originally from Justin) -RUN wget https://bootstrap.pypa.io/get-pip.py -q \ +RUN wget https://bootstrap.pypa.io/pip/2.7/get-pip.py -q \ && python get-pip.py # install samtools, since the version in apt-get is too far out-of-date @@ -78,19 +81,13 @@ RUN mkdir /picard &&\ # get GATK 3.8-1-0 jar flie RUN mkdir /gatk &&\ cd /gatk &&\ - wget -O ./gatk.tar.bz2 'https://software.broadinstitute.org/gatk/download/auth?package=GATK-archive&version=3.8-1-0-gf15c1c3ef' &&\ - tar -xjf gatk.tar.bz2 &&\ + wget -O ./gatk.tar.bz2 'https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef.tar.bz2' &&\ + tar -xjvf gatk.tar.bz2 &&\ rm gatk.tar.bz2 # get VarScan and SRA-toolkit RUN wget http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.7.jar -q \ && cp VarScan.v2.3.7.jar /usr/bin/VarScan.jar \ - && wget http://www.niehs.nih.gov/research/resources/assets/docs/artsrcchocolatecherrycake031915linuxtgz.tgz -q \ - && tar -zxf /tmp/artsrcchocolatecherrycake031915linuxtgz.tgz \ - && cd /tmp/art_src_ChocolateCherryCake_Linux \ - && ./configure \ - && make \ - && make install \ && cd /tmp/ \ && wget http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/2.3.5-2/sratoolkit.2.3.5-2-ubuntu64.tar.gz -q \ && tar -zxf /tmp/sratoolkit.2.3.5-2-ubuntu64.tar.gz \ @@ -98,24 +95,27 @@ RUN wget http://downloads.sourceforge.net/project/varscan/VarScan.v2.3.7.jar -q && rm -r /tmp/* #install snp-pipeline and snp-mutator -RUN pip install numpy snp-pipeline biopython snp-mutator +RUN pip install numpy snp-pipeline==$SNP_PIPELINE_VER biopython==1.68 snp-mutator # set java CLASSPATH variables ENV CLASSPATH /usr/bin/VarScan.jar:/picard/picard.jar:/gatk/GenomeAnalysisTK-3.8-1-0-gf15c1c3ef/GenomeAnalysisTK.jar # not sure why this is here, or what program uses NUMCORES, but I'm going to leave it for now. ENV NUMCORES 1 -# Test snp_pipeline (code originally from Justin). Running copy_snppipeline_data.py generates an message the the script is deprecated +# make a DIR called /data for running cfsan-snp-pipeline in +WORKDIR /data + +# Test snp_pipeline (code modified from Justin). Running copy_snppipeline_data.py generates an message the the script is deprecated # but I figured I would leave it since the below commands test that the image runs properly. +FROM app as test WORKDIR /test/ RUN copy_snppipeline_data.py lambdaVirusInputs testLambdaVirus \ && cd testLambdaVirus \ && run_snp_pipeline.sh -s samples reference/lambda_virus.fasta \ && copy_snppipeline_data.py lambdaVirusExpectedResults expectedResults \ - && diff -q snplist.txt expectedResults/snplist.txt \ - && diff -q snpma.fasta expectedResults/snpma.fasta \ - && diff -q referenceSNP.fasta expectedResults/referenceSNP.fasta + && diff -q snplist.txt expectedResults/snplist.txt >> diffOutput.txt \ + && diff -q snpma.fasta expectedResults/snpma.fasta >> diffOutput.txt \ + && diff -q referenceSNP.fasta expectedResults/referenceSNP.fasta >> diffOutput.txt \ + && if [ -s diffOutput.txt ]; then echo "testLambdaVirus failed"; else echo "testLambdaVirus passed"; fi -# make a DIR called /data for running cfsan-snp-pipeline in -RUN mkdir /data -WORKDIR /data +WORKDIR /data \ No newline at end of file diff --git a/cfsan-snp-pipeline/2.2.1/Dockerfile b/cfsan-snp-pipeline/2.2.1/Dockerfile new file mode 100644 index 000000000..b89ff2f97 --- /dev/null +++ b/cfsan-snp-pipeline/2.2.1/Dockerfile @@ -0,0 +1,121 @@ +FROM ubuntu:xenial as app + +# metadata +LABEL base.image="ubuntu:xenial" +LABEL dockerfile.version="1" +LABEL software="cfsan_snp_pipeline" +LABEL software.version="2.2.1" +LABEL description="A pipeline for the production of SNP matrices from sequence data used in the phylogenetic analysis of pathogenic organisms sequenced from samples of interest to food safety." +LABEL website="https://github.com/CFSAN-Biostatistics/snp-pipeline" +LABEL license="https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/LICENSE.txt" +LABEL original.maintainer="Justin Payne" +LABEL original.maintainer.email="justin.payne@fda.hhs.gov" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="pjx8@cdc.gov" + +# This Dockerfile is based on a Docker file from Justin Payne which can be found here: +# https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/master/Dockerfile +# The dockerfile has been modified from it's original form by Curtis Kapsak and Sam Baird + +WORKDIR /tmp/ +RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + bzip2 \ + default-jre \ + g++ \ + gcc \ + git \ + gsl-bin \ + libgsl0-dev \ + libtbb-dev \ + libbz2-dev \ + liblzma-dev \ + make \ + ncurses-dev \ + python \ + python-dev \ + tabix \ + wget \ + zlib1g-dev \ + ca-certificates \ + && apt-get clean + +# Dependency version, can be updated in the build with build_args +ARG SNP_PIPELINE_VER +ENV SNP_PIPELINE_VER=${SNP_PIPELINE_VER:-2.2.1} + +ARG BCFTOOLS_VER +ENV BCFTOOLS_VER=${BCFTOOLS_VER:-1.8} +ARG BOWTIE2_VER +ENV BOWTIE2_VER=${BOWTIE2_VER:-2.3.4.1} +ARG HTSLIB_VER +ENV HTSLIB_VER=${HTSLIB_VER:-1.3.2} +ARG GATK_VER +ENV GATK_VER=${GATK_VER:-3.4-46-gbc02625} +ARG PICARD_VER +ENV PICARD_VER=${PICARD_VER:-2.18.4} +ARG SAMTOOLS_VER +ENV SAMTOOLS_VER=${SAMTOOLS_VER:-1.8} +ARG SRATOOLKIT_VER +ENV SRATOOLKIT_VER=${SRATOOLKIT_VER:-2.8.1} +ARG VARSCAN_VER +ENV VARSCAN_VER=${VARSCAN_VER:-2.3.9} +ARG NUMPY_VER +ENV NUMPY_VER=${NUMPY_VER:-1.16.6} +ARG BIOPYTHON_VER +ENV BIOPYTHON_VER=${BIOPYTHON_VER:-1.68} + + +# install pip (code originally from Justin) +RUN wget https://bootstrap.pypa.io/pip/2.7/get-pip.py -q \ + && python get-pip.py + +#install bowtie2 +#add -std:c++11 to make for compiler support +RUN wget https://github.com/BenLangmead/bowtie2/archive/v$BOWTIE2_VER.tar.gz -qO - | tar xz && cd bowtie2-$BOWTIE2_VER && make && make install && cd /tmp + +#install samtools +RUN wget https://github.com/samtools/htslib/releases/download/$HTSLIB_VER/htslib-$HTSLIB_VER.tar.bz2 -qO - | tar xj && (cd htslib-$HTSLIB_VER && make && make install && cd /tmp) +RUN wget https://github.com/samtools/samtools/releases/download/$SAMTOOLS_VER/samtools-$SAMTOOLS_VER.tar.bz2 -qO - | tar xj && (cd samtools-$SAMTOOLS_VER && make && make install && cd /tmp) +RUN wget https://github.com/samtools/bcftools/releases/download/$BCFTOOLS_VER/bcftools-$BCFTOOLS_VER.tar.bz2 -qO - | tar xj && (cd bcftools-$BCFTOOLS_VER && make && make install && cd /tmp) + +#install VARSCAN, ART, SRA Toolkit, GATK, Picard +RUN wget http://downloads.sourceforge.net/project/varscan/VarScan.v$VARSCAN_VER.jar -q \ + && cp VarScan.v$VARSCAN_VER.jar /usr/bin/VarScan.jar +RUN wget http://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/$SRATOOLKIT_VER/sratoolkit.$SRATOOLKIT_VER-ubuntu64.tar.gz -q \ + && tar -zxf /tmp/sratoolkit.$SRATOOLKIT_VER-ubuntu64.tar.gz \ + && cp /tmp/sratoolkit.$SRATOOLKIT_VER-ubuntu64/bin/fastq-dump.$SRATOOLKIT_VER /usr/bin/fastq-dump +RUN wget https://storage.googleapis.com/gatk-software/package-archive/gatk/GenomeAnalysisTK-$GATK_VER.tar.bz2 \ + && tar -jxf /tmp/GenomeAnalysisTK-$GATK_VER.tar.bz2 \ + && cp /tmp/GenomeAnalysisTK.jar /usr/bin/GenomeAnalysisTK.jar +RUN wget https://github.com/broadinstitute/picard/releases/download/$PICARD_VER/picard.jar -q \ + && cp picard.jar /usr/bin/picard.jar + +#install snp-pipeline and snp-mutator +RUN pip install numpy==$NUMPY_VER \ + snp-pipeline==$SNP_PIPELINE_VER \ + biopython==$BIOPYTHON_VER + + +ENV PATH "$PATH:/tmp/samtools-$SAMTOOLS_VER/bin:/tmp/bcftools-$BCFTOOLS_VER/bin:/tmp/bowtie2-$BOWTIE2_VER/bin" +ENV CLASSPATH "/usr/bin/VarScan.jar:/usr/bin/picard.jar:/usr/bin/GenomeAnalysisTK.jar" +ENV NUMCORES 1 + +# make a DIR called /data for running cfsan-snp-pipeline in +WORKDIR /data + +# Test snp_pipeline (code modified from Justin). Running copy_snppipeline_data.py generates an message the the script is deprecated +# but I figured I would leave it since the below commands test that the image runs properly. +FROM app as test +WORKDIR /test/ +RUN cfsan_snp_pipeline data lambdaVirusInputs testLambdaVirus \ + && cd testLambdaVirus \ + && cfsan_snp_pipeline run -s samples reference/lambda_virus.fasta \ + && copy_snppipeline_data.py lambdaVirusExpectedResults expectedResults \ + && diff -q snplist.txt expectedResults/snplist.txt >> diffOutput.txt \ + && diff -q snpma.fasta expectedResults/snpma.fasta >> diffOutput.txt \ + && diff -q referenceSNP.fasta expectedResults/referenceSNP.fasta >> diffOutput.txt \ + && if [ -s diffOutput.txt ]; then echo "testLambdaVirus failed"; else echo "testLambdaVirus passed"; fi + +WORKDIR /data \ No newline at end of file diff --git a/cfsan-snp-pipeline/2.2.1/README.md b/cfsan-snp-pipeline/2.2.1/README.md new file mode 100644 index 000000000..2d9a50a99 --- /dev/null +++ b/cfsan-snp-pipeline/2.2.1/README.md @@ -0,0 +1,28 @@ +# CFSAN SNP Pipeline + +CFSAN SNP Pipeline [v2.2.1](https://github.com/CFSAN-Biostatistics/snp-pipeline/tree/v2.2.1) + +## About +For producing SNP matrices from sequencing data for phylogenetic analysis of closely-related pathogenic organisms of interest to food safety. Pipeline was developed by the United States Food and Drug Administration, Center for Food Safety and Applied Nutrition. + +## Usage +The all-in-one `cfsan_snp_pipeline run` command will perfrom alignment, variant calling, and SNP matrix generation given a reference FASTA file and sample FASTQ files for groups of related organisms. + +Given the following example directory structure for the inputs: +``` +/data/example/reference/reference.fasta +/data/example/samples/sample1/sample1_1.fastq +/data/example/samples/sample1/sample1_2.fastq +/data/example/samples/sample2/sample2_1.fastq +/data/example/samples/sample2/sample2_2.fastq +... +``` + +In the `example` directory, run the pipeline: +``` +bashcfsan_snp_pipeline run -s samples reference/reference.fasta +``` + +Full documentation for the pipeline can be found here: https://snp-pipeline.readthedocs.io/en/latest/readme.html + +This Dockerfile is based on a Dockerfile by Justin Payne: https://github.com/CFSAN-Biostatistics/snp-pipeline/blob/v2.2.1/Dockerfile diff --git a/circlator/1.5.5/Dockerfile b/circlator/1.5.5/Dockerfile new file mode 100644 index 000000000..aad6f2a2a --- /dev/null +++ b/circlator/1.5.5/Dockerfile @@ -0,0 +1,80 @@ +FROM ubuntu:jammy as app + +ARG CIRCLATOR_VER="1.5.5" +ARG SAMTOOLS_VER="1.16.1" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="circlator" +LABEL software.version=$CIRCLATOR_VER +LABEL description="A tool to circularize genome assemblies" +LABEL website="https://github.com/sanger-pathogens/circlator" +LABEL license="https://github.com/sanger-pathogens/circlator/blob/master/LICENSE" +LABEL maintainer="Abigail Shockey" +LABEL maintainer.email="abigail.shockey@slh.wisc.edu" +LABEL maintainer2="Erin Young" +LABEL maintainer2.email="eriny@utah.gov" + +# mummer version 3.23+dfsg-7 +# bwa version 0.7.17 +# prodigal version 2.6.3 +# canu version 2.0 +# spades version 3.13.1 +# spades version gives the following error: +# WARNING: SPAdes version 3.13.1 is being used. It will work, but better results are usually obtained from Circlator using SPAdes version 3.7.1. Although 3.7.1 is not the latest version, we recommend it for Circlator. +RUN apt-get update && apt-get install -y --no-install-recommends \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + gawk \ + ca-certificates \ + procps \ + bwa \ + prodigal \ + mummer \ + canu \ + spades \ + python3 \ + python3-pip \ + xz-utils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/python python /usr/bin/python3 10 + +# install samtools +RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + rm samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +# install circlator +RUN pip3 install circlator==${CIRCLATOR_VER} + +ENV PATH="/circlator-v${CIRCLATOR_VER}/bin:$PATH" \ + LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN circlator --help && circlator version && circlator progcheck + +RUN circlator test testdir && ls testdir + +RUN wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/8b9559a4293f64b1d99c2376fe0e1b94f854e3db/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + circlator fixstart SRR13957123.consensus.fa SRR13957123_fixstart && \ + ls && cat SRR13957123_fixstart.log + + diff --git a/circlator/1.5.5/README.md b/circlator/1.5.5/README.md new file mode 100644 index 000000000..cbc0a3f89 --- /dev/null +++ b/circlator/1.5.5/README.md @@ -0,0 +1,24 @@ +# Circlator container + +Main tool : [circlator](https://sanger-pathogens.github.io/circlator/) + +Additional tools: + +- bwa version 0.7.17 +- canu version 2.0 +- nucmer version 3.1 +- perl 5.34.0 +- prodigal 2.6.3 (Feb, 2016) +- python 3.10.6 +- samtools version 1.16.1 +- spades 3.13.1 + +Full documentation: https://github.com/sanger-pathogens/circlator/wiki + +Circlator is a genome assembly workflow, but is more commonly used for its fixstart feature - which will rotate input sequences to start with dnaA if found. + +## Example Usage + +```bash +circlator fixstart input.fasta output +``` diff --git a/dnaapler/0.1.0/Dockerfile b/dnaapler/0.1.0/Dockerfile new file mode 100644 index 000000000..3c4eacac5 --- /dev/null +++ b/dnaapler/0.1.0/Dockerfile @@ -0,0 +1,62 @@ +FROM mambaorg/micromamba:1.4.1 as app + +USER root + +WORKDIR / + +ARG DNAAPLER_VER="0.1.0" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="dnaapler" +LABEL software.version="${DNAAPLER_VER}" +LABEL description="Rotates chromosomes and more" +LABEL website="https://github.com/gbouras13/dnaapler" +LABEL license="MIT" +LABEL license.url="https://github.com/gbouras13/dnaapler/blob/main/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# create the conda environment, install mykrobe via bioconda package; cleanup conda garbage +RUN micromamba create -n dnaapler -y -c bioconda -c defaults -c conda-forge dnaapler=${DNAAPLER_VER} && \ + micromamba clean -a -y + +# set the PATH and LC_ALL for singularity compatibility +ENV PATH="/opt/conda/envs/dnaapler/bin/:${PATH}" \ + LC_ALL=C.UTF-8 + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="dnaapler" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# set final working directory as /data +WORKDIR /data + +# default command is to print help options +CMD [ "dnaapler", "--help" ] + +# new base for testing +FROM app as test + +# set working directory to /test +WORKDIR /test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="dnaapler" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# downloads genome sequence and then extracts the last plasmid in the laziest way possible +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/025/259/185/GCA_025259185.1_ASM2525918v1/GCA_025259185.1_ASM2525918v1_genomic.fna.gz && \ + gunzip GCA_025259185.1_ASM2525918v1_genomic.fna.gz && \ + grep "CP104365.1" GCA_025259185.1_ASM2525918v1_genomic.fna -A 50000 > CP104365.1.fasta && \ + dnaapler mystery --prefix mystery_test --output mystery_test -i CP104365.1.fasta && \ + dnaapler plasmid --prefix plasmid_test --output plasmid_test -i CP104365.1.fasta && \ + ls mystery_test plasmid_test diff --git a/dnaapler/0.1.0/README.md b/dnaapler/0.1.0/README.md new file mode 100644 index 000000000..9dde6b2c6 --- /dev/null +++ b/dnaapler/0.1.0/README.md @@ -0,0 +1,41 @@ +# dnaapler container + +Main tool : [dnappler](https://github.com/gbouras13/dnaapler) + +Additional tools: + +- [blast](https://blast.ncbi.nlm.nih.gov/Blast.cgi) 2.14.0 + +Full documentation: [https://github.com/gbouras13/dnaapler](https://github.com/gbouras13/dnaapler) + +> `dnaapler` is a simple python program that takes a single nucleotide input sequence (in FASTA format), finds the desired start gene using blastx against an amino acid sequence database, checks that the start codon of this gene is found, and if so, then reorients the chromosome to begin with this gene on the forward strand. + +dnaapler has several commands for chromosomes, plasmids, and more. + +``` +Usage: dnaapler [OPTIONS] COMMAND [ARGS]... + +Options: + -h, --help Show this message and exit. + -V, --version Show the version and exit. + +Commands: + chromosome Reorients your sequence to begin with the dnaA chromosomal... + citation Print the citation(s) for this tool + custom Reorients your sequence with a custom database + mystery Reorients your sequence with a random gene + phage Reorients your sequence to begin with the terL large... + plasmid Reorients your sequence to begin with the repA replication... +``` + +WARNING: Does not support multifasta files. Each sequence must be processed individually. + +## Example Usage + +```bash +# for a fasta of a chromsome sequence +dnaapler chromosome --input chromosome.fasta --output dnaapler_chr + +# for a fasta of a plasmid sequence +dnaapler plasmid --input plasmid.fasta --output dnaapler_plasmid +``` diff --git a/dockerfile-template/Dockerfile b/dockerfile-template/Dockerfile index aa8a73c4d..24064fcdc 100644 --- a/dockerfile-template/Dockerfile +++ b/dockerfile-template/Dockerfile @@ -1,60 +1,92 @@ -# FROM defines the base docker image. This command has to come first in the file -# The 'as' keyword lets you name the folowing stage. We use `app` for the production image -FROM ubuntu:focal as app +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Thank you for using this Dockerfile template! ##### +##### This is an outline for the flow of building a docker image. ##### +##### The docker image is built to the 'app' stage on dockerhub/quay. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### -# ARG sets environment variables during the build stage +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 1. Set up the base image in the first stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# 'FROM' defines where the Dockerfile is starting to build from. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. The production image uses everything to the 'app' stage. + +FROM ubuntu:jammy as app + +# List all software versions are ARGs near the top of the dockerfile +# 'ARG' sets environment variables during the build stage +# ARG variables are ONLY available during image build, they do not persist in the final image ARG SOFTWARENAME_VER="1.0.0" -# LABEL instructions tag the image with metadata that might be important to the user -# Optional, but highly recommended -LABEL base.image="ubuntu:focal" +# 'LABEL' instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:jammy" LABEL dockerfile.version="1" LABEL software="SoftwareName" -LABEL software.version=$SOFTWARENAME_VER +LABEL software.version="${SOFTWARENAME_VER}" LABEL description="This software does X, Y, AND Z!" LABEL website="https://github.com/StaPH-B/docker-builds" LABEL license="https://github.com/StaPH-B/docker-builds/blob/master/LICENSE" LABEL maintainer="FirstName LastName" LABEL maintainer.email="my.email@email.com" -# RUN executes code during the build +# 'RUN' executes code during the build # Install dependencies via apt-get or yum if using a centos or fedora base -RUN apt-get update && apt-get install -y \ +RUN apt-get update && apt-get install -y --no-install-recommends \ dependency_a \ dependency_b \ - dependency_c + dependency_c && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* # Install and/or setup more things. Make /data for use as a working dir -# Example: ncbi-blast+ 2.9.0 -RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.9.0/ncbi-blast-2.9.0+-x64-linux.tar.gz && \ - tar -xzf ncbi-blast-2.9.0+-x64-linux.tar.gz && \ - rm ncbi-blast-2.9.0+-x64-linux.tar.gz && \ +# For readability, limit one install per 'RUN' statement. + +# Example: install ncbi-blast+ 2.9.0 pre-compiled linux binaries +ARG BLAST_VER=2.9.0 + +RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ mkdir /data -# ENV instructions set environment variables that persist from the build into the resulting image +# 'ENV' instructions set environment variables that persist from the build into the resulting image # Use for e.g. $PATH and locale settings for compatibility with Singularity -ENV PATH="/ncbi-blast-2.9.0+/bin:$PATH" \ +ENV PATH="/software-${SOFTWARENAME_VER}/bin:$PATH" \ LC_ALL=C -# WORKDIR sets working directory +# 'CMD' instructions set a default command when the container is run. This is typically 'tool --help.' +CMD [ "tool", "--help" ] + +# 'WORKDIR' sets working directory WORKDIR /data +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 2. Set up the testing stage. ##### +##### The docker image is built to the 'test' stage before merging, but ##### +##### the test stage (or any stage after 'app') will be lost. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + # A second FROM insruction creates a new stage -# We use `test` for the test image FROM app as test -# Demonstrate that the program is successfully installed +# set working directory so that all test inputs & outputs are kept in /test +WORKDIR /test + +# print help and version info; check dependencies (not all software has these options available) +# Mostly this ensures the tool of choice is in path and is executable +RUN softwarename --help && \ + softwarename --check && \ + softwarename --version -# Option 1: run the program's internal tests, for example with SPAdes: +# Demonstrate that the program is successfully installed - which is highly dependant on what the tool is. + +# Run the program's internal tests if available, for example with SPAdes: RUN spades.py --test -# Option 2: write your own tests in a bash script in the same directory as your Dockerfile: +# Option 1: write your own tests in a bash script in the same directory as your Dockerfile and copy them: COPY my_tests.sh . RUN bash my_tests.sh -# Option 3: write python unit tests in a tests/ directory in the same directory as your Dockerfile: -RUN apt-get install -y python3 -RUN mkdir tests/ -COPY tests/ tests/ -RUN python3 -m unittest discover -s tests - +# Option 2: write below common usage cases, for example with tb-profiler: +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt diff --git a/dockerfile-template/Dockerfile_builder b/dockerfile-template/Dockerfile_builder new file mode 100644 index 000000000..469ff46a9 --- /dev/null +++ b/dockerfile-template/Dockerfile_builder @@ -0,0 +1,129 @@ +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Thank you for using this Dockerfile template! ##### +##### This is an outline for the flow of building a docker image. ##### +##### The docker image is built to the 'app' stage on dockerhub/quay. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Sometimes tools required to build a tool are not needed to run ##### +##### it. This means that images are larger than they need to be. A way ##### +##### to reduce the size of an image, is to have a stage prior to 'app' ##### +##### where these temporarily-required tools are installed. Then, only ##### +##### relevant executables and files are copied in to the 'app' stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 1. Set up the builder stage as the first stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# Please note, all 'LABEL', 'ENV', and 'CMD' instructions will be lost +# Also, files and executables are not carried over unless they are explicitly copied. + +# 'FROM' defines where the Dockerfile is starting to build from. This command has to come first in the file +FROM ubuntu:jammy as builder + +# 'RUN' executes code during the build +# Install dependencies via apt-get or yum if using a centos or fedora base +RUN apt-get update && apt-get install -y --no-install-recommends \ + dependency_a \ + dependency_b \ + dependency_c && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install and/or setup more things. Make /data for use as a working dir +# For readability, limit one install per 'RUN' statement. + +# Example: install ncbi-blast+ 2.9.0 pre-compiled linux binaries +ARG BLAST_VER=2.9.0 + +RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 2. Set up the base image in the 'app' stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# 'FROM' defines where the Dockerfile is starting to build from. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. The production image uses everything to the 'app' stage. + +FROM ubuntu:jammy as app + +# List all software versions are ARGs near the top of the dockerfile +# 'ARG' sets environment variables during the build stage +# ARG variables are ONLY available during image build, they do not persist in the final image +ARG SOFTWARENAME_VER="1.0.0" + +# 'LABEL' instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="SoftwareName" +LABEL software.version="${SOFTWARENAME_VER}" +LABEL description="This software does X, Y, AND Z!" +LABEL website="https://github.com/StaPH-B/docker-builds" +LABEL license="https://github.com/StaPH-B/docker-builds/blob/master/LICENSE" +LABEL maintainer="FirstName LastName" +LABEL maintainer.email="my.email@email.com" + +# copy in files and executables into app stage +COPY --from=builder + +# example copy in blast executable +ARG BLAST_VER=2.9.0 +COPY --from=builder /bwa/bwa-${BWA_VER}/bwa /usr/local/bin + +# 'RUN' executes code during the build +# Install dependencies via apt-get or yum if using a centos or fedora base +# Please ensure ALL dependencies for running the tool make it into this stage +RUN apt-get update && apt-get install -y --no-install-recommends \ + dependency_a \ + dependency_b \ + dependency_c && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install and/or setup more things. Make /data for use as a working dir +# For readability, limit one install per 'RUN' statement. + +# 'ENV' instructions set environment variables that persist from the build into the resulting image +# Use for e.g. $PATH and locale settings for compatibility with Singularity +ENV PATH="/software-${SOFTWARENAME_VER}/bin:$PATH" \ + LC_ALL=C + +# 'CMD' instructions set a default command when the container is run. This is typically 'tool --help.' +CMD [ "tool", "--help" ] + +# 'WORKDIR' sets working directory +WORKDIR /data + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 3. Set up the testing stage. ##### +##### The docker image is built to the 'test' stage before merging, but ##### +##### the test stage (or any stage after 'app') will be lost. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# A second FROM insruction creates a new stage +# The test stage must be downstream from 'app' +FROM app as test + +# set working directory so that all test inputs & outputs are kept in /test +WORKDIR /test + +# print help and version info; check dependencies (not all software has these options available) +# Mostly this ensures the tool of choice is in path and is executable +RUN softwarename --help && \ + softwarename --check && \ + softwarename --version + +# Demonstrate that the program is successfully installed - which is highly dependant on what the tool is. + +# Run the program's internal tests if available, for example with SPAdes: +RUN spades.py --test + +# Option 1: write your own tests in a bash script in the same directory as your Dockerfile and copy them: +COPY my_tests.sh . +RUN bash my_tests.sh + +# Option 2: write below common usage cases, for example with tb-profiler: +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt diff --git a/dockerfile-template/Dockerfile_mamba b/dockerfile-template/Dockerfile_mamba new file mode 100644 index 000000000..8cbe1233d --- /dev/null +++ b/dockerfile-template/Dockerfile_mamba @@ -0,0 +1,113 @@ +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Thank you for using this Dockerfile template! ##### +##### This is an outline for the flow of building a docker image. ##### +##### The docker image is built to the 'app' stage on dockerhub/quay. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 1. Set up the base image in the first stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Conda can be a useful packaging manager that handles the ##### +##### installation of tools and their dependencies. Generally, images ##### +##### built via this method are larger, and may have file ownership ##### +##### errors - which is why we generally recommend attempting to build ##### +##### from source first. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# 'FROM' defines the base docker image. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. The production image uses everything to the 'app' stage. +FROM mambaorg/micromamba:1.4.9 as app + +# List all software versions are ARGs near the top of the dockerfile +# 'ARG' sets environment variables during the build stage +# 'ARG' variables are ONLY available during image build, they do not persist in the final image +ARG SOFTWARENAME_VERSION="1.0.4" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# 'LABEL' instructions tag the image with metadata that might be important to the user + +LABEL base.image="mambaorg/micromamba:1.4.9" +LABEL dockerfile.version="1" +LABEL software="SOFTWARENAME" +LABEL software.version="${SOFTWARENAME_VERSION}" +LABEL description="This software does X, Y, AND Z!" +LABEL website="https://github.com/StaPH-B/docker-builds" +LABEL license="https://github.com/StaPH-B/docker-builds/blob/master/LICENSE" +LABEL maintainer="FirstName LastName" +LABEL maintainer.email="my.email@email.com" + +# 'RUN' executes code during the build +# Install dependencies via apt-get or yum if using a centos or fedora base +RUN apt-get update && apt-get install -y --no-install-recommends \ + dependency_a \ + dependency_b \ + dependency_c && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Example apt-get command for commonly-missing dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install your desired software into the base conda/micromamba environment, pinning the version +# clean up conda garbage +# make /data to use as a working directory +RUN micromamba install --name base -c conda-forge -c bioconda -c defaults SOFTWARENAME=${SOFTWARENAME_VERSION} && \ + micromamba clean -a -y && \ + mkdir /data + +# 'ENV' instructions set environment variables that persist from the build into the resulting image +# set the environment, add base conda/micromamba bin directory into path +# set locale settings to UTF-8 +ENV PATH="/opt/conda/bin/:${PATH}" \ + LC_ALL=C.UTF-8 + +# 'CMD' instructions set a default command when the container is run. This is typically 'tool --help.' +CMD [ "tool", "--help" ] + +# set final working directory to /data +WORKDIR /data + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 2. Set up the testing stage. ##### +##### The docker image is built to the 'test' stage before merging, but ##### +##### the test stage (or any stage after 'app') will be lost. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# A second FROM insruction creates a new stage +# new base for testing +FROM app as test + +# so that conda/micromamba env is active when running below commands +ENV ENV_NAME="base" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# set working directory so that all test inputs & outputs are kept in /test +WORKDIR /test + +# print help and version info; check dependencies (not all software has these options available) +# Mostly this ensures the tool of choice is in path and is executable +RUN softwarename --help && \ + softwarename --check && \ + softwarename --version + +# Run the program's internal tests if available, for example with SPAdes: +RUN spades.py --test + +# Option 1: write your own tests in a bash script in the same directory as your Dockerfile and copy them: +COPY my_tests.sh . +RUN bash my_tests.sh + +# Option 2: write below common usage cases, for example with tb-profiler: +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt + diff --git a/dockerfile-template/Dockerfile_python3 b/dockerfile-template/Dockerfile_python3 new file mode 100644 index 000000000..7bdc747cf --- /dev/null +++ b/dockerfile-template/Dockerfile_python3 @@ -0,0 +1,101 @@ +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Thank you for using this Dockerfile template! ##### +##### This is an outline for the flow of building a docker image. ##### +##### The docker image is built to the 'app' stage on dockerhub/quay. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Pip/pypi can be a useful packaging manager that handles the ##### +##### installation of tools and their dependencies. This template uses ##### +##### a base image for which python3 and pip are already installed, and ##### +##### is intended to be used for tools installed solely via pip (it's ##### +##### like the pypi version of the Dockerfile_mamba template for conda ##### +##### installations.) +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 1. Set up the base image in the first stage. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# 'FROM' defines where the Dockerfile is starting to build from. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. The production image uses everything to the 'app' stage. + +FROM python:3.9.17-slim as app + +# List all software versions are ARGs near the top of the dockerfile +# 'ARG' sets environment variables during the build stage +# ARG variables are ONLY available during image build, they do not persist in the final image +ARG SOFTWARENAME_VER="1.0.0" + +# 'LABEL' instructions tag the image with metadata that might be important to the user +LABEL base.image="python:3.9.17-slim" +LABEL dockerfile.version="1" +LABEL software="SoftwareName" +LABEL software.version="${SOFTWARENAME_VER}" +LABEL description="This software does X, Y, AND Z!" +LABEL website="https://github.com/StaPH-B/docker-builds" +LABEL license="https://github.com/StaPH-B/docker-builds/blob/master/LICENSE" +LABEL maintainer="FirstName LastName" +LABEL maintainer.email="my.email@email.com" + +# 'RUN' executes code during the build +# Install dependencies via apt-get or yum if using a centos or fedora base +RUN apt-get update && apt-get install -y --no-install-recommends \ + procps \ + dependency_a \ + dependency_b \ + dependency_c && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Install and/or setup more things. Make /data for use as a working dir +# For readability, limit one install per 'RUN' statement. +RUN pip install --no-cache software==${SOFTWARENAME_VER} + + +# Example: install pygenomeviz 0.3.2 +ARG PYGENOMEVIZ_VER="0.3.2" + +RUN pip install --no-cache pygenomeviz==$PYGENOMEVIZ_VER + +# 'ENV' instructions set environment variables that persist from the build into the resulting image +# Use for e.g. $PATH and locale settings for compatibility with Singularity +ENV PATH="/software-${SOFTWARENAME_VER}/bin:$PATH" \ + LC_ALL=C + +# 'CMD' instructions set a default command when the container is run. This is typically 'tool --help.' +CMD [ "tool", "--help" ] + +# 'WORKDIR' sets working directory +WORKDIR /data + +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### +##### Step 2. Set up the testing stage. ##### +##### The docker image is built to the 'test' stage before merging, but ##### +##### the test stage (or any stage after 'app') will be lost. ##### +##### ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ##### + +# A second FROM insruction creates a new stage +FROM app as test + +# set working directory so that all test inputs & outputs are kept in /test +WORKDIR /test + +# print help and version info; check dependencies (not all software has these options available) +# Mostly this ensures the tool of choice is in path and is executable +RUN softwarename --help && \ + softwarename --check && \ + softwarename --version + +# Demonstrate that the program is successfully installed - which is highly dependant on what the tool is. + +# Run the program's internal tests if available, for example with SPAdes: +RUN spades.py --test + +# Option 1: write your own tests in a bash script in the same directory as your Dockerfile and copy them: +COPY my_tests.sh . +RUN bash my_tests.sh + +# Option 2: write below common usage cases, for example with tb-profiler: +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt diff --git a/dockerfile-template/README.md b/dockerfile-template/README.md new file mode 100644 index 000000000..d205b147b --- /dev/null +++ b/dockerfile-template/README.md @@ -0,0 +1,62 @@ + + +# container + +Main tool: [](link to program) + +Code repository: + +Additional tools: +- list: version + +Basic information on how to use this tool: +- executable: +- help: <-h> +- version: <-v> +- description: + +Additional information: + + + +Full documentation: link to documentation or wiki + +## Example Usage + +```bash + +``` + + + diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock index 964d5edcc..b31e39028 100644 --- a/docs/Gemfile.lock +++ b/docs/Gemfile.lock @@ -221,25 +221,23 @@ GEM rb-inotify (~> 0.9, >= 0.9.7) ruby_dep (~> 1.2) mercenary (0.3.6) - mini_portile2 (2.8.0) + mini_portile2 (2.8.1) minima (2.5.0) jekyll (~> 3.5) jekyll-feed (~> 0.9) jekyll-seo-tag (~> 2.1) minitest (5.15.0) multipart-post (2.1.1) - nokogiri (1.13.6) + nokogiri (1.14.3) mini_portile2 (~> 2.8.0) racc (~> 1.4) - nokogiri (1.13.6-x64-mingw32) - racc (~> 1.4) octokit (4.22.0) faraday (>= 0.9) sawyer (~> 0.8.0, >= 0.5.3) pathutil (0.16.2) forwardable-extended (~> 2.6) public_suffix (2.0.5) - racc (1.6.0) + racc (1.6.2) rb-fsevent (0.11.0) rb-inotify (0.10.1) ffi (~> 1.0) diff --git a/docs/contribute.md b/docs/contribute.md index 6227022fa..2c1ef4be9 100644 --- a/docs/contribute.md +++ b/docs/contribute.md @@ -5,18 +5,19 @@ layout: page ## Contributing -So far, we've created a bunch of docker images that we use in our day-to-day activities, but if there is a specific tool you do not see on our list and would like to add your own docker image, we would love to add it to the list. - Discover a bug or having issues with our images? Do you have a suggestion or advice for improving our docker images? [Please submit an issue under the Issues tab on our github repository](https://github.com/StaPH-B/docker-builds/issues) +Regardless of how many docker images are available via staphb/docker-builds or other efforts, there are still tools that are not available in a container. We would welcome your contribution if you do not see a tool on our list and would like to add your own docker image. + ### How to contribute a new Docker image 1. Build your own docker image using a Dockerfile (information on this in [Develop your own container](https://staph-b.github.io/docker-builds/make_containers/)) 2. Fork this github repository using the fork button at the top right hand of the github page. + * There are Dockerfile and README templates that can be copied from https://github.com/StaPH-B/docker-builds/tree/master/dockerfile-template and edited 3. Add your Dockerfile, README, and test files to your forked repository following these conventions: * The first directory should be the name of the program with special characters removed, and it's preferable to remove uppercase - `/spades` * The second directory should be the version number of the program, in X.X.X format - `/spades/3.12.0` * The Dockerfile and any other files required for building and testing belong in the sub-directory - `/spades/3.12.0/Dockerfile` and `/spades/3.12.0/my_spades_tests.sh` - * NOTE: There is a file size limit for github (I believe 100MB/file), so if you have a program with a huge database or file of some kind - we won't be able to store the it in our github repository, and that database should be downloaded as part of the Dockerfile instructions with `wget`, `curl`, etc. + * NOTE: There is a file size limit for github (~100MB/file), so if you have a program with a huge database or file of some kind - we won't be able to store the it in our github repository, and that database should be downloaded as part of the Dockerfile instructions with `wget`, `curl`, etc. 4. Please edit the `README.md` and `LICENSE.md` with the appropriate information for your program (follow alphabetical order please!) and commit changes to your forked repo. It's easiest to copy a line for an existing program from the raw markdown documents [README.md](https://raw.githubusercontent.com/StaPH-B/docker-builds/master/README.md) or [LICENSE.md](https://raw.githubusercontent.com/StaPH-B/docker-builds/master/LICENSE.md) and replace the information with your new image. 5. Open your github actions tab in the forked repository in the GitHub user interface and use the 'Manual Test' with the corresponding tool and version to test. If it passes, the files are ready to submit in a pull request! 6. Visit our docker-builds github repository and a green "Submit Pull Request" button should appear near the top of the page. Click it, and submit a pull request to merge your forked repository into our repository. diff --git a/docs/run_containers.md b/docs/run_containers.md index 1bceb743a..149aebf0a 100644 --- a/docs/run_containers.md +++ b/docs/run_containers.md @@ -30,12 +30,15 @@ To run a staphb image you simply name the image you want to use. ``` docker run staphb/trimmomatic ``` -The previous command did not give any instructions to the container so the container simply started and then exited. +The previous command did not give any instructions to the container so the container simply started and then exited. Many images have a 'CMD' instruction that prints the help command for the main tool of the image, but some do not. To see an executed command run the following command. ``` docker run staphb/trimmomatic sh -c "echo Hello from inside the trimmomatic container" ``` + + + There are a number of options you can also supply to the run command. We have found the following command is useful for running containers: ``` docker run --rm=True -v $PWD:/data -u $(id -u):$(id -g) staphb/: <--flags --go --here> @@ -68,6 +71,8 @@ Here is a description of each of the flags we used in the previous command. based on the user and group from the local machine, resulting in the correct file ownership. ``` +By design, containers only interact with the directories given to them. If you cannot see your files in the container or the result files after running something in a container, please check which dictories are mounted with `-v`. + ##### NOTE: This can be incorporated into a bash function by including the following into your .bashrc ``` #docker_run function diff --git a/dragonflye/1.0.14/Dockerfile b/dragonflye/1.0.14/Dockerfile new file mode 100644 index 000000000..5e1351dff --- /dev/null +++ b/dragonflye/1.0.14/Dockerfile @@ -0,0 +1,94 @@ +FROM mambaorg/micromamba:1.2.0 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end of app layer +WORKDIR / + +# ARG variables only persist during build time +ARG DRAGONFLYE_VER="1.0.14" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.2.0" +LABEL dockerfile.version="1" +LABEL software="dragonflye" +LABEL software.version=${DRAGONFLYE_VER} +LABEL description="Conda environment for dragonflye. Dragonflye: Assemble bacterial isolate genomes from Nanopore reads." +LABEL website="https://github.com/rpetit3/dragonflye" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/rpetit3/dragonflye/blob/main/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# create the conda environment; install dragonflye and dependencies based on bioconda package; cleanup conda garbage +RUN micromamba install -n base -c conda-forge -c bioconda -c defaults -y dragonflye=${DRAGONFLYE_VER} && \ + micromamba clean -a -y && \ + mkdir /data + +WORKDIR /data + +# hardcode base conda environment into the PATH variable; LC_ALL for singularity compatibility +ENV PATH="$PATH:/opt/conda/bin/" \ + LC_ALL=C.UTF-8 + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="base" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# show help options and check dependencies +RUN dragonflye --help && \ + dragonflye --check + +# so that testing outputs are inside /test +WORKDIR /test + +# download test data (ONT and ILMN FASTQs) +RUN echo "downloading ILMN and ONT test data from bactopia/bactopia-tests on GitHub..." && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/illumina/SRR2838702_R1.fastq.gz && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/illumina/SRR2838702_R2.fastq.gz + +# test assembly and polishing algorithms with test data +# modified code from here: https://github.com/rpetit3/dragonflye/blob/main/.github/workflows/test-dragonflye.yml +RUN echo "Testing Raven Assembler (quality filtered)..." && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599 --cpus 0 --nopolish --outdir raven-minquality --gsize 300000 --assembler raven --minquality 8 && \ +echo "Test Raven Assembler (quality filtered, no length filter)..." && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 && \ +echo "Test Raven Assembler (quality filtered, no length filter, trimming)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen-trim --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 --trim && \ +echo "Test Raven Assembler (quality filtered, no length filter, trim opts)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen-trimopts --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 --trim --trimopts '--adapter_threshold 95' && \ +echo "Testing Raven Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-raven --cpus 0 --nopolish --depth 5 --outdir raven --gsize 300000 --assembler raven && \ +echo "Testing Raven Assembler + Racon Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-racon --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Medaka Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-medaka --gsize 300000 --assembler raven --racon 0 --model r103_min_high_g345 && \ +echo "Testing Flye Assembler + Medaka Polish + --medaka_opts" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-medaka-opts --gsize 300000 --assembler raven --racon 0 --model r103_min_high_g345 --medaka_opts '-b 200' && \ +echo "Testing Flye Assembler + Racon & Medaka Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-both --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Racon & Pilon Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --R1 /test/SRR2838702_R1.fastq.gz --R2 /test/SRR2838702_R2.fastq.gz --cpus 0 --outdir raven-polypolish --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Racon & Polypolish Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --R1 /test/SRR2838702_R1.fastq.gz --R2 /test/SRR2838702_R2.fastq.gz --cpus 0 --outdir raven-pilon --gsize 300000 --assembler raven --polypolish 0 --pilon 1 && \ +echo "Testing Miniasm Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599_mini.asm --cpus 1 --nopolish --outdir miniasm --gsize 300000 --assembler miniasm && \ +echo "Testing Flye Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir flye --gsize 300000 --assembler flye && \ +echo "Testing Flye Assembler (with --nano-hq)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-nano.hq --cpus 0 --nopolish --outdir flyehq --gsize 300000 --assembler flye --nanohq && \ +echo "Testing --list_models" && \ +dragonflye --list_models 2>&1 | grep r941_min_sup_g507 diff --git a/dragonflye/1.0.14/README.md b/dragonflye/1.0.14/README.md new file mode 100644 index 000000000..4eb55cac9 --- /dev/null +++ b/dragonflye/1.0.14/README.md @@ -0,0 +1,43 @@ +# dragonflye docker image + +Main tool : [dragonflye](https://github.com/rpetit3/dragonflye) + +> dragonflye: Assemble bacterial isolate genomes from Nanopore reads + +## Additional tools + +Protip: run command `docker run staphb/dragonflye:latest micromamba list` to see full micromamba environment: + +- [any2fasta 0.4.2](https://github.com/tseemann/any2fasta) +- [assembly-scan 0.4.1](https://github.com/rpetit3/assembly-scan) +- [bwa 0.7.17-r1188](https://github.com/lh3/bwa) +- [bcftools 1.15.1](https://github.com/samtools/bcftools) +- biopython 1.80 +- [fastp 0.23.2](https://github.com/OpenGene/fastp) +- [flye 2.9.1](https://github.com/fenderglass/Flye) +- [kmc 3.2.1](https://github.com/refresh-bio/KMC) +- [medaka 1.6.1](https://github.com/nanoporetech/medaka) +- [miniasm 0.3](https://github.com/lh3/miniasm) +- [minimap2 2.24-r1122](https://github.com/lh3/minimap2) +- [nanoq 0.9.0](https://github.com/esteinig/nanoq) +- perl 5.32.1 +- [pigz 2.6](https://zlib.net/pigz/) +- [polypolish 0.5.0](https://github.com/rrwick/Polypolish) +- [porechop 0.2.4](https://github.com/rrwick/Porechop) +- python 3.8.15 +- [racon 1.5.0](https://github.com/lbcb-sci/racon) +- [rasusa 0.7.0](https://github.com/mbhall88/rasusa) +- [raven 1.8.1](https://github.com/lbcb-sci/raven) +- [samclip 0.4.0](https://github.com/tseemann/samclip) +- [samtools 1.15.1](https://github.com/samtools/samtools) +- [seqtk 1.3-r106](https://github.com/lh3/seqtk) + +## Example Usage + +```bash +# download ONT FASTQs for testing +wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz + +# run dragonflye using flye as the assembly algorithm +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-nano.hq --cpus 0 --nopolish --outdir flyehq --gsize 300000 --assembler flye --nanohq +``` diff --git a/dragonflye/1.1.1/Dockerfile b/dragonflye/1.1.1/Dockerfile new file mode 100644 index 000000000..0fdebaa75 --- /dev/null +++ b/dragonflye/1.1.1/Dockerfile @@ -0,0 +1,97 @@ +FROM mambaorg/micromamba:1.4.4 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end of app layer +WORKDIR / + +# ARG variables only persist during build time +ARG DRAGONFLYE_VER="1.1.1" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.4" +LABEL dockerfile.version="1" +LABEL software="dragonflye" +LABEL software.version=${DRAGONFLYE_VER} +LABEL description="Conda environment for dragonflye. Dragonflye: Assemble bacterial isolate genomes from Nanopore reads." +LABEL website="https://github.com/rpetit3/dragonflye" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/rpetit3/dragonflye/blob/main/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer1="Erin Young" +LABEL maintainer1.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# create the conda environment; install dragonflye and dependencies based on bioconda package; cleanup conda garbage +RUN micromamba install -n base -c conda-forge -c bioconda -c defaults -y dragonflye=${DRAGONFLYE_VER} && \ + micromamba clean -a -y && \ + mkdir /data + +WORKDIR /data + +# hardcode base conda environment into the PATH variable; LC_ALL for singularity compatibility +ENV PATH="$PATH:/opt/conda/bin/" \ + LC_ALL=C.UTF-8 + +CMD dragonflye --help + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="base" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# show help options and check dependencies +RUN dragonflye --help && \ + dragonflye --check + +# so that testing outputs are inside /test +WORKDIR /test + +# download test data (ONT and ILMN FASTQs) +RUN echo "downloading ILMN and ONT test data from bactopia/bactopia-tests on GitHub..." && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/illumina/SRR2838702_R1.fastq.gz && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/illumina/SRR2838702_R2.fastq.gz + +# test assembly and polishing algorithms with test data +# modified code from here: https://github.com/rpetit3/dragonflye/blob/main/.github/workflows/test-dragonflye.yml +RUN echo "Testing Raven Assembler (quality filtered)..." && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599 --cpus 0 --nopolish --outdir raven-minquality --gsize 300000 --assembler raven --minquality 8 && \ +echo "Test Raven Assembler (quality filtered, no length filter)..." && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 && \ +echo "Test Raven Assembler (quality filtered, no length filter, trimming)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen-trim --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 --trim && \ +echo "Test Raven Assembler (quality filtered, no length filter, trim opts)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir raven-minquality-nominreadlen-trimopts --gsize 300000 --assembler raven --minquality 6 --minreadlen 0 --trim --trimopts '--adapter_threshold 95' && \ +echo "Testing Raven Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-raven --cpus 0 --nopolish --depth 5 --outdir raven --gsize 300000 --assembler raven && \ +echo "Testing Raven Assembler + Racon Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-racon --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Medaka Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-medaka --gsize 300000 --assembler raven --racon 0 --model r103_min_high_g345 && \ +echo "Testing Flye Assembler + Medaka Polish + --medaka_opts" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-medaka-opts --gsize 300000 --assembler raven --racon 0 --model r103_min_high_g345 --medaka_opts '-b 200' && \ +echo "Testing Flye Assembler + Racon & Medaka Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --outdir raven-both --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Racon & Pilon Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --R1 /test/SRR2838702_R1.fastq.gz --R2 /test/SRR2838702_R2.fastq.gz --cpus 0 --outdir raven-polypolish --gsize 300000 --assembler raven && \ +echo "Testing Flye Assembler + Racon & Polypolish Polish" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --R1 /test/SRR2838702_R1.fastq.gz --R2 /test/SRR2838702_R2.fastq.gz --cpus 0 --outdir raven-pilon --gsize 300000 --assembler raven --polypolish 0 --pilon 1 && \ +echo "Testing Miniasm Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599_mini.asm --cpus 1 --nopolish --outdir miniasm --gsize 300000 --assembler miniasm && \ +echo "Testing Flye Assembler" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --cpus 0 --nopolish --outdir flye --gsize 300000 --assembler flye && \ +echo "Testing Flye Assembler (with --nano-hq)" && \ +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-nano.hq --cpus 0 --nopolish --outdir flyehq --gsize 300000 --assembler flye --nanohq && \ +echo "Testing --list_models" && \ +dragonflye --list_models 2>&1 | grep r941_min_sup_g507 diff --git a/dragonflye/1.1.1/README.md b/dragonflye/1.1.1/README.md new file mode 100644 index 000000000..0bd009a0b --- /dev/null +++ b/dragonflye/1.1.1/README.md @@ -0,0 +1,43 @@ +# dragonflye docker image + +Main tool : [dragonflye v1.1.1](https://github.com/rpetit3/dragonflye) + +> dragonflye: Assemble bacterial isolate genomes from Nanopore reads + +## Additional tools + +Protip: run command `docker run staphb/dragonflye:latest micromamba list` to see full micromamba environment: + +- [any2fasta 0.4.2](https://github.com/tseemann/any2fasta) +- [assembly-scan 0.4.1](https://github.com/rpetit3/assembly-scan) +- [bwa 0.7.17-r1188](https://github.com/lh3/bwa) +- [bcftools 1.15.1](https://github.com/samtools/bcftools) +- biopython 1.80 +- [fastp 0.23.2](https://github.com/OpenGene/fastp) +- [flye 2.9.1](https://github.com/fenderglass/Flye) +- [kmc 3.2.1](https://github.com/refresh-bio/KMC) +- [medaka 1.6.1](https://github.com/nanoporetech/medaka) +- [miniasm 0.3](https://github.com/lh3/miniasm) +- [minimap2 2.24-r1122](https://github.com/lh3/minimap2) +- [nanoq 0.9.0](https://github.com/esteinig/nanoq) +- perl 5.32.1 +- [pigz 2.6](https://zlib.net/pigz/) +- [polypolish 0.5.0](https://github.com/rrwick/Polypolish) +- [porechop 0.2.4](https://github.com/rrwick/Porechop) +- python 3.8.15 +- [racon 1.5.0](https://github.com/lbcb-sci/racon) +- [rasusa 0.7.0](https://github.com/mbhall88/rasusa) +- [raven 1.8.1](https://github.com/lbcb-sci/raven) +- [samclip 0.4.0](https://github.com/tseemann/samclip) +- [samtools 1.15.1](https://github.com/samtools/samtools) +- [seqtk 1.3-r106](https://github.com/lh3/seqtk) + +## Example Usage + +```bash +# download ONT FASTQs for testing +wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz + +# run dragonflye using flye as the assembly algorithm +dragonflye --reads /test/ERR3772599.fastq.gz --prefix ERR3772599-nano.hq --cpus 0 --nopolish --outdir flyehq --gsize 300000 --assembler flye --nanohq +``` diff --git a/emmtyper/0.2.0/Dockerfile b/emmtyper/0.2.0/Dockerfile new file mode 100644 index 000000000..7d49db49b --- /dev/null +++ b/emmtyper/0.2.0/Dockerfile @@ -0,0 +1,54 @@ +FROM mambaorg/micromamba:1.2.0 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building +WORKDIR / + +ARG EMMTYPER_VER="0.2.0" + +LABEL base.image="mambaorg/micromamba:1.2.0" +LABEL dockerfile.version="1" +LABEL software="emmtyper" +LABEL software.version=${EMMTYPER_VER} +LABEL description="Conda environment for emmtyper. emmtyper is a command line tool for emm-typing of Streptococcus pyogenes using a de novo or complete assembly." +LABEL website="https://github.com/MDU-PHL/emmtyper" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/MDU-PHL/emmtyper/blob/master/LICENSE" +LABEL maintainer="Henry Kunerth" +LABEL maintainer.email="henrykunerth@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# set the environment +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +#install emmtyper and dependencies +RUN micromamba create -n emmtyper && \ + eval "$(micromamba shell hook --shell=bash)" && \ + micromamba activate emmtyper && \ + micromamba install --yes --name emmtyper -c conda-forge -c bioconda -c defaults emmtyper \ + emmtyper=${EMMTYPER_VER} && \ + micromamba clean --all --yes + + +ENV ENV_NAME="emmtyper" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +FROM app as test + +ENV ENV_NAME="emmtyper" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN wget 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/006/785/GCA_000006785.2_ASM678v2/GCA_000006785.2_ASM678v2_genomic.fna.gz' && \ + gunzip GCA_000006785.2_ASM678v2_genomic.fna.gz && \ + mv GCA_000006785.2_ASM678v2_genomic.fna test_data.fasta +RUN emmtyper test_data.fasta && \ + emmtyper -w pcr test_data.fasta -o test_out && \ + head -10 test_out diff --git a/emmtyper/0.2.0/README.md b/emmtyper/0.2.0/README.md new file mode 100644 index 000000000..4c81d3636 --- /dev/null +++ b/emmtyper/0.2.0/README.md @@ -0,0 +1,46 @@ +# emmtyper container + +Main tool : [emmtyper](https://github.com/MDU-PHL/emmtyper) + +'emmtyper' is a command line tool for emm-typing of _Streptococcus pyogenes_ using a _de novo_ or complete assembly. + +## Authors + +- Andre Tan +- Torsten Seemann +- Jake Lacey +- Mark Davies +- Liam Mcintyre +- Hannah Frost +- Deborah Williamson +- Anders Gonçalves da Silva + +The codebase for `emmtyper` was primarly written by Andre Tan as part of his Master's +Degree in Bioinformatics. Torsten Seemann, Deborah Williamson, and Anders Gonçalves da Silva provided supervision and assistance. + +Hannah Frost contributed with EMM clustering by suggesting we incorporate it in to the code, and providing the necessary information to do so and test it. + +Jake Lacey, Liam Mcintyre, and Mark Davies provided assistance in validating `emmtyper`. + +## Maintainer + +The code is actively maintained by MDU Bioinformatics Team. + +Contact the principal maintainer at andersgs at gmail dot com. + +## Example Usage + +```bash +# run emmtyper in BLAST (default) mode: + +emmtyper .fasta -o + +# or with output written in verbose format: + +emmtyper .fasta -o -f verbose + +# run emmtyper in PCR mode (useful for troubleshooting, see documentation) + +emmtyper -w pcr .fasta -o + +``` \ No newline at end of file diff --git a/emmtypingtool/0.0.1/Dockerfile b/emmtypingtool/0.0.1/Dockerfile index 6ad0bf06f..d15a43ccc 100644 --- a/emmtypingtool/0.0.1/Dockerfile +++ b/emmtypingtool/0.0.1/Dockerfile @@ -1,9 +1,9 @@ -# Base Image -FROM ubuntu:xenial +# Base Image and app layer +FROM ubuntu:xenial as app # Metadata LABEL base.image="ubuntu:xenial" -LABEL version="1" +LABEL dockerfile.version="2" LABEL software="emm-typing-tool" LABEL software.version="0.0.1" LABEL description="Assign emm type and subtype by querying the CDC M-type specific database " @@ -12,10 +12,9 @@ LABEL license="https://raw.githubusercontent.com/phe-bioinformatics/emm-typing-t LABEL maintainer="Jake Garfin" LABEL maintainer.email="jake.garfin@state.mn.us" - RUN apt-get update && apt-get install -y \ python=2.7.12-1~16.04 \ - python-pip=8.1.1-2ubuntu0.4 \ + python-pip=8.1.1-2ubuntu0.6 \ wget \ zlib1g-dev \ ncbi-blast+=2.2.31-4 \ @@ -46,11 +45,12 @@ RUN wget 'ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz' && \ chmod -R 777 emboss # Install python packages -RUN pip install --upgrade pip && \ - python -m pip install pyyaml==5.1.2 numpy==1.16.4 lxml==4.4.1 biopython==1.74 +RUN python -m pip install pyyaml==5.1.2 numpy==1.16.4 lxml==4.4.1 biopython==1.74 # Install PHE emm-typing-tool -RUN git clone https://github.com/phe-bioinformatics/emm-typing-tool.git +RUN git clone https://github.com/phe-bioinformatics/emm-typing-tool.git &&\ +cd emm-typing-tool && \ +git checkout 500d048ee49006d7aa3245fa4ee4ae7f99c398b4 # Setup emm database RUN mkdir /db && cd /db && \ @@ -65,4 +65,29 @@ RUN mkdir /db && cd /db && \ ENV PATH="${PATH}:/samtools-0.1.19:/bowtie2-2.2.9:/EMBOSS-6.6.0/emboss:/emm-typing-tool" -WORKDIR /data \ No newline at end of file +WORKDIR /data + +# Adding test layer +FROM app as test + +ARG SRATOOLKIT_VER="3.0.2" +ARG SRA_ACCESSION="ERR10762520" + +# Install SRA toolkit +RUN mkdir /test && cd /test && \ +wget https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRATOOLKIT_VER}/sratoolkit.${SRATOOLKIT_VER}-ubuntu64.tar.gz && \ +tar -vxzf sratoolkit.${SRATOOLKIT_VER}-ubuntu64.tar.gz && \ +rm sratoolkit.${SRATOOLKIT_VER}-ubuntu64.tar.gz + +ENV PATH="${PATH}:/test/sratoolkit.${SRATOOLKIT_VER}-ubuntu64/bin" + +# Download fastq files for test +RUN mkdir /test/raw_reads && cd /test/raw_reads && \ +fasterq-dump --split-files ${SRA_ACCESSION} && \ +gzip ${SRA_ACCESSION}_* + +WORKDIR /data + +# Run emmtypingtool on test files +RUN mkdir /data/test && \ +emm_typing.py -m /db -i /test/raw_reads -o /data/test diff --git a/emmtypingtool/0.0.1/README.md b/emmtypingtool/0.0.1/README.md new file mode 100644 index 000000000..c5fd8763e --- /dev/null +++ b/emmtypingtool/0.0.1/README.md @@ -0,0 +1,27 @@ +# emmtypingtool docker image + +> emmtypingtool: Assign emm type and subtype by querying the CDC M-type specific database + +Main tool : [emmtypingtool](https://github.com/phe-bioinformatics/emm-typing-tool) + +Additional tools: +- python 2.7.12 +- ncbi-blast+ 2.2.31 +- bowtie2 2.2.9 +- samtools 0.1.19 +- emboss 6.6.0 +- pyyaml 5.1.2 +- numpy 1.16.4 +- lxml 4.4.1 +- biopython 1.74 +- sratoolkit 3.0.2 + +## Example Usage + +```bash +# Running emmtypingtool on /raw_reads directory containing fastq files (.fastq.gz) +emm_typing.py -m /db -i /raw_reads -o /output_directory + +# Running emmtypingtool on specified fastq files (sample1) +emm_typing.py -m /db -1 sample1.R1.fastq.gz -2 sample1.R2.fastq.gz -o /output_directory +``` diff --git a/fastani/1.34-RGDV2/Dockerfile b/fastani/1.34-RGDV2/Dockerfile new file mode 100644 index 000000000..7321352de --- /dev/null +++ b/fastani/1.34-RGDV2/Dockerfile @@ -0,0 +1,77 @@ +## build RGDv2 ## +FROM staphb/ncbi-datasets:15.11.0 as stage + +# copy in list of NCBI accessions and species list +COPY RGDv2-NCBI-assembly-accessions.txt /RGDv2/RGDv2-NCBI-assembly-accessions.txt +COPY RGDv2-NCBI-assembly-accessions-and-species.txt /RGDv2/RGDv2-NCBI-assembly-accessions-and-species.txt + +# download RGD genomes using NCBI datasets tools; cleanup unneccessary files; +# move and re-name assemblies to include Species in the filename +# make fasta files readable to all users; create File Of FileNames for all 43 assemblies (to be used with fastANI) +RUN for ID in $(cat /RGDv2/RGDv2-NCBI-assembly-accessions.txt); do \ + SPECIES=$(grep "${ID}" /RGDv2/RGDv2-NCBI-assembly-accessions-and-species.txt | cut -f 1) && \ + echo "downloading $ID, species "${SPECIES}", from NCBI..."; \ + datasets download genome accession ${ID} --filename ${ID}.zip; \ + unzip ${ID}.zip; \ + rm ${ID}.zip; \ + mv -v ncbi_dataset/data/${ID}/${ID}*.fna /RGDv2/${ID}.${SPECIES}.fasta; \ + rm -rfv ncbi_dataset/; \ + rm -v README.md; \ +done && \ +ls /RGDv2/*.fasta >/RGDv2/FOFN-RGDv2.txt &&\ +chmod 664 /RGDv2/* + +## App ## +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist at build time +ARG FASTANI_VER="v1.34" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="FastANI" +LABEL software.version=${FASTANI_VER} +LABEL description="Fast alignment-free computation of whole-genome Average Nucleotide Identity" +LABEL website="https://github.com/ParBLiSS/FastANI" +LABEL license="https://github.com/ParBLiSS/FastANI/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Kutluhan Incekara" +LABEL maintainer3.email="kutluhan.incekara@ct.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + unzip \ + libgomp1 && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# download pre-compiled binary; unzip; put binary in /usr/local/bin +# apt dependencies: libgomp1 unzip wget +RUN wget --no-check-certificate https://github.com/ParBLiSS/FastANI/releases/download/${FASTANI_VER}/fastANI-Linux64-${FASTANI_VER}.zip && \ + unzip fastANI-Linux64-${FASTANI_VER}.zip -d /usr/local/bin && \ + rm fastANI-Linux64-${FASTANI_VER}.zip + +# copy RGDv2 from stage +COPY --from=stage /RGDv2/ /RGDv2/ + +# default run command +CMD fastANI -h + +# singularity compatibility +ENV LC_ALL=C + +# set working directory +WORKDIR /data + +## Test ## +FROM app as test + +# test against RGDv2 +RUN wget --no-check-certificate -P /data https://github.com/ParBLiSS/FastANI/raw/master/tests/data/Escherichia_coli_str_K12_MG1655.fna && \ + fastANI -t 8 -q /data/Escherichia_coli_str_K12_MG1655.fna --rl /RGDv2/FOFN-RGDv2.txt -o fastANI.RGDv2.out.tsv &&\ + echo "output TSV from fastANI test:" && \ + cat fastANI.RGDv2.out.tsv + diff --git a/fastani/1.34-RGDV2/README.md b/fastani/1.34-RGDV2/README.md new file mode 100644 index 000000000..e27a4dbf0 --- /dev/null +++ b/fastani/1.34-RGDV2/README.md @@ -0,0 +1,21 @@ +# fastANI container + +Main tool : [fastANI](https://github.com/ParBLiSS/FastANI) + +Full documentation: https://github.com/ParBLiSS/FastANI + +FastANI was developed for fast alignment-free computation of whole-genome Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide identity of orthologous gene pairs shared between two microbial genomes. + +This docker image contains the Reference Genome Database version 2 (RGDv2) from the Enteric Diseases Laboratory Branch at the CDC. It contains the genomes of 43 enteric bacterial isolates that are used to for species identification of bacterial isolate WGS data. This database is NOT meant to be comprehensive - it contains the genomes of enteric pathogens commonly sequenced by EDLB and some closely related species. + +The FASTA files for RGDv2 can be found within `/RGDv2/` inside the docker image. + +## Example Usage + +```bash +# query one genome against another genome +fastANI -t 8 -q bacterial-genome1.fasta -r bacterial-genome2.fasta -o fastANI.out.tsv + +# query one genome against the 43 genomes in RGDv2 (requires a File Of FileNames as input) +fastANI -t 8 -q bacterial-genome.fasta --rl /RGDv2/FOFN-RGDv2.txt -o fastANI.RGDv2.out.tsv +``` diff --git a/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions-and-species.txt b/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions-and-species.txt new file mode 100644 index 000000000..e47fb399d --- /dev/null +++ b/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions-and-species.txt @@ -0,0 +1,43 @@ +Campylobacter_coli GCA_008011635.1 +Campylobacter_fetus GCA_000015085.1 +Campylobacter_fetus GCA_000495505.1 +Campylobacter_fetus GCA_000759515.1 +Campylobacter_hyointestinalis GCA_001643955.1 +Campylobacter_jejuni GCA_000017485.1 +Campylobacter_jejuni GCA_008011525.1 +Campylobacter_lari GCA_000019205.1 +Campylobacter_lari GCA_000816225.1 +Campylobacter_upsaliensis GCA_008011615.1 +Escherichia_albertii GCA_000512125.1 +Escherichia_coli GCA_002741475.1 +Escherichia_fergusonii GCA_000026225.1 +Grimontia_hollisae GCA_009665295.1 +Listeria_innocua GCA_017363615.1 +Listeria_innocua GCA_017363655.1 +Listeria_ivanovii GCA_000252975.1 +Listeria_marthii GCA_017363645.1 +Listeria_monocytogenes GCA_001466295.1 +Listeria_monocytogenes GCA_013625895.1 +Listeria_monocytogenes GCA_013625995.1 +Listeria_monocytogenes GCA_013626145.1 +Listeria_monocytogenes GCA_014526935.1 +Listeria_seeligeri GCA_017363605.1 +Listeria_welshimeri GCA_002489005.1 +Photobacterium_damselae GCA_009665375.1 +Salmonella_bongori GCA_013588055.1 +Salmonella_enterica GCA_011388235.1 +Vibrio_alginolyticus GCA_009665435.1 +Vibrio_cholerae GCA_009665515.2 +Vibrio_cidicii GCA_009665415.1 +Vibrio_cincinnatiensis GCA_009665395.1 +Vibrio_fluvialis GCA_009665355.1 +Vibrio_furnissii GCA_009665335.1 +Vibrio_harveyi GCA_009665315.1 +Vibrio_metoecus GCA_009665255.1 +Vibrio_metoecus GCA_009665275.1 +Vibrio_metschnikovii GCA_009665235.1 +Vibrio_mimicus GCA_009665195.1 +Vibrio_navarrensis GCA_009665215.1 +Vibrio_parahaemolyticus GCA_009665495.1 +Vibrio_vulnificus GCA_009665455.1 +Vibrio_vulnificus GCA_009665475.1 diff --git a/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions.txt b/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions.txt new file mode 100644 index 000000000..bfde7610a --- /dev/null +++ b/fastani/1.34-RGDV2/RGDv2-NCBI-assembly-accessions.txt @@ -0,0 +1,43 @@ +GCA_008011635.1 +GCA_000015085.1 +GCA_000495505.1 +GCA_000759515.1 +GCA_001643955.1 +GCA_000017485.1 +GCA_008011525.1 +GCA_000816225.1 +GCA_000019205.1 +GCA_008011615.1 +GCA_000512125.1 +GCA_002741475.1 +GCA_000026225.1 +GCA_009665295.1 +GCA_017363655.1 +GCA_017363615.1 +GCA_000252975.1 +GCA_017363645.1 +GCA_001466295.1 +GCA_014526935.1 +GCA_013626145.1 +GCA_013625995.1 +GCA_013625895.1 +GCA_017363605.1 +GCA_002489005.1 +GCA_009665375.1 +GCA_013588055.1 +GCA_011388235.1 +GCA_009665435.1 +GCA_009665515.2 +GCA_009665415.1 +GCA_009665395.1 +GCA_009665355.1 +GCA_009665335.1 +GCA_009665315.1 +GCA_009665275.1 +GCA_009665255.1 +GCA_009665235.1 +GCA_009665195.1 +GCA_009665215.1 +GCA_009665495.1 +GCA_009665475.1 +GCA_009665455.1 \ No newline at end of file diff --git a/fastani/1.34-RGDV2/RGDv2-metadata.tsv b/fastani/1.34-RGDV2/RGDv2-metadata.tsv new file mode 100644 index 000000000..ca1da71fc --- /dev/null +++ b/fastani/1.34-RGDV2/RGDv2-metadata.tsv @@ -0,0 +1,44 @@ +Species BioSample NCBI Assembly Strain ID +Campylobacter coli SAMN12323645 GCA_008011635.1 2013D-9606 +Campylobacter fetus SAMN02604050 GCA_000015085.1 82-40 +Campylobacter fetus SAMN02604287 GCA_000495505.1 03-427 +Campylobacter fetus SAMN02870596 GCA_000759515.1 97-608 +Campylobacter hyointestinalis SAMN03737973 GCA_001643955.1 LMG 9260 +Campylobacter jejuni SAMN02604056 GCA_000017485.1 NC_009707 +Campylobacter jejuni SAMN12323651 GCA_008011525.1 D0133 +Campylobacter lari SAMN02604025 GCA_000019205.1 RM2100 +Campylobacter lari SAMN03248542 GCA_000816225.1 LMG 11760 +Campylobacter upsaliensis SAMN12323647 GCA_008011615.1 D1914 +Escherichia albertii SAMN02641387 GCA_000512125.1 KF1 +Escherichia coli SAMN07731009 GCA_002741475.1 B4103-1 +Escherichia fergusonii SAMEA3138228 GCA_000026225.1 ATCC_35469 +Grimontia hollisae SAMN10812938 GCA_009665295.1 2013V-1029 +Listeria innocua SAMN10869157 GCA_017363615.1 2010L-2059 +Listeria innocua SAMN10869156 GCA_017363655.1 H0996 L +Listeria ivanovii SAMEA3138408 GCA_000252975.1 PAM55 +Listeria marthii SAMN10869158 GCA_017363645.1 FSL S4-696 +Listeria monocytogenes SAMN02944835 GCA_001466295.1 G4599 +Listeria monocytogenes SAMN02847829 GCA_013625895.1 2014L-6256 +Listeria monocytogenes SAMN03067768 GCA_013625995.1 J0099 +Listeria monocytogenes SAMN02950479 GCA_013626145.1 2014L-6393 +Listeria monocytogenes SAMN03761815 GCA_014526935.1 2011L-2626 +Listeria seeligeri SAMN10869159 GCA_017363605.1 F5761 +Listeria welshimeri SAMN03462185 GCA_002489005.1 SLCC5334 +Photobacterium damselae SAMN10702680 GCA_009665375.1 2012V-1072 +Salmonella bongori SAMN13207407 GCA_013588055.1 04-0440 +Salmonella enterica SAMN08167480 GCA_011388235.1 2010K-2370 +Vibrio alginolyticus SAMN10702675 GCA_009665435.1 2013V-1302 +Vibrio cholerae SAMN10863496 GCA_009665515.2 2010EL-1786 +Vibrio cidicii SAMN10863497 GCA_009665415.1 2423-01 +Vibrio cincinnatiensis SAMN10812936 GCA_009665395.1 2409-02 +Vibrio fluvialis SAMN10812937 GCA_009665355.1 2013V-1049 +Vibrio furnissii SAMN10702681 GCA_009665335.1 2419-04 +Vibrio harveyi SAMN10702676 GCA_009665315.1 2011V-1164 +Vibrio metoecus SAMN10702677 GCA_009665255.1 2011V-1169 +Vibrio metoecus SAMN10863498 GCA_009665275.1 08-2459 +Vibrio metschnikovii SAMN10702671 GCA_009665235.1 2012V-1020 +Vibrio mimicus SAMN10812939 GCA_009665195.1 2011V-1073 +Vibrio navarrensis SAMN10863499 GCA_009665215.1 08-2462 +Vibrio parahaemolyticus SAMN10702672 GCA_009665495.1 2012AW-0154 +Vibrio vulnificus SAMN10702674 GCA_009665455.1 2009V-1035 +Vibrio vulnificus SAMN10702673 GCA_009665475.1 2142-77 \ No newline at end of file diff --git a/fastani/1.34/Dockerfile b/fastani/1.34/Dockerfile new file mode 100644 index 000000000..6090fe610 --- /dev/null +++ b/fastani/1.34/Dockerfile @@ -0,0 +1,51 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist at build time +ARG FASTANI_VER="v1.34" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="FastANI" +LABEL software.version=${FASTANI_VER} +LABEL description="Fast alignment-free computation of whole-genome Average Nucleotide Identity" +LABEL website="https://github.com/ParBLiSS/FastANI" +LABEL license="https://github.com/ParBLiSS/FastANI/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Kutluhan Incekara" +LABEL maintainer3.email="kutluhan.incekara@ct.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + unzip \ + libgomp1 && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# download pre-compiled binary; unzip; put binary in /usr/local/bin +# apt dependencies: libgomp1 unzip wget +RUN wget --no-check-certificate https://github.com/ParBLiSS/FastANI/releases/download/${FASTANI_VER}/fastANI-Linux64-${FASTANI_VER}.zip && \ + unzip fastANI-Linux64-${FASTANI_VER}.zip -d /usr/local/bin && \ + rm fastANI-Linux64-${FASTANI_VER}.zip + +# default run command +CMD fastANI -h + +# singularity compatibility +ENV LC_ALL=C + +# set working directory +WORKDIR /data + +## Test ## +FROM app as test + +# download 2 genomes from fastANI GitHub; compare the 2; cat the output file +RUN wget --no-check-certificate -P /data https://github.com/ParBLiSS/FastANI/raw/master/tests/data/Escherichia_coli_str_K12_MG1655.fna && \ + wget --no-check-certificate -P /data https://github.com/ParBLiSS/FastANI/raw/master/tests/data/Shigella_flexneri_2a_01.fna && \ + fastANI -q /data/Shigella_flexneri_2a_01.fna -r /data/Escherichia_coli_str_K12_MG1655.fna -o /data/fastANI-test-ShiglellaFlexneri-EcoliK12.tsv && \ + echo "output TSV from fastANI test:" && \ + cat fastANI-test-ShiglellaFlexneri-EcoliK12.tsv + diff --git a/fastani/1.34/README.md b/fastani/1.34/README.md new file mode 100644 index 000000000..2958b1056 --- /dev/null +++ b/fastani/1.34/README.md @@ -0,0 +1,19 @@ +# fastANI container + +Main tool : [fastANI](https://github.com/ParBLiSS/FastANI) + +Full documentation: https://github.com/ParBLiSS/FastANI + +FastANI was developed for fast alignment-free computation of whole-genome Average Nucleotide Identity (ANI). ANI is defined as mean nucleotide identity of orthologous gene pairs shared between two microbial genomes. + +This docker image contains no references. + +## Example Usage + +```bash +# query one genome against another genome +fastANI -t 8 -q bacterial-genome1.fasta -r bacterial-genome2.fasta -o fastANI.out.tsv + +# query one genome against the 43 genomes in RGDv2 (requires a File Of FileNames as input) +fastANI -t 8 -q bacterial-genome.fasta --rl /RGDv2/FOFN-RGDv2.txt -o fastANI.RGDv2.out.tsv +``` diff --git a/fastp/0.23.4/Dockerfile b/fastp/0.23.4/Dockerfile new file mode 100644 index 000000000..92f5fedd9 --- /dev/null +++ b/fastp/0.23.4/Dockerfile @@ -0,0 +1,50 @@ +FROM ubuntu:jammy as app + +ARG FASTP_VER="0.23.4" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="Fastp" +LABEL software.version=$FASTP_VER +LABEL description="An ultra-fast all-in-one FASTQ preprocessor" +LABEL website="https://github.com/OpenGene/fastp" +LABEL license="https://github.com/OpenGene/fastp/blob/master/LICENSE" +LABEL maintainer="Idowu Olawoye" +LABEL maintainer.email="olawoyei0303@run.edu.ng" +LABEL maintainer2="Erin Young" +LABEL maintainer2.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN mkdir fastp && \ + cd fastp && \ + wget http://opengene.org/fastp/fastp.${FASTP_VER} && \ + mv fastp.${FASTP_VER} fastp && \ + chmod a+x ./fastp && \ + mkdir /data + +ENV PATH="/fastp:$PATH" + +WORKDIR /data + +CMD [ "fastp", "--help" ] + +# A second FROM insruction creates a new stage +# We use `test` for the test image +FROM app as test + +# downloading sars-cov-2 fastq test files +RUN wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123_1.fastq.gz && \ +wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123_2.fastq.gz + +# running fastp on the files +RUN fastp \ + -i SRR13957123_1.fastq.gz \ + -I SRR13957123_2.fastq.gz \ + -o SRR13957123_PE1.fastq.gz \ + -O SRR13957123_PE2.fastq.gz \ + -h SRR13957123_fastp.html \ + -j SRR13957123_fastp.json diff --git a/fastp/0.23.4/README.md b/fastp/0.23.4/README.md new file mode 100644 index 000000000..c3674c358 --- /dev/null +++ b/fastp/0.23.4/README.md @@ -0,0 +1,13 @@ +# fastp container + +Main tool : [fastp](https://github.com/OpenGene/fastp) + +Full documentation: https://github.com/OpenGene/fastp + +> A tool designed to provide fast all-in-one preprocessing for FastQ files. + +## Example Usage + +```bash +fastp -i SRR13957123_1.fastq.gz -I SRR13957123_2.fastq.gz -o SRR13957123_PE1.fastq.gz -O SRR13957123_PE2.fastq.gz -h SRR13957123_fastp.html -j SRR13957123_fastp.json +``` diff --git a/fastqc/0.11.9/Dockerfile b/fastqc/0.11.9/Dockerfile index 225da25d5..30b1ad557 100644 --- a/fastqc/0.11.9/Dockerfile +++ b/fastqc/0.11.9/Dockerfile @@ -1,29 +1,39 @@ -FROM ubuntu:xenial - ARG FASTQC_VER="0.11.9" +# FROM defines the base docker image. +# The 'as' keyword lets you name the following stage. We use `app` for the production image + +FROM ubuntu:bionic as app + +#re-instantiate these variables +ARG FASTQC_VER + # metadata -LABEL base.image="ubuntu:xenial" -LABEL dockerfile.version="1" +LABEL base.image="ubuntu:bionic" +LABEL dockerfile.version="2" LABEL software="FASTQC" LABEL software.version="0.11.9" LABEL description="A quality control analysis tool for high throughput sequencing data" LABEL website="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" LABEL license="https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt" -LABEL maintainer1="Abigail Shockey" -LABEL maintainer1.email="abigail.shockey@slh.wisc.edu" +LABEL maintainer="Abigail Shockey" +LABEL maintainer.email="abigail.shockey@slh.wisc.edu" LABEL maintainer2="Curtis Kapsak" LABEL maintainer2.email="pjx8@cdc.gov" +LABEL maintainer3="Pooja Gupta" +LABEL maintainer3.email="biopooja@gmail.com" # install dependencies; cleanup apt garbage -RUN apt-get update && apt-get install -y \ - unzip \ - wget \ - perl \ - default-jre && \ - apt-get autoclean && rm -rf /var/lib/apt/lists/* - -# install fastqc +RUN apt-get update && apt-get install -y --no-install-recommends \ + unzip \ + wget \ + perl \ + default-jre \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install fastqc. Make /data for use as a working dir RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${FASTQC_VER}.zip && \ unzip fastqc_v${FASTQC_VER}.zip && \ rm fastqc_v${FASTQC_VER}.zip && \ @@ -33,3 +43,47 @@ RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${FAS # set PATH and working directory ENV PATH="${PATH}:/FastQC/" WORKDIR /data + +# TEST STAGE - Getting Test Data # +# getting fastq files with SRA tools - input data +FROM ncbi/sra-tools:3.0.1 as fastq + +# set working directory to /fastq_test +WORKDIR /fastq_test + +RUN fasterq-dump --threads 4 -A SRR6082043 && \ + gzip *.fastq && \ + mv SRR6082043_1.fastq.gz test_1.fastq.gz && mv SRR6082043_2.fastq.gz test_2.fastq.gz + +# new base for testing +FROM app as test + +#re-instantiate these variables +ARG FASTQC_VER + +# set working directory to /test +WORKDIR /test + +# Getting test output data for test validation. Just a note, it is not possible to compare the test HTML files to the test output directly as the report includes a date of run. +# Files are available if someone wants to verify the outputs externally or write another test. +RUN wget -P /test/data https://raw.githubusercontent.com/poojasgupta/docker-builds/master/fastqc/0.11.9/tests/test_1_fastqc.html && \ + wget -P /test/data https://raw.githubusercontent.com/poojasgupta/docker-builds/master/fastqc/0.11.9/tests/test_1_fastqc.zip && \ + wget -P /test/data https://raw.githubusercontent.com/poojasgupta/docker-builds/master/fastqc/0.11.9/tests/test_2_fastqc.html && \ + wget -P /test/data https://raw.githubusercontent.com/poojasgupta/docker-builds/master/fastqc/0.11.9/tests/test_2_fastqc.zip + +# Copy the test input data to /test +COPY --from=fastq /fastq_test/test*fastq.gz /test/ + +# Run fastqc +RUN echo "Running FastQC with the test fastq files" +RUN fastqc test_1.fastq.gz test_2.fastq.gz -o . + +# Check validity of outputs. The expected number of sequences should be '669558' in both the fastq files. +# If the test output is different, something went wrong. +RUN grep -o "Total Sequences669558" /test/test_1_fastqc.html && \ +grep -o "Total Sequences669558" /test/test_2_fastqc.html && \ +echo "Your test worked successfully!" + +# Print version and help options +RUN fastqc --version && \ + fastqc --help diff --git a/fastqc/0.11.9/README.md b/fastqc/0.11.9/README.md new file mode 100755 index 000000000..0871a3c92 --- /dev/null +++ b/fastqc/0.11.9/README.md @@ -0,0 +1,19 @@ +# fastqc 0.11.9 docker image + +Main tool: [fastqc](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +Full documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + +Source code: https://github.com/s-andrews/FastQC + +> A tool designed to do provide some simple quality control checks on high throughput sequence data. + +This docker image runs FASTQC on an input '.fastq.gz' file. + +## Example usage + +Example stats for a compressed (gzip) FASTQ: `$ docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/fastqc:0.11.9 fastqc /data/SRX0000000_R1.fastq.gz` + +Output will be a QC report (.html and .zip). + +View full fastqc help options: `$ docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/fastqc:0.11.9 fastqc -h ` diff --git a/fastqc/0.11.9/tests/test_1_fastqc.html b/fastqc/0.11.9/tests/test_1_fastqc.html new file mode 100644 index 000000000..372882fe2 --- /dev/null +++ b/fastqc/0.11.9/tests/test_1_fastqc.html @@ -0,0 +1,187 @@ +test_1.fastq.gz FastQC Report
      FastQCFastQC Report
      Tue 7 Feb 2023
      test_1.fastq.gz

      [OK]Basic Statistics

      MeasureValue
      Filenametest_1.fastq.gz
      File typeConventional base calls
      EncodingSanger / Illumina 1.9
      Total Sequences669558
      Sequences flagged as poor quality0
      Sequence length35-251
      %GC52

      [OK]Per base sequence quality

      Per base quality graph

      [OK]Per sequence quality scores

      Per Sequence quality graph

      [FAIL]Per base sequence content

      Per base sequence content

      [WARN]Per sequence GC content

      Per sequence GC content graph

      [OK]Per base N content

      N content graph

      [WARN]Sequence Length Distribution

      Sequence length distribution

      [OK]Sequence Duplication Levels

      Duplication level graph

      [OK]Overrepresented sequences

      No overrepresented sequences

      [OK]Adapter Content

      Adapter graph

      \ No newline at end of file diff --git a/fastqc/0.11.9/tests/test_1_fastqc.zip b/fastqc/0.11.9/tests/test_1_fastqc.zip new file mode 100644 index 000000000..94fc5bfd2 Binary files /dev/null and b/fastqc/0.11.9/tests/test_1_fastqc.zip differ diff --git a/fastqc/0.11.9/tests/test_2_fastqc.html b/fastqc/0.11.9/tests/test_2_fastqc.html new file mode 100644 index 000000000..7be836638 --- /dev/null +++ b/fastqc/0.11.9/tests/test_2_fastqc.html @@ -0,0 +1,187 @@ +test_2.fastq.gz FastQC Report
      FastQCFastQC Report
      Tue 7 Feb 2023
      test_2.fastq.gz

      [OK]Basic Statistics

      MeasureValue
      Filenametest_2.fastq.gz
      File typeConventional base calls
      EncodingSanger / Illumina 1.9
      Total Sequences669558
      Sequences flagged as poor quality0
      Sequence length35-251
      %GC52

      [WARN]Per base sequence quality

      Per base quality graph

      [OK]Per sequence quality scores

      Per Sequence quality graph

      [FAIL]Per base sequence content

      Per base sequence content

      [WARN]Per sequence GC content

      Per sequence GC content graph

      [OK]Per base N content

      N content graph

      [WARN]Sequence Length Distribution

      Sequence length distribution

      [OK]Sequence Duplication Levels

      Duplication level graph

      [OK]Overrepresented sequences

      No overrepresented sequences

      [OK]Adapter Content

      Adapter graph

      \ No newline at end of file diff --git a/fastqc/0.11.9/tests/test_2_fastqc.zip b/fastqc/0.11.9/tests/test_2_fastqc.zip new file mode 100644 index 000000000..fc859858f Binary files /dev/null and b/fastqc/0.11.9/tests/test_2_fastqc.zip differ diff --git a/fastqc/0.12.1/Dockerfile b/fastqc/0.12.1/Dockerfile new file mode 100644 index 000000000..a51e89027 --- /dev/null +++ b/fastqc/0.12.1/Dockerfile @@ -0,0 +1,83 @@ +# FROM defines the base docker image. +# The 'as' keyword lets you name the following stage. We use `app` for the production image +FROM ubuntu:focal as app + +ARG FASTQC_VER="0.12.1" + +# metadata +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="FASTQC" +LABEL software.version="${FASTQC_VER}" +LABEL description="A quality control analysis tool for high throughput sequencing data" +LABEL website="https://www.bioinformatics.babraham.ac.uk/projects/fastqc/" +LABEL website.code.repo="https://github.com/s-andrews/FastQC/" +LABEL license="https://github.com/s-andrews/FastQC/blob/master/LICENSE.txt" +LABEL maintainer="Abigail Shockey" +LABEL maintainer.email="abigail.shockey@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Pooja Gupta" +LABEL maintainer3.email="biopooja@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + unzip \ + wget \ + perl \ + default-jre \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install fastqc. Make /data for use as a working dir +RUN wget https://www.bioinformatics.babraham.ac.uk/projects/fastqc/fastqc_v${FASTQC_VER}.zip && \ + unzip fastqc_v${FASTQC_VER}.zip && \ + rm fastqc_v${FASTQC_VER}.zip && \ + chmod +x FastQC/fastqc && \ + mkdir /data + +# set PATH and working directory +ENV PATH="${PATH}:/FastQC/" +WORKDIR /data + +# TEST STAGE - Getting Test Data # +# getting fastq files with SRA tools - input data +FROM ncbi/sra-tools:3.0.1 as fastq + +# set working directory to /fastq_test +WORKDIR /fastq_test + +RUN fasterq-dump --threads 4 -A SRR6082043 && \ + gzip *.fastq && \ + mv SRR6082043_1.fastq.gz test_1.fastq.gz && mv SRR6082043_2.fastq.gz test_2.fastq.gz + +# new base for testing +FROM app as test + +# set working directory to /test +WORKDIR /test + +# Getting test output data for test validation. Just a note, it is not possible to compare the test HTML files to the test output directly as the report includes a date of run. +# Files are available if someone wants to verify the outputs externally or write another test. +RUN wget -P /test/data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/fastqc/0.11.9/tests/test_1_fastqc.html && \ + wget -P /test/data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/fastqc/0.11.9/tests/test_1_fastqc.zip && \ + wget -P /test/data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/fastqc/0.11.9/tests/test_2_fastqc.html && \ + wget -P /test/data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/fastqc/0.11.9/tests/test_2_fastqc.zip + +# Copy the test input data to /test +COPY --from=fastq /fastq_test/test*fastq.gz /test/ + +# Run fastqc +RUN echo "Running FastQC with the test fastq files" +RUN fastqc test_1.fastq.gz test_2.fastq.gz -o . + +# Check validity of outputs. The expected number of sequences should be '669558' in both the fastq files. +# If the test output is different, something went wrong. +RUN grep -o "Total Sequences669558" /test/test_1_fastqc.html && \ +grep -o "Total Sequences669558" /test/test_2_fastqc.html && \ +echo "Your test worked successfully!" + +# Print version and help options +RUN fastqc --version && \ + fastqc --help diff --git a/fastqc/0.12.1/README.md b/fastqc/0.12.1/README.md new file mode 100755 index 000000000..fbbaf9d58 --- /dev/null +++ b/fastqc/0.12.1/README.md @@ -0,0 +1,28 @@ +# fastqc docker image + +Main tool & documentation: [fastqc](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +Source code: [https://github.com/s-andrews/FastQC/](https://github.com/s-andrews/FastQC) + +Additional tools: + +- perl 5.30.0 +- java (openjdk 11.0.18) + +> A tool designed to do provide some simple quality control checks on high throughput sequence data. + +## Example usage + +Example stats for a compressed (gzip) FASTQ: + +```bash +docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/fastqc:latest fastqc /data/SRX0000000_R1.fastq.gz +``` + +Output will be a QC report (`.html` and `.zip`). + +View full fastqc help options: + +```bash +docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/fastqc:latest fastqc -h +``` diff --git a/fasttree/2.1.11/Dockerfile b/fasttree/2.1.11/Dockerfile index 5189e25c4..7b8c5cd4f 100644 --- a/fasttree/2.1.11/Dockerfile +++ b/fasttree/2.1.11/Dockerfile @@ -1,5 +1,8 @@ # base image -FROM ubuntu:bionic +FROM ubuntu:bionic as app + +# add ARG here +ARG fasttree_ver="2.1.11" # metadata LABEL base.image="ubuntu:bionic" @@ -7,25 +10,43 @@ LABEL version="1" LABEL software="Fasttree" LABEL software.version="2.1.11" LABEL description="Infers approximately-maximum-likelihood phylogenetic trees from alignments of nucleotide or protein sequences" -LABEL website="http://www.microbesonline.org/fasttree" +LABEL website="http://www.microbesonline.org/fasttree/" LABEL license="http://www.microbesonline.org/fasttree/" -LABEL maintainer="Abigail Shockey" -LABEL maintainer.email="abigail.shockey@slh.wisc.edu" +LABEL dockerfile.version="1" +LABEL maintainer="Daniel Evans" +LABEL maintainer.email="evansdr95@gmail.com" # install ubuntu dependencies -RUN apt-get update && \ - apt-get -y install build-essential\ - wget && \ - apt-get clean +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* # install fasttree RUN wget http://www.microbesonline.org/fasttree/FastTree && \ chmod +x FastTree && \ mkdir fasttree && \ - mv FastTree fasttree/ + mv FastTree /fasttree/ # add to path ENV PATH="${PATH}:/fasttree" RUN mkdir /data + WORKDIR /data + +# test layer +# run FastTree on a two-genome alignment, then print the tree + +FROM app as test + +ARG fasttree_ver="2.1.11" + +COPY covid_alignment.fasta /data/covid_alignment.fasta + +RUN echo "Running command: $ FastTree -log fasttree_test_log -nt /data/covid_alignment.fasta > fasttree_test_tree.nwk" && \ +FastTree -log fasttree_test_log -nt /data/covid_alignment.fasta > fasttree_test_tree.nwk && \ +echo "Printing FastTree output tree..." && \ +cat fasttree_test_tree.nwk && \ +echo "Dockerfile test complete!" diff --git a/fasttree/2.1.11/README.md b/fasttree/2.1.11/README.md new file mode 100644 index 000000000..69eedf14e --- /dev/null +++ b/fasttree/2.1.11/README.md @@ -0,0 +1,84 @@ +Main tool: FastTree v2.1.11 +Full documentation: http://www.microbesonline.org/fasttree/ + +**Docker container information:** + +This Docker container installs and runs FastTree using Ubuntu Bionic as a base image. No other dependencies are needed to run FastTree. +The small sequence alignment (covid_alignment.fasta) also in the container is used to test the Docker build. + +**Usage for FastTree version 2.1.11** + +FastTree infers approximately-maximum-likelihood phylogenetic trees from alignments of nucleotide or protein sequences. + +FastTree can handle alignments with up to a million of sequences in a reasonable amount of time and memory. + +FastTree accepts alignments in fasta or phylip interleaved formats + +``` +FastTree protein_alignment > tree + +FastTree < protein_alignment > tree + +FastTree -out tree protein_alignment + +FastTree -nt nucleotide_alignment > tree + +FastTree -nt -gtr < nucleotide_alignment > tree + +FastTree < nucleotide_alignment > tree + +Common options (must be before the alignment file): + + -quiet to suppress reporting information + + -nopr to suppress progress indicator + + -log logfile -- save intermediate trees, settings, and model details + + -fastest -- speed up the neighbor joining phase & reduce memory usage (recommended for >50,000 sequences) + + -n to analyze multiple alignments (phylip format only) (use for global bootstrap, with seqboot and CompareToBootstrap.pl) + + -nosupport to not compute support values + + -intree newick_file to set the starting tree(s) + + -intree1 newick_file to use this starting tree for all the alignments (for faster global bootstrap on huge alignments) + + -pseudo to use pseudocounts (recommended for highly gapped sequences) + + -gtr -- generalized time-reversible model (nucleotide alignments only) + + -lg -- Le-Gascuel 2008 model (amino acid alignments only) + + -wag -- Whelan-And-Goldman 2001 model (amino acid alignments only) + + -quote -- allow spaces and other restricted characters (but not ' ) in + sequence names and quote names in the output tree (fasta input only; + FastTree will not be able to read these trees back in + + -noml to turn off maximum-likelihood + + -nome to turn off minimum-evolution NNIs and SPRs + (recommended if running additional ML NNIs with -intree) + + -nome -mllen with -intree to optimize branch lengths for a fixed topology + + -cat # to specify the number of rate categories of sites (default 20) + + -nocat to use constant rates + + -gamma -- after optimizing the tree under the CAT approximation, rescale the lengths to optimize the Gamma20 likelihood + + -constraints constraintAlignment to constrain the topology search constraintAlignment should have 1s or 0s to indicates splits + + -expert -- see more options +``` + +**Example command-line input for basic FastTree phylogeny** + +``` +FastTree -log fasttree_test_log -nt sample.fasta > fasttree_test.nwk +``` + + For more information, see http://www.microbesonline.org/fasttree/ diff --git a/fasttree/2.1.11/covid_alignment.fasta b/fasttree/2.1.11/covid_alignment.fasta new file mode 100644 index 000000000..9ff59b1c6 --- /dev/null +++ b/fasttree/2.1.11/covid_alignment.fasta @@ -0,0 +1,1000 @@ +>Wuhan-Hu-1_SARS-CoV-2 +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT +GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT +CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC +TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT +CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG +AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG +CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA +ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT +CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG +TGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA +TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA +ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG +CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG +TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA +GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA +TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA +GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA +GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA +AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC +ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG +CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG +TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA +AATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA +GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA +AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC +TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT +TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG +AATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTAC +TAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGA +AGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTAT +CTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAA +GGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTC +TATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCC +TCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT +AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGA +AGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGA +AATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGA +AGTGCAAGGTTACAAGAGTGTGAATATCACTTTTGAACTTGATGAAAGGATTGATAAAGT +ACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGC +CTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC +ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTCTACCCTCCAGATGAGGATGAAGA +AGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGA +AGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGA +AGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGA +CGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT +AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGT +AAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGC +AGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGC +TACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAA +GAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGG +TATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA +TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGA +AATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAAT +CAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAA +CTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAG +TGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCA +AGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTGGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCA +GGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGC +CTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTC +TTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTG +TGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGC +GTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA +TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCC +AGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTC +TTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGA +TAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCAC +CTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTAC +AACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA +ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTT +TGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCA +CACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAA +CAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCC +ACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT +GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTG +TAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGG +CACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACA +AGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCA +GTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTT +ACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG +TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAAT +TGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATG +TGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTC +AAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTA +TAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTG +GCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGA +CGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGT +GGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGT +AGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCA +CACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAG +TGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC +AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTT +CTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGC +ATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTG +GTTTTTACTATTAAGTGTTTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGT +TTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAA +CTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT +TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATAT +TCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAG +CTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGT +ACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTA +TGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG +GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTG +TGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGA +CTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGA +TAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACAC +TAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATC +ATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT +AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGA +TGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCCTTAGACAATGTCTTATCTAC +TTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGT +TGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAA +CTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTAT +TGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGC +TGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAA +TGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCA +GTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACC +TGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGC +TGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC +ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCAC +GATATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGT +TGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATA +TTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACAC +ACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTC +TGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC +AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTACTTACTAATATGTTTAC +ACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTAT +TGTAGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGG +TGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCACTGTACT +CTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT +CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCA +TTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTT +TAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAA +GTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTG +TCATCTCGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACC +ACCACAAACCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC +ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGG +TCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGACAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACA +GGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCT +TAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGG +ACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGC +TATGAGGCCCAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAAC +TGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCA +AACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTA +CGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGA +CTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAA +AGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA +TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGT +GAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTT +AGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAA +GCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATAT +GGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATAC +TAGTTTGTCTGGTTTTAAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT +AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT +GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTC +CATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCAT +GTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAAC +TGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTG +TTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA +TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA +GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTG +TATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTT +ACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGT +CCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT +TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGA +AGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCC +ATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAGCAGGCTGTTGCTAATGGTGA +TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGA +CCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT +GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAAT +GCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGC +AAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGT +TGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGC +ATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAG +GGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGAT +GTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTA +CAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAA +ATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATCTATACAGAACTGGAACCACC +TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAA +AGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT +ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTT +TGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCAC +TAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC +GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTG +CCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAAT +ACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGT +CTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA +GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA +CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGAT +AAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGAC +GAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTAC +CAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGAC +TTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT +AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC +ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAG +GACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAA +CGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGT +ATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT +GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTG +TTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGAC +TTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA +AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAAC +TGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG +TTCCCACCTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTT +GTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAAC +TTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATG +CACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCA +CTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTC +TTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTA +CCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTT +GATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAA +TCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT +TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACT +CAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC +TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCC +GCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAAC +ATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT +AAATGTGATAGAGCCATGCCTAACATGCTTAGAATTATGGCCTCACTTGTTCTTGCTCGC +AAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCT +CAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACC +TCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC +ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC +CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGAC +TTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTCTCTGAC +GATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAG +AACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGG +ACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTT +AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC +GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTG +TCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTC +TTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTA +GACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT +TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGC +AATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAA +TGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT +GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATG +AGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAA +GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCA +ATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAA +AGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCT +TATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAA +GTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCT +GTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACA +TCACATACAGTAATGCCATTAAGTGCACCTACACTAGTGCCACAAGAGCACTATGTTAGA +ATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAAT +TATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG +AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCT +TGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT +AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTG +AATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCA +GATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT +GCCAGATTACGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCA +CGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTT +ATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATT +GTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA +GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT +AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACCCTGCTTGGAGAAAA +GCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTA +CCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAA +ACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCA +AAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA +AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC +TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTC +AGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACATACCTGGCATACCTAAG +GACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAAT +GGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT +GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTA +CAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACA +CCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA +CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTA +CAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA +CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGT +TGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGG +CATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGG +GGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCA +CATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCG +GCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCA +GTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAA +TGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTC +TATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC +AATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCT +AACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC +ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTAC +TCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCA +CTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT +GCTAATGAGTACAGATTGTATCTCGATGCTTATAACATGATGATCTCAGCTGGCTTTAGC +TTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAG +AGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGT +GAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA +GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG +CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCT +GCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGT +GTTTGTTCTATGACTGACATAGCCAAGAAACCAACTGAAACGATTTGTGCACCACTCACT +GTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGT +GTTCTTATTACAGAAGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT +AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG +AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTA +CAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAA +TTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTT +AGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA +TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATA +ACAGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGAT +GATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG +ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACA +TTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT +TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCA +ACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTA +AACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCT +GATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTG +CTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCT +AAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGT +GGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACAT +TCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTT +ACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA +CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACA +AATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA +AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTT +CTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTT +CTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG +TCAGTGTGTTAATCTTACAACCAGAACTCAATTACCCCCTGCATACACTAATTCTTTCAC +ACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGA +CTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTATACATGTCTCTGGGAC +CAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC +TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA +GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATT +TCAATTTTGTAATGATCCATTTTTGGGTGTTTATTACCACAAAAACAACAAAAGTTGGAT +GGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCA +GCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGT +GTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGT +GCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT +TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGA +TTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAG +GACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACT +TGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTA +TCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTAC +AAACTTGTGCCCTTTTGGTGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTG +GAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTCCGCATC +ATTTTCCACTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTAC +TAATGTCTATGCAGATTCATTTGTAATTAGAGGTGATGAAGTCAGACAAATCGCTCCAGG +GCAAACTGGAAAGATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGT +TATAGCTTGGAATTCTAACAATCTTGATTCTAAGGTTGGTGGTAATTATAATTACCTGTA +TAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTA +TCAGGCCGGTAGCACACCTTGTAATGGTGTTGAAGGTTTTAATTGTTACTTTCCTTTACA +ATCATATGGTTTCCAACCCACTAATGGTGTTGGTTACCAACCATACAGAGTAGTAGTACT +TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTT +GGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTAC +TGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTAC +TGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGG +TGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCA +GGATGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTG +GCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC +TGAACATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAG +TTATCAGACTCAGACTAATTCTCCTCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCAT +TGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGC +CATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAA +GACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTT +GTTGCAATATGGCAGTTTTTGTACACAATTAAACCGTGCTTTAACTGGAATAGCTGTTGA +ACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC +AATTAAAGATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAG +CAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTT +CATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACA +AAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATA +CACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGC +ATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACA +GAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA +AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAA +CCAAAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAATTTTGGTGCAAT +TTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAAT +TGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAAT +TAGAGCTGCAGAAATCAGAGCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGT +ACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCC +TCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA +GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGG +TGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACA +AATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGT +CAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGACTCATTCAAGGAGGAGTTAGA +TAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAA +TGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTT +AAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC +ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTAT +GCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTG +CTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACAC +ATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAG +CAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCG +ATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTT +CAGAGCGCTTCCAAAATCATAACCCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT +GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTC +GTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAG +AGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAA +AACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTAT +TGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACA +AGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGA +GTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA +ACTCAATTGAGTACAGACACTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATT +GTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTT +AATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAA +GCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGACAGGTACGTTA +ATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCC +ATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA +AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT +CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAG +CCATGGCAGATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAACAAT +GGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATG +CCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG +TAACTTTAGCTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAA +TTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTT +TCAGACTGTTTGCGCGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC +TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAA +TCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG +ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACATCACGAACGCTTTCTTATTACA +AATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA +GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGC +TTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAG +ATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA +AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGAT +GAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTG +ATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTA +CTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTCATCCTCTA +GCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC +GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGA +CAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT +ATAACACTTTGCTTCACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACT +TCTATTTGTGCTTTTTAGCCTTTCTGCTATTCCTTGTTTTAATTATGCTTATTATCTTTT +GGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAACGAACATGAAAT +TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTAC +AGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATT +CTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGG +ATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT +GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT +CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAA +CGAACAAACTAAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACCCCGCATTAC +GTTTGGTGGACCCTCAGATTCAACTGGCAGTAACCAGAATGGAGAACGCAGTGGGGCGCG +ATCAAAACAACGTCGGCCCCAAGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCT +CACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACAC +CAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG +TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGG +GCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA +GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGC +AATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG +CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAA +TTCAACTCCAGGCAGCAGTAGGGGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGA +TGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAA +GAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAG +ACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAAC +TGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGG +AATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC +CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCA +TATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC +TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCC +TGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCAGTGCTGACTC +AACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC +TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGC +ACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTA +GGGAGGACTTGAAAGAGCCACCACATTTTCACCGAGGCCACGCGGAGTACGATCGAGTGT +ACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT +TTTAGTAGTGCTATCCCCATGTGATTTTAATAGCTTCTTAGGAGAATGACAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAAAAA +>SARS-CoV-2/human/Japan/SZ-NIG-Y222567/2022 +---------------------------------------------------TGTAGATCT +GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT +CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC +TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT +TGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG +AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG +CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA +ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT +CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG +TGGCCATAGGTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA +TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA +ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG +CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG +TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA +GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA +TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA +GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA +GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA +AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC +ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG +CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG +TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA +AATACTTCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA +GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA +AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC +TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT +TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG +AATTTCACAGTATTCACTGAGACTCATTGATGCTATGATGTTCACATCTGATTTGGCTAC +TAACAATCTAGTTGTAATGGCCTACATTACAGGTGGTGTTGTTCAGTTGACTTCGCAGTG +GCTAACTAACATCTTTGGCACTGTTTATGAAAAACTCAAACCCGTCCTTGATTGGCTTGA +AGAGAAGTTTAAGGAAGGTGTAGAGTTTCTTAGAGACGGTTGGGAAATTGTTAAATTTAT +CTCAACCTGTGCTTGTGAAATTGTCGGTGGACAAATTGTCACCTGTGCAAAGGAAATTAA +GGAGAGTGTTCAGACATTCTTTAAGCTTGTAAATAAATTTTTGGCTTTGTGTGCTGACTC +TATCATTATTGGTGGAGCTAAACTTAAAGCCTTGAATTTAGGTGAAACATTTGTCACGCA +CTCAAAGGGATTGTACAGAAAGTGTGTTAAATCCAGAGAAGAAACTGGCCTACTCATGCC +TCTAAAAGCCCCAAAAGAAATTATCTTCTTAGAGGGAGAAACACTTCCCACAGAAGTGTT +AACAGAGGAAGTTGTCTTGAAAACTGGTGATTTACAACCATTAGAACAACCTACTAGTGA +AGCTGTTGAAGCTCCATTGGTTGGTACACCAGTTTGTATTAACGGGCTTATGTTGCTCGA +AATCAAAGACACAGAAAAGTACTGTGCCCTTGCACCTAATATGATGGTAACAAACAATAC +CTTCACACTCAAAGGCGGTGCACCAACAAAGGTTACTTTTGGTGATGACACTGTGATAGA +AGTGCAAGGTTACAAGAGTGTGAATATCATTTTTGAACTTGATGAAAGGATTGATAAAGT +ACTTAATGAGAAGTGCTCTGCCTATACAGTTGAACTCGGTACAGAAGTAAATGAGTTCGC +CTGTGTTGTGGCAGATGCTGTCATAAAAACTTTGCAACCAGTATCTGAATTACTTACACC +ACTGGGCATTGATTTAGATGAGTGGAGTATGGCTACATACTACTTATTTGATGAGTCTGG +TGAGTTTAAATTGGCTTCACATATGTATTGTTCTTTTTACCCTCCAGATGAGGATGAAGA +AGAAGGTGATTGTGAAGAAGAAGAGTTTGAGCCATCAACTCAATATGAGTATGGTACTGA +AGATGATTACCAAGGTAAACCTTTGGAATTTGGTGCCACTTCTGCTGCTCTTCAACCTGA +AGAAGAGCAAGAAGAAGATTGGTTAGATGATGATAGTCAACAAACTGTTGGTCAACAAGA +CGGCAGTGAGGACAATCAGACAACTACTATTCAAACAATTGTTGAGGTTCAACCTCAATT +AGAGATGGAACTTACACCAGTTGTTCAGACTATTGAAGTGAATAGTTTTAGTGGTTATTT +AAAACTTACTGACAATGTATACATTAAAAATGCAGACATTGTGGAAGAAGCTAAAAAGGT +AAAACCAACAGTGGTTGTTAATGCAGCCAATGTTTACCTTAAACATGGAGGAGGTGTTGC +AGGAGCCTTAAATAAGGCTACTAACAATGCCATGCAAGTTGAATCTGATGATTACATAGC +TACTAATGGACCACTTAAAGTGGGTGGTAGTTGTGTTTTAAGCGGACACAATCTTGCTAA +ACACTGTCTTCATGTTGTCGGCCCAAATGTTAACAAAGGTGAAGACATTCAACTTCTTAA +GAGTGCTTATGAAAATTTTAATCAGCACGAAGTTCTACTTGCACCATTATTATCAGCTGG +TATTTTTGGTGCTGACCCTATACATTCTTTAAGAGTTTGTGTAGATACTGTTCGCACAAA +TGTCTACTTAGCTGTCTTTGATAAAAATCTCTATGACAAACTTGTTTCAAGCTTTTTGGA +AATGAAGAGTGAAAAGCAAGTTGAACAAAAGATCGCTGAGATTCCTAAAGAGGAAGTTAA +GCCATTTATAACTGAAAGTAAACCTTCAGTTGAACAGAGAAAACAAGATGATAAGAAAAT +CAAAGCTTGTGTTGAAGAAGTTACAACAACTCTGGAAGAAACTAAGTTCCTCACAGAAAA +CTTGTTACTTTATATTGACATTAATGGCAATCTTCATCCAGATTCTGCCACTCTTGTTAG +TGACATTGACATCACTTTCTTAAAGAAAGATGCTCCATATATAGTGGGTGATGTTGTTCA +AGAGGGTGTTTTAACTGCTGTGGTTATACCTACTAAAAAGGCTAGTGGCACTACTGAAAT +GCTAGCGAAAGCTTTGAGAAAAGTGCCAACAGACAATTATATAACCACTTACCCGGGTCA +GGGTTTAAATGGTTACACTGTAGAGGAGGCAAAGACAGTGCTTAAAAAGTGTAAAAGTGC +TTTTTACATTCTACCATCTATTATCTCTAATGAGAAGCAAGAAATTCTTGGAACTGTTTC +TTGGAATTTGCGAGAAATGCTTGCACATGCAGAAGAAACACGCAAATTAATGCCTGTCTG +TGTGGAAACTAAAGCCATAGTTTCAACTATACAGCGTAAATATAAGGGTATTAAAATACA +AGAGGGTGTGGTTGATTATGGTGCTAGATTTTACTTTTACACCAGTAAAACAACTGTAGC +GTCACTTATCAACACACTTAACGATCTAAATGAAACTCTTGTTACAATGCCACTTGGCTA +TGTAACACATGGCTTAAATTTGGAAGAAGCTGCTCGGTATATGAGATCTCTCAAAGTGCC +AGCTACAGTTTCTGTTTCTTCACCTGATGCTGTTACAGCGTATAATGGTTATCTTACTTC +TTCTTCTAAAACACCTGAAGAACATTTTATTGAAACCATCTCACTTGCTGGTTCCTATAA +AGATTGGTCCTATTCTGGACAATCTACACAACTAGGTATAGAATTTCTTAAGAGAGGTGA +TAAAAGTGTATATTACACTAGTAATCCTACCACATTCCACCTAGATGGTGAAGTTATCAC +CTTTGACAATCTTAAGACACTTCTTTCTTTGAGAGAAGTGAGGACTATTAAGGTGTTTAC +AACAGTAGACAACATTAACCTCCACACGCAAGTTGTGGACATGTCAATGACATATGGACA +ACAGTTTGGTCCAACTTATTTGGATGGAGCTGATGTTACTAAAATAAAACCTCATAATTC +ACATGAAGGTAAAACATTTTATGTTTTACCTAATGATGACACTCTACGTGTTGAGGCTTT +TGAGTACTACCACACAACTGATCCTAGTTTTCTGGGTAGGTACATGTCAGCATTAAATCA +CACTAAAAAGTGGAAATACCCACAAGTTAATGGTTTAACTTCTATTAAATGGGCAGATAA +CAACTGTTATCTTGCCACTGCATTGTTAACACTCCAACAAATAGAGTTGAAGTTTAATCC +ACCTGCTCTACAAGATGCTTATTACAGAGCAAGGGCTGGTGAAGCTGCTAACTTTTGTGC +ACTTATCTTAGCCTACTGTAATAAGACAGTAGGTGAGTTAGGTGATGTTAGAGAAACAAT +GAGTTACTTGTTTCAACATGCCAATTTAGATTCTTGCAAAAGAGTCTTGAACGTGGTGTG +TAAAACTTGTGGACAACAGCAGACAACCCTTAAGGGTGTAGAAGCTGTTATGTACATGGG +CACACTTTCTTATGAACAATTTAAGAAAGGTGTTCAGATACCTTGTACGTGTGGTAAACA +AGCTACAAAATATCTAGTACAACAGGAGTCACCTTTTGTTATGATGTCAGCACCACCTGC +TCAGTATGAACTTAAGCATGGTACATTTACTTGTGCTAGTGAGTACACTGGTAATTACCA +GTGTGGTCACTATAAACATATAACTTCTAAAGAAACTTTGTATTGCATAGACGGTGCTTT +ACTTACAAAGTCCTCAGAATACAAAGGTCCTATTACGGATGTTTTCTACAAAGAAAACAG +TTACACAACAACCATAAAACCAGTTACTTATAAATTGGATGGTGTTGTTTGTACAGAAAT +TGACCCTAAGTTGGACAATTATTATAAGAAAGACAATTCTTATTTCACAGAGCAACCAAT +TGATCTTGTACCAAACCAACCATATCCAAACGCAAGCTTCGATAATTTTAAGTTTGTATG +TGATAATATCAAATTTGCTGATGATTTAAACCAGTTAACTGGTTATAAGAAACCTGCTTC +AAGAGAGCTTAAAGTTACATTTTTCCCTGACTTAAATGGTGATGTGGTGGCTATTGATTA +TAAACACTACACACCCTCTTTTAAGAAAGGAGCTAAATTGTTACATAAACCTATTGTTTG +GCATGTTAACAATGCAACTAATAAAGCCACGTATAAACCAAATACCTGGTGTATACGTTG +TCTTTGGAGCACAAAACCAGTTGAAACATCAAATTCGTTTGATGTACTGAAGTCAGAGGA +CGCGCAGGGAATGGATAATCTTGCCTGCGAAGATCTAAAACCAGTCTCTGAAGAAGTAGT +GGAAAATCCTACCATACAGAAAGACGTTCTTGAGTGTAATGTGAAAACTACCGAAGTTGT +AGGAGACATTATACTTAAACCAGCAAATAATAGTTTAAAAATTACAGAAGAGGTTGGCCA +CACAGATCTAATGGCTGCTTATGTAGACAATTCTAGTCTTACTATTAAGAAACCTAATGA +ATTATCTAGAGTATTAGGTTTGAAAACCCTTGCTACTCATGGTTTAGCTGCTGTTAATAG +TGTCCCTTGGGATACTATAGCTAATTATGCTAAGCCTTTTCTTAACAAAGTTGTTAGTAC +AACTACTAACATAGTTACACGGTGTTTAAACCGTGTTTGTACTAATTATATGCCTTATTT +CTTTACTTTATTGCTACAATTGTGTACTTTTACTAGAAGTACAAATTCTAGAATTAAAGC +ATCTATGCCGACTACTATAGCAAAGAATACTGTTAAGAGTGTCGGTAAATTTTGTCTAGA +GGCTTCATTTAATTATTTGAAGTCACCTAATTTTTCTAAACTGATAAATATTATAATTTG +GTTTTTACTATTAAGTGTGTGCCTAGGTTCTTTAATCTACTCAACCGCTGCTTTAGGTGT +TTTAATGTCTAATTTAGGCATGCCTTCTTACTGTACTGGTTACAGAGAAGGCTATTTGAA +CTCTACTAATGTCACTATTGCAACCTACTGTACTGGTTCTATACCTTGTAGTGTTTGTCT +TAGTGGTTTAGATTCTTTAGACACCTATCCTTCTTTAGAAACTATACAAATTACCATTTC +ATCTTTTAAATGGGATTTAACTGCTTTTGGCTTAGTTGCAGAGTGGTTTTTGGCATATAT +TCTTTTCACTAGGTTTTTCTATGTACTTGGATTGGCTGCAATCATGCAATTGTTTTTCAG +CTATTTTGCAGTACATTTTATTAGTAATTCTTGGCTTATGTGGTTAATAATTAATCTTGT +ACAAATGGCCCCGATTTCAGCTATGGTTAGAATGTACATCTTCTTTGCATCATTTTATTA +TGTATGGAAAAGTTATGTGCATGTTGTAGACGGTTGTAATTCATCAACTTGTATGATGTG +TTACAAACGTAATAGAGCAACAAGAGTCGAATGTACAACTATTGTTAATGGTGTTAGAAG +GTCCTTTTATGTCTATGCTAATGGAGGTAAAGGCTTTTGCAAACTACACAATTGGAATTG +TGTTAATTGTGATACATTCTGTGCTGGTAGTACATTTATTAGTGATGAAGTTGCGAGAGA +CTTGTCACTACAGTTTAAAAGACCAATAAATCCTACTGACCAGTCTTCTTACATCGTTGA +TAGTGTTACAGTGAAGAATGGTTCCATCCATCTTTACTTTGATAAAGCTGGTCAAAAGAC +TTATGAAAGACATTCTCTCTCTCATTTTGTTAACTTAGACAACCTGAGAGCTAATAACAC +TAAAGGTTCATTGCCTATTAATGTTATAGTTTTTGATGGTAAATCAAAATGTGAAGAATC +ATCTGCAAAATCAGCGTCTGTTTACTACAGTCAGCTTATGTGTCAACCTATACTGTTACT +AGATCAGGCATTAGTGTCTGATGTTGGTGATAGTGCGGAAGTTGCAGTTAAAATGTTTGA +TGCTTACGTTAATACGTTTTCATCAACTTTTAACGTACCAATGGAAAAACTCAAAACACT +AGTTGCAACTGCAGAAGCTGAACTTGCAAAGAATGTGTCTTTAGACAATGTCTTATCTAC +TTTTATTTCAGCAGCTCGGCAAGGGTTTGTTGATTCAGATGTAGAAACTAAAGATGTTGT +TGAATGTCTTAAATTGTCACATCAATCTGACATAGAAGTTACTGGCGATAGTTGTAATAA +CTATATGCTCACCTATAACAAAGTTGAAAACATGACACCCCGTGACCTTGGTGCTTGTAT +TGACTGTAGTGCGCGTCATATTAATGCGCAGGTAGCAAAAAGTCACAACATTGCTTTGAT +ATGGAACGTTAAAGATTTCATGTCATTGTCTGAACAACTACGAAAACAAATACGTAGTGC +TGCTAAAAAGAATAACTTACCTTTTAAGTTGACATGTGCAACTACTAGACAAGTTGTTAA +TGTTGTAACAACAAAGATAGCACTTAAGGGTGGTAAAATTGTTAATAATTGGTTGAAGCA +GTTAATTAAAGTTACACTTGTGTTCCTTTTTGTTGCTGCTATTTTCTATTTAATAACACC +TGTTCATGTCATGTCTAAACATACTGACTTTTCAAGTGAAATCATAGGATACAAGGCTAT +TGATGGTGGTGTCACTCGTGACATAGCATCTACAGATACTTGTTTTGCTAACAAACATGC +TGATTTTGACACATGGTTTAGCCAGCGTGGTGGTAGTTATACTAATGACAAAGCTTGCCC +ATTGATTGCTGCAGTCATAACAAGAGAAGTGGGTTTTGTCGTGCCTGGTTTGCCTGGCAC +GGTATTACGCACAACTAATGGTGACTTTTTGCATTTCTTACCTAGAGTTTTTAGTGCAGT +TGGTAACATCTGTTACACACCATCAAAACTTATAGAGTACACTGACTTTGCAACATCAGC +TTGTGTTTTGGCTGCTGAATGTACAATTTTTAAAGATGCTTCTGGTAAGCCAGTACCATA +TTGTTATGATACCAATGTACTAGAAGGTTCTGTTGCTTATGAAAGTTTACGCCCTGACAC +ACGTTATGTGCTCATGGATGGCTCTATTATTCAATTTCCTAACACCTACCTTGAAGGTTC +TGTTAGAGTGGTAACAACTTTTGATTCTGAGTACTGTAGGCACGGCACTTGTGAAAGATC +AGAAGCTGGTGTTTGTGTATCTACTAGTGGTAGATGGGTACTTAACAATGATTATTACAG +ATCTTTACCAGGAGTTTTCTGTGGTGTAGATGCTGTAAATTTATTTACTAATATGTTTAT +ACCACTAATTCAACCTATTGGTGCTTTGGACATATCAGCATCTATAGTAGCTGGTGGTAT +TGTGGCTATCGTAGTAACATGCCTTGCCTACTATTTTATGAGGTTTAGAAGAGCTTTTGG +TGAATACAGTCATGTAGTTGCCTTTAATACTTTACTATTCCTTATGTCATTCATTGTACT +CTGTTTAACACCAGTTTACTCATTCTTACCTGGTGTTTATTCTGTTATTTACTTGTACTT +GACATTTTATCTTACTAATGATGTTTCTTTTTTAGCACATATTCAGTGGATGGTTATGTT +CACACCTTTAGTACCTTTCTGGATAACAATTGCTTATATCATTTGTATTTCCACAAAGCA +TTTCTATTGGTTCTTTAGTAATTACCTAAAGAGACGTGTAGTCTTTAATGGTGTTTCCTT +TAGTACTTTTGAAGAAGCTGCGCTGTGCACCTTTTTGTTAAATAAAGAAATGTATCTAAA +GTTGCGTAGTGATGTGCTATTACCTCTTACGCAATATAATAGATACTTAGCTCTTTATAA +TAAGTACAAGTATTTTAGTGGAGCAATGGATACAACTAGCTACAGAGAAGCTGCTTGTTG +TCATCTTGCAAAGGCTCTCAATGACTTCAGTAACTCAGGTTCTGATGTTCTTTACCAACC +ACCACAAATCTCTATCACCTCAGCTGTTTTGCAGAGTGGTTTTAGAAAAATGGCATTCCC +ATCTGGTAAAGTTGAGGGTTGTATGGTACAAGTAACTTGTGGTACAACTACACTTAACGG +TCTTTGGCTTGATGACGTAGTTTACTGTCCAAGACATGTGATCTGCACCTCTGAAGATAT +GCTTAACCCTAATTATGAAGATTTACTCATTCGTAAGTCTAATCATAATTTCTTGGTACA +GGCTGGTAATGTTCAACTCAGGGTTATTGGACATTCTATGCAAAATTGTGTACTTAAGCT +TAAGGTTGATACAGCCAATCCTAAGACACCTAAGTATAAGTTTGTTCGCATTCAACCAGG +ACAGACTTTTTCAGTGTTAGCTTGTTACAATGGTTCACCATCTGGTGTTTACCAATGTGC +TATGAGACACAATTTCACTATTAAGGGTTCATTCCTTAATGGTTCATGTGGTAGTGTTGG +TTTTAACATAGATTATGACTGTGTCTCTTTTTGTTACATGCACCATATGGAATTACCAAC +TGGAGTTCATGCTGGCACAGACTTAGAAGGTAACTTTTATGGACCTTTTGTTGACAGGCA +AACAGCACAAGCAGCTGGTACGGACACAACTATTACAGTTAATGTTTTAGCTTGGTTGTA +TGCTGCTGTTATAAATGGAGACAGGTGGTTTCTCAATCGATTTACCACAACTCTTAATGA +CTTTAACCTTGTGGCTATGAAGTACAATTATGAACCTCTAACACAAGACCATGTTGACAT +ACTAGGACCTCTTTCTGCTCAAACTGGAATTGCCGTTTTAGATATGTGTGCTTCATTAAA +AGAATTACTGCAAAATGGTATGAATGGACGTACCATATTGGGTAGTGCTTTATTAGAAGA +TGAATTTACACCTTTTGATGTTGTTAGACAATGCTCAGGTGTTACTTTCCAAAGTGCAGT +GAAAAGAACAATCAAGGGTACACACCACTGGTTGTTACTCACAATTTTGACTTCACTTTT +AGTTTTAGTCCAGAGTACTCAATGGTCTTTGTTCTTTTTTTTGTATGAAAATGCCTTTTT +ACCTTTTGCTATGGGTATTATTGCTATGTCTGCTTTTGCAATGATGTTTGTCAAACATAA +GCATGCATTTCTCTGTTTGTTTTTGTTACCTTCTCTTGCCACTGTAGCTTATTTTAATAT +GGTCTATATGCCTGCTAGTTGGGTGATGCGTATTATGACATGGTTGGATATGGTTGATAC +TAGTTTG---------AAGCTAAAAGACTGTGTTATGTATGCATCAGCTGTAGTGTTACT +AATCCTTATGACAGCAAGAACTGTGTATGATGATGGTGCTAGGAGAGTGTGGACACTTAT +GAATGTCTTGACACTCGTTTATAAAGTTTATTATGGTAATGCTTTAGATCAAGCCATTTC +CATGTGGGCTCTTATAATCTCTGTTACTTCTAACTACTCAGGTGTAGTTACAACTGTCAT +GTTTTTGGCCAGAGGTATTGTTTTTATGTGTGTTGAGTATTGCCCTATTTTCTTCATAAC +TGGTAATACACTTCAGTGTATAATGCTAGTTTATTGTTTCTTAGGCTATTTTTGTACTTG +TTACTTTGGCCTCTTTTGTTTACTCAACCGCTACTTTAGACTGACTCTTGGTGTTTATGA +TTACTTAGTTTCTACACAGGAGTTTAGATATATGAATTCACAGGGACTACTCCCACCCAA +GAATAGCATAGATGCCTTCAAACTCAACATTAAATTGTTGGGTGTTGGTGGCAAACCTTG +TATCAAAGTAGCCACTGTACAGTCTAAAATGTCAGATGTAAAGTGCACATCAGTAGTCTT +ACTCTCAGTTTTGCAACAACTCAGAGTAGAATCATCATCTAAATTGTGGGCTCAATGTGT +CCAGTTACACAATGACATTCTCTTAGCTAAAGATACTACTGAAGCCTTTGAAAAAATGGT +TTCACTACTTTCTGTTTTGCTTTCCATGCAGGGTGCTGTAGACATAAACAAGCTTTGTGA +AGAAATGCTGGACAACAGGGCAACCTTACAAGCTATAGCCTCAGAGTTTAGTTCCCTTCC +ATCATATGCAGCTTTTGCTACTGCTCAAGAAGCTTATGAACAGGCTGTTGCTAATGGTGA +TTCTGAAGTTGTTCTTAAAAAGTTGAAGAAGTCTTTGAATGTGGCTAAATCTGAATTTGA +CCGTGATGCAGCCATGCAACGTAAGTTGGAAAAGATGGCTGATCAAGCTATGACCCAAAT +GTATAAACAGGCTAGATCTGAGGACAAGAGGGCAAAAGTTACTAGTGCTATGCAGACAAT +GCTTTTCACTATGCTTAGAAAGTTGGATAATGATGCACTCAACAACATTATCAACAATGC +AAGAGATGGTTGTGTTCCCTTGAACATAATACCTCTTACAACAGCAGCCAAACTAATGGT +TGTCATACCAGACTATAACACATATAAAAATACGTGTGATGGTACAACATTTACTTATGC +ATCAGCATTGTGGGAAATCCAACAGGTTGTAGATGCAGATAGTAAAATTGTTCAACTTAG +TGAAATTAGTATGGACAATTCACCTAATTTAGCATGGCCTCTTATTGTAACAGCTTTAAG +GGCCAATTCTGCTGTCAAATTACAGAATAATGAGCTTAGTCCTGTTGCACTACGACAGAT +GTCTTGTGCTGCCGGTACTACACAAACTGCTTGCACTGATGACAATGCGTTAGCTTACTA +CAACACAACAAAGGGAGGTAGGTTTGTACTTGCACTGTTATCCGATTTACAGGATTTGAA +ATGGGCTAGATTCCCTAAGAGTGATGGAACTGGTACTATTTATACAGAACTGGAACCACC +TTGTAGGTTTGTTACAGACACACCTAAAGGTCCTAAAGTGAAGTATTTATACTTTATTAA +AGGATTAAACAACCTAAATAGAGGTATGGTACTTGGTAGTTTAGCTGCCACAGTACGTCT +ACAAGCTGGTAATGCAACAGAAGTGCCTGCCAATTCAACTGTATTATCTTTCTGTGCTTT +TGCTGTAGATGCTGCTAAAGCTTACAAAGATTATCTAGCTAGTGGGGGACAACCAATCAC +TAATTGTGTTAAGATGTTGTGTACACACACTGGTACTGGTCAGGCAATAACAGTTACACC +GGAAGCCAATATGGATCAAGAATCCTTTGGTGGTGCATCGTGTTGTCTGTACTGCCGTTG +CCACATAGATCATCCAAATCCTAAAGGATTTTGTGACTTAAAAGGTAAGTATGTACAAAT +ACCTACAACTTGTGCTAATGACCCTGTGGGTTTTACACTTAAAAACACAGTCTGTACCGT +CTGCGGTATGTGGAAAGGTTATGGCTGTAGTTGTGATCAACTCCGCGAACCCATGCTTCA +GTCAGCTGATGCACAATCGTTTTTAAACGGGTTTGCGGTGTAAGTGCAGCCCGTCTTACA +CCGTGCGGCACAGGCACTAGTACTGATGTCGTATACAGGGCTTTTGACATCTACAATGAT +AAAGTAGCTGGTTTTGCTAAATTCCTAAAAACTAATTGTTGTCGCTTCCAAGAAAAGGAC +GAAGATGACAATTTAATTGATTCTTACTTTGTAGTTAAGAGACACACTTTCTCTAACTAC +CAACATGAAGAAACAATTTATAATTTACTTAAGGATTGTCCAGCTGTTGCTAAACATGAC +TTCTTTAAGTTTAGAATAGACGGTGACATGGTACCACATATATCACGTCAACGTCTTACT +AAATACACAATGGCAGACCTCGTCTATGCTTTAAGGCATTTTGATGAAGGTAATTGTGAC +ACATTAAAAGAAATACTTGTCACATACAATTGTTGTGATGATGATTATTTCAATAAAAAG +GACTGGTATGATTTTGTAGAAAACCCAGATATATTACGCGTATACGCCAACTTAGGTGAA +CGTGTACGCCAAGCTTTGTTAAAAACAGTACAATTCTGTGATGCCATGCGAAATGCTGGT +ATTGTTGGTGTACTGACATTAGATAATCAAGATCTCAATGGTAACTGGTATGATTTCGGT +GATTTCATACAAACCACGCCAGGTAGTGGAGTTCCTGTTGTAGATTCTTATTATTCATTG +TTAATGCCTATATTAACCTTGACCAGGGCTTTAACTGCAGAGTCACATGTTGACACTGAC +TTAACAAAGCCTTACATTAAGTGGGATTTGTTAAAATATGACTTCACGGAAGAGAGGTTA +AAACTCTTTGACCGTTATTTTAAATATTGGGATCAGACATACCACCCAAATTGTGTTAAC +TGTTTGGATGACAGATGCATTCTGCATTGTGCAAACTTTAATGTTTTATTCTCTACAGTG +TTCCCACTTACAAGTTTTGGACCACTAGTGAGAAAAATATTTGTTGATGGTGTTCCATTT +GTAGTTTCAACTGGATACCACTTCAGAGAGCTAGGTGTTGTACATAATCAGGATGTAAAC +TTACATAGCTCTAGACTTAGTTTTAAGGAATTACTTGTGTATGCTGCTGACCCTGCTATG +CACGCTGCTTCTGGTAATCTATTACTAGATAAACGCACTACGTGCTTTTCAGTAGCTGCA +CTTACTAACAATGTTGCTTTTCAAACTGTCAAACCCGGTAATTTTAACAAAGACTTCTAT +GACTTTGCTGTGTCTAAGGGTTTCTTTAAGGAAGGAAGTTCTGTTGAATTAAAACACTTC +TTCTTTGCTCAGGATGGTAATGCTGCTATCAGCGATTATGACTACTATCGTTATAATCTA +CCAACAATGTGTGATATCAGACAACTACTATTTGTAGTTGAAGTTGTTGATAAGTACTTT +GATTGTTACGATGGTGGCTGTATTAATGCTAACCAAGTCATCGTCAACAACCTAGACAAA +TCAGCTGGTTTTCCATTTAATAAATGGGGTAAGGCTAGACTTTATTATGATTCAATGAGT +TATGAGGATCAAGATGCACTTTTCGCATATACAAAACGTAATGTCATCCCTACTATAACT +CAAATGAATCTTAAGTATGCCATTAGTGCAAAGAATAGAGCTCGCACCGTAGCTGGTGTC +TCTATCTGTAGTACTATGACCAATAGACAGTTTCATCAAAAATTATTGAAATCAATAGCC +GCCACTAGAGGAGCTACTGTAGTAATTGGAACAAGCAAATTCTATGGTGGTTGGCACAAC +ATGTTAAAAACTGTTTATAGTGATGTAGAAAACCCTCACCTTATGGGTTGGGATTATCCT +AAATGTGATAGAGCCATGCCTAACATGCTCAGAATTATGGCCTCACTTGTTCTTGCTCGC +AAACATACAACGTGTTGTAGCTTGTCACACCGTTTCTATAGATTAGCTAATGAGTGTGCT +CAAGTATTGAGTGAAATGGTCATGTGTGGCGGTTCACTATATGTTAAACCAGGTGGAACC +TCATCAGGAGATGCCACAACTGCTTATGCTAATAGTGTTTTTAACATTTGTCAAGCTGTC +ACGGCCAATGTTAATGCACTTTTATCTACTGATGGTAACAAAATTGCCGATAAGTATGTC +CGCAATTTACAACACAGACTTTATGAGTGTCTCTATAGAAATAGAGATGTTGACACAGAC +TTTGTGAATGAGTTTTACGCATATTTGCGTAAACATTTCTCAATGATGATACTTTCTGAC +GATGCTGTTGTGTGTTTCAATAGCACTTATGCATCTCAAGGTCTAGTGGCTAGCATAAAG +AACTTTAAGTCAGTTCTTTATTATCAAAACAATGTTTTTATGTCTGAAGCAAAATGTTGG +ACTGAGACTGACCTTACTAAAGGACCTCATGAATTTTGCTCTCAACATACAATGCTAGTT +AAACAGGGTGATGATTATGTGTACCTTCCTTACCCAGATCCATCAAGAATCCTAGGGGCC +GGCTGTTTTGTAGATGATATCGTAAAAACAGATGGTACACTTATGATTGAACGGTTCGTG +TCTTTAGCTATAGATGCTTACCCACTTACTAAACATCCTAATCAGGAGTATGCTGATGTC +TTTCATTTGTACTTACAATACATAAGAAAGCTACATGATGAGTTAACAGGACACATGTTA +GACATGTATTCTGTTATGCTTACTAATGATAACACTTCAAGGTATTGGGAACCTGAGTTT +TATGAGGCTATGTACACACCGCATACAGTCTTACAGGCTGTTGGGGCTTGTGTTCTTTGC +AATTCACAGACTTCATTAAGATGTGGTGCTTGCATACGTAGACCATTCTTATGTTGTAAA +TGCTGTTACGACCATGTCATATCAACATCACATAAATTAGTCTTGTCTGTTAATCCGTAT +GTTTGCAATGCTCCAGGTTGTGATGTCACAGATGTGACTCAACTTTACTTAGGAGGTATG +AGCTATTATTGTAAATCACATAAACCACCCATTAGTTTTCCATTGTGTGCTAATGGACAA +GTTTTTGGTTTATATAAAAATACATGTGTTGGTAGCGATAATGTTACTGACTTTAATGCA +ATTGCAACATGTGACTGGACAAATGCTGGTGATTACATTTTAGCTAACACCTGTACTGAA +AGACTCAAGCTTTTTGCAGCAGAAACGCTCAAAGCTACTGAGGAGACATTTAAACTGTCT +TATGGTATTGCTACTGTACGTGAAGTGCTGTCTGACAGAGAATTACATCTTTCATGGGAA +GTTGGTAAACCTAGACCACCACTTAACCGAAATTATGTCTTTACTGGTTATCGTGTAACT +AAAAACAGTAAAGTACAAATAGGAGAGTACACCTTTGAAAAAGGTGACTATGGTGATGCT +GTTGTTTACCGAGGTACAACAACTTACAAATTAAATGTTGGTGATTATTTTGTGCTGACA +TCACATACAGTAATGCCATTAAGTGCACCTACATTAGTGCCACAAGAGCACTATGTTAGA +ATTACTGGCTTATACCCAACACTCAATATCTCAGATGAGTTTTCTAGCAATGTTGCAAAT +TATCAAAAGGTTGGTATGCAAAAGTATTCTACACTCCAGGGACCACCTGGTACTGGTAAG +AGTCATTTTGCTATTGGCCTAGCTCTCTACTACCCTTCTGCTCGCATAGTGTATACAGCT +TGCTCTCATGCCGCTGTTGATGCACTATGTGAGAAGGCATTAAAATATTTGCCTATAGAT +AAATGTAGTAGAATTATACCTGCACGTGCTCGTGTAGAGTGTTTTGATAAATTCAAAGTG +AATTCAACATTAGAACAGTATGTCTTTTGTACTGTAAATGCATTGCCTGAGACGACAGCA +GATATAGTTGTCTTTGATGAAATTTCAATGGCCACAAATTATGATTTGAGTGTTGTCAAT +GCCAGATTATGTGCTAAGCACTATGTGTACATTGGCGACCCTGCTCAATTACCTGCACCA +CGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTT +ATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATT +GTTGACACTGTGAGTGCTTTGGTTTATGATAATAAGCTTAAAGCACATAAAGACAAATCA +GCTCAATGCTTTAAAATGTTTTATAAGGGTGTTATCACGCATGATGTTTCATCTGCAATT +AACAGGCCACAAATAGGCGTGGTAAGAGAATTCCTTACACGTAACTCTGCTTGGAGAAAA +GCTGTCTTTATTTCACCTTATAATTCACAGAATGCTGTAGCCTCAAAGATTTTGGGACTA +CCAACTCAAACTGTTGATTCATCACAGGGCTCAGAATATGACTATGTCATATTCACTCAA +ACCACTGAAACAGCTCACTCTTGTAATGTAAACAGATTTAATGTTGCTATTACCAGAGCA +AAAGTAGGCATACTTTGCATAATGTCTGATAGAGACCTTTATGACAAGTTGCAATTTACA +AGTCTTGAAATTCCACGTAGGAATGTGGCAACTTTACAAGCTGAAAATGTAACAGGACTC +TTTAAAGATTGTAGTAAGGTAATCACTGGGTTACATCCTACACAGGCACCTACACACCTC +AGTGTTGACACTAAATTCAAAACTGAAGGTTTATGTGTTGACGTACCTGGCATACCTAAG +GACATGACCTATAGAAGACTCATCTCTATGATGGGTTTTAAAATGAATTATCAAGTTAAT +GGTTACCCTAACATGTTTATCACCCGCGAAGAAGCTATAAGACATGTACGTGCATGGATT +GGCTTCGATGTCGAGGGGTGTCATGCTACTAGAGAAGCTGTTGGTACCAATTTACCTTTA +CAGCTAGGTTTTTCTACAGGTGTTAACCTAGTTGCTGTACCTACAGGTTATGTTGATACA +CCTAATAATACAGATTTTTCCAGAGTTAGTGCTAAACCACCGCCTGGAGATCAATTTAAA +CACCTCATACCACTTATGTACAAAGGACTTCCTTGGAATGTAGTGCGTATAAAGATTGTA +CAAATGTTAAGTGACACACTTAAAAATCTCTCTGACAGAGTCGTATTTGTCTTATGGGCA +CATGGCTTTGAGTTGACATCTATGAAGTATTTTGTGAAAATAGGACCTGAGCGCACCTGT +TGTCTATGTGATAGACGTGCCACATGCTTTTCCACTGCTTCAGACACTTATGCCTGTTGG +CATCATTCTATTGGATTTGATTACGTCTATAATCCGTTTATGATTGATGTTCAACAATGG +GGTTTTACAGGTAACCTACAAAGCAACCATGATCTGTATTGTCAAGTCCATGGTAATGCA +CATGTAGCTAGTTGTGATGCAATCATGACTAGGTGTCTAGCTGTCCACGAGTGCTTTGTT +AAGCGTGTTGACTGGACTATTGAATATCCTATAATTGGTGATGAACTGAAGATTAATGCG +GCTTGTAGAAAGGTTCAACACATGGTTGTTAAAGCTGCATTATTAGCAGACAAATTCCCA +GTTCTTCACGACATTGGTAACCCTAAAGCTATTAAGTGTGTACCTCAAGCTGATGTAGAA +TGGAAGTTCTATGATGCACAGCCTTGTAGTGACAAAGCTTATAAAATAGAAGAATTATTC +TATTCTTATGCCACACATTCTGACAAATTCACAGATGGTGTATGCCTATTTTGGAATTGC +AATGTCGATAGATATCCTGCTAATTCCATTGTTTGTAGATTTGACACTAGAGTGCTATCT +AACCTTAACTTGCCTGGTTGTGATGGTGGCAGTTTGTATGTAAATAAACATGCATTCCAC +ACACCAGCTTTTGATAAAAGTGCTTTTGTTAATTTAAAACAATTACCATTTTTCTATTAC +TCTGACAGTCCATGTGAGTCTCATGGAAAACAAGTAGTGTCAGATATAGATTATGTACCA +CTAAAGTCTGCTACGTGTATAACACGTTGCAATTTAGGTGGTGCTGTCTGTAGACATCAT +GCTAATGAGTACAGATTGTATCTTGATGCTTATAACATGATGATCTCAGCTGGCTTTAGC +TTGTGGGTTTACAAACAATTTGATACTTATAACCTCTGGAACACTTTTACAAGACTTCAG +AGTTTAGAAAATGTGGCTTTTAATGTTGTAAATAAGGGACACTTTGATGGACAACAGGGT +GAAGTACCAGTTTCTATCATTAATAACACTGTTTACACAAAAGTTGATGGTGTTGATGTA +GAATTGTTTGAAAATAAAACAACATTACCTGTTAATGTAGCATTTGAGCTTTGGGCTAAG +CGCAACATTAAACCAGTACCAGAGGTGAAAATACTCAATAATTTGGGTGTGGACATTGCT +GCTAATACTGTGATCTGGGACTACAAAAGAGATGCTCCAGCACATATATCTACTATTGGT +GTTTGTTCTATGACTGACATAGCCAAGAAACCAATTGAAACGATTTGTGCACCACTCACT +GTCTTTTTTGATGGTAGAGTTGATGGTCAAGTAGACTTATTTAGAAATGCCCGTAATGGT +GTTCTTATTACAGAGGGTAGTGTTAAAGGTTTACAACCATCTGTAGGTCCCAAACAAGCT +AGTCTTAATGGAGTCACATTAATTGGAGAAGCCGTAAAAACACAGTTCAATTATTATAAG +AAAGTTGATGGTGTTGTCCAACAATTACCTGAAACTTACTTTACTCAGAGTAGAAATTTA +CAAGAATTTAAACCCAGGAGTCAAATGGAAATTGATTTCTTAGAATTAGCTATGGATGAA +TTCATTGAACGGTATAAATTAGAAGGCTATGCCTTCGAACATATCGTTTATGGAGATTTT +AGTCATAGTCAGTTAGGTGGTTTACATCTACTGATTGGACTAGCTAAACGTTTTAAGGAA +TCACCTTTTGAATTAGAAGATTTTATTCCTATGGACAGTACAGTTAAAAACTATTTCATA +ACGGATGCGCAAACAGGTTCATCTAAGTGTGTGTGTTCTGTTATTGATTTATTACTTGAT +GATTTTGTTGAAATAATAAAATCCCAAGATTTATCTGTAGTTTCTAAGGTTGTCAAAGTG +ACTATTGACTATACAGAAATTTCATTTATGCTTTGGTGTAAAGATGGCCATGTAGAAACA +TTTTACCCAAAATTACAATCTAGTCAAGCGTGGCAACCGGGTGTTGCTATGCCTAATCTT +TACAAAATGCAAAGAATGCTATTAGAAAAGTGTGACCTTCAAAATTATGGTGATAGTGCA +ACATTACCTAAAGGCATAATGATGAATGTCGCAAAATATACTCAACTGTGTCAATATTTA +AACACATTAACATTAGCTGTACCCTATAATATGAGAGTTATACATTTTGGTGCTGGTTCT +GATAAAGGAGTTGCACCAGGTACAGCTGTTTTAAGACAGTGGTTGCCTACGGGTACGCTG +CTTGTCGATTCAGATCTTAATGACTTTGTCTCTGATGCAGATTCAACTTTGATTGGTGAT +TGTGCAACTGTACATACAGCTAATAAATGGGATCTCATTATTAGTGATATGTACGACCCT +AAGACTAAAAATGTTACAAAAGAAAATGACTCTAAAGAGGGTTTTTTCACTTACATTTGT +GGGTTTATACAACAAAAGCTAGCTCTTGGAGGTTCCGTGGCTATAAAGATAACAGAACAT +TCTTGGAATGCTGATCTTTATAAGCTCATGGGACACTTCGCATGGTGGACAGCCTTTGTT +ACTAATGTGAATGCGTCATCATCTGAAGCATTTTTAATTGGATGTAATTATCTTGGCAAA +CCACGCGAACAAATAGATGGTTATGTCATGCATGCAAATTACATATTTTGGAGGAATACA +AATCCAATTCAGTTGTCTTCCTATTCTTTATTTGACATGAGTAAATTTCCCCTTAAATTA +AGGGGTACTGCTGTTATGTCTTTAAAAGAAGGTCAAATCAATGATATGATTTTATCTCTT +CTTAGTAAAGGTAGACTTATAATTAGAGAAAACAACAGAGTTGTTATTTCTAGTGATGTT +CTTGTTAACAACTAAACGAACAATGTTTGTTTTTCTTGTTTTATTGCCACTAGTCTCTAG +TCAGTGTGTTAATCTTATAACCAGAACTCAAT---------CATACACTAATTCTTTCAC +ACGTGGTGTTTATTACCCTGACAAAGTTTTCAGATCCTCAGTTTTACATTCAACTCAGGA +CTTGTTCTTACCTTTCTTTTCCAATGTTACTTGGTTCCATGCTA------TCTCTGGGAC +CAATGGTACTAAGAGGTTTGATAACCCTGTCCTACCATTTAATGATGGTGTTTATTTTGC +TTCCACTGAGAAGTCTAACATAATAAGAGGCTGGATTTTTGGTACTACTTTAGATTCGAA +GACCCAGTCCCTACTTATTGTTAATAACGCTACTAATGTTGTTATTAAAGTCTGTGAATT +TCAATTTTGTAATGATCCATTTTTGGATGTTTATTACCACAAAAACAACAAAAGTTGGAT +GGAAAGTGAGTTCAGAGTTTATTCTAGTGCGAATAATTGCACTTTTGAATATGTCTCTCA +GCCTTTTCTTATGGACCTTGAAGGAAAACAGGGTAATTTCAAAAATCTTAGGGAATTTGT +GTTTAAGAATATTGATGGTTATTTTAAAATATATTCTAAGCACACGCCTATTAATTTAGG +GCGTGATCTCCCTCAGGGTTTTTCGGCTTTAGAACCATTGGTAGATTTGCCAATAGGTAT +TAACATCACTAGGTTTCAAACTTTACTTGCTTTACATAGAAGTTATTTGACTCCTGGTGA +TTCTTCTTCAGGTTGGACAGCTGGTGCTGCAGCTTATTATGTGGGTTATCTTCAACCTAG +GACTTTTCTATTAAAATATAATGAAAATGGAACCATTACAGATGCTGTAGACTGTGCACT +TGACCCTCTCTCAGAAACAAAGTGTACGTTGAAATCCTTCACTGTAGAAAAAGGAATCTA +CCAAACTTCTAACTTTAGAGTCCAACCAACAGAATCTATTGTTAGATTTCCTAATATTAC +AAACTTGTGCCCTTTTGATGAAGTTTTTAACGCCACCAGATTTGCATCTGTTTATGCTTG +GAACAGGAAGAGAATCAGCAACTGTGTTGCTGATTATTCTGTCCTATATAATTTCGCACC +ATTTTTCGCTTTTAAGTGTTATGGAGTGTCTCCTACTAAATTAAATGATCTCTGCTTTAC +TAATGTCTATGCAGATTCATTTGTAATTAGAGGTAATGAAGTCAGCCAAATCGCTCCAGG +GCAAACTGGAAATATTGCTGATTATAATTATAAATTACCAGATGATTTTACAGGCTGCGT +TATAGCTTGGAATTCTAACAAGCTTGATTCTAAGGTTGGTGGTAATTATAATTACCGGTA +TAGATTGTTTAGGAAGTCTAATCTCAAACCTTTTGAGAGAGATATTTCAACTGAAATCTA +TCAGGCCGGTAACAAACCTTGTAATGGTGTTGCAGGTGTTAATTGTTACTTTCCTTTACA +ATCATATGGTTTCCGACCCACTTATGGTGTTGGTCACCAACCATACAGAGTAGTAGTACT +TTCTTTTGAACTTCTACATGCACCAGCAACTGTTTGTGGACCTAAAAAGTCTACTAATTT +GGTTAAAAACAAATGTGTCAATTTCAACTTCAATGGTTTAACAGGCACAGGTGTTCTTAC +TGAGTCTAACAAAAAGTTTCTGCCTTTCCAACAATTTGGCAGAGACATTGCTGACACTAC +TGATGCTGTCCGTGATCCACAGACACTTGAGATTCTTGACATTACACCATGTTCTTTTGG +TGGTGTCAGTGTTATAACACCAGGAACAAATACTTCTAACCAGGTTGCTGTTCTTTATCA +GGGTGTTAACTGCACAGAAGTCCCTGTTGCTATTCATGCAGATCAACTTACTCCTACTTG +GCGTGTTTATTCTACAGGTTCTAATGTTTTTCAAACACGTGCAGGCTGTTTAATAGGGGC +TGAATATGTCAACAACTCATATGAGTGTGACATACCCATTGGTGCAGGTATATGCGCTAG +TTATCAGACTCAGACTAAGTCTCATCGGCGGGCACGTAGTGTAGCTAGTCAATCCATCAT +TGCCTACACTATGTCACTTGGTGCAGAAAATTCAGTTGCTTACTCTAATAACTCTATTGC +CATACCCACAAATTTTACTATTAGTGTTACCACAGAAATTCTACCAGTGTCTATGACCAA +GACATCAGTAGATTGTACAATGTACATTTGTGGTGATTCAACTGAATGCAGCAATCTTTT +GTTGCAATATGGCAGTTTTTGTACACAATTAAAACGTGCTTTAACTGGAATAGCTGTTGA +ACAAGACAAAAACACCCAAGAAGTTTTTGCACAAGTCAAACAAATTTACAAAACACCACC +AATTAAATATTTTGGTGGTTTTAATTTTTCACAAATATTACCAGATCCATCAAAACCAAG +CAAGAGGTCATTTATTGAAGATCTACTTTTCAACAAAGTGACACTTGCAGATGCTGGCTT +CATCAAACAATATGGTGATTGCCTTGGTGATATTGCTGCTAGAGACCTCATTTGTGCACA +AAAGTTTAACGGCCTTACTGTTTTGCCACCTTTGCTCACAGATGAAATGATTGCTCAATA +CACTTCTGCACTGTTAGCGGGTACAATCACTTCTGGTTGGACCTTTGGTGCAGGTGCTGC +ATTACAAATACCATTTGCTATGCAAATGGCTTATAGGTTTAATGGTATTGGAGTTACACA +GAATGTTCTCTATGAGAACCAAAAATTGATTGCCAACCAATTTAATAGTGCTATTGGCAA +AATTCAAGACTCACTTTCTTCCACAGCAAGTGCACTTGGAAAACTTCAAGATGTGGTCAA +CCATAATGCACAAGCTTTAAACACGCTTGTTAAACAACTTAGCTCCAAATTTGGTGCAAT +TTCAAGTGTTTTAAATGATATCCTTTCACGTCTTGACAAAGTTGAGGCTGAAGTGCAAAT +TGATAGGTTGATCACAGGCAGACTTCAAAGTTTGCAGACATATGTGACTCAACAATTAAT +TAGAGCTGCAGAAATCAGATCTTCTGCTAATCTTGCTGCTACTAAAATGTCAGAGTGTGT +ACTTGGACAATCAAAAAGAGTTGATTTTTGTGGAAAGGGCTATCATCTTATGTCCTTCCC +TCAGTCAGCACCTCATGGTGTAGTCTTCTTGCATGTGACTTATGTCCCTGCACAAGAAAA +GAACTTCACAACTGCTCCTGCCATTTGTCATGATGGAAAAGCACACTTTCCTCGTGAAGG +TGTCTTTGTTTCAAATGGCACACACTGGTTTGTAACACAAAGGAATTTTTATGAACCACA +AATCATTACTACAGACAACACATTTGTGTCTGGTAACTGTGATGTTGTAATAGGAATTGT +CAACAACACAGTTTATGATCCTTTGCAACCTGAATTAGATTCATTCAAGGAGGAGTTAGA +TAAATATTTTAAGAATCATACATCACCAGATGTTGATTTAGGTGACATCTCTGGCATTAA +TGCTTCAGTTGTAAACATTCAAAAAGAAATTGACCGCCTCAATGAGGTTGCCAAGAATTT +AAATGAATCTCTCATCGATCTCCAAGAACTTGGAAAGTATGAGCAGTATATAAAATGGCC +ATGGTACATTTGGCTAGGTTTTATAGCTGGCTTGATTGCCATAGTAATGGTGACAATTAT +GCTTTGCTGTATGACCAGTTGCTGTAGTTGTCTCAAGGGCTGTTGTTCTTGTGGATCCTG +CTGCAAATTTGATGAAGACGACTCTGAGCCAGTGCTCAAAGGAGTCAAATTACATTACAC +ATAAACGAACTTATGGATTTGTTTATGAGAATCTTCACAATTGGAACTGTAACTTTGAAG +CAAGGTGAAATCAAGGATGCTACTCCTTCAGATTTTGTTCGCGCTACTGCAACGATACCG +ATACAAGCCTCACTCCCTTTCGGATGGCTTATTGTTGGCGTTGCACTTCTTGCTGTTTTT +CAGAGCGCTTCCAAAATCATAACTCTCAAAAAGAGATGGCAACTAGCACTCTCCAAGGGT +GTTCACTTTGTTTGCAACTTGCTGTTGTTGTTTGTAACAGTTTACTCACACCTTTTGCTC +GTTGCTGCTGGCCTTGAAGCCCCTTTTCTCTATCTTTATGCTTTAGTCTACTTCTTGCAG +AGTATAAACTTTGTAAGAATAATAATGAGGCTTTGGCTTTGCTGGAAATGCCGTTCCAAA +AACCCATTACTTTATGATGCCAACTATTTTCTTTGCTGGCATACTAATTGTTACGACTAT +TGTATACCTTACAATAGTGTAACTTCTTCAATTGTCATTACTTCAGGTGATGGCACAACA +AGTCCTATTTCTGAACATGACTACCAGATTGGTGGTTATACTGAAAAATGGGAATCTGGA +GTAAAAGACTGTGTTGTATTACACAGTTACTTCACTTCAGACTATTACCAGCTGTACTCA +ACTCAATTGAGTACAGACATTGGTGTTGAACATGTTACCTTCTTCATCTACAATAAAATT +GTTGATGAGCCTGAAGAACATGTCCAAATTCACACAATCGACGGTTCATCCGGAGTTGTT +AATCCAGTAATGGAACCAATTTATGATGAACCGACGACGACTACTAGCGTGCCTTTGTAA +GCACAAGCTGATGAGTACGAACTTATGTACTCATTCGTTTCGGAAGAGATAGGTACGTTA +ATAGTTAATAGCGTACTTCTTTTTCTTGCTTTCGTGGTATTCTTGCTAGTTACACTAGCC +ATCCTTACTGCGCTTCGATTGTGTGCGTACTGCTGCAATATTGTTAACGTGAGTCTTGTA +AAACCTTCTTTTTACGTTTACTCTCGTGTTAAAAATCTGAATTCTTCTAGAGTTCCTGAT +CTTCTGGTCTAAACGAACTAAATATTATATTAGTTTTTCTGTTTGGAACTTTAATTTTAG +CCATGGCAAATTCCAACGGTACTATTACCGTTGAAGAGCTTAAAAAGCTCCTTGAAGAAT +GGAACCTAGTAATAGGTTTCCTATTCCTTACATGGATTTGTCTTCTACAATTTGCCTATG +CCAACAGGAATAGGTTTTTGTATATAATTAAGTTAATTTTCCTCTGGCTGTTATGGCCAG +TAACTTTAACTTGTTTTGTGCTTGCTGCTGTTTACAGAATAAATTGGATCACCGGTGGAA +TTGCTATCGCAATGGCTTGTCTTGTAGGCTTGATGTGGCTCAGCTACTTCATTGCTTCTT +TCAGACTGTTTGCACGTACGCGTTCCATGTGGTCATTCAATCCAGAAACTAACATTCTTC +TCAACGTGCCACTCCATGGCACTATTCTGACCAGACCGCTTCTAGAAAGTGAACTCGTAA +TCGGAGCTGTGATCCTTCGTGGACATCTTCGTATTGCTGGACACCATCTAGGACGCTGTG +ACATCAAGGACCTGCCTAAAGAAATCACTGTTGCTACGTCACGAACGCTTTCTTATTACA +AATTGGGAGCTTCGCAGCGTGTAGCAGGTGACTCAGGTTTTGCTGCATACAGTCGCTACA +GGATTGGCAACTATAAATTAAACACAGACCATTCCAGTAGCAGTGACAATATTGCTTTGC +TTGTACAGTAAGTGACAACAGATGTTTCATCTCGTTGACTTTCAGGTTACTATAGCAGAG +ATATTACTAATTATTATGAGGACTTTTAAAGTTTCCATTTGGAATCTTGATTACATCATA +AACCTCATAATTAAAAATTTATCTAAGTCACTAACTGAGAATAAATATTCTCAATTAGAT +GAAGAGCAACCAATGGAGATTGATTAAACGAACATGAAAATTATTCTTTTCTTGGCACTG +ATAACACTCGCTACTTGTGAGCTTTATCACTACCAAGAGTGTGTTAGAGGTACAACAGTA +CTTTTAAAAGAACCTTGCTCTTCTGGAACATACGAGGGCAATTCACCATTTTATCCTCTA +GCTGATAACAAATTTGCACTGACTTGCTTTAGCACTCAATTTGCTTTTGCTTGTCCTGAC +GGCGTAAAACACGTCTATCAGTTACGTGCCAGATCAGTTTCACCTAAACTGTTCATCAGA +CAAGAGGAAGTTCAAGAACTTTACTCTCCAATTTTTCTTATTGTTGCGGCAATAGTGTTT +ATAACACTTTGCTTTACACTCAAAAGAAAGACAGAATGATTGAACTTTCATTAATTGACT +TCTATTTGTGCTTTTTAGCCTTTCTGTTATTCCTTGTTTTAATTATGCTTATTATCTTTT +GGTTCTCACTTGAACTGCAAGATCATAATGAAACTTGTCACGCCTAAATGAACATGAAAT +TTCTTGTTTTCTTAGGAATCATCACAACTGTAGCTGCATTTCACCAAGAATGTAGTTTAC +AGTCATGTACTCAACATCAACCATATGTAGTTGATGACCCGTGTCCTATTCACTTCTATT +CTAAATGGTATATTAGAGTAGGAGCTAGAAAATCAGCACCTTTAATTGAATTGTGCGTGG +ATGAGGCTGGTTCTAAATCACCCATTCAGTACATCGATATCGGTAATTATACAGTTTCCT +GTTTACCTTTTACAATTAATTGCCAGGAACCTAAATTGGGTAGTCTTGTAGTGCGTTGTT +CGTTCTATGAAGACTTTTTAGAGTATCATGACGTTCGTGTTGTTTTAGATTTCATCTAAA +CGAACAAACTTAAATGTCTGATAATGGACCCCAAAATCAGCGAAATGCACTCCGCATTAC +GTTTGGTGGGCCCTCAGATTCAACTGGCAGTAACCAGAATG---------GTGGGGCGCG +ATCAAAACAACGTCGGCCCCAGGGTTTACCCAATAATACTGCGTCTTGGTTCACCGCTCT +CACTCAACATGGCAAGGAAGACCTTAAATTCCCTCGAGGACAAGGCGTTCCAATTAACAC +CAATAGCAGTCCAGATGACCAAATTGGCTACTACCGAAGAGCTACCAGACGAATTCGTGG +TGGTGACGGTAAAATGAAAGATCTCAGTCCAAGATGGTATTTCTACTACCTAGGAACTGG +GCCAGAAGCTGGACTTCCCTATGGTGCTAACAAAGACGGCATCATATGGGTTGCAACTGA +GGGAGCCTTGAATACACCAAAAGATCACATTGGCACCCGCAATCCTGCTAACAATGCTGC +AATCGTGCTACAACTTCCTCAAGGAACAACATTGCCAAAAGGCTTCTACGCAGAAGGGAG +CAGAGGCGGCAGTCAAGCCTCTTCTCGTTCCTCATCACGTAGTCGCAACAGTTCAAGAAA +TTCAACTCCAGGCAGCAGTAAACGAACTTCTCCTGCTAGAATGGCTGGCAATGGCGGTGA +TGCTGCTCTTGCTTTGCTGCTGCTTGACAGATTGAACCAGCTTGAGAGCAAAATGTCTGG +TAAAGGCCAACAACAACAAGGCCAAACTGTCACTAAGAAATCTGCTGCTGAGGCTTCTAA +GAAGCCTCGGCAAAAACGTACTGCCACTAAAGCATACAATGTAACACAAGCTTTCGGCAG +ACGTGGTCCAGAACAAACCCAAGGAAATTTTGGGGACCAGGAACTAATCAGACAAGGAAC +TGATTACAAACATTGGCCGCAAATTGCACAATTTGCCCCCAGCGCTTCAGCGTTCTTCGG +AATGTCGCGCATTGGCATGGAAGTCACACCTTCGGGAACGTGGTTGACCTACACAGGTGC +CATCAAATTGGATGACAAAGATCCAAATTTCAAAGATCAAGTCATTTTGCTGAATAAGCA +TATTGACGCATACAAAACATTCCCACCAACAGAGCCTAAAAAGGACAAAAAGAAGAAGGC +TGATGAAACTCAAGCCTTACCGCAGAGACAGAAGAAACAGCAAACTGTGACTCTTCTTCC +TGCTGCAGATTTGGATGATTTCTCCAAACAATTGCAACAATCCATGAGCCGTGCTGACTC +AACTCAGGCCTAAACTCATGCAGACCACACAAGGCAGATGGGCTATATAAACGTTTTCGC +TTTTCCGTTTACGATATATAGTCTACTCTTGTGCAGAATGAATTCTCGTAACTACATAGC +ACAAGTAGATGTAGTTAACTTTAATCTCACATAGCAATCTTTAATCAGTGTGTAACATTA +GGGAGGACTTGAAAGAGCCACCACATTTTCACC--------------------------T +ACAGTGAACAATGCTAGGGAGAGCTGCCTATATGGAAGAGCCCTAATGTGTAAAATTAAT +TTTAGTAGTGCTATCCCCATGTGATTTTAAT----------------------------- +----------------------- diff --git a/filtlong/0.2.1/Dockerfile b/filtlong/0.2.1/Dockerfile index 3cc423ff0..fa40d3b3a 100644 --- a/filtlong/0.2.1/Dockerfile +++ b/filtlong/0.2.1/Dockerfile @@ -1,22 +1,27 @@ -FROM ubuntu:xenial +ARG FILTLONG_VER="0.2.1" +# setting as global variable for persistence across all build layers -# for easy upgrade later. ARG variables only persist during image build time -ARG FILTLONG_VER=0.2.1 +FROM ubuntu:jammy as app -LABEL base.image="ubuntu:xenial" -LABEL dockerfile.version="2" +# re-instantiating for the app build layer if using ARG as a global variable above +ARG FILTLONG_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="3" LABEL software="Filtlong" -LABEL software.version="0.2.1" -LABEL description="Filter long reads by quality and length. Can use ILMN reads as reference." +LABEL software.version="${FILTLONG_VER}" +LABEL description="Filters long reads by quality and length. Can use Illumina reads as reference." LABEL website="https://github.com/rrwick/Filtlong" LABEL license="https://github.com/rrwick/Filtlong/blob/master/LICENSE" LABEL maintainer="Curtis Kapsak" LABEL maintainer.email="kapsakcj@gmail.com" -LABEL maintainer="Thomas A. Christensen II" -LABEL maintainer.email="25492070+MillironX@users.noreply.github.com" +LABEL maintainer2="Thomas A. Christensen II" +LABEL maintainer2.email="25492070+MillironX@users.noreply.github.com" +LABEL maintainer3="Stephen Beckstrom-Sternberg" +LABEL maintainer3.email="stephen.beckstrom-sternberg@azdhs.gov" -# install deps; cleanup apt garbage -RUN apt-get update && apt-get install -y \ +# Install dependencies and cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ g++ \ zlib1g-dev \ @@ -24,21 +29,42 @@ RUN apt-get update && apt-get install -y \ python3 \ pigz \ less \ - bc && \ + bc \ + gzip \ + ca-certificates \ + procps && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 10 && \ apt-get autoclean && rm -rf /var/lib/apt/lists/* -# install filtlong; make /data +# installing filtlong RUN wget https://github.com/rrwick/Filtlong/archive/v${FILTLONG_VER}.tar.gz && \ tar -xzf v${FILTLONG_VER}.tar.gz && \ rm -r v${FILTLONG_VER}.tar.gz && \ - cd Filtlong-${FILTLONG_VER}/ && \ + cd /Filtlong-${FILTLONG_VER}/ && \ make -j && \ sed -i 's/\$hist/\$hist --dot "*"/g' scripts/read_info_histograms.sh && \ mkdir /data -# required for singularity compatibility; update PATH -ENV LC_ALL=C \ - PATH="${PATH}:/Filtlong-${FILTLONG_VER}/bin:/Filtlong-${FILTLONG_VER}/scripts" +ENV PATH="$PATH:/Filtlong-$FILTLONG_VER/bin:/Filtlong-$FILTLONG_VER/scripts:/Filtlong-$FILTLONG_VER/test" LC_ALL=C +# set workdir to default for building; set to /data at the end WORKDIR /data + +# new base for testing +FROM app as test + +ARG FILTLONG_VER + +RUN filtlong -h && filtlong --version + +# unittest +RUN cd /Filtlong-${FILTLONG_VER} && python3 -m unittest -h + +WORKDIR /test +RUN wget --no-verbose "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR190/056/SRR19008456/SRR19008456_1.fastq.gz" && \ + wget --no-verbose "ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR190/056/SRR19008456/SRR19008456_2.fastq.gz" && \ + read_info_histograms.sh /data/*fastq.gz + +RUN echo "Run filtlong with options on a pair of long reads in /data dir: " && \ + filtlong --min_length 250 --min_mean_q 40 SRR19008456_1.fastq.gz|gzip > SRR19008456_1_filt.fastq.gz && \ + filtlong --min_length 250 --min_mean_q 40 SRR19008456_2.fastq.gz|gzip > SRR19008456_2_filt.fastq.gz diff --git a/filtlong/0.2.1/README.md b/filtlong/0.2.1/README.md new file mode 100644 index 000000000..721f064ad --- /dev/null +++ b/filtlong/0.2.1/README.md @@ -0,0 +1,16 @@ +# filtlong container + +Main tool : [filtlong](https://github.com/rrwick/Filtlong) + +Additional tools: +- python 3.10.6 + +Full documentation: [https://github.com/rrwick/Filtlong](https://github.com/rrwick/Filtlong) + +Filtlong filters long-read fastq files by quality & read lengths. + +## Example Usage + +```bash +filtlong --min_length 1000 --keep_percent 90 --target_bases 500000000 input.fastq.gz | gzip > output.fastq.gz +``` diff --git a/flye/2.9.2/Dockerfile b/flye/2.9.2/Dockerfile new file mode 100644 index 000000000..4d63af19d --- /dev/null +++ b/flye/2.9.2/Dockerfile @@ -0,0 +1,69 @@ +# for easy upgrade later +ARG FLYE_VER="2.9.2" + +FROM ubuntu:focal as app + +# have to re-instantiate variable within app stage +ARG FLYE_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Flye" +LABEL software.version="${FLYE_VER}" +LABEL description="Fast and accurate de novo assembler for single molecule sequencing reads" +LABEL website="https://github.com/fenderglass/Flye" +LABEL license="https://github.com/fenderglass/Flye/blob/flye/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +RUN apt-get update && apt-get install --no-install-recommends -y \ + make \ + gcc \ + g++ \ + python3 \ + python3-pip \ + python3-setuptools \ + zlib1g-dev \ + wget \ + ca-certificates \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# install flye +RUN wget https://github.com/fenderglass/Flye/archive/${FLYE_VER}.tar.gz && \ + tar -xf ${FLYE_VER}.tar.gz && \ + rm -v ${FLYE_VER}.tar.gz && \ + cd Flye-${FLYE_VER} && \ + python3 setup.py build && \ + python3 setup.py install && \ + mkdir /data + +# set final working directory +WORKDIR /data + +# for singularity compatibility +ENV LC_ALL=C + +#### begin the test stage #### +FROM app as test + +# have to re-instantiate variable within app stage +ARG FLYE_VER + +# run flye toy dataset test +RUN python3 /Flye-${FLYE_VER}/flye/tests/test_toy.py + +# download test data (ONT and ILMN FASTQs) and run through flye +RUN echo "downloading ONT test data from bactopia/bactopia-tests on GitHub..." && \ + wget https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz && \ + flye --nano-raw ERR3772599.fastq.gz -o flye-test-portiera -t 2 && \ + flye --version + +# the following takes too long, but this would be the test I would recommend if testing this locally. +# from https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md +# RUN wget https://zenodo.org/record/1172816/files/E.coli_PacBio_40x.fasta && \ +# flye --pacbio-raw E.coli_PacBio_40x.fasta --out-dir out_pacbio --threads 4 && \ +# wget https://zenodo.org/record/1172816/files/Loman_E.coli_MAP006-1_2D_50x.fasta && \ +# flye --nano-raw Loman_E.coli_MAP006-1_2D_50x.fasta --out-dir out_nano --threads 4 diff --git a/flye/2.9.2/README.md b/flye/2.9.2/README.md new file mode 100644 index 000000000..272411118 --- /dev/null +++ b/flye/2.9.2/README.md @@ -0,0 +1,19 @@ +# Flye container + +Main tool : [flye](https://github.com/fenderglass/Flye) 2.9.2 + +Full documentation: [https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md](https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md) + +> Flye is a de novo assembler for single-molecule sequencing reads, such as those produced by PacBio and Oxford Nanopore Technologies. + +Additional included tools: + +- [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 +- python 3.8.10 + +## Example Usage + +```bash +# sample.fastq.gz is compressed with bgzip +flye --nano-raw sample.fastq.gz --threads 12 --out-dir flye/sample +``` diff --git a/freyja/1.3.12/Dockerfile b/freyja/1.3.12/Dockerfile new file mode 100644 index 000000000..32035e7bf --- /dev/null +++ b/freyja/1.3.12/Dockerfile @@ -0,0 +1,66 @@ +FROM mambaorg/micromamba:0.24.0 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.3.12" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:0.24.0" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# set working directory to /data +WORKDIR /data + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true -O /data/Freyja_WWSC2.bam && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv diff --git a/freyja/1.3.12/README.md b/freyja/1.3.12/README.md new file mode 100644 index 000000000..bb16ff3d8 --- /dev/null +++ b/freyja/1.3.12/README.md @@ -0,0 +1,16 @@ +# freyja container + +Main tool : [freyja](https://github.com/andersen-lab/Freyja) + +Full documentation: https://github.com/andersen-lab/Freyja/blob/main/README.md + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +# Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` \ No newline at end of file diff --git a/freyja/1.4.2/Dockerfile b/freyja/1.4.2/Dockerfile new file mode 100644 index 000000000..16df90b53 --- /dev/null +++ b/freyja/1.4.2/Dockerfile @@ -0,0 +1,70 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.4.2" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# set working directory to /data +WORKDIR /data + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true -O /data/Freyja_WWSC2.bam && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv + +# test new Freyja demix version option +RUN freyja demix --version && \ +freyja --version \ No newline at end of file diff --git a/freyja/1.4.2/README.md b/freyja/1.4.2/README.md new file mode 100644 index 000000000..84308fb49 --- /dev/null +++ b/freyja/1.4.2/README.md @@ -0,0 +1,14 @@ +# freyja container + +Main tool & documentation: [freyja](https://github.com/andersen-lab/Freyja) + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +## Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` diff --git a/freyja/1.4.2_update/Dockerfile b/freyja/1.4.2_update/Dockerfile new file mode 100644 index 000000000..b92a3a73b --- /dev/null +++ b/freyja/1.4.2_update/Dockerfile @@ -0,0 +1,71 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.4.2" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +RUN freyja update + +# set working directory to /data +WORKDIR /data + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true -O /data/Freyja_WWSC2.bam && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv + +# test new Freyja demix version option +RUN freyja demix --version && freyja --version \ No newline at end of file diff --git a/freyja/1.4.2_update/README.md b/freyja/1.4.2_update/README.md new file mode 100644 index 000000000..5b97378d9 --- /dev/null +++ b/freyja/1.4.2_update/README.md @@ -0,0 +1,16 @@ +# freyja container + +Main tool & documentation: [freyja](https://github.com/andersen-lab/Freyja) + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +## Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` + +Warning: `freyja update` does not work under all conditions. \ No newline at end of file diff --git a/freyja/1.4.3/Dockerfile b/freyja/1.4.3/Dockerfile new file mode 100644 index 000000000..8087604e9 --- /dev/null +++ b/freyja/1.4.3/Dockerfile @@ -0,0 +1,74 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.4.3" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +RUN freyja update + +# set working directory to /data +WORKDIR /data + +# default command is to pull up help options +CMD [ "freyja", "--help" ] + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true -O /data/Freyja_WWSC2.bam && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv + +# test new Freyja demix version option +RUN freyja demix --version && freyja --version diff --git a/freyja/1.4.3/README.md b/freyja/1.4.3/README.md new file mode 100644 index 000000000..1cc676daf --- /dev/null +++ b/freyja/1.4.3/README.md @@ -0,0 +1,16 @@ +# freyja container + +Main tool & documentation: [freyja](https://github.com/andersen-lab/Freyja) + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +## Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` + +Warning: `freyja update` does not work under all conditions. diff --git a/freyja/1.4.4/Dockerfile b/freyja/1.4.4/Dockerfile new file mode 100644 index 000000000..43b07d0e8 --- /dev/null +++ b/freyja/1.4.4/Dockerfile @@ -0,0 +1,76 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.4.4" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# update barcodes +# NOTE: this will download the latest version of the `freyja/data/usher_barcodes.csv` file from GitHub +RUN freyja update + +# set working directory to /data +WORKDIR /data + +# default command is to pull up help options +CMD [ "freyja", "--help" ] + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv + +# print barcode version and freyja version +RUN freyja demix --version && freyja --version diff --git a/freyja/1.4.4/README.md b/freyja/1.4.4/README.md new file mode 100644 index 000000000..8bb48ce63 --- /dev/null +++ b/freyja/1.4.4/README.md @@ -0,0 +1,34 @@ +# freyja container + +Main tool & documentation: [freyja](https://github.com/andersen-lab/Freyja) + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +## Additional tools + +- biopython 1.81 +- ivar 1.4.2 +- mafft 7.520 +- matplotlib-base 3.7.1 +- pandas 2.0.2 +- samtools 1.17 +- scipy 1.10.1 +- seaborn 0.12.2 +- ucsc-fatvcf 426 +- usher 0.6.2 + +## freyja barcodes + +This docker image was built on **2023-06-22** and the command `freyja update` is run as part of the build to retrieve the most up-to-date barcode file `freyja/data/usher_barcodes.csv` file from Freyja's GitHub repo. The barcode version included in this docker image is **`06_22_2023-00-14`** as reported by `freyja demix --version` + +## Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] + +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` + +Warning: `freyja update` does not work under all conditions. You may need to specify an output directory (`freyja update --outdir /path/to/outdir`) for which your user has write priveleges, such as a mounted volume. diff --git a/freyja/1.4.5/Dockerfile b/freyja/1.4.5/Dockerfile new file mode 100644 index 000000000..13ee91f51 --- /dev/null +++ b/freyja/1.4.5/Dockerfile @@ -0,0 +1,76 @@ +FROM mambaorg/micromamba:1.4.9 as app + +# Version arguments +# ARG variables only persist during build time +ARG FREYJA_SOFTWARE_VERSION="1.4.5" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.4.9" +LABEL dockerfile.version="1" +LABEL software="Freyja" +LABEL software.version=${FREYJA_SOFTWARE_VERSION} +LABEL description="Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference)" +LABEL website="https://github.com/andersen-lab/Freyja" +LABEL license="https://github.com/andersen-lab/Freyja/blob/main/LICENSE" +LABEL maintainer="Kevin Libuit" +LABEL maintainer.email="kevin.libuit@theiagen.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create Freyja conda environment called freyja-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n freyja-env -c conda-forge -c bioconda -c defaults freyja=${FREYJA_SOFTWARE_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/freyja-env/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# update barcodes +# NOTE: this will download the latest version of the `freyja/data/usher_barcodes.csv` file from GitHub +RUN freyja update + +# set working directory to /data +WORKDIR /data + +# default command is to pull up help options +CMD [ "freyja", "--help" ] + +# new base for testing +FROM app as test + +# Grab test data from Freyja version 1.3.4 +RUN wget -O /data/Freyja_WWSC2.bam https://github.com/StaPH-B/docker-builds/blob/master/freyja/1.3.4/tests/Freyja_WWSC2.bam?raw=true && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_depths.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/Freyja_variants.tsv && \ + wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/freyja/1.3.4/tests/nCoV-2019.reference.fasta + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="freyja-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Run Freyja +RUN freyja variants /data/Freyja_WWSC2.bam --variants /data/test_variants.tsv --depths /data/test_depths.tsv --ref /data/nCoV-2019.reference.fasta && \ + freyja demix /data/test_variants.tsv /data/test_depths.tsv --output /data/test_demix.tsv + +# Check validity of outputs +RUN head /data/test_variants.tsv && \ + head /data/test_depths.tsv && \ + head /data/test_demix.tsv && \ + grep "Omicron" /data/test_demix.tsv + +# print barcode version and freyja version +RUN freyja demix --version && freyja --version diff --git a/freyja/1.4.5/README.md b/freyja/1.4.5/README.md new file mode 100644 index 000000000..0170f4236 --- /dev/null +++ b/freyja/1.4.5/README.md @@ -0,0 +1,34 @@ +# freyja container + +Main tool & documentation: [freyja](https://github.com/andersen-lab/Freyja) + +Freyja is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the UShER global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +## Additional tools + +- biopython 1.81 +- ivar 1.4.2 +- mafft 7.520 +- matplotlib-base 3.7.2 +- pandas 2.0.3 +- samtools 1.17 +- scipy 1.11.1 +- seaborn 0.12.2 +- ucsc-fatovcf 448 +- usher 0.6.2 + +## freyja barcodes + +This docker image was built on **2023-08-17** and the command `freyja update` is run as part of the build to retrieve the most up-to-date barcode file `freyja/data/usher_barcodes.csv` file from Freyja's GitHub repo. The barcode version included in this docker image is **`08_17_2023-00-15`** as reported by `freyja demix --version` + +## Example Usage + +```bash +# run freyja variants to call variants from an aligned SC2 bam file +freyja variants [bamfile] --variants [variant outfile name] --depths [depths outfile name] --ref [reference.fa] + +# run freyja demix to identify lineages based on called variants +freyja demix [variants-file] [depth-file] --output [output-file] +``` + +Warning: `freyja update` does not work under all conditions. You may need to specify an output directory (`freyja update --outdir /path/to/outdir`) for which your user has write priveleges, such as a mounted volume. diff --git a/gambit/1.0.0/Dockerfile b/gambit/1.0.0/Dockerfile new file mode 100644 index 000000000..a17914b83 --- /dev/null +++ b/gambit/1.0.0/Dockerfile @@ -0,0 +1,63 @@ +# Software installation, no database files +FROM mambaorg/micromamba:0.27.0 as app_base + +ARG GAMBIT_SOFTWARE_VERSION="1.0.0" +ARG GAMBIT_GIT_TAG=v${GAMBIT_SOFTWARE_VERSION} +ARG GAMBIT_SRC_URL=https://github.com/jlumpe/gambit/archive/refs/tags/${GAMBIT_GIT_TAG}.tar.gz + +LABEL base.image="mambaorg/micromamba:0.27.0" +LABEL dockerfile.version="1" +LABEL software="GAMBIT" +LABEL software.version=${GAMBIT_SOFTWARE_VERSION} +LABEL description="Rapid genomic-distance comparison for taxonomic identification of microbial pathogens" +LABEL website="https://github.com/jlumpe/gambit" +LABEL license="https://github.com/jlumpe/gambit/blob/master/LICENSE" +LABEL maintainer1="Kevin Libuit" +LABEL maintainer.email1="kevin.libuit@theiagen.com" +LABEL maintainer2="Michelle Scribner" +LABEL maintainer.email2="michelle.scribner@theiagen.com" +LABEL maintainer3="Jared Lumpe" +LABEL maintainer.email3="jared@jaredlumpe.com" + +# Environment +ENV GAMBIT_DB_PATH=/gambit-db +ENV LC_ALL=C.UTF-8 + +# Install mamba environment +COPY --chown=$MAMBA_USER:$MAMBA_USER env.yaml /tmp/env.yaml +RUN micromamba install -y -n base -f /tmp/env.yaml && \ + micromamba clean --all --yes + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 # Subsequent RUN commands use environment + +# Install GAMBIT package +RUN pip install ${GAMBIT_SRC_URL} && \ + micromamba clean -a -y + +USER root +RUN mkdir $GAMBIT_DB_PATH /data && \ + chown $MAMBA_USER:$MAMBA_USER $GAMBIT_DB_PATH /data +USER $MAMBA_USER +WORKDIR /data + +# Make sure conda, python, and GAMBIT are in the path +ENV PATH="/opt/conda/bin:${PATH}" + + +# With database files added +FROM app_base AS app + +ARG GAMBIT_DB_BASE_URL=https://storage.googleapis.com/jlumpe-gambit/public/databases/refseq-curated/1.0 +ARG GAMBIT_DB_GENOMES_URL=$GAMBIT_DB_BASE_URL/gambit-refseq-curated-1.0.gdb +ARG GAMBIT_DB_SIGNATURES_URL=$GAMBIT_DB_BASE_URL/gambit-refseq-curated-1.0.gs + +ADD --chown=$MAMBA_USER:$MAMBA_USER $GAMBIT_DB_GENOMES_URL $GAMBIT_DB_PATH/ +ADD --chown=$MAMBA_USER:$MAMBA_USER $GAMBIT_DB_SIGNATURES_URL $GAMBIT_DB_PATH/ + + +# Run test +FROM app as test + +COPY test.sh . +RUN bash test.sh + diff --git a/gambit/1.0.0/README.md b/gambit/1.0.0/README.md new file mode 100644 index 000000000..7bd2a8934 --- /dev/null +++ b/gambit/1.0.0/README.md @@ -0,0 +1,14 @@ +# gambit container + +Main tool : [gambit](github.com/jlumpe/gambit) + +Full documentation: https://gambit-genomics.readthedocs.io/en/latest/ + +GAMBIT (Genomic Approximation Method for Bacterial Identification and Tracking) is a tool for rapid taxonomic identification of microbial pathogens. It uses an extremely efficient genomic distance metric along with a curated database of approximately 50,000 reference genomes (derived from NCBI RefSeq) to identify unknown bacterial genomes within seconds. + +## Example Usage + +```bash +# run gambit on assembly +gambit query GCF_000240185.1_ASM24018v2_genomic.fna +``` diff --git a/gambit/1.0.0/env.yaml b/gambit/1.0.0/env.yaml new file mode 100644 index 000000000..a275821bf --- /dev/null +++ b/gambit/1.0.0/env.yaml @@ -0,0 +1,28 @@ +name: base +channels: + - conda-forge +dependencies: + - python ==3.9 + - pip + + # Build requirements + - c-compiler + - cython >=0.29 + - numpy >=1.13 + + # Runtime requirements + - sqlalchemy >=1.1 + - biopython >=1.69 + - alembic >=1.0 + - attrs >=20 + - cattrs >=1.0 + - click >=7.0 + - h5py >=3.0 + - setuptools + - scipy >=1.7 + + # For test + - wget + + # For scripts that parse output + - pandas >=1.4 diff --git a/gambit/1.0.0/test.sh b/gambit/1.0.0/test.sh new file mode 100755 index 000000000..2db96b4f3 --- /dev/null +++ b/gambit/1.0.0/test.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +set -euo pipefail + +FASTA=GCF_000240185.1_ASM24018v2_genomic.fna.gz +FASTA_URL=https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/240/185/GCF_000240185.1_ASM24018v2/$FASTA + +gambit --version + +wget $FASTA_URL +gambit query $FASTA -o test_output.csv +grep "Klebsiella pneumoniae" test_output.csv diff --git a/gfastats/1.3.6/Dockerfile b/gfastats/1.3.6/Dockerfile new file mode 100644 index 000000000..63e9d3851 --- /dev/null +++ b/gfastats/1.3.6/Dockerfile @@ -0,0 +1,44 @@ +FROM ubuntu:jammy as app + +ARG GFASTATS_VER=1.3.6 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="gfastats" +LABEL software.version=${GFASTATS_VER} +LABEL description="The swiss army knife for genome assembly." +LABEL website="https://github.com/vgl-hub/gfastats" +LABEL license="https://github.com/vgl-hub/gfastats/blob/main/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + procps \ + wget \ + make \ + g++ \ + zlib1g-dev && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN wget -q https://github.com/vgl-hub/gfastats/releases/download/v${GFASTATS_VER}/gfastats.v${GFASTATS_VER}.tar.gz && \ + tar -xf gfastats.v${GFASTATS_VER}.tar.gz && \ + rm gfastats.v${GFASTATS_VER}.tar.gz && \ + cd /gfastats && \ + make -j + +ENV PATH="/gfastats/build/bin:${PATH}" \ + LC_ALL=C + +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN gfastats --help && gfastats --version + +RUN gfastats /gfastats/testFiles/random1.fasta -o gfa && \ + gfastats /gfastats/testFiles/random2.gfa2 && \ + gfastats /gfastats/testFiles/random1.fasta -k /gfastats/testFiles/random1.instructions.sak -o gfa && \ + gfastats --tabular --seq-report /gfastats/testFiles/random6.circular.gfa \ No newline at end of file diff --git a/gfastats/1.3.6/README.md b/gfastats/1.3.6/README.md new file mode 100644 index 000000000..3d9025fd7 --- /dev/null +++ b/gfastats/1.3.6/README.md @@ -0,0 +1,18 @@ +# gfastats container + +Main tool : [gfastats](https://github.com/vgl-hub/gfastats) + +Full documentation: [https://github.com/vgl-hub/gfastats](https://github.com/vgl-hub/gfastats) + +> The swiss army knife for genome assembly. +> gfastats is a single fast and exhaustive tool for summary statistics and simultaneous *fa* (fasta, fastq, gfa [.gz]) genome assembly file manipulation. gfastats also allows seamless fasta<>fastq<>gfa[.gz] conversion. + +## Example Usage + +```bash +# from readme +gfastats input.[fasta|fastq|gfa][.gz] [expected genome size] [header[:start-end]] + +# sample usage +gfastats input.gfa --threads 4 --tabular --seq-report +``` \ No newline at end of file diff --git a/hmmer/3.3.2/Dockerfile b/hmmer/3.3.2/Dockerfile new file mode 100644 index 000000000..9e5c68ab5 --- /dev/null +++ b/hmmer/3.3.2/Dockerfile @@ -0,0 +1,57 @@ +# base image: Ubuntu +FROM ubuntu:jammy as app + +# File Author / Maintainer +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="hmmer" +LABEL software.version="3.3.2" +LABEL description="This software provides HMMer for hmmsearch etc.!" +LABEL website="http://hmmer.org/" +LABEL license="http://eddylab.org/software/hmmer/Userguide.pdf" +LABEL maintainer="Neranjan Perera" +LABEL maintainer.email="neranjan007@gmail.com" + + +ARG HMMERVERSION="3.3.2" + + +RUN apt-get update --fix-missing \ + && apt-get install -y wget make gcc \ + && wget http://eddylab.org/software/hmmer/hmmer-${HMMERVERSION}.tar.gz \ + && tar -xzvf hmmer-${HMMERVERSION}.tar.gz \ + && rm -f hmmer-${HMMERVERSION}.tar.gz \ + && cd hmmer-${HMMERVERSION}/ \ + && ./configure \ + && make \ + && make install \ + && apt-get remove -y wget make gcc \ + && apt-get autoremove -y \ + && apt-get autoclean \ + && rm -rf /var/lib/apt/lists/* \ + && mkdir data + +# set environmental variables e.g. $PATH and locale settings for singularity compatibility +ENV PATH="$PATH:/hmmer-${HMMERVERSION}/src" \ + LC_ALL=C + +# set working directory +WORKDIR /data + +# default command is to print help options +CMD [ "hmmscan", "-h" ] + +# test layer +FROM app as test + +RUN apt-get update && \ + apt-get install make gcc perl python3 -y + + +# print version and run help flag +RUN hmmscan -h && hmmbuild -h + +WORKDIR /hmmer-${HMMERVERSION} + +# compile and run a test suite +RUN make check \ No newline at end of file diff --git a/hmmer/3.3.2/Readme.md b/hmmer/3.3.2/Readme.md new file mode 100644 index 000000000..c453bd248 --- /dev/null +++ b/hmmer/3.3.2/Readme.md @@ -0,0 +1,21 @@ +# Hmmmer container + +Main tool : [HMMER3](http://hmmer.org/) + +HMMER: biosequence analysis using profile hidden Markov models +(http://eddylab.org/software/hmmer) + +# Example commands + +```bash +# Test hmmscan with help options +docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/hmmer:3.3.2 hmmscan -h +``` + + +```bash +# Test hmmsearch with help options +docker run --rm -u $(id -u):$(id -g) -v ${PWD}:/data staphb/hmmer:3.3.2 hmmsearch -h +``` + + diff --git a/homopolish/0.4.1/Dockerfile b/homopolish/0.4.1/Dockerfile new file mode 100644 index 000000000..c85b2cd92 --- /dev/null +++ b/homopolish/0.4.1/Dockerfile @@ -0,0 +1,54 @@ +FROM mambaorg/micromamba:1.3.0 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + + +# ARG sets environment variables during the build stage +ARG HOMOPOLISH_VER="0.4.1" + + +LABEL base.image="mambaorg/micromamba:1.3.0" +LABEL dockerfile.version="1" +LABEL software="homopolish" +LABEL software.version=${HOMOPOLISH_VER} +LABEL description="Genome polisher developed for Nanopore and extended for PacBioCLR" +LABEL website="https://github.com/ythuang0522/homopolish" +LABEL license="https://github.com/ythuang0522/homopolish/blob/master/LICENSE" +LABEL maintainer="Shelby Bennett" +LABEL maintainer.email="shelby.bennett@dgs.virginia.com" + +# RUN executes code during the build +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get -y --no-install-recommends install \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# create conda enviornement homopolish called homopolish from bioconda recipe; clean up conda garbage +RUN micromamba create -n homopolish -c conda-forge -c bioconda -c defaults homopolish=${HOMOPOLISH_VER} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default +ENV PATH="/opt/conda/envs/homopolish/bin:/opt/conda/envs/env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# set working directory to /data +WORKDIR /data + +# create test layer to ensure program was sucessfully installed +FROM app as test + +# ensure mamba/conda enviornment is activated during the test + +ENV ENV_NAME="homopolish" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Grab test S.enterica genome from NCBI and run homopolish +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/006/945/GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz && \ +gzip -d GCF_000006945.2_ASM694v2_genomic.fna.gz && \ +homopolish polish -a GCF_000006945.2_ASM694v2_genomic.fna -g Salmonella_enterica -m R9.4.pkl -o test1 && \ +cat test1/*.fasta diff --git a/homopolish/0.4.1/README.md b/homopolish/0.4.1/README.md new file mode 100644 index 000000000..159b75067 --- /dev/null +++ b/homopolish/0.4.1/README.md @@ -0,0 +1,15 @@ +# homopolish container + +Main tool: [homopolish](https://github.com/ythuang0522/homopolish) + +Full Documentation: https://github.com/ythuang0522/homopolish/blob/master/README.md + +Homopolish is a genome polisher originally developed for Nanopore and subsequently extended for PacBio CLR. It generates a high-quality genome (>Q50) for virus, bacteria, and fungus. Nanopore/PacBio systematic errors are corrected by retreiving homologs from closely-related genomes and polished by an SVM. When paired with Racon and Medaka, the genome quality can reach Q50-90 (>99.999%) on Nanopore R9.4/10.3 flowcells (Guppy >3.4). For PacBio CLR, Homopolish also improves the majority of Flye-assembled genomes to Q90 (see Accuracy). + +# Example Usage + +``` bash +homopolish polish -a yourgenome.fasta -s bacteria.msh -m R9.4.pkl -o youroutput + +homopolish polish -a yourgenome.fasta -l path_to_your_genomes.fasta -m R9.4.pkl -o youroutput +``` diff --git a/htslib/1.17/Dockerfile b/htslib/1.17/Dockerfile new file mode 100644 index 000000000..9eac10955 --- /dev/null +++ b/htslib/1.17/Dockerfile @@ -0,0 +1,63 @@ +# for easy upgrade later. ARG variables only persist during build time +ARG HTSLIBVER="1.17" + +FROM ubuntu:focal as app + +ARG HTSLIBVER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="htslib" +LABEL software.version="${HTSLIBVER}" +LABEL description="A C library for reading/writing high-throughput sequencing data" +LABEL website="https://github.com/samtools/htslib" +LABEL license="https://github.com/samtools/htslib/blob/develop/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install dependencies, cleanup apt garbage +# It's helpful when they're all listed on https://github.com/samtools/htslib/blob/develop/INSTALL +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + make \ + bzip2 \ + autoconf \ + automake \ + make \ + gcc \ + perl \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# get htslib and make /data +RUN wget https://github.com/samtools/htslib/releases/download/${HTSLIBVER}/htslib-${HTSLIBVER}.tar.bz2 && \ + tar -vxjf htslib-${HTSLIBVER}.tar.bz2 && \ + rm htslib-${HTSLIBVER}.tar.bz2 && \ + cd htslib-${HTSLIBVER} && \ + make && \ + make install && \ + mkdir /data + +# set $PATH (honestly unnecessary here, lol) and locale settings for singularity compatibility +ENV PATH="$PATH" \ + LC_ALL=C + +# set working directory +WORKDIR /data + +# testing +FROM app as test + +ARG HTSLIBVER + +WORKDIR /htslib-${HTSLIBVER}/test + +RUN perl /htslib-${HTSLIBVER}/test/test.pl diff --git a/htslib/1.17/README.md b/htslib/1.17/README.md new file mode 100644 index 000000000..f276a37aa --- /dev/null +++ b/htslib/1.17/README.md @@ -0,0 +1,26 @@ +# htslib container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/htslib) + +Additional tools: + +* perl 5.30.0 + +## Example Usage + +```bash +# determine file formats for various BAM and SAM files +$ htsfile tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam +tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam: BAM version 1 compressed sequence data + +$ htsfile ce_tag_padded.sam +ce_tag_padded.sam: SAM version 1.4 sequence text + +# compresses sample.fastq to sample.fastq.gz in BGZF format (blocked GNU Zip Format) +$ bgzip sample.fastq +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) diff --git a/htslib/1.18/Dockerfile b/htslib/1.18/Dockerfile new file mode 100644 index 000000000..59c61ec4b --- /dev/null +++ b/htslib/1.18/Dockerfile @@ -0,0 +1,85 @@ +# for easy upgrade later. ARG variables only persist during build time +ARG HTSLIB_VER="1.18" + +FROM ubuntu:jammy as builder + +ARG HTSLIB_VER + +# install dependencies, cleanup apt garbage +# It's helpful when they're all listed on https://github.com/samtools/htslib/blob/develop/INSTALL +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + make \ + bzip2 \ + autoconf \ + automake \ + make \ + gcc \ + perl \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libssl-dev \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# get htslib, compile, install, run test suite +RUN wget -q https://github.com/samtools/htslib/releases/download/${HTSLIB_VER}/htslib-${HTSLIB_VER}.tar.bz2 && \ + tar -vxjf htslib-${HTSLIB_VER}.tar.bz2 && \ + rm -v htslib-${HTSLIB_VER}.tar.bz2 && \ + cd htslib-${HTSLIB_VER} && \ + make && \ + make install && \ + make test + +### start of app stage ### +FROM ubuntu:jammy as app + +ARG HTSLIB_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="htslib" +LABEL software.version="${HTSLIB_VER}" +LABEL description="A C library for reading/writing high-throughput sequencing data" +LABEL website="https://github.com/samtools/htslib" +LABEL license="https://github.com/samtools/htslib/blob/develop/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install runtime dependencies & cleanup apt garbage +# installed as recommend here: https://github.com/samtools/htslib/blob/develop/INSTALL#L31 +RUN apt-get update && apt-get install --no-install-recommends -y \ + bzip2 \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + ca-certificates \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# copy in htslib executables from builder stage +COPY --from=builder /usr/local/bin/* /usr/local/bin/ + +# set locale settings for singularity compatibility +ENV LC_ALL=C + +# set working directory +WORKDIR /data + +# default command is to show help options +CMD ["htsfile", "--help"] + +### start of test stage ### +FROM app as test + +# check that these three executables are available +RUN bgzip --help && tabix --help && htsfile --help + +# FYI Test suite "make test" now performed in the builder stage since app and +# test stages do not include htslib source code. +# This is to avoid having to re-download source code simply to run test suite \ No newline at end of file diff --git a/htslib/1.18/README.md b/htslib/1.18/README.md new file mode 100644 index 000000000..7d03b6333 --- /dev/null +++ b/htslib/1.18/README.md @@ -0,0 +1,26 @@ +# htslib container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/htslib) + +Additional tools: + +* perl 5.34.0 + +## Example Usage + +```bash +# determine file formats for various BAM and SAM files +$ htsfile tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam +tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam: BAM version 1 compressed sequence data + +$ htsfile ce_tag_padded.sam +ce_tag_padded.sam: SAM version 1.4 sequence text + +# compresses sample.fastq to sample.fastq.gz in BGZF format (blocked GNU Zip Format) +$ bgzip sample.fastq +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) diff --git a/iqtree2/2.2.2.2/Dockerfile b/iqtree2/2.2.2.2/Dockerfile new file mode 100644 index 000000000..bb01f44b2 --- /dev/null +++ b/iqtree2/2.2.2.2/Dockerfile @@ -0,0 +1,39 @@ +FROM ubuntu:xenial as app + +# for easy upgrade later. ARG variables only persist during build time. +ARG IQTREE2_VER="2.2.2.2" + +# metadata +LABEL base.image="ubuntu:xenial" +LABEL dockerfile.version="1" +LABEL software="IQ-Tree2" +LABEL software.version="2.2.2.2" +LABEL description="Efficient software for phylogenomic inference" +LABEL website="http://www.iqtree.org/" +LABEL source.code.website="https://github.com/iqtree/iqtree2" +LABEL license="https://github.com/iqtree/iqtree2/blob/master/LICENSE" +LABEL maintainer="Jade Wang" +LABEL maintainer.email="jwang7@health.nyc.gov" + +#install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget ca-certificates procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download, uncompress iqtree2 tarball; make /data +RUN wget https://github.com/iqtree/iqtree2/releases/download/v${IQTREE2_VER}/iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + tar -xvf iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + rm -v iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + mkdir /data + +# set PATH and locale settings for singularity compatibility +ENV PATH="/iqtree-${IQTREE2_VER}-Linux/bin:${PATH}"\ + LC_ALL=C +WORKDIR /data + +FROM app as test +###TEST PATH +RUN iqtree2 --version +###TEST TREE TOPOLOGY +RUN iqtree2 -s /iqtree-${IQTREE2_VER}-Linux/example.phy --rate diff --git a/iqtree2/2.2.2.2/README.md b/iqtree2/2.2.2.2/README.md new file mode 100644 index 000000000..49a52b63d --- /dev/null +++ b/iqtree2/2.2.2.2/README.md @@ -0,0 +1,16 @@ +Tools installed in image: wget, iqtree2 version 2.2.2.2 +ENV path set to: /iqtree-2.2.2.2-Linux/bin +Working directory set in /data + +To use this image: + +1) Build docker image +``` +docker build -t : /path/to/iqtree2/Dockerfile +``` +2) Run docker image with iqtree2 commands +``` +docker run -i -t : iqtree2 [OPTIONS] -s FILE[,...,FILE] +``` + +For more details on how to run iqtree2 v2.2.2.2, check out iqtree2 documentation: http://www.iqtree.org/doc/ diff --git a/iqtree2/2.2.2.6/Dockerfile b/iqtree2/2.2.2.6/Dockerfile new file mode 100644 index 000000000..6fb80e0b3 --- /dev/null +++ b/iqtree2/2.2.2.6/Dockerfile @@ -0,0 +1,52 @@ +ARG IQTREE2_VER="2.2.2.6" + +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during build time. +ARG IQTREE2_VER + +# metadata +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="IQ-Tree2" +LABEL software.version="${IQTREE2_VER}" +LABEL description="Efficient software for phylogenomic inference" +LABEL website="http://www.iqtree.org/" +LABEL source.code.website="https://github.com/iqtree/iqtree2" +LABEL license="https://github.com/iqtree/iqtree2/blob/master/LICENSE" +LABEL maintainer="Jade Wang" +LABEL maintainer.email="jwang7@health.nyc.gov" +LABEL maintainer2="Erin Young" +LABEL maintainer2.email="eriny@utah.gov" + +#install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget ca-certificates procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download, uncompress iqtree2 tarball; make /data +RUN wget https://github.com/iqtree/iqtree2/releases/download/v${IQTREE2_VER}/iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + tar -xvf iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + rm -v iqtree-${IQTREE2_VER}-Linux.tar.gz && \ + mkdir /data + +# set PATH and locale settings for singularity compatibility +ENV PATH="/iqtree-${IQTREE2_VER}-Linux/bin:${PATH}"\ + LC_ALL=C + +# final working directory is /data +WORKDIR /data + +# default command is to pull up help options +CMD [ "iqtree2", "--help" ] + +FROM app as test + +ARG IQTREE2_VER + +# print version +RUN iqtree2 --version + +###TEST TREE TOPOLOGY +RUN iqtree2 -s /iqtree-${IQTREE2_VER}-Linux/example.phy --rate diff --git a/iqtree2/2.2.2.6/README.md b/iqtree2/2.2.2.6/README.md new file mode 100644 index 000000000..98f9cd634 --- /dev/null +++ b/iqtree2/2.2.2.6/README.md @@ -0,0 +1,28 @@ +# IQ-TREE2 container + +Main tool : [iqtree2](https://github.com/iqtree/iqtree2) + +Full documentation: [https://github.com/rrwick/Polypolish/wiki](http://www.iqtree.org/doc/) + +| Efficient and versatile phylogenomic software by maximum likelihood + +## Example Usage + +An example phylip file (IQ-TREE2 also supports other file formats such as FASTA, NEXUS, CLUSTALW) + +``` +7 28 +Frog AAATTTGGTCCTGTGATTCAGCAGTGAT +Turtle CTTCCACACCCCAGGACTCAGCAGTGAT +Bird CTACCACACCCCAGGACTCAGCAGTAAT +Human CTACCACACCCCAGGAAACAGCAGTGAT +Cow CTACCACACCCCAGGAAACAGCAGTGAC +Whale CTACCACGCCCCAGGACACAGCAGTGAT +Mouse CTACCACACCCCAGGACTCAGCAGTGAT +``` + +Can be aligned with the following command: + +```bash +iqtree2 -s /iqtree-2.2.2.6-Linux/example.phy --redo +``` diff --git a/ivar/1.3.2/Dockerfile b/ivar/1.3.2/Dockerfile new file mode 100644 index 000000000..17cd91c6a --- /dev/null +++ b/ivar/1.3.2/Dockerfile @@ -0,0 +1,101 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during image build +ARG SAMTOOLSVER=1.16 +ARG HTSLIBVER=1.16 +ARG IVARVER=1.3.2 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="iVar" +LABEL software.version=${IVARVER} +LABEL description="Computational package that contains functions broadly useful for viral amplicon-based sequencing." +LABEL website="https://github.com/andersen-lab/ivar" +LABEL license="https://github.com/andersen-lab/ivar/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL modified.from="https://github.com/andersen-lab/ivar/blob/master/Dockerfile" +LABEL original.maintainer="Kathik G" +LABEL original.maintainer.email="gkarthik@scripps.edu" + +# install dependencies, cleanup apt garbage. +# bedtools version=2.26.0 +# keeping vim and nano for legacy reasons +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + procps \ + autoconf \ + autotools-dev \ + automake \ + zlib1g-dev \ + python3 \ + wget \ + libbz2-dev \ + liblzma-dev \ + libncurses-dev \ + git \ + bedtools \ + python3-pip \ + vim \ + nano && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# installing htslib +RUN wget https://github.com/samtools/htslib/releases/download/${HTSLIBVER}/htslib-${HTSLIBVER}.tar.bz2 && \ + tar xvf htslib-${HTSLIBVER}.tar.bz2 && \ + rm htslib-${HTSLIBVER}.tar.bz2 && \ + cd htslib-${HTSLIBVER}/ && \ + ./configure && \ + make && \ + make install + +# installing samtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# installing iVar; make /data +RUN wget https://github.com/andersen-lab/ivar/archive/v${IVARVER}.tar.gz && \ + tar -xf v${IVARVER}.tar.gz && \ + rm -rf v${IVARVER}.tar.gz && \ + cd ivar-${IVARVER} && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" + +# set /data as working directory +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN ivar version && samtools --version + +# getting files +RUN wget https://github.com/StaPH-B/docker-builds/blob/master/tests/SARS-CoV-2/SRR13957123.sorted.bam?raw=true && mv SRR13957123.sorted.bam?raw=true SRR13957123.sorted.bam && \ + samtools view -s 0.25 -b SRR13957123.sorted.bam | samtools sort -o SRR13957123.subsampled.bam - && samtools index SRR13957123.subsampled.bam && \ + wget https://raw.githubusercontent.com/UPHL-BioNGS/Cecret/master/configs/MN908947.3.fasta?raw=true && mv MN908947.3.fasta?raw=true MN908947.3.fasta && \ + wget https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.primer.bed && \ + ls + +# primer trimming +RUN ivar trim -e -i SRR13957123.subsampled.bam -b nCoV-2019.primer.bed -p SRR13957123.primertrim && \ + samtools sort SRR13957123.primertrim.bam -o SRR13957123.primertrim.sorted.bam + +# creating a consensus - using smaller -d for github actions +RUN samtools mpileup -A -d 40 -B -Q 0 --reference MN908947.3.fasta SRR13957123.primertrim.sorted.bam | \ + ivar consensus -q 20 -t 0.6 -n N -m 20 -p SRR13957123.consensus && \ + wc -c SRR13957123.consensus* + diff --git a/ivar/1.3.2/README.md b/ivar/1.3.2/README.md new file mode 100644 index 000000000..09a281eeb --- /dev/null +++ b/ivar/1.3.2/README.md @@ -0,0 +1,28 @@ +# iVar container + +Main tool : [iVar](https://andersen-lab.github.io/ivar/html/manualpage.html) + +>iVar is a computational package that contains functions broadly useful for viral amplicon-based sequencing + +Additional tools (required): + +* [HTSlib](https://github.com/samtools/htslib) 1.16 +* [samtools](http://www.htslib.org/) 1.16 +* bedtools 2.26.0 +* python 3.10.6 + +## Example Usage + +```bash +ivar trim -e -i {bam} -b {primer bed} -p {sample}.primertrim +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar variants -p {sample}.variants -q 20 -t 0.6 -r {reference.fasta} -g {reference.gff} +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar consensus -t 0.6 -p {sample}.consensus -n N +``` diff --git a/ivar/1.4.1/Dockerfile b/ivar/1.4.1/Dockerfile new file mode 100644 index 000000000..a151ce951 --- /dev/null +++ b/ivar/1.4.1/Dockerfile @@ -0,0 +1,98 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during image build +ARG SAMTOOLSVER=1.17 +ARG HTSLIBVER=$SAMTOOLSVER +ARG IVARVER=1.4.1 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="iVar" +LABEL software.version=${IVARVER} +LABEL description="Computational package that contains functions broadly useful for viral amplicon-based sequencing." +LABEL website="https://github.com/andersen-lab/ivar" +LABEL license="https://github.com/andersen-lab/ivar/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL modified.from="https://github.com/andersen-lab/ivar/blob/master/Dockerfile" +LABEL original.maintainer="Kathik G" +LABEL original.maintainer.email="gkarthik@scripps.edu" + +# install dependencies, cleanup apt garbage. +# bedtools version=2.30.0 +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + procps \ + autoconf \ + autotools-dev \ + automake \ + zlib1g-dev \ + python3 \ + wget \ + libbz2-dev \ + liblzma-dev \ + libncurses-dev \ + git \ + bedtools \ + python3-pip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# installing htslib +RUN wget -q https://github.com/samtools/htslib/releases/download/${HTSLIBVER}/htslib-${HTSLIBVER}.tar.bz2 && \ + tar xvf htslib-${HTSLIBVER}.tar.bz2 && \ + rm htslib-${HTSLIBVER}.tar.bz2 && \ + cd htslib-${HTSLIBVER}/ && \ + ./configure && \ + make && \ + make install + +# installing samtools +RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# installing iVar; make /data +RUN wget -q https://github.com/andersen-lab/ivar/archive/v${IVARVER}.tar.gz && \ + tar -xf v${IVARVER}.tar.gz && \ + rm -rf v${IVARVER}.tar.gz && \ + cd ivar-${IVARVER} && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" + +# set /data as working directory +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN ivar version && samtools --version + +# getting files +RUN wget -q https://github.com/StaPH-B/docker-builds/blob/master/tests/SARS-CoV-2/SRR13957123.sorted.bam?raw=true -O SRR13957123.sorted.bam && \ + samtools view -s 0.25 -b SRR13957123.sorted.bam | samtools sort -o SRR13957123.subsampled.bam - && samtools index SRR13957123.subsampled.bam && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.reference.fasta -O MN908947.3.fasta && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.primer.bed && \ + wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_1.fastq.gz?raw=true -O SRR13957123_1.fastq.gz && \ + wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_2.fastq.gz?raw=true -O SRR13957123_2.fastq.gz + +# primer trimming +RUN ivar trim -e -i SRR13957123.subsampled.bam -b nCoV-2019.primer.bed -p SRR13957123.primertrim && \ + samtools sort SRR13957123.primertrim.bam -o SRR13957123.primertrim.sorted.bam + +# creating a consensus - using smaller -d for github actions +RUN samtools mpileup -A -d 40 -B -Q 0 --reference MN908947.3.fasta SRR13957123.primertrim.sorted.bam | \ + ivar consensus -q 20 -t 0.6 -n N -m 20 -p SRR13957123.consensus && \ + wc -c SRR13957123.consensus* diff --git a/ivar/1.4.1/README.md b/ivar/1.4.1/README.md new file mode 100644 index 000000000..8cfc1e989 --- /dev/null +++ b/ivar/1.4.1/README.md @@ -0,0 +1,28 @@ +# iVar container + +Main tool : [iVar](https://andersen-lab.github.io/ivar/html/manualpage.html) + +> iVar is a computational package that contains functions broadly useful for viral amplicon-based sequencing + +Additional tools (required): + +* [HTSlib](https://github.com/samtools/htslib) 1.17 +* [samtools](http://www.htslib.org/) 1.17 +* [bedtools](https://bedtools.readthedocs.io/en/latest/) 2.30.0 +* python 3.10.6 + +## Example Usage + +```bash +ivar trim -e -i {bam} -b {primer bed} -p {sample}.primertrim +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar variants -p {sample}.variants -q 20 -t 0.6 -r {reference.fasta} -g {reference.gff} +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar consensus -t 0.6 -p {sample}.consensus -n N +``` diff --git a/ivar/1.4.2/Dockerfile b/ivar/1.4.2/Dockerfile new file mode 100644 index 000000000..09522bb67 --- /dev/null +++ b/ivar/1.4.2/Dockerfile @@ -0,0 +1,109 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during image build +ARG SAMTOOLSVER=1.17 +ARG HTSLIBVER=$SAMTOOLSVER +ARG IVARVER=1.4.2 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="iVar" +LABEL software.version=${IVARVER} +LABEL description="Computational package that contains functions broadly useful for viral amplicon-based sequencing." +LABEL website="https://github.com/andersen-lab/ivar" +LABEL license="https://github.com/andersen-lab/ivar/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL modified.from="https://github.com/andersen-lab/ivar/blob/master/Dockerfile" +LABEL original.maintainer="Kathik G" +LABEL original.maintainer.email="gkarthik@scripps.edu" + +# install dependencies, cleanup apt garbage. +# bedtools version=2.30.0 +# bwa version=0.7.17-6 +# minimap2 version=2.24+dfsg-2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + procps \ + autoconf \ + autotools-dev \ + automake \ + zlib1g-dev \ + python3 \ + wget \ + libbz2-dev \ + liblzma-dev \ + libncurses-dev \ + git \ + bedtools \ + python3-pip \ + bwa \ + minimap2 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# installing htslib +RUN wget -q https://github.com/samtools/htslib/releases/download/${HTSLIBVER}/htslib-${HTSLIBVER}.tar.bz2 && \ + tar xvf htslib-${HTSLIBVER}.tar.bz2 && \ + rm htslib-${HTSLIBVER}.tar.bz2 && \ + cd htslib-${HTSLIBVER}/ && \ + ./configure && \ + make && \ + make install + +# installing samtools +RUN wget -q https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# installing iVar; make /data +RUN wget -q https://github.com/andersen-lab/ivar/archive/v${IVARVER}.tar.gz && \ + tar -xf v${IVARVER}.tar.gz && \ + rm -rf v${IVARVER}.tar.gz && \ + cd ivar-${IVARVER} && \ + ./autogen.sh && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib" + +# set /data as working directory +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN ivar version && samtools --version + +# getting files +RUN wget -q https://github.com/StaPH-B/docker-builds/blob/master/tests/SARS-CoV-2/SRR13957123.sorted.bam?raw=true -O SRR13957123.sorted.bam && \ + samtools view -s 0.25 -b SRR13957123.sorted.bam | samtools sort -o SRR13957123.subsampled.bam - && samtools index SRR13957123.subsampled.bam && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.reference.fasta -O MN908947.3.fasta && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.primer.bed && \ + wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_1.fastq.gz?raw=true -O SRR13957123_1.fastq.gz && \ + wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_2.fastq.gz?raw=true -O SRR13957123_2.fastq.gz + +# primer trimming +RUN ivar trim -e -i SRR13957123.subsampled.bam -b nCoV-2019.primer.bed -p SRR13957123.primertrim && \ + samtools sort SRR13957123.primertrim.bam -o SRR13957123.primertrim.sorted.bam + +# creating a consensus - using smaller -d for github actions +RUN samtools mpileup -A -d 40 -B -Q 0 --reference MN908947.3.fasta SRR13957123.primertrim.sorted.bam | \ + ivar consensus -q 20 -t 0.6 -n N -m 20 -p SRR13957123.consensus && \ + wc -c SRR13957123.consensus* + +# piping into ivar takes too long, but here's what the test would be +RUN bwa index MN908947.3.fasta && \ + bwa mem MN908947.3.fasta SRR13957123_1.fastq.gz SRR13957123_2.fastq.gz | \ + ivar trim -b nCoV-2019.primer.bed -x 3 -m 30 | \ + samtools sort | samtools mpileup -aa -A -Q 0 -B -d 200 --reference MN908947.3.fasta - | \ + ivar consensus -p test_consensus -m 10 -n N -t 0.5 diff --git a/ivar/1.4.2/README.md b/ivar/1.4.2/README.md new file mode 100644 index 000000000..ddf978212 --- /dev/null +++ b/ivar/1.4.2/README.md @@ -0,0 +1,45 @@ +# iVar container + +Main tool : [iVar](https://andersen-lab.github.io/ivar/html/manualpage.html) + +> iVar is a computational package that contains functions broadly useful for viral amplicon-based sequencing + +Additional tools (required): + +* [HTSlib](https://github.com/samtools/htslib) 1.17 +* [samtools](http://www.htslib.org/) 1.17 +* [bedtools](https://bedtools.readthedocs.io/en/latest/) 2.30.0 +* python 3.10.6 + +Additional tools (optional): +* [minimap2](https://github.com/lh3/minimap2) 2.24+dfsg-2 +* [bwa](https://bio-bwa.sourceforge.net/) 0.7.17-6 + +## Example Usage + +```bash +ivar trim -e -i {bam} -b {primer bed} -p {sample}.primertrim +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar variants -p {sample}.variants -q 20 -t 0.6 -r {reference.fasta} -g {reference.gff} +``` + +```bash +samtools mpileup -A -d 8000 -B -Q 0 --reference {reference.fasta} {bam} | \ + ivar consensus -t 0.6 -p {sample}.consensus -n N +``` + +Starting with iVar version 1.4.1, the output of an aligner such as minimap2 and bwa (both included) can be piped into ivar trim directly +```bash +# index reference +bwa index reference.fasta + +# run bwa and pipe into ivar (single line is split with \ for clarity) +bwa mem reference.fasta read_1.fastq.gz read_2.fastq.gz | \ + ivar trim -b primer.bed -x 3 -m 30 | \ + samtools sort | \ + samtools mpileup -aa -A -Q 0 -B -d 2000 - | \ + ivar consensus -p output_prefix -m 10 -n N -t 0.5 +``` diff --git a/kaptive/2.0.5/Dockerfile b/kaptive/2.0.5/Dockerfile new file mode 100644 index 000000000..cfabae016 --- /dev/null +++ b/kaptive/2.0.5/Dockerfile @@ -0,0 +1,71 @@ +FROM ubuntu:focal as app + +ARG KAPTIVE_VER="2.0.5" +ARG blastVer="2.3.0" +# so that apt doesn't try to ask for user input +ARG DEBIAN_FRONTEND=noninteractive + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Kaptive" +LABEL software.version="${KAPTIVE_VER}" +LABEL description="Report information about surface polysaccharide loci for Klebsiella pneumoniae species complex and Acinetobacter baumannii genome assemblies" +LABEL website="https://github.com/katholt/Kaptive" +LABEL license="https://github.com/katholt/Kaptive/blob/master/LICENSE" +LABEL maintainer="Tamas Stirling" +LABEL maintainer.email="stirling.tamas@gmail.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install some dependencies via apt; cleanup apt garbage +RUN apt-get update && apt-get install -y \ + locales \ + python3 \ + python3-pip \ + python3-setuptools \ + software-properties-common \ + wget \ + --no-install-recommends && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# set locale +RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8 +ENV LANG=en_US.UTF-8 + +# ncbi-blast+ +RUN wget "ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${blastVer}/ncbi-blast-${blastVer}+-x64-linux.tar.gz" && \ + tar zxvpf ncbi-blast-${blastVer}+-x64-linux.tar.gz && \ + rm ncbi-blast-${blastVer}+-x64-linux.tar.gz + +# kaptive +RUN wget "https://github.com/katholt/Kaptive/archive/refs/tags/v${KAPTIVE_VER}.tar.gz" && \ + tar -xzf v${KAPTIVE_VER}.tar.gz && \ + rm -f v${KAPTIVE_VER}.tar.gz && \ + mv /Kaptive-${KAPTIVE_VER} /kaptive + +RUN pip3 install biopython + +# set $PATH, with /kaptive at the highest priority +ENV PATH="/kaptive:/ncbi-blast-${blastVer}+/bin:${PATH}" + +# final working directory is /data +WORKDIR /data + +# test layer +FROM app as test + +# test with A. baumannii +RUN wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/016/486/705/GCA_016486705.1_PDT000751301.1/GCA_016486705.1_PDT000751301.1_genomic.fna.gz" && \ + gunzip GCA_016486705.1_PDT000751301.1_genomic.fna.gz +# k locus +RUN kaptive.py -a GCA_016486705.1_PDT000751301.1_genomic.fna -k /kaptive/reference_database/Acinetobacter_baumannii_k_locus_primary_reference.gbk +# o locus +RUN kaptive.py -a GCA_016486705.1_PDT000751301.1_genomic.fna -k /kaptive/reference_database/Acinetobacter_baumannii_OC_locus_primary_reference.gbk + +# test with K. pneumoniae +RUN wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/022/268/055/GCA_022268055.1_PDT000434809.1/GCA_022268055.1_PDT000434809.1_genomic.fna.gz" && \ + gunzip GCA_022268055.1_PDT000434809.1_genomic.fna.gz +# k locus +RUN kaptive.py -a GCA_022268055.1_PDT000434809.1_genomic.fna -k /kaptive/reference_database/Klebsiella_k_locus_primary_reference.gbk +# o locus +RUN kaptive.py -a GCA_022268055.1_PDT000434809.1_genomic.fna -k /kaptive/reference_database/Klebsiella_o_locus_primary_reference.gbk diff --git a/kaptive/2.0.5/README.md b/kaptive/2.0.5/README.md new file mode 100644 index 000000000..563c8ab2e --- /dev/null +++ b/kaptive/2.0.5/README.md @@ -0,0 +1,24 @@ +# Kaptive + +Main tool: [Kaptive](https://github.com/katholt/Kaptive) + +Additionall tools: + +- ncbi-blast+ 2.3.0 +- python 3.8.10 +- biopython 1.81 + +## Example Usage + +```bash +# K locus, A. baumannii +kaptive.py -a assembly.fasta -k /kaptive/reference_database/Acinetobacter_baumannii_k_locus_primary_reference.gbk +# O locus, A. baumannii +kaptive.py -a assembly.fasta -k /kaptive/reference_database/Acinetobacter_baumannii_OC_locus_primary_reference.gbk + +# K locus, K. pneumoniae +kaptive.py -a assembly.fasta -k /kaptive/reference_database/Klebsiella_k_locus_primary_reference.gbk +# O locus, K. pneumoniae +kaptive.py -a assembly.fasta -k /kaptive/reference_database/Klebsiella_o_locus_primary_reference.gbk +``` + diff --git a/kleborate/2.3.2-2023-05/Dockerfile b/kleborate/2.3.2-2023-05/Dockerfile new file mode 100644 index 000000000..bb33b5267 --- /dev/null +++ b/kleborate/2.3.2-2023-05/Dockerfile @@ -0,0 +1,87 @@ +FROM ubuntu:focal as app + +# for easy upgrade later. ARG variables only persist during image build time +ARG KLEBORATE_VER="2.3.2" +ARG BLAST_VER="2.9.0" +ARG MASH_VER="2.3" +ARG KAPTIVE_VER="2.0.6" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Kleborate" +LABEL software.version="${KLEBORATE_VER}" +LABEL description="tool to screen genome assemblies of Klebsiella pneumoniae and the Klebsiella pneumoniae species complex (KpSC)" +LABEL website="https://github.com/katholt/Kleborate" +LABEL license="https://github.com/katholt/Kleborate/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Frank Ambrosio" +LABEL maintainer2.email="frank.ambrosio@theiagen.com" + +# install prerequisites. Cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-setuptools \ + wget \ + ca-certificates \ + bzip2 \ + locales \ + git \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# to avoid encoding issues during kleborate setup.py install step +RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8 +ENV LANG=en_US.UTF-8 + +# so that we get a more recent version of biopython instead of what is available via apt +# trying to get rid of biopython warning "BiopythonExperimentalWarning: Bio.Align.substitution_matrices ......" +RUN python3 -m pip install biopython + +# mash; update UID and GID of mash files; make /data +# UID and GID changes because the original owner is UID: 1081147385 and GID: 1360859114 which does NOT play well with systems that limits GIDs and UIDs +RUN wget https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ + tar -xvf mash-Linux64-v${MASH_VER}.tar && \ + rm -rf mash-Linux64-v${MASH_VER}.tar && \ + chown root:root /mash-Linux64-v${MASH_VER}/* && \ + mkdir /data + +# ncbi-blast+ +RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and TERM to avoid warnings +ENV PATH="/mash-Linux64-v${MASH_VER}:\ +/ncbi-blast-${BLAST_VER}+/bin/:\ +${PATH}" \ + TERM=xterm-256color + +# kleborate install +# as per author instructions. Have to use git clone and git checkout since kaptive is a git submodule. cannot use tarballs +RUN git clone --recursive https://github.com/katholt/Kleborate.git && \ + cd Kleborate && \ + git checkout v${KLEBORATE_VER} && \ + cd kaptive && \ + git pull https://github.com/katholt/Kaptive v${KAPTIVE_VER} && \ + cd ../ && \ + python3 setup.py install + +# set working directory +WORKDIR /data + +FROM app as test + +# run Kleborate on assembly supplied with Kleborate code +RUN kleborate --resistance --kaptive --all \ + --assemblies /Kleborate/test/test_genomes/172.fasta.gz \ + --outfile /data/strain172-kleborate-test-out.tsv + +# install pytest; run unit tests included with Kleborate +RUN apt-get update && apt-get install -y --no-install-recommends python3-pytest && \ + cd /Kleborate && \ + python3 -m pytest + +# print help and version info +RUN kleborate --help && kleborate --version \ No newline at end of file diff --git a/kleborate/2.3.2-2023-05/README.md b/kleborate/2.3.2-2023-05/README.md new file mode 100644 index 000000000..beaaa12b9 --- /dev/null +++ b/kleborate/2.3.2-2023-05/README.md @@ -0,0 +1,53 @@ +# Kleborate container + +Main tool : [Kleborate](https://github.com/katholt/Kleborate) 2.3.2 + +Additional tools: + +- [kaptive](https://github.com/katholt/Kaptive) 2.0.6 +- ncbi-blast+ 2.9.0 +- [mash](https://github.com/marbl/Mash) 2.3 +- python 3.8.2 +- biopython 1.81 + +Full documentation: [https://github.com/katholt/Kleborate/wiki](https://github.com/katholt/Kleborate/wiki) + +Kleborate is a tool to screen genome assemblies of Klebsiella pneumoniae and the Klebsiella pneumoniae species complex (KpSC) for: + +- MLST sequence type +- species (e.g. K. pneumoniae, K. quasipneumoniae, K. variicola, etc.) +- ICEKp associated virulence loci: yersiniabactin (ybt), colibactin (clb), salmochelin (iro), hypermucoidy (rmpA) +- virulence plasmid associated loci: salmochelin (iro), aerobactin (iuc), hypermucoidy (rmpA, rmpA2) +- antimicrobial resistance determinants: acquired genes, SNPs, gene truncations and intrinsic β-lactamases +- K (capsule) and O antigen (LPS) serotype prediction, via wzi alleles and Kaptive + +## Why is there `staphb/kleborate:2.3.2` and `staphb/kleborate:2.3.2-2023-05`? What is the difference? + +A small change was made to the Kleborate v2.3.2 dockerfile to account for an odd UID (1081147385) and GID (1360859114) that owns the `mash` executable and license file. This can cause problems in "rootless" environments or environments where `docker` is run inside of a docker container such as in GitPod. These environments have more limits placed on UIDs and GIDs than your typical linux computers. Error shown below: + +``` +$ docker pull staphb/kleborate:2.3.2 +2.3.2: Pulling from staphb/kleborate +47c764472391: Pull complete +f1582e59f07e: Pull complete +a2c96ce05542: Pull complete +213b944f2044: Pull complete +e136842d98e3: Extracting [==================================================>] 1.847MB/1.847MB +d99354f29fdd: Download complete +499193a95fe3: Download complete +4f4fb700ef54: Download complete +failed to register layer: ApplyLayer exit status 1 stdout: stderr: failed to Lchown "/mash-Linux64-v2.3/LICENSE.txt" for UID 1081147385, GID 1360859114 (try increasing the number of subordinate IDs in /etc/subuid and /etc/subgid): lchown /mash-Linux64-v2.3/LICENSE.txt: invalid argument +``` + +The dockerfile now has a step to `chown` these files so that the `root` user owns them, which prevents these odd errors. + +The updated docker image is available under the docker image name: `staphb/kleborate:2.3.2-2023-05` + +## Example Usage + +```bash +# run Kleborate on assembly supplied with Kleborate code +kleborate --resistance --kaptive --all \ + --assemblies /Kleborate/test/test_genomes/172.fasta.gz \ + --outfile /data/strain172-kleborate-test-out.tsv +``` diff --git a/kleborate/2.3.2/Dockerfile b/kleborate/2.3.2/Dockerfile new file mode 100644 index 000000000..354c9a3d1 --- /dev/null +++ b/kleborate/2.3.2/Dockerfile @@ -0,0 +1,85 @@ +FROM ubuntu:focal as app + +# for easy upgrade later. ARG variables only persist during image build time +ARG KLEBORATE_VER="2.3.2" +ARG BLAST_VER="2.9.0" +ARG MASH_VER="2.3" +ARG KAPTIVE_VER="2.0.6" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Kleborate" +LABEL software.version="${KLEBORATE_VER}" +LABEL description="tool to screen genome assemblies of Klebsiella pneumoniae and the Klebsiella pneumoniae species complex (KpSC)" +LABEL website="https://github.com/katholt/Kleborate" +LABEL license="https://github.com/katholt/Kleborate/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Frank Ambrosio" +LABEL maintainer2.email="frank.ambrosio@theiagen.com" + +# install prerequisites. Cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + python3-setuptools \ + wget \ + ca-certificates \ + bzip2 \ + locales \ + git \ + procps && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# to avoid encoding issues during kleborate setup.py install step +RUN locale-gen en_US.UTF-8 && update-locale LANG=en_US.UTF-8 +ENV LANG=en_US.UTF-8 + +# so that we get a more recent version of biopython instead of what is available via apt +# trying to get rid of biopython warning "BiopythonExperimentalWarning: Bio.Align.substitution_matrices ......" +RUN python3 -m pip install biopython + +# mash; make /data +RUN wget https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ + tar -xvf mash-Linux64-v${MASH_VER}.tar && \ + rm -rf mash-Linux64-v${MASH_VER}.tar && \ + mkdir /data + +# ncbi-blast+ +RUN wget ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and TERM to avoid warnings +ENV PATH="/mash-Linux64-v${MASH_VER}:\ +/ncbi-blast-${BLAST_VER}+/bin/:\ +${PATH}" \ + TERM=xterm-256color + +# kleborate install +# as per author instructions. Have to use git clone and git checkout since kaptive is a git submodule. cannot use tarballs +RUN git clone --recursive https://github.com/katholt/Kleborate.git && \ + cd Kleborate && \ + git checkout v${KLEBORATE_VER} && \ + cd kaptive && \ + git pull https://github.com/katholt/Kaptive v${KAPTIVE_VER} && \ + cd ../ && \ + python3 setup.py install + +# set working directory +WORKDIR /data + +FROM app as test + +# run Kleborate on assembly supplied with Kleborate code +RUN kleborate --resistance --kaptive --all \ + --assemblies /Kleborate/test/test_genomes/172.fasta.gz \ + --outfile /data/strain172-kleborate-test-out.tsv + +# install pytest; run unit tests included with Kleborate +RUN apt-get update && apt-get install -y --no-install-recommends python3-pytest && \ + cd /Kleborate && \ + python3 -m pytest + +# print help and version info +RUN kleborate --help && kleborate --version \ No newline at end of file diff --git a/kleborate/2.3.2/README.md b/kleborate/2.3.2/README.md new file mode 100644 index 000000000..dd4d0b75c --- /dev/null +++ b/kleborate/2.3.2/README.md @@ -0,0 +1,31 @@ +# Kleborate container + +Main tool : [Kleborate](https://github.com/katholt/Kleborate) 2.3.2 + +Additional tools: + +- [kaptive](https://github.com/katholt/Kaptive) 2.0.6 +- ncbi-blast+ 2.9.0 +- [mash](https://github.com/marbl/Mash) 2.3 +- python 3.8.2 +- biopython 1.81 + +Full documentation: [https://github.com/katholt/Kleborate/wiki](https://github.com/katholt/Kleborate/wiki) + +Kleborate is a tool to screen genome assemblies of Klebsiella pneumoniae and the Klebsiella pneumoniae species complex (KpSC) for: + +- MLST sequence type +- species (e.g. K. pneumoniae, K. quasipneumoniae, K. variicola, etc.) +- ICEKp associated virulence loci: yersiniabactin (ybt), colibactin (clb), salmochelin (iro), hypermucoidy (rmpA) +- virulence plasmid associated loci: salmochelin (iro), aerobactin (iuc), hypermucoidy (rmpA, rmpA2) +- antimicrobial resistance determinants: acquired genes, SNPs, gene truncations and intrinsic β-lactamases +- K (capsule) and O antigen (LPS) serotype prediction, via wzi alleles and Kaptive + +## Example Usage + +```bash +# run Kleborate on assembly supplied with Kleborate code +kleborate --resistance --kaptive --all \ + --assemblies /Kleborate/test/test_genomes/172.fasta.gz \ + --outfile /data/strain172-kleborate-test-out.tsv +``` diff --git a/kma/1.4.10/Dockerfile b/kma/1.4.10/Dockerfile new file mode 100644 index 000000000..fec71b4a3 --- /dev/null +++ b/kma/1.4.10/Dockerfile @@ -0,0 +1,57 @@ +# base image +FROM ubuntu:jammy as app + +ARG KMAVER="1.4.10" + +# metadata +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="kma" +LABEL software.version="$KMAVER" +LABEL description="K-mer alignment of raw reads against a database" +LABEL website="https://bitbucket.org/genomicepidemiology/kma/src/master/" +LABEL license="https://bitbucket.org/genomicepidemiology/kma/src/master/" +LABEL license.type="Apache License, V2.0" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Eetu Eklund" +LABEL maintainer2.email="eetu.eklund@maryland.gov" + +# install dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libz-dev \ + make \ + wget \ + ca-certificates \ + git && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +RUN git clone https://bitbucket.org/genomicepidemiology/kma.git && \ + cd kma && \ + git fetch && \ + git checkout $KMAVER && \ + make && \ + mkdir data + +ENV PATH="$PATH:/kma" \ + LC_ALL=C + +WORKDIR data + +# test stage +FROM app as test + +# set /test as working directory for test below +WORKDIR /test + +# Downloads E.coli genome and runs kma index on it +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + kma index -i GCA_012224845.2_ASM1222484v2_genomic.fna -o /test/database + +# Downloads read files and uses them for kma mapping against database; print kma version +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_2.fastq.gz && \ + kma -ipe SRR6903006_1.fastq.gz SRR6903006_2.fastq.gz -o /test/test_output -t_db /test/database -tmp /tmp/ && \ + kma -v diff --git a/kma/1.4.10/README.md b/kma/1.4.10/README.md new file mode 100644 index 000000000..5a0e0f322 --- /dev/null +++ b/kma/1.4.10/README.md @@ -0,0 +1,22 @@ +# kma container + +### Main tool : kma + +kma : k-mer mapping of raw reads to a redundant database + + +## Example Usage + +Database needs to be indexed to use kma for mapping: + +`kma index -i templates.fsa.gz -o database/name` + +kma mapping of paired and unpaired read files: + +`kma -ipe SRR13957123_*.fastq.gz -o test.output -t_db DB_name` + +`kma -i singleEndReads.fq.gz -ipe pairedEnd_*.fq.gz -o output/name -t_db database/name -1t1` + + +### More information about kma: +https://bitbucket.org/genomicepidemiology/kma/src/master/ diff --git a/kraken2/2.1.3/Dockerfile b/kraken2/2.1.3/Dockerfile new file mode 100644 index 000000000..22ccadfcd --- /dev/null +++ b/kraken2/2.1.3/Dockerfile @@ -0,0 +1,87 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during build time. +ARG K2VER="2.1.3" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="2" +LABEL software="Kraken2" +LABEL software.version="${K2VER}" +LABEL description="Taxonomic sequence classifier" +LABEL website="https://github.com/DerrickWood/kraken2" +LABEL license="https://github.com/DerrickWood/kraken2/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Erin Young" +LABEL maintainer2.email="eriny@utah.gov" + +# install dependencies and cleanup apt garbage +RUN apt-get update && apt-get -y --no-install-recommends install \ + wget \ + ca-certificates \ + zlib1g-dev \ + make \ + g++ \ + rsync \ + cpanminus \ + ncbi-blast+ && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# perl module required for kraken2-build +RUN cpanm Getopt::Std + +# DL Kraken2, unpack, and install +RUN wget https://github.com/DerrickWood/kraken2/archive/v${K2VER}.tar.gz && \ + tar -xzf v${K2VER}.tar.gz && \ + rm -rf v${K2VER}.tar.gz && \ + cd kraken2-${K2VER} && \ + ./install_kraken2.sh . && \ + mkdir /data /kraken2-db + +ENV PATH="/kraken2-${K2VER}/:$PATH" \ + LC_ALL=C + +# default command is to pull up help options +CMD [ "kraken2", "--help" ] + +WORKDIR /data + +### start of test stage ### +FROM app as test + +# print help and versions +RUN kraken2 --help && \ + kraken2-build --help && \ + kraken2 --version && \ + blastn -version && \ + dustmasker -version && \ + segmasker -version + +# set working dir and download test k2 database inside +WORKDIR /kraken2-db +RUN wget https://genome-idx.s3.amazonaws.com/kraken/k2_viral_20230605.tar.gz && \ + tar -zxf k2_viral_20230605.tar.gz + +# set working dir for running test commands below +WORKDIR /test + +# download and classify SARS-CoV-2 reads +RUN wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_1.fastq.gz?raw=true -O SRR13957123_1.fastq.gz && \ + wget -q https://github.com/StaPH-B/docker-builds/blob/83ee344304794f4ffa162d1c082bb35f916badcf/tests/SARS-CoV-2/SRR13957123_2.fastq.gz?raw=true -O SRR13957123_2.fastq.gz && \ + kraken2 --paired --classified-out cseqs#.fq --db /kraken2-db SRR13957123_1.fastq.gz SRR13957123_2.fastq.gz --report kraken2_report.txt && \ + cat kraken2_report.txt + + + +##### NO DATABASE INCLUDED WITH THIS DOCKER IMAGE ##### +## User will need to mount a directory from their host machine that contains kraken2 database files +## to a directory in the container (/kraken2-db exists for this purpose, but feel free to use another location) +## A list of created Kraken2 databases can be found at https://benlangmead.github.io/aws-indexes/k2 + +# DL MiniKraken2_8GB database. Built from RefSeq bacteria, archaea, viral, and human libraries. +# --strip-components=1 used so that the *.k2d files end up inside /kraken2-db and not another directory +#RUN mkdir /kraken2-db && \ +# cd /kraken2-db && \ +# wget --no-check-certificate https://genome-idx.s3.amazonaws.com/kraken/minikraken2_v2_8GB_201904.tgz && \ +# tar -zxf --strip-components=1 minikraken2_v2_8GB_201904.tgz && \ +# rm -rf minikraken2_v2_8GB_201904.tgz diff --git a/kraken2/2.1.3/README.md b/kraken2/2.1.3/README.md new file mode 100644 index 000000000..de031c1ba --- /dev/null +++ b/kraken2/2.1.3/README.md @@ -0,0 +1,26 @@ +# Kraken2 2.1.3 container + +Main tool : [Kraken2](https://github.com/DerrickWood/kraken2/) + +Additional tools: + +- [Pre-built kraken2 databases can be found here](https://benlangmead.github.io/aws-indexes/k2) + +Full documentation: [link to documentation](https://github.com/DerrickWood/kraken2/wiki) + +Kraken 2 is a fast and memory efficient tool for taxonomic assignment of metagenomics sequencing reads. + +## Example Usage + +```bash +# query Illumina paired-end reads against kraken2 standard 8GB database +kraken2 --report test.kraken2.1.2.salmonella.report \ + --output test.kraken2.1.2.salmonella.output \ + --paired \ + --db ./k2_standard_8gb_20210517 \ + --threads 4 \ + SRR10992628_1.gz SRR10992628_2.gz + +# inspect a kraken2 database +kraken2-inspect --db ./k2_standard_8gb_20210517 --threads 4 +``` diff --git a/lyveset/1.1.4f/Dockerfile b/lyveset/1.1.4f/Dockerfile index db5773c26..05dcc9626 100644 --- a/lyveset/1.1.4f/Dockerfile +++ b/lyveset/1.1.4f/Dockerfile @@ -1,69 +1,83 @@ -FROM ubuntu:xenial - +FROM ubuntu:xenial as app +ARG LYVESET_VER="1.1.4f" LABEL base.image="ubuntu:xenial" -LABEL version="3" +LABEL dockerfile.version="4" LABEL software="Lyve-SET" LABEL software.version="1.1.4f" LABEL description="LYVE-SET, a method of using hqSNPs to create a phylogeny, especially for outbreak investigations" LABEL website="https://github.com/lskatz/lyve-SET" LABEL license="https://github.com/lskatz/lyve-SET/blob/master/LICENSE" -LABEL maintainer1="Kelsey Florek" -LABEL maintainer1.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" LABEL maintainer2="Curtis Kapsak" -LABEL maintainer2.email="pjx8@cdc.gov" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Erik Wolfsohn" +LABEL maintainer3.email="ewolfsohn@gmail.com" + +#run apt in noninteractive mode so it can't prompt for input +ENV DEBIAN_FRONTEND noninteractive + +#update apt from closest mirrors +RUN echo 'deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-security main restricted universe multiverse' | cat - /etc/apt/sources.list > temp && mv temp /etc/apt/sources.list &&\ + echo 'deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-backports main restricted universe multiverse' | cat - /etc/apt/sources.list > temp && mv temp /etc/apt/sources.list &&\ + echo 'deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-updates main restricted universe multiverse' | cat - /etc/apt/sources.list > temp && mv temp /etc/apt/sources.list &&\ + echo 'deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe multiverse' | cat - /etc/apt/sources.list > temp && mv temp /etc/apt/sources.list -# install dependencies -RUN apt-get update && apt-get install -y\ - perl\ - libfile-slurp-perl\ - openjdk-9-jre\ - bioperl\ - wget\ - libz-dev\ - git\ - libncurses5-dev\ - libncursesw5-dev\ - build-essential\ - ncbi-blast+\ - libsvn-perl\ - subversion\ - libsvn1\ - automake1.11\ - libpthread-stubs0-dev\ - cpanminus\ - mpich\ - clang\ - smalt && \ - apt-get clean && apt-get autoclean && \ - rm -rf /var/lib/apt/lists/* +RUN apt update && apt-get install -y \ + curl \ + perl \ + libfile-slurp-perl \ + default-jre \ + bioperl \ + wget \ + libz-dev \ + git \ + libncurses5-dev \ + libncursesw5-dev \ + build-essential \ + ncbi-blast+ \ + libsvn-perl \ + subversion \ + libsvn1 \ + automake1.11 \ + libpthread-stubs0-dev \ + cpanminus \ + mpich \ + clang \ + libssl-dev \ + libio-socket-ssl-perl \ + libxml-simple-perl \ + unzip \ + smalt \ + python3 &&\ + apt-get clean && apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* -# install edirect -RUN wget ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/edirect.tar.gz &&\ - tar -xzvf edirect.tar.gz &&\ - rm edirect.tar.gz &&\ - cd edirect &&\ - ./setup.sh +RUN mkdir /edirect && \ + perl -MNet::FTP -e '$ftp = new Net::FTP("ftp.ncbi.nlm.nih.gov", Passive => 1);$ftp->login; $ftp->binary;$ftp->get("/entrez/entrezdirect/edirect.tar.gz");' && \ + gunzip -c edirect.tar.gz | tar xf - && \ + mv -v /edirect/* /usr/local/bin && \ + rm edirect.tar.gz -#install smalt (at the time of this build lyve-SET was not building correctly) -#RUN wget --max-redirect 50 --continue 'https://downloads.sourceforge.net/project/smalt/smalt-0.7.6-static.tar.gz' -O smalt-0.7.6-static.tar.gz &&\ -# tar -zxvf smalt-0.7.6-static.tar.gz &&\ -# rm smalt-0.7.6-static.tar.gz &&\ -# cd smalt-0.7.6 &&\ -# ./configure &&\ -# make &&\ -# make install +# install missing xtract executable required by efetch +RUN wget https://ftp.ncbi.nlm.nih.gov/entrez/entrezdirect/xtract.Linux.gz && \ + gunzip -f xtract.Linux.gz && \ + mv -v xtract.Linux /usr/local/bin/xtract && \ + chmod +x /usr/local/bin/xtract #get lyveset files -RUN wget https://github.com/lskatz/lyve-SET/archive/v1.1.4f.tar.gz &&\ - tar -xzvf v1.1.4f.tar.gz &&\ - rm v1.1.4f.tar.gz &&\ - mv lyve-SET-1.1.4f lyve-SET +RUN wget https://github.com/lskatz/lyve-SET/archive/v${LYVESET_VER}.tar.gz &&\ + tar -xzvf v${LYVESET_VER}.tar.gz &&\ + rm v${LYVESET_VER}.tar.gz &&\ + mv lyve-SET-${LYVESET_VER} lyve-SET # download perl modules, setup the lyveset filesystem RUN cpanm Test::Most Bio::FeatureIO String::Escape File::Slurp URI::Escape Math::Round Schedule::SGE --force RUN make -C /lyve-SET install -e PREFIX=/lyve-SET &&\ make -C /lyve-SET env -e PREFIX=/lyve-SET +RUN make -C /lyve-SET clean-edirect + # set PATH and locale settings for singularity compatibility ENV PATH="/lyve-SET:\ /lyve-SET/scripts:\ @@ -72,3 +86,7 @@ ${PATH}"\ LC_ALL=C WORKDIR /data + +FROM app as test + +RUN set_test.pl --numcpus 4 lambda lambda diff --git a/lyveset/1.1.4f/README.md b/lyveset/1.1.4f/README.md new file mode 100644 index 000000000..9af6a6141 --- /dev/null +++ b/lyveset/1.1.4f/README.md @@ -0,0 +1,41 @@ +# LYVE version of the Snp Extraction Tool (SET), a method of using hqSNPs to create a phylogeny. + +Main tool: [Lyve-SET](https://github.com/lskatz/Lyve-SET) + +Additional tools: + +- ncbi-blast+ 2.2.31 +- python 2.7.12 +- python 3.5.2 +- bioperl 1.6.924 +- cpanminus 1.7040 +- NCBI E-utilities +- Git 2.7.4 +- VCFTools 0.1.12b +- SAMTools 1.2 +- VarScan 2.3.7 +- BCFTools 1.3.1 +- SMALT 0.7.6 +- SNAP 1.0beta.18 +- RAxML 8.1.16 +- Quake 0.3.5 + +## Example Usage + +### Help: + To see the help for any script, run it without options or with --help. For example, set_test.pl -h. The following is the help for the main script, launch_set.pl + +### Analyze test data: + set_test.pl --numcpus 8 lambda lambda + +### Analyze your data: + + set_manage.pl --create yourProject + # paired end reads have to be shuffled into one file per sample + shuffleSplitReads.pl --numcpus 8 -o interleaved *.fastq.gz + # then moved into your project dir + mv interleaved/*.fastq.gz yourProject/reads/ + # cleanup + rmdir interleaved + cp reference.fasta yourProject/ref/ + launch_set.pl --numcpus 8 -ref yourProject/ref/reference.fasta yourProject \ No newline at end of file diff --git a/mafft/7.505/Dockerfile b/mafft/7.505/Dockerfile new file mode 100644 index 000000000..ea349ed88 --- /dev/null +++ b/mafft/7.505/Dockerfile @@ -0,0 +1,48 @@ +# base image +FROM ubuntu:bionic as app + +# For easy upgrade in the future. ARG variables only persist during build time. +ARG MAFFT_VER="7.505" + +# metadata +LABEL base.image="ubuntu:bionic" +LABEL dockerfile.version="1" +LABEL software="MAFFT" +LABEL software.version=${MAFFT_VER} +LABEL description="Multiple alignment program for amino acid or nucleotide sequences" +LABEL website="https://mafft.cbrc.jp/alignment/software/" +LABEL license="https://mafft.cbrc.jp/alignment/software/license.txt" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# install wget, remove apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install mafft binary and make /data +RUN wget https://mafft.cbrc.jp/alignment/software/mafft_${MAFFT_VER}-1_amd64.deb && \ + dpkg -i mafft_${MAFFT_VER}-1_amd64.deb && \ + rm mafft_${MAFFT_VER}-1_amd64.deb && \ + mkdir /data + +WORKDIR /data + +FROM staphb/ncbi-datasets:14.3.0 as datasets + +WORKDIR /test + +RUN datasets download virus genome accession OQ344199.1 OQ344200.1 OQ344201.1 --filename sarscov2.zip && unzip sarscov2.zip + +FROM app as test + +WORKDIR /test + +COPY --from=datasets /test/ncbi_dataset/data/genomic.fna /test/genomic.fna + +# this works, but it writes to stderr +# RUN maff --help && mafft --version + +RUN mafft --auto genomic.fna > aligned.fna && wc -l *fna diff --git a/mafft/7.505/README.md b/mafft/7.505/README.md new file mode 100644 index 000000000..46c2a8f36 --- /dev/null +++ b/mafft/7.505/README.md @@ -0,0 +1,20 @@ +# MAFFT container + +Main tool : [MAFFT](https://mafft.cbrc.jp/alignment/software/) + +MAFFT is a multiple sequence alignment program for unix-like operating systems. + +# Example Usage + +``` +# have mafft detect what to do +mafft --auto input > output +# filter out sequences with too many ambiguous values with --maxambiguous (only available in version greater than 7.473) and compare to reference +mafft --auto --maxambiguous 0.05 --addfragments othersequences referencesequence > output +``` + +## Other noteworthy options include +``` +--thread # allows parallelization +--large # reduces memory burden +``` diff --git a/masurca/4.1.0/Dockerfile b/masurca/4.1.0/Dockerfile new file mode 100644 index 000000000..faa605027 --- /dev/null +++ b/masurca/4.1.0/Dockerfile @@ -0,0 +1,75 @@ +FROM ubuntu:focal as app + +ARG MASURCA_VER="4.1.0" +ARG MINIMAP2_VER="2.24" +ARG BWA_VER="0.7.17" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="masurca" +LABEL software.version="${MASURCA_VER}" +LABEL description="Genome Assembly and Analysis" +LABEL website="https://github.com/alekseyzimin/masurca" +LABEL license="https://github.com/alekseyzimin/masurca/blob/master/LICENSE.txt" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + python \ + build-essential \ + curl \ + wget \ + gcc \ + zlib1g-dev \ + bzip2 \ + libboost-dev \ + libbz2-dev \ + liblzma-dev \ + numactl \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install minimap2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxvf - + +# install bwa +RUN mkdir /bwa && \ + cd /bwa && \ + wget https://github.com/lh3/bwa/releases/download/v$BWA_VER/bwa-$BWA_VER.tar.bz2 && \ + tar -xjf bwa-$BWA_VER.tar.bz2 && \ + rm bwa-$BWA_VER.tar.bz2 && \ + cd bwa-$BWA_VER && \ + make + +# install masurca suite +RUN mkdir /data && \ + wget https://github.com/alekseyzimin/masurca/releases/download/v$MASURCA_VER/MaSuRCA-$MASURCA_VER.tar.gz && \ + tar -xvf MaSuRCA-$MASURCA_VER.tar.gz && \ + rm MaSuRCA-$MASURCA_VER.tar.gz + +WORKDIR /MaSuRCA-$MASURCA_VER + +# even in 4.0.9 there's the evil cp at the end of .install.sh +RUN bash /MaSuRCA-$MASURCA_VER/install.sh 2>/dev/null ; exit 0 + +ENV PATH="${PATH}:/MaSuRCA-${MASURCA_VER}/Flye/bin:/MaSuRCA-${MASURCA_VER}/bin:/minimap2-${MINIMAP2_VER}_x64-linux:/bwa/bwa-${BWA_VER}" \ + LC_ALL=C + +# WORKDIR sets working directory +WORKDIR /data + +FROM app as test + +# testing POLCA +RUN wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123_1.fastq.gz && \ + wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123_2.fastq.gz && \ + wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + polca.sh -a SRR13957123.consensus.fa -r 'SRR13957123_1.fastq.gz SRR13957123_2.fastq.gz' && \ + test -f SRR13957123.consensus.fa.PolcaCorrected.fa + +# testing hybrid assembly +RUN wget -q https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_1.fastq.gz && \ + wget -q https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/short_reads_2.fastq.gz && \ + wget -q https://github.com/rrwick/Unicycler/raw/69e712eb95c4b9f8a46aade467260260a9ce7a91/sample_data/long_reads_low_depth.fastq.gz && \ + masurca -t 2 -i short_reads_1.fastq.gz,short_reads_2.fastq.gz -r long_reads_low_depth.fastq.gz diff --git a/masurca/4.1.0/README.md b/masurca/4.1.0/README.md new file mode 100644 index 000000000..ff84a5179 --- /dev/null +++ b/masurca/4.1.0/README.md @@ -0,0 +1,22 @@ +# masurca container + +Main tools: + +- [masurca](https://github.com/alekseyzimin/masurca) 4.1.0 +- [polca](https://github.com/alekseyzimin/masurca#polca) +- [Chromosome scaffolder](https://github.com/alekseyzimin/masurca#chromosome-scaffolder) +- [SAMBA](https://github.com/alekseyzimin/masurca#samba-scaffolder) + +Other tools: + +- [bwa](https://github.com/lh3/bwa) (used by polca) 0.7.17-r1188 +- [minimap2](https://github.com/lh3/minimap2) (used by samba) 2.24-r1122 + +## Example Usage + +```bash +# polishes consensus genome with paired-end Illumina reads +polca.sh -a genome.fasta -r 'reads1.fastq reads2.fastq.gz' -t 16 -m 1G +``` + +Better documentation can be found at [https://github.com/alekseyzimin/masurca](https://github.com/alekseyzimin/masurca) diff --git a/midas/1.3.2/Dockerfile b/midas/1.3.2/Dockerfile new file mode 100644 index 000000000..f25c6db1e --- /dev/null +++ b/midas/1.3.2/Dockerfile @@ -0,0 +1,88 @@ +## Builder ## +ARG MIDAS_VER="1.3.2" + +FROM ubuntu:jammy as builder + +RUN apt update && apt-get install -y \ + build-essential \ + wget \ + python2-dev \ + zlib1g-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-openssl-dev \ + libncurses5-dev \ + curl + +# build samtools 1.4 +RUN wget --no-check-certificate https://github.com/samtools/samtools/releases/download/1.4/samtools-1.4.tar.bz2 &&\ + tar -xvf samtools-1.4.tar.bz2 &&\ + cd samtools-1.4 &&\ + ./configure && make && make install + +# get python/pip dependencies +RUN update-alternatives --install /usr/bin/python python /usr/bin/python2 1 &&\ + curl https://bootstrap.pypa.io/pip/2.7/get-pip.py --output get-pip.py &&\ + python get-pip.py &&\ + pip install --user --no-cache-dir pandas==0.20.3 biopython==1.70 numpy==1.8.2 pysam==0.13 + +## APP ## +FROM ubuntu:jammy as app + +ARG MIDAS_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="MIDAS" +LABEL software.version="${MIDAS_VER}" +LABEL description="Metagenomic Intra-Species Diversity Analysis System" +LABEL website="https://github.com/snayfach/MIDAS" +LABEL license="https://github.com/snayfach/MIDAS/blob/master/LICENSE" +LABEL maintainer="Kutluhan Incekara" +LABEL maintainer.email="kutluhan.incekara@ct.gov" + +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + ca-certificates \ + python2 \ + zlib1g-dev \ + libcurl4 \ + bowtie2 \ + hmmer \ + vsearch &&\ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install MIDAS +RUN wget --no-check-certificate https://github.com/snayfach/MIDAS/archive/refs/tags/v${MIDAS_VER}.tar.gz &&\ + tar -xvf v${MIDAS_VER}.tar.gz && rm v${MIDAS_VER}.tar.gz &&\ + rm /MIDAS-${MIDAS_VER}/bin/Linux/bowtie2* &&\ + rm /MIDAS-${MIDAS_VER}/bin/Linux/samtools &&\ + ln -s /usr/bin/bowtie2* /MIDAS-${MIDAS_VER}/bin/Linux/ &&\ + update-alternatives --install /usr/bin/python python /usr/bin/python2 1 + +# collect dependencies from builder +COPY --from=builder /usr/local/bin/* /MIDAS-${MIDAS_VER}/bin/Linux/ +COPY --from=builder /root/.local /root/.local + +ENV PYTHONPATH="/MIDAS-${MIDAS_VER}:$PYTHONPATH" +ENV PATH="/MIDAS-${MIDAS_VER}/scripts:/MIDAS-${MIDAS_VER}/bin/Linux:/root/.local/bin:$PATH" +ENV LC_ALL=C + +WORKDIR /data + +## TEST ## +FROM app as test + +ARG MIDAS_VER + +# change workdir for test +WORKDIR /MIDAS-${MIDAS_VER}/test/ + +# create a test database +RUN tar --no-same-owner -xvf genomes.tar.gz &&\ + build_midas_db.py genomes genomes.mapfile /midas_database + +# test runs with test db +RUN run_midas.py species ./sample -1 ./test.fq.gz -n 100 -d /midas_database &&\ + run_midas.py genes ./sample -1 ./test.fq.gz -n 100 --species_id Bacteroides_vulgatus -d /midas_database &&\ + run_midas.py snps ./sample -1 ./test.fq.gz -n 100 --species_id Bacteroides_vulgatus -d /midas_database \ No newline at end of file diff --git a/midas/1.3.2/README.md b/midas/1.3.2/README.md new file mode 100644 index 000000000..36821d51c --- /dev/null +++ b/midas/1.3.2/README.md @@ -0,0 +1,21 @@ +# Metagenomic Intra-Species Diversity Analysis System (MIDAS) container + +Main tool: [MIDAS](https://github.com/snayfach/MIDAS) + +Additional tools: +- samtools v1.4 +- bowtie2 v2.4.4 +- hmmer v3.3.2 +- vsearch v2.21.1 + +Full documentation: https://github.com/snayfach/MIDAS + +## Note +This docker image does not contain reference database. Users should download the default database or create their own. + +Default database found here: http://lighthouse.ucsf.edu/MIDAS/midas_db_v1.2.tar.gz + +## Example usage +```bash +run_midas.py species /path/to/outdir -1 /path/to/reads_1.fq.gz -2 /path/to/reads_2.fq.gz -d /path/to/midas_database +``` diff --git a/minimap2/2.25/Dockerfile b/minimap2/2.25/Dockerfile new file mode 100644 index 000000000..a5a35aef4 --- /dev/null +++ b/minimap2/2.25/Dockerfile @@ -0,0 +1,57 @@ +FROM ubuntu:jammy as app + +# for easy upgrade later. ARG variables only persist during image build time +ARG MINIMAP2_VER="2.25" + +# metadata +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="Minimap2" +LABEL software.version="${MINIMAP2_VER}" +LABEL description="versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database" +LABEL website="https://github.com/lh3/minimap2" +LABEL license="https://github.com/lh3/minimap2/blob/master/LICENSE.txt" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="Kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# install deps and cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + curl \ + ca-certificates \ + bzip2 \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install minimap2 binary; make /data +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxvf - && \ + mkdir /data + +# set final PATH and LC_ALL for singularity compatibility +ENV PATH="${PATH}:/minimap2-${MINIMAP2_VER}_x64-linux" \ +LC_ALL=C + +# set final working directory as /data +WORKDIR /data + +# test layer +FROM app as test + +# use /test as working directory for tests +WORKDIR /test + +# git clone minimap2 github repo solely for the purpose of getting test data files +# the minimap2 command used below is that installed in the 'app' layer, proven by the output of "command -v minimap2" +# lastly, I'm using the test commands listed here: https://github.com/lh3/minimap2#getting-started +RUN apt-get update && apt-get install -y --no-install-recommends git && \ +git clone https://github.com/lh3/minimap2 && \ +cd minimap2 && \ +echo && \ +echo "double checking that the 'minimap2' command used below is the executable installed in the app stage of the docker image. Here is the path:" && \ +command -v minimap2 && \ +echo && \ +minimap2 -a test/MT-human.fa test/MT-orang.fa > test.sam && \ +minimap2 -x map-ont -d MT-human-ont.mmi test/MT-human.fa && \ +minimap2 -a MT-human-ont.mmi test/MT-orang.fa > test.sam && \ +minimap2 --version diff --git a/minimap2/2.25/README.md b/minimap2/2.25/README.md new file mode 100644 index 000000000..f38618d87 --- /dev/null +++ b/minimap2/2.25/README.md @@ -0,0 +1,31 @@ +# minimap2 container + +Main tool : [minimap2](https://github.com/lh3/minimap2) + +Additional tools: + +- none (apart from basic linux utilties included in `ubuntu:jammy` base docker image, like `ls`, `mkdir`, etc.) + +Publication: [https://academic.oup.com/bioinformatics/article/34/18/3094/4994778?login=false](https://academic.oup.com/bioinformatics/article/34/18/3094/4994778?login=false) + +Minimap2 is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. + +## Example Usage + +These example commands were copy/pasted directly from: [https://github.com/lh3/minimap2#getting-started](https://github.com/lh3/minimap2#getting-started) + +```bash +# use presets (no test data) +minimap2 -ax map-pb ref.fa pacbio.fq.gz > aln.sam # PacBio CLR genomic reads +minimap2 -ax map-ont ref.fa ont.fq.gz > aln.sam # Oxford Nanopore genomic reads +minimap2 -ax map-hifi ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.19 or later) +minimap2 -ax asm20 ref.fa pacbio-ccs.fq.gz > aln.sam # PacBio HiFi/CCS genomic reads (v2.18 or earlier) +minimap2 -ax sr ref.fa read1.fa read2.fa > aln.sam # short genomic paired-end reads +minimap2 -ax splice ref.fa rna-reads.fa > aln.sam # spliced long reads (strand unknown) +minimap2 -ax splice -uf -k14 ref.fa reads.fa > aln.sam # noisy Nanopore Direct RNA-seq +minimap2 -ax splice:hq -uf ref.fa query.fa > aln.sam # Final PacBio Iso-seq or traditional cDNA +minimap2 -ax splice --junc-bed anno.bed12 ref.fa query.fa > aln.sam # prioritize on annotated junctions +minimap2 -cx asm5 asm1.fa asm2.fa > aln.paf # intra-species asm-to-asm alignment +minimap2 -x ava-pb reads.fa reads.fa > overlaps.paf # PacBio read overlap +minimap2 -x ava-ont reads.fa reads.fa > overlaps.paf # Nanopore read overlap +``` diff --git a/mlst/2.23.0-2023-07/Dockerfile b/mlst/2.23.0-2023-07/Dockerfile new file mode 100644 index 000000000..180a0ac61 --- /dev/null +++ b/mlst/2.23.0-2023-07/Dockerfile @@ -0,0 +1,78 @@ +FROM ubuntu:jammy as app + +ARG MLST_VER="2.23.0" +ARG ANY2FASTA_VER="0.4.2" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="mlst" +LABEL software.version="${MLST_VER}" +LABEL description="Scan contig files against PubMLST typing schemes" +LABEL website="https://github.com/tseemann/mlst" +LABEL license="https://github.com/tseemann/mlst/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies via apt; cleanup apt garbage +# blast from ubuntu:jammy is v2.12.0 (as of 2023-07-05) +# deps needed specifically for db download scripts: libfile-which-perl, curl, parallel +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + libmoo-perl \ + liblist-moreutils-perl \ + libjson-perl \ + gzip \ + file \ + ncbi-blast+ \ + libfile-which-perl \ + curl \ + parallel \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get any2fasta; move binary to /usr/local/bin which is already in $PATH +RUN wget https://github.com/tseemann/any2fasta/archive/refs/tags/v${ANY2FASTA_VER}.tar.gz && \ + tar xzf v${ANY2FASTA_VER}.tar.gz && \ + rm v${ANY2FASTA_VER}.tar.gz && \ + chmod +x any2fasta-${ANY2FASTA_VER}/any2fasta && \ + mv -v any2fasta-${ANY2FASTA_VER}/any2fasta /usr/local/bin + +# get mlst +RUN wget https://github.com/tseemann/mlst/archive/v${MLST_VER}.tar.gz && \ + tar -xzf v${MLST_VER}.tar.gz && \ + rm v${MLST_VER}.tar.gz + +# set PATH and perl local settings +ENV PATH="${PATH}:/mlst-${MLST_VER}/bin:" \ + LC_ALL=C.UTF-8 + +# check dependencies and list available schemes +RUN mlst --check && mlst --list + +# update databases, following steps from here, but modified after much trial and error: https://github.com/tseemann/mlst#updating-the-database +# delete the old databases instead of renaming & saving the dir +RUN cd /mlst-${MLST_VER}/scripts && \ + mkdir db-downloaded-$(date -I) && \ + ./mlst-download_pub_mlst -d db-downloaded-$(date -I) | bash && \ + rm -rv /mlst-${MLST_VER}/db/pubmlst && \ + mv -v db-downloaded-$(date -I) /mlst-${MLST_VER}/db/pubmlst && \ + ./mlst-make_blast_db && \ + mlst --list + +WORKDIR /data + +# default command is to pull up help options; can be overridden of course +CMD ["mlst", "--help"] + +FROM app as test + +# copy in test script; run script +# test commands stolen shamelessly from https://github.com/tseemann/mlst/blob/master/.travis.yml +COPY mlst-tests.sh . +RUN bash mlst-tests.sh + +### Example command to run mlst (broken into two lines for readability) +### If you have an assembly named contigs.fasta in your PWD: +# $ docker run -v ${PWD}:/data staphb/mlst:latest \ +# mlst contigs.fasta >mlst-results.tsv \ No newline at end of file diff --git a/mlst/2.23.0-2023-07/README.md b/mlst/2.23.0-2023-07/README.md new file mode 100644 index 000000000..12e56e3bb --- /dev/null +++ b/mlst/2.23.0-2023-07/README.md @@ -0,0 +1,56 @@ +# mlst container + +Main tool : [mlst](https://github.com/tseemann/mlst) + +Additional tools: + +- ncbi-blast+ 2.9.0 +- perl 5.30.0 +- any2fasta 0.4.2 + +Full documentation: [https://github.com/tseemann/mlst](https://github.com/tseemann/mlst) + +Scan contig files against PubMLST typing schemes + +## pubMLST databases + +Available schemes found here: [https://github.com/tseemann/mlst/tree/master/db/pubmlst](https://github.com/tseemann/mlst/tree/master/db/pubmlst) + +This docker image was built in July of 2023 and databases were updated during the docker image build process. See Dockerfile for update commands. + +```bash +# sorted list of available mlst schemes +$ mlst --list 2>/dev/null | tr ' ' '\n' | sort | tr '\n' ' ' | fold -s -w80 +aactinomycetemcomitans abaumannii abaumannii_2 achromobacter aeromonas +aphagocytophilum arcobacter bbacilliformis bcc bcereus bfragilis bhenselae +blicheniformis_14 bordetella_3 borrelia bpseudomallei brachyspira brachyspira_2 +brachyspira_3 brachyspira_4 brachyspira_5 brucella bsubtilis bwashoensis +campylobacter campylobacter_nonjejuni campylobacter_nonjejuni_2 +campylobacter_nonjejuni_3 campylobacter_nonjejuni_4 campylobacter_nonjejuni_5 +campylobacter_nonjejuni_6 campylobacter_nonjejuni_7 campylobacter_nonjejuni_8 +campylobacter_nonjejuni_9 cbotulinum cdifficile cfreundii chlamydiales +cmaltaromaticum cperfringens cronobacter csepticum diphtheria_3 dnodosus +ecloacae ecoli ecoli_achtman_4 edwardsiella efaecalis efaecium fpsychrophilum +gallibacterium geotrichum hcinaedi helicobacter hinfluenzae hparasuis hsuis +kaerogenes kingella klebsiella koxytoca leptospira leptospira_2 leptospira_3 +liberibacter listeria_2 llactis_phage lsalivarius mabscessus magalactiae +manserisalpingitidis mbovis_2 mcanis mcaseolyticus mcatarrhalis_achtman_6 +mflocculare mgallisepticum mgallisepticum_2 mhaemolytica mhominis_3 +mhyopneumoniae mhyorhinis miowae mplutonius mpneumoniae msciuri msynoviae +mycobacteria_2 neisseria oralstrep orhinotracheale otsutsugamushi pacnes_3 +paeruginosa pdamselae pfluorescens pgingivalis plarvae pmultocida pmultocida_2 +ppentosaceus pputida psalmonis ranatipestifer rhodococcus sagalactiae saureus +sbsec scanis schromogenes sdysgalactiae senterica_achtman_2 sepidermidis +sgallolyticus shaemolyticus shewanella shominis sinorhizobium smaltophilia +spneumoniae spseudintermedius spyogenes ssuis staphlugdunensis sthermophilus +streptomyces streptothermophilus suberis szooepidemicus taylorella +tenacibaculum tpallidum ureaplasma vcholerae vcholerae_2 vibrio +vparahaemolyticus vtapetis vvulnificus wolbachia xfastidiosa +ypseudotuberculosis_achtman_3 yruckeri +``` + +## Example Usage + +```bash +mlst --nopath contigs.fasta > mlst-results.tsv +``` diff --git a/mlst/2.23.0-2023-07/mlst-tests.sh b/mlst/2.23.0-2023-07/mlst-tests.sh new file mode 100755 index 000000000..65a77824a --- /dev/null +++ b/mlst/2.23.0-2023-07/mlst-tests.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# test commands stolen shamelessly from https://github.com/tseemann/mlst/blob/master/.travis.yml + +# change into mlst root dir where it's installed in the docker image, e.g. /mlst-2.22.0 +cd /mlst-* + +# all below commands should exit code 0 +set -euxo pipefail + +mlst --check +mlst --version +mlst --help +! mlst --doesnotexist +! mlst -q /etc +! mlst -q /etc/shadow +mlst --list | grep saureus +mlst --longlist | grep saureus +mlst -q test/example.fna.gz | grep -w 184 +mlst -q test/example.gbk.gz | grep -w 184 +mlst -q test/novel.fasta.bz2 | grep -w spneumoniae +gzip -d -c test/example.fna.gz | mlst -q /dev/stdin | grep -w 184 +gzip -d -c test/example.gbk.gz | mlst -q /dev/stdin | grep -w 184 +mlst -q --legacy --scheme sepidermidis test/example.fna.gz test/example.gbk.gz +mlst -q --csv test/example.fna.gz | grep ',184,' +mlst -q test/example.gbk.gz --json out.json && grep 'sequence_type' out.json +! mlst -q --label double_trouble test/example.gbk.gz test/example.fna.gz +mlst -q --label GDAYMATE test/example.fna.gz | grep GDAYMATE +mlst -q --novel novel.fa test/novel.fasta.bz2 && grep 'recP' novel.fa +scripts/mlst-show_seqs -s efaecium -t 111 diff --git a/mlst/2.23.0-2023-08/Dockerfile b/mlst/2.23.0-2023-08/Dockerfile new file mode 100644 index 000000000..66890c269 --- /dev/null +++ b/mlst/2.23.0-2023-08/Dockerfile @@ -0,0 +1,81 @@ +FROM ubuntu:jammy as app + +ARG MLST_VER="2.23.0" +ARG ANY2FASTA_VER="0.4.2" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="mlst" +LABEL software.version="${MLST_VER}" +LABEL description="Scan contig files against PubMLST typing schemes" +LABEL website="https://github.com/tseemann/mlst" +LABEL license="https://github.com/tseemann/mlst/blob/master/LICENSE" +LABEL maintainer="Inês Mendes" +LABEL maintainer.email="ines.mendes@theiagen.com" + +# copy README for posterity +COPY README.md . + +# install dependencies via apt; cleanup apt garbage +# blast from ubuntu:jammy is v2.12.0 (as of 2023-07-05) +# deps needed specifically for db download scripts: libfile-which-perl, curl, parallel +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + libmoo-perl \ + liblist-moreutils-perl \ + libjson-perl \ + gzip \ + file \ + ncbi-blast+ \ + libfile-which-perl \ + curl \ + parallel \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get any2fasta; move binary to /usr/local/bin which is already in $PATH +RUN wget https://github.com/tseemann/any2fasta/archive/refs/tags/v${ANY2FASTA_VER}.tar.gz && \ + tar xzf v${ANY2FASTA_VER}.tar.gz && \ + rm v${ANY2FASTA_VER}.tar.gz && \ + chmod +x any2fasta-${ANY2FASTA_VER}/any2fasta && \ + mv -v any2fasta-${ANY2FASTA_VER}/any2fasta /usr/local/bin + +# get mlst +RUN wget https://github.com/tseemann/mlst/archive/v${MLST_VER}.tar.gz && \ + tar -xzf v${MLST_VER}.tar.gz && \ + rm v${MLST_VER}.tar.gz + +# set PATH and perl local settings +ENV PATH="${PATH}:/mlst-${MLST_VER}/bin:" \ + LC_ALL=C.UTF-8 + +# check dependencies and list available schemes +RUN mlst --check && mlst --list + +# update databases, following steps from here, but modified after much trial and error: https://github.com/tseemann/mlst#updating-the-database +# delete the old databases instead of renaming & saving the dir +RUN cd /mlst-${MLST_VER}/scripts && \ + mkdir db-downloaded-$(date -I) && \ + ./mlst-download_pub_mlst -d db-downloaded-$(date -I) | bash && \ + rm -rv /mlst-${MLST_VER}/db/pubmlst && \ + mv -v db-downloaded-$(date -I) /mlst-${MLST_VER}/db/pubmlst && \ + ./mlst-make_blast_db && \ + mlst --list + +WORKDIR /data + +# default command is to pull up help options; can be overridden of course +CMD ["mlst", "--help"] + +FROM app as test + +# copy in test script; run script +# test commands stolen shamelessly from https://github.com/tseemann/mlst/blob/master/.travis.yml +COPY mlst-tests.sh . +RUN bash mlst-tests.sh + +### Example command to run mlst (broken into two lines for readability) +### If you have an assembly named contigs.fasta in your PWD: +# $ docker run -v ${PWD}:/data staphb/mlst:latest \ +# mlst contigs.fasta >mlst-results.tsv \ No newline at end of file diff --git a/mlst/2.23.0-2023-08/README.md b/mlst/2.23.0-2023-08/README.md new file mode 100644 index 000000000..3d1b36aa5 --- /dev/null +++ b/mlst/2.23.0-2023-08/README.md @@ -0,0 +1,56 @@ +# mlst container + +Main tool : [mlst](https://github.com/tseemann/mlst) + +Additional tools: + +- ncbi-blast+ 2.9.0 +- perl 5.30.0 +- any2fasta 0.4.2 + +Full documentation: [https://github.com/tseemann/mlst](https://github.com/tseemann/mlst) + +Scan contig files against PubMLST typing schemes + +## pubMLST databases + +Available schemes found here: [https://github.com/tseemann/mlst/tree/master/db/pubmlst](https://github.com/tseemann/mlst/tree/master/db/pubmlst) + +This docker image was built in August of 2023 and databases were updated during the docker image build process. See Dockerfile for update commands. + +```bash +# sorted list of available mlst schemes +$ mlst --list 2>/dev/null | tr ' ' '\n' | sort | tr '\n' ' ' | fold -s -w80 +aactinomycetemcomitans abaumannii abaumannii_2 achromobacter aeromonas +aphagocytophilum arcobacter bbacilliformis bcc bcereus bfragilis bhenselae +blicheniformis_14 bordetella_3 borrelia bpseudomallei brachyspira brachyspira_2 +brachyspira_3 brachyspira_4 brachyspira_5 brucella bsubtilis bwashoensis +campylobacter campylobacter_nonjejuni campylobacter_nonjejuni_2 +campylobacter_nonjejuni_3 campylobacter_nonjejuni_4 campylobacter_nonjejuni_5 +campylobacter_nonjejuni_6 campylobacter_nonjejuni_7 campylobacter_nonjejuni_8 +campylobacter_nonjejuni_9 cbotulinum cdifficile cfreundii chlamydiales +cmaltaromaticum cperfringens cronobacter csepticum diphtheria_3 dnodosus +ecloacae ecoli ecoli_achtman_4 edwardsiella efaecalis efaecium fpsychrophilum +gallibacterium geotrichum hcinaedi helicobacter hinfluenzae hparasuis hsuis +kaerogenes kingella klebsiella koxytoca leptospira leptospira_2 leptospira_3 +liberibacter listeria_2 llactis_phage lsalivarius mabscessus magalactiae +manserisalpingitidis mbovis_2 mcanis mcaseolyticus mcatarrhalis_achtman_6 +mflocculare mgallisepticum mgallisepticum_2 mhaemolytica mhominis_3 +mhyopneumoniae mhyorhinis miowae mplutonius mpneumoniae msciuri msynoviae +mycobacteria_2 neisseria oralstrep orhinotracheale otsutsugamushi pacnes_3 +paeruginosa pdamselae pfluorescens pgingivalis plarvae pmultocida pmultocida_2 +ppentosaceus pputida psalmonis ranatipestifer rhodococcus sagalactiae saureus +sbsec scanis schromogenes sdysgalactiae senterica_achtman_2 sepidermidis +sgallolyticus shaemolyticus shewanella shominis sinorhizobium smaltophilia +spneumoniae spseudintermedius spyogenes ssuis staphlugdunensis sthermophilus +streptomyces streptothermophilus suberis szooepidemicus taylorella +tenacibaculum tpallidum ureaplasma vcholerae vcholerae_2 vibrio +vparahaemolyticus vtapetis vvulnificus wolbachia xfastidiosa +ypseudotuberculosis_achtman_3 yruckeri +``` + +## Example Usage + +```bash +mlst --nopath contigs.fasta > mlst-results.tsv +``` diff --git a/mlst/2.23.0-2023-08/mlst-tests.sh b/mlst/2.23.0-2023-08/mlst-tests.sh new file mode 100755 index 000000000..65a77824a --- /dev/null +++ b/mlst/2.23.0-2023-08/mlst-tests.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# test commands stolen shamelessly from https://github.com/tseemann/mlst/blob/master/.travis.yml + +# change into mlst root dir where it's installed in the docker image, e.g. /mlst-2.22.0 +cd /mlst-* + +# all below commands should exit code 0 +set -euxo pipefail + +mlst --check +mlst --version +mlst --help +! mlst --doesnotexist +! mlst -q /etc +! mlst -q /etc/shadow +mlst --list | grep saureus +mlst --longlist | grep saureus +mlst -q test/example.fna.gz | grep -w 184 +mlst -q test/example.gbk.gz | grep -w 184 +mlst -q test/novel.fasta.bz2 | grep -w spneumoniae +gzip -d -c test/example.fna.gz | mlst -q /dev/stdin | grep -w 184 +gzip -d -c test/example.gbk.gz | mlst -q /dev/stdin | grep -w 184 +mlst -q --legacy --scheme sepidermidis test/example.fna.gz test/example.gbk.gz +mlst -q --csv test/example.fna.gz | grep ',184,' +mlst -q test/example.gbk.gz --json out.json && grep 'sequence_type' out.json +! mlst -q --label double_trouble test/example.gbk.gz test/example.fna.gz +mlst -q --label GDAYMATE test/example.fna.gz | grep GDAYMATE +mlst -q --novel novel.fa test/novel.fasta.bz2 && grep 'recP' novel.fa +scripts/mlst-show_seqs -s efaecium -t 111 diff --git a/mummer/4.0.0-gnuplot/Dockerfile b/mummer/4.0.0-gnuplot/Dockerfile new file mode 100644 index 000000000..0dbaa21a7 --- /dev/null +++ b/mummer/4.0.0-gnuplot/Dockerfile @@ -0,0 +1,130 @@ +FROM ubuntu:focal as app + +# ARG sets environment variables during the build stage; they do not persist after the image is built +ARG MUMMER_VER="4.0.0" +ARG MASH_VER="2.3" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="mummer" +LABEL software.version=${MUMMER_VER} +LABEL description="MUMmer is a versatile alignment tool for DNA and protein sequences." +LABEL website="https://github.com/mummer4/mummer" +LABEL license="https://github.com/mummer4/mummer/blob/master/LICENSE.md" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" + +# to prevent tzdata from asking for a region during apt updates +ARG DEBIAN_FRONTEND=noninteractive + +# Install dependencies via apt; clean up apt garbage +# gnuplot version 5.2 patchlevel 8 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + git \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + curl \ + sed \ + gnuplot \ + build-essential \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install mummer; make /data for use as working directory +RUN wget https://github.com/mummer4/mummer/releases/download/v${MUMMER_VER}rc1/mummer-${MUMMER_VER}rc1.tar.gz && \ + tar -xvf mummer-${MUMMER_VER}rc1.tar.gz && \ + rm mummer-${MUMMER_VER}rc1.tar.gz && \ + cd mummer-${MUMMER_VER}rc1 && \ + ./configure && \ + make && \ + make install && \ + ldconfig && \ + cd .. && \ + mkdir /data + +# install ncbi datasets tool (pre-compiled binary) version 14.17.0; place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# copy in list of NCBI accessions and species list +COPY RGDv2-NCBI-assembly-accessions.txt /RGDv2/RGDv2-NCBI-assembly-accessions.txt +COPY RGDv2-NCBI-assembly-accessions-and-species.txt /RGDv2/RGDv2-NCBI-assembly-accessions-and-species.txt + +# download RGD genomes using NCBI datasets tools; cleanup unneccessary files; +# move and re-name assemblies to include Species in the filename +# make fasta files readable to all users; create File Of FileNames for all 43 assemblies (to be used with fastANI) +RUN for ID in $(cat /RGDv2/RGDv2-NCBI-assembly-accessions.txt); do \ + SPECIES=$(grep "${ID}" /RGDv2/RGDv2-NCBI-assembly-accessions-and-species.txt | cut -f 1) && \ + echo "downloading $ID, species "${SPECIES}", from NCBI..."; \ + datasets download genome accession --filename ${ID}.zip ${ID}; \ + unzip -q ${ID}.zip; \ + rm ${ID}.zip; \ + mv -v ncbi_dataset/data/${ID}/${ID}*.fna /RGDv2/${ID}.${SPECIES}.fasta; \ + rm -rf ncbi_dataset/; \ + rm README.md; \ + done && \ + ls /RGDv2/*.fasta >/RGDv2/FOFN-RGDv2.txt && \ + chmod 664 /RGDv2/* + +# downloading mash binary +RUN wget https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ + tar -xvf mash-Linux64-v${MASH_VER}.tar && \ + rm -rf mash-Linux64-v${MASH_VER}.tar + +# download Lee's ani-m script +RUN wget https://github.com/lskatz/ani-m/archive/refs/tags/v0.1.tar.gz && \ + tar xzf v0.1.tar.gz && \ + rm -v v0.1.tar.gz + +# LC_ALL for singularity compatibility; set PATH +ENV LC_ALL=C \ + PATH="${PATH}:/ani-m-0.1:/mash-Linux64-v${MASH_VER}" + +# set working directory +WORKDIR /data + +FROM app as test + +# test that mash is in the PATH +RUN mash --help && mash --version + +# test a few commands part of the mummer package +RUN nucmer -h && nucmer --version && \ + promer -h && promer --version && \ + wget https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pylori26695_Eslice.fasta && \ + wget https://mummer4.github.io/tutorial/exampleFiles/2.1/in/H_pyloriJ99_Eslice.fasta && \ + wget https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_Mslice.fasta && \ + wget https://mummer4.github.io/tutorial/exampleFiles/2.2/in/B_anthracis_contigs.fasta && \ + wget http://mummer.sourceforge.net/examples/data/H_pylori26695_Eslice.fasta && \ + wget http://mummer.sourceforge.net/examples/data/H_pyloriJ99_Eslice.fasta && \ + mummer -mum -b -c H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta > mummer.mums && \ + mummerplot -x "[0,275287]" -y "[0,265111]" --terminal png -postscript -p mummer mummer.mums && \ + nucmer -c 100 -p nucmer B_anthracis_Mslice.fasta B_anthracis_contigs.fasta && \ + show-snps -C nucmer.delta > nucmer.snps && \ + promer -p promer_100 -c 100 H_pylori26695_Eslice.fasta H_pyloriJ99_Eslice.fasta && \ + mummerplot -l nucmer.delta -p test_mummer_plot --png && \ + gnuplot test_mummer_plot.gp + +# testing ani-m.pl (which runs 'dnadiff' and 'mash' under the hood) +# comparing one of the reference genomes to another +RUN ani-m.pl --symmetric --mash-filter 0.9 \ + /RGDv2/GCA_014526935.1.Listeria_monocytogenes.fasta \ + /RGDv2/GCA_001466295.1.Listeria_monocytogenes.fasta \ + | tee test-output-ani-m.tsv diff --git a/mummer/4.0.0-gnuplot/README.md b/mummer/4.0.0-gnuplot/README.md new file mode 100755 index 000000000..5e5cbab34 --- /dev/null +++ b/mummer/4.0.0-gnuplot/README.md @@ -0,0 +1,62 @@ +# mummer + RGDv2 container + +Main tool : [mummer](https://github.com/mummer4/mummer) + +Additional tools: + +- `mash` v2.3 +- `ani-m.pl` v0.1 from [https://github.com/lskatz/ani-m](https://github.com/lskatz/ani-m) +- `gnu-plot` v5.2 patchlevel 8 +- `datasets` 14.17.0 + +MUMmer is a versatile alignment tool for DNA and protein sequences. + +This docker image contains the **Reference Genome Database version 2 (RGDv2)** from the Enteric Diseases Laboratory Branch at the CDC. It contains the reference genomes of 43 enteric bacterial isolates that are used to for species identification of bacterial isolate WGS data. This database is NOT meant to be comprehensive - it contains the genomes of enteric pathogens commonly sequenced by EDLB and some closely related species. + +The FASTA files for RGDv2 can be found within the directory `/RGDv2/` inside the docker image. + +The docker image also includes `ani-m.pl`, written by [Lee Katz](@lskatz) and Lori Gladney. This perl script can be used for running the `dnadiff` tool from the Mummer package for performing Average Nucleotide Identity (ANI) analysis to compare genomes. + +## Example Usage + +Compare one genome against the 43 genomes in RGDv2, writing the output to a TSV file. + +This script will automatically use `mash` as quick check to see relatedness between genomes. If the two genomes have less than 0.9 mash distance, then the ANI calculation will be skipped. + +The `--symmetric` flag will run ANI comparison on both: + + 1. the query vs the reference genome, followed by... + 2. the reference versus the query genome. + + ANI values will likely be nearly identical between the two comparisons, but differences may occur in the `percentAligned` or percent bases aligned value depending on the sequences present in the genome. + +```bash +ani-m.pl --symmetric --mash-filter 0.9 my-assembly-of-interest.fasta /RGDv2/*.fasta | tee output.tsv +``` + +Example output TSV when comparing a Salmonella enterica serotype Kentucky to RGDv2 using the above command: + +```bash +$ column -t output-with-mash-and-symmetric-flag.tsv +reference query ANI stdev percentAligned +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_000026225.1.Escherichia_fergusonii.fasta 84.7423 3.93 33.1834 +GCA_000026225.1.Escherichia_fergusonii.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 84.7485 3.97 35.6278 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_000512125.1.Escherichia_albertii.fasta 84.8404 3.72 32.5365 +GCA_000512125.1.Escherichia_albertii.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 84.8391 3.72 34.8883 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_002741475.1.Escherichia_coli.fasta 84.7711 3.76 35.5718 +GCA_002741475.1.Escherichia_coli.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 84.7762 3.76 35.8077 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_009665195.1.Vibrio_mimicus.fasta 89.4273 6.41 0.7529 +GCA_009665195.1.Vibrio_mimicus.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 89.4215 6.40 1.5694 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_009665515.2.Vibrio_cholerae.fasta 93.7723 7.47 2.4096 +GCA_009665515.2.Vibrio_cholerae.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 93.7724 7.52 3.0205 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_011388235.1.Salmonella_enterica.fasta 98.7341 3.42 88.7485 +GCA_011388235.1.Salmonella_enterica.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 98.7335 2.79 94.5892 +GCA_011245895.1_PDT000672941.1_genomic.fna GCA_013588055.1.Salmonella_bongori.fasta 90.3425 2.99 73.3307 +GCA_013588055.1.Salmonella_bongori.fasta GCA_011245895.1_PDT000672941.1_genomic.fna 90.3405 2.95 81.9293 +``` + +Better documentation for Mummer can be found at [https://github.com/mummer4/mummer](https://github.com/mummer4/mummer) + +A tutorial can be found at [https://mummer4.github.io/tutorial/tutorial.html](https://mummer4.github.io/tutorial/tutorial.html) + +And the manual can be found at [http://mummer.sourceforge.net/manual/](http://mummer.sourceforge.net/manual/) diff --git a/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions-and-species.txt b/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions-and-species.txt new file mode 100644 index 000000000..7d1763100 --- /dev/null +++ b/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions-and-species.txt @@ -0,0 +1,43 @@ +Campylobacter_coli GCA_008011635.1 +Campylobacter_fetus GCA_000015085.1 +Campylobacter_fetus GCA_000495505.1 +Campylobacter_fetus GCA_000759515.1 +Campylobacter_hyointestinalis GCA_001643955.1 +Campylobacter_jejuni GCA_000017485.1 +Campylobacter_jejuni GCA_008011525.1 +Campylobacter_lari GCA_000019205.1 +Campylobacter_lari GCA_000816225.1 +Campylobacter_upsaliensis GCA_008011615.1 +Escherichia_albertii GCA_000512125.1 +Escherichia_coli GCA_002741475.1 +Escherichia_fergusonii GCA_000026225.1 +Grimontia_hollisae GCA_009665295.1 +Listeria_innocua GCA_017363615.1 +Listeria_innocua GCA_017363655.1 +Listeria_ivanovii GCA_000252975.1 +Listeria_marthii GCA_017363645.1 +Listeria_monocytogenes GCA_001466295.1 +Listeria_monocytogenes GCA_013625895.1 +Listeria_monocytogenes GCA_013625995.1 +Listeria_monocytogenes GCA_013626145.1 +Listeria_monocytogenes GCA_014526935.1 +Listeria_seeligeri GCA_017363605.1 +Listeria_welshimeri GCA_002489005.1 +Photobacterium_damselae GCA_009665375.1 +Salmonella_bongori GCA_013588055.1 +Salmonella_enterica GCA_011388235.1 +Vibrio_alginolyticus GCA_009665435.1 +Vibrio_cholerae GCA_009665515.2 +Vibrio_cidicii GCA_009665415.1 +Vibrio_cincinnatiensis GCA_009665395.1 +Vibrio_fluvialis GCA_009665355.1 +Vibrio_furnissii GCA_009665335.1 +Vibrio_harveyi GCA_009665315.1 +Vibrio_metoecus GCA_009665255.1 +Vibrio_metoecus GCA_009665275.1 +Vibrio_metschnikovii GCA_009665235.1 +Vibrio_mimicus GCA_009665195.1 +Vibrio_navarrensis GCA_009665215.1 +Vibrio_parahaemolyticus GCA_009665495.1 +Vibrio_vulnificus GCA_009665455.1 +Vibrio_vulnificus GCA_009665475.1 \ No newline at end of file diff --git a/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions.txt b/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions.txt new file mode 100644 index 000000000..bfde7610a --- /dev/null +++ b/mummer/4.0.0-gnuplot/RGDv2-NCBI-assembly-accessions.txt @@ -0,0 +1,43 @@ +GCA_008011635.1 +GCA_000015085.1 +GCA_000495505.1 +GCA_000759515.1 +GCA_001643955.1 +GCA_000017485.1 +GCA_008011525.1 +GCA_000816225.1 +GCA_000019205.1 +GCA_008011615.1 +GCA_000512125.1 +GCA_002741475.1 +GCA_000026225.1 +GCA_009665295.1 +GCA_017363655.1 +GCA_017363615.1 +GCA_000252975.1 +GCA_017363645.1 +GCA_001466295.1 +GCA_014526935.1 +GCA_013626145.1 +GCA_013625995.1 +GCA_013625895.1 +GCA_017363605.1 +GCA_002489005.1 +GCA_009665375.1 +GCA_013588055.1 +GCA_011388235.1 +GCA_009665435.1 +GCA_009665515.2 +GCA_009665415.1 +GCA_009665395.1 +GCA_009665355.1 +GCA_009665335.1 +GCA_009665315.1 +GCA_009665275.1 +GCA_009665255.1 +GCA_009665235.1 +GCA_009665195.1 +GCA_009665215.1 +GCA_009665495.1 +GCA_009665475.1 +GCA_009665455.1 \ No newline at end of file diff --git a/mummer/4.0.0-gnuplot/RGDv2-metadata.tsv b/mummer/4.0.0-gnuplot/RGDv2-metadata.tsv new file mode 100644 index 000000000..233188b46 --- /dev/null +++ b/mummer/4.0.0-gnuplot/RGDv2-metadata.tsv @@ -0,0 +1,44 @@ +Species BioSample NCBI Assembly Strain ID +Campylobacter coli SAMN12323645 GCA_008011635.1 2013D-9606 +Campylobacter fetus SAMN02604050 GCA_000015085.1 82-40 +Campylobacter fetus SAMN02604287 GCA_000495505.1 03-427 +Campylobacter fetus SAMN02870596 GCA_000759515.1 97-608 +Campylobacter hyointestinalis SAMN03737973 GCA_001643955.1 LMG 9260 +Campylobacter jejuni SAMN02604056 GCA_000017485.1 NC_009707 +Campylobacter jejuni SAMN12323651 GCA_008011525.1 D0133 +Campylobacter lari SAMN02604025 GCA_000019205.1 RM2100 +Campylobacter lari SAMN03248542 GCA_000816225.1 LMG 11760 +Campylobacter upsaliensis SAMN12323647 GCA_008011615.1 D1914 +Escherichia albertii SAMN02641387 GCA_000512125.1 KF1 +Escherichia coli SAMN07731009 GCA_002741475.1 B4103-1 +Escherichia fergusonii SAMEA3138228 GCA_000026225.1 ATCC_35469 +Grimontia hollisae SAMN10812938 GCA_009665295.1 2013V-1029 +Listeria innocua SAMN10869157 GCA_017363615.1 2010L-2059 +Listeria innocua SAMN10869156 GCA_017363655.1 H0996 L +Listeria ivanovii SAMEA3138408 GCA_000252975.1 PAM55 +Listeria marthii SAMN10869158 GCA_017363645.1 FSL S4-696 +Listeria monocytogenes SAMN02944835 GCA_001466295.1 G4599 +Listeria monocytogenes SAMN02847829 GCA_013625895.1 2014L-6256 +Listeria monocytogenes SAMN03067768 GCA_013625995.1 J0099 +Listeria monocytogenes SAMN02950479 GCA_013626145.1 2014L-6393 +Listeria monocytogenes SAMN03761815 GCA_014526935.1 2011L-2626 +Listeria seeligeri SAMN10869159 GCA_017363605.1 F5761 +Listeria welshimeri SAMN03462185 GCA_002489005.1 SLCC5334 +Photobacterium damselae SAMN10702680 GCA_009665375.1 2012V-1072 +Salmonella bongori SAMN13207407 GCA_013588055.1 04-0440 +Salmonella enterica SAMN08167480 GCA_011388235.1 2010K-2370 +Vibrio alginolyticus SAMN10702675 GCA_009665435.1 2013V-1302 +Vibrio cholerae SAMN10863496 GCA_009665515.2 2010EL-1786 +Vibrio cidicii SAMN10863497 GCA_009665415.1 2423-01 +Vibrio cincinnatiensis SAMN10812936 GCA_009665395.1 2409-02 +Vibrio fluvialis SAMN10812937 GCA_009665355.1 2013V-1049 +Vibrio furnissii SAMN10702681 GCA_009665335.1 2419-04 +Vibrio harveyi SAMN10702676 GCA_009665315.1 2011V-1164 +Vibrio metoecus SAMN10702677 GCA_009665255.1 2011V-1169 +Vibrio metoecus SAMN10863498 GCA_009665275.1 08-2459 +Vibrio metschnikovii SAMN10702671 GCA_009665235.1 2012V-1020 +Vibrio mimicus SAMN10812939 GCA_009665195.1 2011V-1073 +Vibrio navarrensis SAMN10863499 GCA_009665215.1 08-2462 +Vibrio parahaemolyticus SAMN10702672 GCA_009665495.1 2012AW-0154 +Vibrio vulnificus SAMN10702674 GCA_009665455.1 2009V-1035 +Vibrio vulnificus SAMN10702673 GCA_009665475.1 2142-77 \ No newline at end of file diff --git a/mykrobe/0.12.1-genotyphi-2.0/Dockerfile b/mykrobe/0.12.1-genotyphi-2.0/Dockerfile new file mode 100644 index 000000000..e6cb4254a --- /dev/null +++ b/mykrobe/0.12.1-genotyphi-2.0/Dockerfile @@ -0,0 +1,160 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +ARG MYKROBE_VER="0.12.1" +ARG SONNEITYPING_VER="20210201" +ARG GENOTYPHI_VER="2.0" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="Mykrobe & Genotyphi & Sonneityping" +LABEL software.version="${MYKROBE_VER}" +LABEL description="Conda environment for Mykrobe, particularly for Genotyphi & Sonneityping" +LABEL website="https://github.com/Mykrobe-tools/mykrobe" +LABEL license1="MIT" +LABEL license1.url="https://github.com/Mykrobe-tools/mykrobe/blob/master/LICENSE" +LABEL website2="https://github.com/katholt/genotyphi" +LABEL license2="GNU General Public License v3.0" +LABEL license2.url="https://github.com/katholt/genotyphi/blob/main/LICENSE" +LABEL website3="https://github.com/katholt/sonneityping/" +LABEL license3="GNU General Public License v3.0" +LABEL license3.url="https://github.com/katholt/sonneityping/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + jq && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the genotyphi code; make /data +RUN wget https://github.com/typhoidgenomics/genotyphi/archive/refs/tags/v${GENOTYPHI_VER}.tar.gz && \ + tar -xzf v${GENOTYPHI_VER}.tar.gz && \ + rm -v v${GENOTYPHI_VER}.tar.gz && \ + mv -v genotyphi-${GENOTYPHI_VER} /genotyphi && \ + chmod +x /genotyphi/parse_typhi_mykrobe.py && \ + mkdir -v /data + +# Get the sonneityping code +RUN wget https://github.com/katholt/sonneityping/archive/refs/tags/v${SONNEITYPING_VER}.tar.gz && \ + tar -xzf v${SONNEITYPING_VER}.tar.gz && \ + rm -vf v${SONNEITYPING_VER}.tar.gz && \ + mv -v sonneityping-${SONNEITYPING_VER}/ /sonneityping/ && \ + chmod +x /sonneityping/parse_mykrobe_predict.py + +# set the PATH and LC_ALL for singularity compatibility +ENV PATH="${PATH}:/opt/conda/envs/mykrobe/bin/:/genotyphi:/sonneityping" \ + LC_ALL=C.UTF-8 + +# create the conda environment, install mykrobe via bioconda package; cleanup conda garbage +RUN micromamba create -n mykrobe -y -c conda-forge -c bioconda -c defaults \ + mykrobe=${MYKROBE_VER} \ + python \ + pip \ + pandas && \ + micromamba clean -a -y + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="mykrobe" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# get the latest databases (AKA "panels") +RUN mykrobe panels update_metadata && \ + mykrobe panels update_species all && \ + mykrobe panels describe + +# set final working directory as /data +WORKDIR /data + +# new stage for downloading test FASTQs +FROM ncbi/sra-tools:3.0.1 as fastq + +# set working directory to /test +WORKDIR /test + +# SRR3277297 is Salmonella enterica serovar Typhi: https://www.ncbi.nlm.nih.gov/sra/?term=SRR3277297 +# SRR3441855 is Shigella sonnei from CDPH: https://www.ncbi.nlm.nih.gov/sra/?term=SRR3441855 +RUN fasterq-dump --progress --split-files --threads 2 -A SRR3277297 --skip-technical && \ + fasterq-dump --progress --split-files --threads 2 -A SRR3441855 --skip-technical && \ + gzip *.fastq + +# new base for testing +FROM app as test + +# set working directory to /test +WORKDIR /test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="mykrobe" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# so I can use 'column' to make outputs pretty & nice to read +RUN apt-get update && apt-get install -y --no-install-recommends bsdmainutils + +# test with TB FASTQs as mentioned in mykrobe instructions +RUN wget -O test_reads.fq.gz https://ndownloader.figshare.com/files/21059229 && \ + mykrobe predict -t 2 -s SAMPLE -S tb -o out.json --format json -i test_reads.fq.gz && \ + cat out.json + +# Copy the test FASTQs from "fastq" stage to /test +COPY --from=fastq /test/*fastq.gz /test/ + +# so that the below commands are run with /bin/bash shell and not /bin/sh - needed for bash-specific tricks below +SHELL ["/bin/bash", "-c"] + +# test mykrobe and genotyphi with Salmonella enterica serovar Typhi FASTQs +RUN mykrobe predict -t 2 --sample SRR3277297 --species typhi --format json --out SRR3277297.genotyphi.json --seq SRR3277297_1.fastq.gz SRR3277297_2.fastq.gz && \ +python /genotyphi/parse_typhi_mykrobe.py --jsons SRR3277297.genotyphi.json --prefix mykrobe_out && \ +column -t -s $'\t' mykrobe_out_predictResults.tsv + +# test mykrobe and sonneityping with Shigella sonnei FASTQs +RUN mykrobe predict --sample Ssonnei --species sonnei --format json_and_csv --out Ssonnei --seq SRR3441855_1.fastq.gz SRR3441855_2.fastq.gz && \ + python /sonneityping/parse_mykrobe_predict.py --jsons Ssonnei.json --alleles /sonneityping/alleles.txt --prefix sonneityping && \ + column -t -s $'\t' sonneityping_predictResults.tsv + +# final version check +RUN mykrobe --version + +### OUTPUT FROM mykrobe panels describe run on 2023-03-30: ### +# Species summary: + +# Species Update_available Installed_version Installed_url Latest_version Latest_url +# sonnei no 20210201 https://ndownloader.figshare.com/files/26274424 20210201 https://ndownloader.figshare.com/files/26274424 +# staph no 20201001 https://ndownloader.figshare.com/files/24914930 20201001 https://ndownloader.figshare.com/files/24914930 +# tb no 20220705 https://ndownloader.figshare.com/files/36197349 20220705 https://ndownloader.figshare.com/files/36197349 +# typhi no 20221208 https://ndownloader.figshare.com/files/38478086 20221208 https://ndownloader.figshare.com/files/38478086 + +# sonnei default panel: 20210201 +# sonnei panels: +# Panel Reference Description +# 20201012 NC_016822.1 Genotyping panel for Shigella sonnei based on scheme defined in Hawkey 2020, and panel for variants in the quinolone resistance determining regions in gyrA and parC +# 20210201 NC_016822.1 Genotyping panel for Shigella sonnei based on scheme defined in Hawkey 2020, and panel for variants in the quinolone resistance determining regions in gyrA and parC (same as 20201012, but with lineage3.7.30 added) + +# staph default panel: 20170217 +# staph panels: +# Panel Reference Description +# 20170217 BX571856.1 AMR panel described in Bradley, P et al. Rapid antibiotic-resistance predictions from genome sequence data for Staphylococcus aureus and Mycobacterium tuberculosis. Nat. Commun. 6:10063 doi: 10.1038/ncomms10063 (2015) + +# tb default panel: 202206 +# tb panels: +# Panel Reference Description +# 201901 NC_000962.3 AMR panel based on first line drugs from NEJM-2018 variants (DOI 10.1056/NEJMoa1800474), and second line drugs from Walker 2015 panel +# 202010 NC_000962.3 AMR panel based on first line drugs from NEJM-2018 variants (DOI 10.1056/NEJMoa1800474), second line drugs from Walker 2015 panel, and lineage scheme from Chiner-Oms 2020 +# 202206 NC_000962.3 AMR panel '202010' combined with the WHO 2021 catalogue (doi:10/h298 and doi:10/h299), and lineage scheme from Chiner-Oms 2020 +# bradley-2015 NC_000962.3 AMR panel described in Bradley, P et al. Rapid antibiotic-resistance predictions from genome sequence data for Staphylococcus aureus and Mycobacterium tuberculosis. Nat. Commun. 6:10063 doi: 10.1038/ncomms10063 (2015) +# walker-2015 NC_000962.3 AMR panel described in Walker, Timothy M et al. Whole-genome sequencing for prediction of Mycobacterium tuberculosis drug susceptibility and resistance: a retrospective cohort study. The Lancet Infectious Diseases , Volume 15 , Issue 10 , 1193 - 1202 + +# typhi default panel: 20221207 +# typhi panels: +# Panel Reference Description +# 20210323 AL513382.1 GenoTyphi genotyping scheme and AMR calling using Wong et al 2016 (https://doi.org/10.1038/ncomms12827) and updates as described in Dyson & Holt 2021 (https://doi.org/10.1101/2021.04.28.441766) +# 20221207 AL513382.1 v20220712 of GenoTyphi genotyping scheme and AMR calling for Salmonella Typhi, using Wong et al 2016 (https://doi.org/10.1038/ncomms12827) and updates as described in Dyson & Holt 2021 (https://doi.org/10.1101/2021.04.28.441766), Sikorski et al 2022 (https://doi.org/10.1128/mbio.01920-22) and the technical report at https://doi.org/10.5281/zenodo.7407985. diff --git a/mykrobe/0.12.1-genotyphi-2.0/README.md b/mykrobe/0.12.1-genotyphi-2.0/README.md new file mode 100644 index 000000000..a8a607869 --- /dev/null +++ b/mykrobe/0.12.1-genotyphi-2.0/README.md @@ -0,0 +1,101 @@ +# Mykrobe (and genotyphi and sonneityping) docker image + +Main tool : [Mykrobe](https://github.com/Mykrobe-tools/mykrobe) + +Additional tools: + +- [genotyphi](https://github.com/typhoidgenomics/genotyphi) 2.0 +- [sonneityping](https://github.com/katholt/sonneityping) v20210201 +- python 3.10.10 +- biopython 1.81 +- pandas 1.5.3 + +Full documentation: [https://github.com/Mykrobe-tools/mykrobe/wiki](https://github.com/Mykrobe-tools/mykrobe/wiki) + +This docker image was created with the intention of running `genotyphi` on FASTQ files from Salmonella Typhi isolates using the Mykrobe implementation of `genotyphi`. However, this docker image contains the full (bio)conda environment for `mykrobe`, and thus can be used for other organisms as well (Mycobacterium tuberculosis, Staphylococcus aureus, Shigella sonnei) + +The docker image was also created with the intention of running Mykrobe on Shigella sonnei FASTQ files, and therefore includes the python script from `sonneityping` which can be used to parse the outputs of `mykrobe`. + +## Included Mykrobe databases (AKA panels) + +The docker image for Mykrobe was built on 2023-04-17, and thus contains the most recent database that was available at the time. + +```bash +### OUTPUT FROM mykrobe panels describe run on 2023-03-30: ### +$ mykrobe panels describe +# Species summary: + +# Species Update_available Installed_version Installed_url Latest_version Latest_url +# sonnei no 20210201 https://ndownloader.figshare.com/files/26274424 20210201 https://ndownloader.figshare.com/files/26274424 +# staph no 20201001 https://ndownloader.figshare.com/files/24914930 20201001 https://ndownloader.figshare.com/files/24914930 +# tb no 20220705 https://ndownloader.figshare.com/files/36197349 20220705 https://ndownloader.figshare.com/files/36197349 +# typhi no 20221208 https://ndownloader.figshare.com/files/38478086 20221208 https://ndownloader.figshare.com/files/38478086 + +# sonnei default panel: 20210201 +# sonnei panels: +# Panel Reference Description +# 20201012 NC_016822.1 Genotyping panel for Shigella sonnei based on scheme defined in Hawkey 2020, and panel for variants in the quinolone resistance determining regions in gyrA and parC +# 20210201 NC_016822.1 Genotyping panel for Shigella sonnei based on scheme defined in Hawkey 2020, and panel for variants in the quinolone resistance determining regions in gyrA and parC (same as 20201012, but with lineage3.7.30 added) + +# staph default panel: 20170217 +# staph panels: +# Panel Reference Description +# 20170217 BX571856.1 AMR panel described in Bradley, P et al. Rapid antibiotic-resistance predictions from genome sequence data for Staphylococcus aureus and Mycobacterium tuberculosis. Nat. Commun. 6:10063 doi: 10.1038/ncomms10063 (2015) + +# tb default panel: 202206 +# tb panels: +# Panel Reference Description +# 201901 NC_000962.3 AMR panel based on first line drugs from NEJM-2018 variants (DOI 10.1056/NEJMoa1800474), and second line drugs from Walker 2015 panel +# 202010 NC_000962.3 AMR panel based on first line drugs from NEJM-2018 variants (DOI 10.1056/NEJMoa1800474), second line drugs from Walker 2015 panel, and lineage scheme from Chiner-Oms 2020 +# 202206 NC_000962.3 AMR panel '202010' combined with the WHO 2021 catalogue (doi:10/h298 and doi:10/h299), and lineage scheme from Chiner-Oms 2020 +# bradley-2015 NC_000962.3 AMR panel described in Bradley, P et al. Rapid antibiotic-resistance predictions from genome sequence data for Staphylococcus aureus and Mycobacterium tuberculosis. Nat. Commun. 6:10063 doi: 10.1038/ncomms10063 (2015) +# walker-2015 NC_000962.3 AMR panel described in Walker, Timothy M et al. Whole-genome sequencing for prediction of Mycobacterium tuberculosis drug susceptibility and resistance: a retrospective cohort study. The Lancet Infectious Diseases , Volume 15 , Issue 10 , 1193 - 1202 + +# typhi default panel: 20221207 +# typhi panels: +# Panel Reference Description +# 20210323 AL513382.1 GenoTyphi genotyping scheme and AMR calling using Wong et al 2016 (https://doi.org/10.1038/ncomms12827) and updates as described in Dyson & Holt 2021 (https://doi.org/10.1101/2021.04.28.441766) +# 20221207 AL513382.1 v20220712 of GenoTyphi genotyping scheme and AMR calling for Salmonella Typhi, using Wong et al 2016 (https://doi.org/10.1038/ncomms12827) and updates as described in Dyson & Holt 2021 (https://doi.org/10.1101/2021.04.28.441766), Sikorski et al 2022 (https://doi.org/10.1128/mbio.01920-22) and the technical report at https://doi.org/10.5281/zenodo.7407985. +``` + +## Example Usage + +Following directions from here for running the `mykrobe` implementation of `genotyphi`: https://github.com/katholt/genotyphi#running-mykrobe + +### Salmonella serovar Typhi/genotyphi example + +```bash +# launch the container interactively +$ docker run --rm -v $PWD:/data -u $(id -u):$(id -g) -it staphb/mykrobe:latest + +# run genotyphi/mykrobe on Illumina reads from a Salmonella typhi isolate +$ mykrobe predict --sample SRR3277297 --species typhi --format json --out SRR3277297.genotyphi.json --seq SRR3277297_1.fastq.gz SRR3277297_2.fastq.gz + +# parse mykrobe output with helper script from genotyphi; generate TSV +$ python /genotyphi/parse_typhi_mykrobe.py --jsons SRR3277297.genotyphi.json --prefix mykrobe_out +SRR3277297 + +# print out results TSV +$ column -t -s $'\t' mykrobe_out_predictResults.tsv +genome species spp_percent final genotype confidence acrB_R717L acrB_R717Q num QRDR lowest support for genotype marker poorly supported markers max support for additional markers additional markers node support parC_S80R parC_S80I parC_E84G parC_E84K gyrA_S83F gyrA_S83Y gyrA_D87G gyrA_D87N gyrA_D87V gyrA_D87Y gyrB_S464F gyrB_S464Y catA1 dfrA7 sul1 sul2 strA strB mphA TEM1 qnrS1 ermB CTXM15 tetB tetA dfrA5 dfrA15 IncFIAHI1 IncHI1A IncHI1BR27 IncHI1_ST6 IncY z66 +SRR3277297 typhi 91.715 2.3.1 strong 0 0 1 1 (1; 0/69); 2 (1; 0/102); 2.2 (1; 134/0); 2.3 (1; 110/0); 2.3.2 (1; 82/0); 2.3.1 (1; 106/0) 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +``` + +### Shigella sonnei/sonneityping example + +```bash +# launch the container interactively +$ docker run --rm -v $PWD:/data -u $(id -u):$(id -g) -it staphb/mykrobe:0.12.1 + +# run genotyphi/mykrobe on Illumina reads from a Salmonella typhi isolate +$ mykrobe predict --sample Ssonnei --species sonnei --format json_and_csv --out Ssonnei --seq shigella_sonnei_R1.fastq.gz shigella_sonnei.fastq.gz + +# parse mykrobe output JSON with helper script from sonneityping; must use alleles.txt file found inside docker container +# generate TSV output +$ python /sonneityping/parse_mykrobe_predict.py --jsons Ssonnei.json --alleles /sonneityping/alleles.txt --prefix sonneityping + +# print out results TSV +$ column -t -s $'\t' sonneityping_predictResults.tsv +genome species final genotype name confidence num QRDR parC_S80I gyrA_S83L gyrA_S83A gyrA_D87G gyrA_D87N gyrA_D87Y lowest support for genotype marker poorly supported markers max support for additional markers additional markers node support +Ssonnei S. sonnei 3.6.1.1.2 CipR.MSM5 strong 3 1 1 0 1 0 0 lineage3 (1; 65/0); lineage3.6 (1; 94/0); lineage3.6.1 (1; 59/0); lineage3.6.1.1 (1; 74/0); lineage3.6.1.1.2 (1; 65/0) +``` diff --git a/nanoplot/1.41.6/Dockerfile b/nanoplot/1.41.6/Dockerfile new file mode 100644 index 000000000..f4de5929f --- /dev/null +++ b/nanoplot/1.41.6/Dockerfile @@ -0,0 +1,56 @@ +FROM ubuntu:focal as app + +# for easy upgrade later. ARG variables only persist during image build time. +ARG NANOPLOT_VER="1.41.6" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="nanoplot" +LABEL software.version="${NANOPLOT_VER}" +LABEL description="Plotting suite for Oxford Nanopore sequencing data and alignments" +LABEL website="https://github.com/wdecoster/NanoPlot" +LABEL license="https://github.com/wdecoster/NanoPlot/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" +LABEL maintainer2="Kate Prussing" +LABEL maintainer2.email="catharine.prussing@health.ny.gov" + +# install dependencies via apt; cleanup apt garbage; set locale to en_US.UTF-8 +RUN apt-get update && apt-get install -y zlib1g-dev \ + bzip2 \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + libncurses5-dev \ + libssl-dev \ + python3 \ + python3-pip \ + python3-setuptools \ + locales && \ + locale-gen en_US.UTF-8 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# for singularity compatibility +ENV LC_ALL=C + +# install NanoPlot via pypi using pip3; make /data directory +RUN pip3 install matplotlib psutil requests NanoPlot==${NANOPLOT_VER} && \ + mkdir /data + +WORKDIR /data + +# testing layer +FROM app as test + +# print help options and version +RUN NanoPlot --help && NanoPlot --version + +# install wget for downloading test data +RUN apt-get update && apt-get install -y wget + +# download ONT data for a Salmonella isolate, run NanoPlot on the FASTQ file +# Go here for more info: https://www.ebi.ac.uk/ena/browser/view/SRR19787768?show=reads +RUN wget https://ftp.sra.ebi.ac.uk/vol1/fastq/SRR197/068/SRR19787768/SRR19787768_1.fastq.gz && \ + NanoPlot --fastq SRR19787768_1.fastq.gz --log --N50 -o /data/nanoplot-test && \ + ls -lh /data/nanoplot-test && \ + cat /data/nanoplot-test/NanoStats.txt diff --git a/nanoplot/1.41.6/README.md b/nanoplot/1.41.6/README.md new file mode 100644 index 000000000..110b10c2e --- /dev/null +++ b/nanoplot/1.41.6/README.md @@ -0,0 +1,19 @@ +# NanoPlot container + +Main tool : [NanoPlot](https://github.com/wdecoster/NanoPlot) + +Additional tools: + +- nanomath 1.3.0 +- nanoget 1.19.3 + +Full documentation: [https://github.com/wdecoster/NanoPlot](https://github.com/wdecoster/NanoPlot) + +Plotting tool for long read sequencing data and alignments. + +## Example Usage + +```bash +# pass in the sequencing_summary.txt file produced by Guppy/Dorado basecaller +NanoPlot --summary sequencing_summary.txt --N50 --loglength -t 4 -o nanoplot-out +``` diff --git a/ncbi-amrfinderplus/3.11.11-2023-04-17.1/Dockerfile b/ncbi-amrfinderplus/3.11.11-2023-04-17.1/Dockerfile new file mode 100644 index 000000000..cf4f4c091 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.11-2023-04-17.1/Dockerfile @@ -0,0 +1,93 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.11" +ARG AMRFINDER_DB_VER="2023-04-17.1" +ARG BLAST_VER="2.13.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.13.0 +# see here for reason why I'm manualy installing 2.13.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + make \ + g++ \ + hmmer \ + procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:\ +/ncbi-blast-${BLAST_VER}+/bin:\ +$PATH" \ +LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ +wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ +amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ +ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv diff --git a/ncbi-amrfinderplus/3.11.11-2023-04-17.1/README.md b/ncbi-amrfinderplus/3.11.11-2023-04-17.1/README.md new file mode 100644 index 000000000..202e269e4 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.11-2023-04-17.1/README.md @@ -0,0 +1,66 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.13.0 + +## Database information + +The database included at time of docker image build is **`2023-04-17.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-04-17.1/changes.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.11-2023-04-17.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.11 +Database directory: '/amrfinder/data/2023-04-17.1' +Database version: 2023-04-17.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Citrobacter_freundii, Clostridioides_difficile, Enterobacter_cloacae, Enterococcus_faecalis, Enterococcus_faecium, +Escherichia, Klebsiella_aerogenes, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, Neisseria_meningitidis, +Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, Staphylococcus_pseudintermedius, +Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA diff --git a/ncbi-amrfinderplus/3.11.14-2023-04-17.1/Dockerfile b/ncbi-amrfinderplus/3.11.14-2023-04-17.1/Dockerfile new file mode 100644 index 000000000..c806589bb --- /dev/null +++ b/ncbi-amrfinderplus/3.11.14-2023-04-17.1/Dockerfile @@ -0,0 +1,99 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.14" +ARG AMRFINDER_DB_VER="2023-04-17.1" +ARG BLAST_VER="2.13.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.13.0 +# see here for reason why I'm manualy installing 2.13.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +# removed because likely unnecessary since we are not compiling from source: make g++ +# libgomp1 required for makeblastdb +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + libgomp1 \ + hmmer \ + procps \ + gzip && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ + wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ + amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ + ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +# default command is to print help options +CMD [ "amrfinder", "--help" ] + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# test that gunzip is installed +RUN gunzip --help \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.14-2023-04-17.1/README.md b/ncbi-amrfinderplus/3.11.14-2023-04-17.1/README.md new file mode 100644 index 000000000..f9868efc7 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.14-2023-04-17.1/README.md @@ -0,0 +1,67 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.13.0 + +## Database information + +The database included at time of docker image build is **`2023-04-17.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-04-17.1/changes.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.14-2023-04-17.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.14 +Database directory: '/amrfinder/data/2023-04-17.1' +Database version: 2023-04-17.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Citrobacter_freundii, Clostridioides_difficile, Enterobacter_cloacae, Enterococcus_faecalis, Enterococcus_faecium, +Escherichia, Klebsiella_aerogenes, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, Neisseria_meningitidis, +Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, Staphylococcus_pseudintermedius, +Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA +``` diff --git a/ncbi-amrfinderplus/3.11.17-2023-07-13.2/Dockerfile b/ncbi-amrfinderplus/3.11.17-2023-07-13.2/Dockerfile new file mode 100644 index 000000000..99b80d850 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.17-2023-07-13.2/Dockerfile @@ -0,0 +1,99 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.17" +ARG AMRFINDER_DB_VER="2023-07-13.2" +ARG BLAST_VER="2.13.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.13.0 +# see here for reason why I'm manualy installing 2.13.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +# removed because likely unnecessary since we are not compiling from source: make g++ +# libgomp1 required for makeblastdb +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + libgomp1 \ + hmmer \ + procps \ + gzip && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ + wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ + amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ + ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +# default command is to print help options +CMD [ "amrfinder", "--help" ] + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# test that gunzip is installed +RUN gunzip --help \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.17-2023-07-13.2/README.md b/ncbi-amrfinderplus/3.11.17-2023-07-13.2/README.md new file mode 100644 index 000000000..ec02a6dac --- /dev/null +++ b/ncbi-amrfinderplus/3.11.17-2023-07-13.2/README.md @@ -0,0 +1,67 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.13.0 + +## Database information + +The database included at time of docker image build is **`2023-07-13.2`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-07-13.2/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.14-2023-04-17.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.17 +Database directory: '/amrfinder/data/2023-07-13.2' +Database version: 2023-07-13.2 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Citrobacter_freundii, Clostridioides_difficile, Enterobacter_asburiae, Enterobacter_cloacae, Enterococcus_faecalis, +Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, +Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA +``` diff --git a/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/Dockerfile b/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/Dockerfile new file mode 100644 index 000000000..fc374e5ec --- /dev/null +++ b/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/Dockerfile @@ -0,0 +1,99 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.18" +ARG AMRFINDER_DB_VER="2023-08-08.2" +ARG BLAST_VER="2.14.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.14.0 +# see here for reason why I'm manualy installing 2.14.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +# removed because likely unnecessary since we are not compiling from source: make g++ +# libgomp1 required for makeblastdb +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + libgomp1 \ + hmmer \ + procps \ + gzip && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ + wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ + amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ + ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +# default command is to print help options +CMD [ "amrfinder", "--help" ] + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# test that gunzip is installed +RUN gunzip --help \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/README.md b/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/README.md new file mode 100644 index 000000000..8e50bf973 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.18-2023-08-08.2.2/README.md @@ -0,0 +1,67 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.14.0 + +## Database information + +The database included at time of docker image build is **`2023-08-08.2`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-08-08.2/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.14-2023-04-17.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.18 +Database directory: '/amrfinder/data/2023-08-08.2' +Database version: 2023-08-08.2 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Citrobacter_freundii, Clostridioides_difficile, Enterobacter_asburiae, Enterobacter_cloacae, Enterococcus_faecalis, +Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, +Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA +``` diff --git a/ncbi-amrfinderplus/3.11.2-2023-02-23.1/Dockerfile b/ncbi-amrfinderplus/3.11.2-2023-02-23.1/Dockerfile new file mode 100644 index 000000000..7c926360b --- /dev/null +++ b/ncbi-amrfinderplus/3.11.2-2023-02-23.1/Dockerfile @@ -0,0 +1,95 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.2" +ARG AMRFINDER_DB_VER="2023-02-23.1" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 +# hmmer installed via apt is v3.3.2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + make \ + g++ \ + ncbi-blast+ \ + hmmer \ + procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# set PATH and locale settings for singularity compatibiliity +ENV PATH="${PATH}:\ +/amrfinder"\ + LC_ALL=C + +# download databases and index them +# have to manually index databases since `amrfinder_index` is not available until v3.11.4 +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# see here for more info: https://github.com/ncbi/amr/issues/112 +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ +wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ +cd /amrfinder/data/${AMRFINDER_DB_VER} && \ +hmmpress AMR.LIB && \ +makeblastdb -in AMRProt -dbtype prot && \ +makeblastdb -in AMR_CDS -dbtype nucl && \ +/bin/bash -c '\ +for ORG in AMR_DNA*.tab; do \ + INPUT_FASTA=$(echo $ORG | cut -d "." -f 1); \ + echo "makeblastdb -in ${INPUT_FASTA} -dbtype nucl" ;\ + makeblastdb -in ${INPUT_FASTA} -dbtype nucl ; \ + done' && \ +ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +## Test layer +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv diff --git a/ncbi-amrfinderplus/3.11.2-2023-02-23.1/README.md b/ncbi-amrfinderplus/3.11.2-2023-02-23.1/README.md new file mode 100644 index 000000000..8bbefa04c --- /dev/null +++ b/ncbi-amrfinderplus/3.11.2-2023-02-23.1/README.md @@ -0,0 +1,65 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.12.0 + +## Database information + +The database included at time of docker image build is **`2023-02-23.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-02-23.1/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.2-2023-02-23.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.2 +Database directory: '/amrfinder/data/2023-02-23.1' +Database version: 2023-02-23.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Clostridioides_difficile, Enterococcus_faecalis, Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, +Neisseria_gonorrhoeae, Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA diff --git a/ncbi-amrfinderplus/3.11.2/Dockerfile b/ncbi-amrfinderplus/3.11.2/Dockerfile new file mode 100644 index 000000000..ce0c6a7d7 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.2/Dockerfile @@ -0,0 +1,73 @@ +FROM ubuntu:focal as app + +ARG AMRFINDER_VER="3.11.2" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 +# hmmer installed via apt is v3.3.2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + make \ + g++ \ + ncbi-blast+ \ + hmmer && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# set PATH and locale settings for singularity compatibiliity +ENV PATH="${PATH}:\ +/amrfinder"\ + LC_ALL=C + +# download databases +RUN cd /amrfinder && amrfinder -u + +WORKDIR /data + +## Test layer +FROM app as test + +RUN amrfinder -l + +# from amrfinder +RUN amrfinder --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# external file +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz &&\ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.2/README.md b/ncbi-amrfinderplus/3.11.2/README.md new file mode 100644 index 000000000..8769aec3f --- /dev/null +++ b/ncbi-amrfinderplus/3.11.2/README.md @@ -0,0 +1,49 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.12.0 + +## Database information + +The database included at time of docker image build is **`2022-12-19.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2022-12-19.1/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.2 +Database directory: '/amrfinder/data/2022-12-19.1' +Database version: 2022-12-19.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, Clostridioides_difficile, Enterococcus_faecalis, Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Staphylococcus_aureus, Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + + +# part of output TSV + +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMRAMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMRBETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMRAMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINETETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.20-2023-09-26.1/Dockerfile b/ncbi-amrfinderplus/3.11.20-2023-09-26.1/Dockerfile new file mode 100644 index 000000000..64f987b70 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.20-2023-09-26.1/Dockerfile @@ -0,0 +1,104 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.20" +ARG AMRFINDER_DB_VER="2023-09-26.1" +ARG BLAST_VER="2.14.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.14.0 +# see here for reason why I'm manualy installing 2.14.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +# removed because likely unnecessary since we are not compiling from source: make g++ +# libgomp1 required for makeblastdb +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + libgomp1 \ + hmmer \ + procps \ + gzip && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:/ncbi-blast-${BLAST_VER}+/bin:$PATH" \ + LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ + wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ + amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ + ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +# default command is to print help options +CMD [ "amrfinder", "--help" ] + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +# NOTICE 2023-10-02: The expected test results that were updated for db 2023-09-26.1 did not make it into the 3.11.20 release of amrfinder. +# So due to this, I'm pulling these files manually, and using them for the tests below. +# for the next dockerfile release, I will update the lines below to use the test files included in the version release. +RUN wget -O /amrfinder/test_both.expected https://raw.githubusercontent.com/ncbi/amr/185a69f541016cf05df8c88f0e1d2ed84db81927/test_both.expected && \ + wget -O /amrfinder/test_dna.expected https://raw.githubusercontent.com/ncbi/amr/185a69f541016cf05df8c88f0e1d2ed84db81927/test_dna.expected && \ + amrfinder --threads 1 --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --threads 1 --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --threads 1 --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --threads 1 --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# test that gunzip is installed +RUN gunzip --help \ No newline at end of file diff --git a/ncbi-amrfinderplus/3.11.20-2023-09-26.1/README.md b/ncbi-amrfinderplus/3.11.20-2023-09-26.1/README.md new file mode 100644 index 000000000..d90c5d08d --- /dev/null +++ b/ncbi-amrfinderplus/3.11.20-2023-09-26.1/README.md @@ -0,0 +1,67 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.14.0 + +## Database information + +The database included at time of docker image build is **`2023-09-26.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-09-26.1/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.14-2023-04-17.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.20 +Database directory: '/amrfinder/data/2023-09-26.1' +Database version: 2023-09-26.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Citrobacter_freundii, Clostridioides_difficile, Enterobacter_asburiae, Enterobacter_cloacae, Enterococcus_faecalis, +Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, Neisseria_gonorrhoeae, +Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Serratia_marcescens, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA +``` diff --git a/ncbi-amrfinderplus/3.11.4-2023-02-23.1/Dockerfile b/ncbi-amrfinderplus/3.11.4-2023-02-23.1/Dockerfile new file mode 100644 index 000000000..3652f9cb9 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.4-2023-02-23.1/Dockerfile @@ -0,0 +1,85 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.4" +ARG AMRFINDER_DB_VER="2023-02-23.1" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 +# hmmer installed via apt is v3.3.2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + make \ + g++ \ + ncbi-blast+ \ + hmmer \ + procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# set PATH and locale settings for singularity compatibiliity +ENV PATH="${PATH}:\ +/amrfinder"\ + LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ +wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ +amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ +ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv diff --git a/ncbi-amrfinderplus/3.11.4-2023-02-23.1/README.md b/ncbi-amrfinderplus/3.11.4-2023-02-23.1/README.md new file mode 100644 index 000000000..43077b2f9 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.4-2023-02-23.1/README.md @@ -0,0 +1,65 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.12.0 + +## Database information + +The database included at time of docker image build is **`2023-02-23.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-02-23.1/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.4-2023-02-23.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.4 +Database directory: '/amrfinder/data/2023-02-23.1' +Database version: 2023-02-23.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Clostridioides_difficile, Enterococcus_faecalis, Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, +Neisseria_gonorrhoeae, Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA diff --git a/ncbi-amrfinderplus/3.11.8-2023-02-23.1/Dockerfile b/ncbi-amrfinderplus/3.11.8-2023-02-23.1/Dockerfile new file mode 100644 index 000000000..18afdf064 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.8-2023-02-23.1/Dockerfile @@ -0,0 +1,93 @@ +FROM ubuntu:jammy as app + +ARG AMRFINDER_VER="3.11.8" +ARG AMRFINDER_DB_VER="2023-02-23.1" +ARG BLAST_VER="2.13.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI AMRFinderPlus" +LABEL software.version="${AMRFINDER_VER}" +LABEL description="NCBI resistance gene detection tool" +LABEL website="https://github.com/ncbi/amr" +LABEL license="https://github.com/ncbi/amr/blob/master/LICENSE" +LABEL maintainer="Kelsey Florek" +LABEL maintainer.email="kelsey.florek@slh.wisc.edu" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Anders Goncalves da Silva" +LABEL maintainer3.email="andersgs@gmail.com" +LABEL maintainer4="Erin Young" +LABEL maintainer4.email="eriny@utah.gov" +LABEL maintainer5="Holly McQueary" +LABEL maintainer5.email="holly.c.mcqueary@mass.gov" + +# ncbi-blast+ installed via apt is v2.12.0 - DISABLING so that we can manually install v2.13.0 +# see here for reason why I'm manualy installing 2.13.0 instead of using apt-get: https://github.com/ncbi/amr/releases/tag/amrfinder_v3.11.8 + +# hmmer installed via apt is v3.3.2 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + wget \ + curl \ + make \ + g++ \ + hmmer \ + procps && \ + apt-get autoclean && \ + rm -rf /var/lib/apt/lists/* + +# download and install amrfinderplus pre-compiled binaries; make /data +RUN mkdir amrfinder && cd /amrfinder && \ + wget https://github.com/ncbi/amr/releases/download/amrfinder_v${AMRFINDER_VER}/amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + tar zxf amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + rm amrfinder_binaries_v${AMRFINDER_VER}.tar.gz && \ + mkdir /data + +# install ncbi-blast linux binaries +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm -v ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH and locale settings for singularity compatibiliity, set amrfinder and manually-installed blast as higher priority in PATH +ENV PATH="/amrfinder:\ +/ncbi-blast-${BLAST_VER}+/bin:\ +$PATH" \ +LC_ALL=C + +# download databases and index them +# done in this manner to pin the database version instead of pulling the latest version with `amrfinder -u` +# softlink is required for `amrfinder -l` and typical `amrfinder` use cases to work properly +RUN mkdir -p /amrfinder/data/${AMRFINDER_DB_VER} && \ +wget -q -P /amrfinder/data/${AMRFINDER_DB_VER} ftp://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/${AMRFINDER_DB_VER}/* && \ +amrfinder_index /amrfinder/data/${AMRFINDER_DB_VER} && \ +ln -s /amrfinder/data/${AMRFINDER_DB_VER} /amrfinder/data/latest + +# set final working directory +WORKDIR /data + +## Test stage +FROM app as test + +# list database version and available --organism options +RUN amrfinder -l + +# run recommended tests from amrfinder +RUN amrfinder --plus -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_prot.got && \ + diff /amrfinder/test_prot.expected test_prot.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -O Escherichia > test_dna.got && \ + diff /amrfinder/test_dna.expected test_dna.got && \ + amrfinder --plus -n /amrfinder/test_dna.fa -p /amrfinder/test_prot.fa -g /amrfinder/test_prot.gff -O Escherichia > test_both.got && \ + diff /amrfinder/test_both.expected test_both.got + +# run amrfinder on Salmonella, without and with --organism option +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/010/941/835/GCA_010941835.1_PDT000052640.3/GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + gzip -d GCA_010941835.1_PDT000052640.3_genomic.fna.gz && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --output test1.txt && \ + amrfinder --plus --nucleotide GCA_010941835.1_PDT000052640.3_genomic.fna --organism Salmonella --output test2.txt && \ + cat test1.txt test2.txt + +# run amrfinder on Klebesiella oxytoca using --organism/-O flag +RUN wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz && \ + amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv diff --git a/ncbi-amrfinderplus/3.11.8-2023-02-23.1/README.md b/ncbi-amrfinderplus/3.11.8-2023-02-23.1/README.md new file mode 100644 index 000000000..08fc2fa34 --- /dev/null +++ b/ncbi-amrfinderplus/3.11.8-2023-02-23.1/README.md @@ -0,0 +1,65 @@ +# NCBI AMRFinderPlus docker image + +Main tool : [NCBI AMRFinderPlus](https://github.com/ncbi/amr) + +Additional tools: + +- hmmer v3.3.2 +- ncbi-blast+ v2.13.0 + +## Database information + +The database included at time of docker image build is **`2023-02-23.1`**. More information can be found in the changes.txt on [NCBI's FTP](https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/3.11/2023-02-23.1/changelog.txt). + +Full documentation: [https://github.com/ncbi/amr/wiki](https://github.com/ncbi/amr/wiki) + +## Docker Image Tags + +Beginning with AMRFinderPlus v3.11.2, we will include the version of AMRFinderPlus followed by the database version in the docker image tag so that it is more informative to users. The format is as follows: + +```bash +# general format +staphb/ncbi-amrfinderplus:- + +# example +staphb/ncbi-amrfinderplus:3.11.8-2023-02-23.1 +``` + +You can view all available docker images on [dockerhub](https://hub.docker.com/r/staphb/ncbi-amrfinderplus/tags) and [quay.io](https://quay.io/repository/staphb/ncbi-amrfinderplus?tab=tags) + +## Example Usage + +```bash +# list out the available organisms for the -O/--organism flag +$ amrfinder -l +Running: amrfinder -l +Software directory: '/amrfinder/' +Software version: 3.11.4 +Database directory: '/amrfinder/data/2023-02-23.1' +Database version: 2023-02-23.1 + +Available --organism options: Acinetobacter_baumannii, Burkholderia_cepacia, Burkholderia_pseudomallei, Campylobacter, +Clostridioides_difficile, Enterococcus_faecalis, Enterococcus_faecium, Escherichia, Klebsiella_oxytoca, Klebsiella_pneumoniae, +Neisseria_gonorrhoeae, Neisseria_meningitidis, Pseudomonas_aeruginosa, Salmonella, Staphylococcus_aureus, +Staphylococcus_pseudintermedius, Streptococcus_agalactiae, Streptococcus_pneumoniae, Streptococcus_pyogenes, Vibrio_cholerae + +# download Klebsiella oxytoca genome FASTA/FNA to use as a test +$ wget "https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/812/925/GCA_003812925.1_ASM381292v1/GCA_003812925.1_ASM381292v1_genomic.fna.gz" + +# uncompress the FNA file +$ gzip -d GCA_003812925.1_ASM381292v1_genomic.fna.gz + +# run amrfinder (nucleotide mode) on the uncompressed FNA file +$ amrfinder --plus --name GCA_003812925.1 -n GCA_003812925.1_ASM381292v1_genomic.fna -O Klebsiella_oxytoca -o GCA_003812925.1-amrfinder.tsv + +# view output TSV +$ column -t -s $'\t' -n GCA_003812925.1-amrfinder.tsv +Name Protein identifier Contig id Start Stop Strand Gene symbol Sequence name Scope Element type Element subtype Class Subclass Method Target length Reference sequence length % Coverage of reference sequence % Identity to reference sequence Alignment length Accession of closest sequence Name of closest sequence HMM id HMM description +GCA_003812925.1 NA CP033844.1 369234 370406 + oqxA multidrug efflux RND transporter periplasmic adaptor subunit OqxA core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 391 391 100.00 90.79 391 WP_002914189.1 multidrug efflux RND transporter periplasmic adaptor subunit OqxA NA NA +GCA_003812925.1 NA CP033844.1 370433 373582 + oqxB multidrug efflux RND transporter permease subunit OqxB core AMR AMR PHENICOL/QUINOLONE PHENICOL/QUINOLONE BLASTX 1050 1050 100.00 96.86 1050 WP_023323140.1 multidrug efflux RND transporter permease subunit OqxB15 NA NA +GCA_003812925.1 NA CP033844.1 636118 637917 - ybtQ yersiniabactin ABC transporter ATP-binding/permease protein YbtQ plus VIRULENCE VIRULENCE NA NA BLASTX 600 600 100.00 89.17 600 AAC69584.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtQ NA NA +GCA_003812925.1 NA CP033844.1 637913 639706 - ybtP yersiniabactin ABC transporter ATP-binding/permease protein YbtP plus VIRULENCE VIRULENCE NA NA BLASTX 598 600 99.67 89.30 598 CAA21388.1 yersiniabactin ABC transporter ATP-binding/permease protein YbtP NA NA +GCA_003812925.1 NA CP033844.1 3473617 3474798 + emrD multidrug efflux MFS transporter EmrD plus AMR AMR EFFLUX EFFLUX BLASTX 394 394 100.00 94.16 394 ACN65732.1 multidrug efflux MFS transporter EmrD NA NA +GCA_003812925.1 NA CP033844.1 5085488 5086357 - blaOXY-2-1 extended-spectrum class A beta-lactamase OXY-2-1 core AMR AMR BETA-LACTAM CEPHALOSPORIN ALLELEX 290 290 100.00 100.00 290 WP_032727905.1 extended-spectrum class A beta-lactamase OXY-2-1 NA NA +GCA_003812925.1 NA CP033845.1 5102 5632 - ant(2'')-Ia aminoglycoside nucleotidyltransferase ANT(2'')-Ia core AMR AMR AMINOGLYCOSIDE GENTAMICIN/KANAMYCIN/TOBRAMYCIN BLASTX 177 177 100.00 98.31 177 WP_000381803.1 aminoglycoside nucleotidyltransferase ANT(2'')-Ia NA NA +GCA_003812925.1 NA CP033846.1 748 1932 - tet(39) tetracycline efflux MFS transporter Tet(39) core AMR AMR TETRACYCLINE TETRACYCLINE EXACTX 395 395 100.00 100.00 395 WP_004856455.1 tetracycline efflux MFS transporter Tet(39) NA NA diff --git a/ncbi-datasets/14.13.2/Dockerfile b/ncbi-datasets/14.13.2/Dockerfile new file mode 100644 index 000000000..1d0f04051 --- /dev/null +++ b/ncbi-datasets/14.13.2/Dockerfile @@ -0,0 +1,43 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="14.13.2" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna \ No newline at end of file diff --git a/ncbi-datasets/14.13.2/README.md b/ncbi-datasets/14.13.2/README.md new file mode 100644 index 000000000..2af950efe --- /dev/null +++ b/ncbi-datasets/14.13.2/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` \ No newline at end of file diff --git a/ncbi-datasets/14.20.0/Dockerfile b/ncbi-datasets/14.20.0/Dockerfile new file mode 100644 index 000000000..280a45d26 --- /dev/null +++ b/ncbi-datasets/14.20.0/Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="14.20.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna \ No newline at end of file diff --git a/ncbi-datasets/14.20.0/README.md b/ncbi-datasets/14.20.0/README.md new file mode 100644 index 000000000..6474b9282 --- /dev/null +++ b/ncbi-datasets/14.20.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/14.27.0/Dockerfile b/ncbi-datasets/14.27.0/Dockerfile new file mode 100644 index 000000000..161a52166 --- /dev/null +++ b/ncbi-datasets/14.27.0/Dockerfile @@ -0,0 +1,43 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="14.27.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/14.27.0/README.md b/ncbi-datasets/14.27.0/README.md new file mode 100644 index 000000000..6474b9282 --- /dev/null +++ b/ncbi-datasets/14.27.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/14.3.0/Dockerfile b/ncbi-datasets/14.3.0/Dockerfile new file mode 100644 index 000000000..820f22666 --- /dev/null +++ b/ncbi-datasets/14.3.0/Dockerfile @@ -0,0 +1,43 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="14.3.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/14.3.0/README.md b/ncbi-datasets/14.3.0/README.md new file mode 100644 index 000000000..6474b9282 --- /dev/null +++ b/ncbi-datasets/14.3.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/14.7.0/Dockerfile b/ncbi-datasets/14.7.0/Dockerfile new file mode 100644 index 000000000..6981dd258 --- /dev/null +++ b/ncbi-datasets/14.7.0/Dockerfile @@ -0,0 +1,43 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="14.7.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/14.7.0/README.md b/ncbi-datasets/14.7.0/README.md new file mode 100644 index 000000000..efd3c7b88 --- /dev/null +++ b/ncbi-datasets/14.7.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more (version 14.7.0) + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/15.1.0/Dockerfile b/ncbi-datasets/15.1.0/Dockerfile new file mode 100644 index 000000000..4467e1ae0 --- /dev/null +++ b/ncbi-datasets/15.1.0/Dockerfile @@ -0,0 +1,46 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="15.1.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +# datasets is generally datasets --help, but just typing in 'datasets' should bring up a help menu +CMD [ "datasets"] + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/15.1.0/README.md b/ncbi-datasets/15.1.0/README.md new file mode 100644 index 000000000..efd3c7b88 --- /dev/null +++ b/ncbi-datasets/15.1.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more (version 14.7.0) + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/15.11.0/Dockerfile b/ncbi-datasets/15.11.0/Dockerfile new file mode 100644 index 000000000..6855552fc --- /dev/null +++ b/ncbi-datasets/15.11.0/Dockerfile @@ -0,0 +1,46 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="15.11.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +# datasets is generally datasets --help, but just typing in 'datasets' should bring up a help menu +CMD [ "datasets" ] + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/15.11.0/README.md b/ncbi-datasets/15.11.0/README.md new file mode 100644 index 000000000..6474b9282 --- /dev/null +++ b/ncbi-datasets/15.11.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-datasets/15.2.0/Dockerfile b/ncbi-datasets/15.2.0/Dockerfile new file mode 100644 index 000000000..588536465 --- /dev/null +++ b/ncbi-datasets/15.2.0/Dockerfile @@ -0,0 +1,46 @@ +FROM ubuntu:jammy as app + +ARG DATASETS_VER="15.2.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's datasets and dataformat" +LABEL software.version="${DATASETS_VER}" +LABEL description="Downloads biological sequence data from NCBI" +LABEL website="https://www.ncbi.nlm.nih.gov/datasets/docs/v1/" +LABEL license="https://github.com/ncbi/datasets/blob/master/pkgs/ncbi-datasets-cli/LICENSE.md" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# unzip isn't needed for datasets/dataformat, but it is often used after downloading files with datasets +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi datasets tool (pre-compiled binary) +RUN wget https://github.com/ncbi/datasets/releases/download/v${DATASETS_VER}/linux-amd64.cli.package.zip && \ + unzip linux-amd64.cli.package.zip && \ + rm linux-amd64.cli.package.zip && \ + chmod +x dataformat datasets + +ENV LC_ALL=C + +WORKDIR /data + +# datasets is generally datasets --help, but just typing in 'datasets' should bring up a help menu +CMD [ "datasets" ] + +FROM app as test + +RUN dataformat --help && datasets --help + +# stolen from Curtis at https://github.com/StaPH-B/docker-builds/blob/master/pangolin/4.1.2/Dockerfile +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && \ + rm ON924087.1.zip && \ + cp ncbi_dataset/data/genomic.fna ON924087.1.fna && \ + wc -c ON924087.1.fna diff --git a/ncbi-datasets/15.2.0/README.md b/ncbi-datasets/15.2.0/README.md new file mode 100644 index 000000000..6474b9282 --- /dev/null +++ b/ncbi-datasets/15.2.0/README.md @@ -0,0 +1,20 @@ +# NCBI datasets and dataformat container + +Main tool : [datasets](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-datasets-tool-to-download-biological-data) and [dataformat](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/download-and-install/#use-the-dataformat-tool-to-convert-data-reports-to-other-formats) + +Full documentation: [https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/](https://www.ncbi.nlm.nih.gov/datasets/docs/v1/how-tos/) + +> Use NCBI Datasets to gather metadata, download data packages, view reports and more + +## Example Usage + +```bash +# will download the fasta for ON924087.1 in a zipped directory +datasets download virus genome accession ON924087.1 --filename ON924087.1.zip + +# unzipping the directory and the fasta file will be located at ncbi_dataset/data/genomic.fna +unzip ON924087.1.zip + +# copying the file into something with a better name +cp ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna +``` diff --git a/ncbi-table2asn/1.26.678/Dockerfile b/ncbi-table2asn/1.26.678/Dockerfile new file mode 100644 index 000000000..fbff19752 --- /dev/null +++ b/ncbi-table2asn/1.26.678/Dockerfile @@ -0,0 +1,38 @@ +FROM ubuntu:jammy as app + +# version doesn't show appear anywhere on ftp; only can be determined via command-line +# this docker image was built 2022-12-13 and this was the current version +ARG TABLE2ASN_VER="1.26.678" +ARG TABLE2ASN_RELEASE_DATE="2022-06-14" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="NCBI's table2asn" +LABEL software.version="${TABLE2ASN_VER}" +LABEL description="Converts files of various formats to ASN.1" +LABEL website="https://www.ncbi.nlm.nih.gov/genbank/table2asn/" +LABEL license="unknown" +LABEL maintainer="Sage Wright" +LABEL maintainer.email="sage.wright@theiagen.com" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + gzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install ncbi table2asn tool (pre-compiled binary) +RUN wget https://ftp.ncbi.nlm.nih.gov/asn1-converters/versions/${TABLE2ASN_RELEASE_DATE}/by_program/table2asn/linux64.table2asn.gz && \ + gunzip linux64.table2asn.gz && \ + mv linux64.table2asn table2asn && \ + chmod +x table2asn + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN table2asn -help && table2asn -version diff --git a/ncbi-table2asn/1.26.678/README.md b/ncbi-table2asn/1.26.678/README.md new file mode 100644 index 000000000..25c552f10 --- /dev/null +++ b/ncbi-table2asn/1.26.678/README.md @@ -0,0 +1,23 @@ +# NCBI table2asn + +Main tool : [table2asn](https://www.ncbi.nlm.nih.gov/genbank/table2asn/) + +Full documentation: [README](https://ftp.ncbi.nlm.nih.gov/asn1-converters/by_program/table2asn/DOCUMENTATION/table2asn_readme.txt) + +> table2asn is a command-line program that creates sequence records for submission to GenBank + +## Example Usage + +```bash +# Single non-genome submission: a particular .fsa file, and only 1 sequence in the .fsa file and the source information is in the definition line of the .fsa file: +table2asn -t template.sbt -i x.fsa -V v + +# Batch non-genome submission: a directory that contains .fsa files, and multiple sequences per file, and the source information is in the definition line of the .fsa files: +table2asn -t template.sbt -indir path_to_files -a s -V v + +# Genome submission: a directory that contains multiple .fsa files of a single genome, and one or more sequences per file and the source information is in the definition line of the .fsa files: +table2asn -t template.sbt -indir path_to_files -M n -Z + +# Genome submission for the most common gapped situation (= runs of 10 or more Ns represent a gap, and there are no gaps of completely unknown size, and the evidence for linkage across the gaps is "paired-ends"), and the source information is in the definition line of the .fsa files: +table2asn -t template -indir path_to_files -M n -Z -gaps-min 10 -l paired-ends +``` diff --git a/ngmaster/1.0.0/Dockerfile b/ngmaster/1.0.0/Dockerfile index 79ef87691..55f975eb0 100644 --- a/ngmaster/1.0.0/Dockerfile +++ b/ngmaster/1.0.0/Dockerfile @@ -1,4 +1,4 @@ -FROM mambaorg/micromamba:0.27.0 as app +FROM mambaorg/micromamba:1.3.0 as app # build and run as root users since micromamba image has 'mambauser' set as the $USER USER root @@ -10,8 +10,8 @@ ARG NGMASTER_VER="1.0.0" ARG MLST_VER="2.23.0" ARG ANY2FASTA_VER="0.4.2" -LABEL base.image="mambaorg/micromamba:0.27.0" -LABEL dockerfile.version="1" +LABEL base.image="mambaorg/micromamba:1.3.0" +LABEL dockerfile.version="2" LABEL software="ngmaster" LABEL software.version=${NGMASTER_VER} LABEL description="In silico multi-antigen sequence typing for Neisseria gonorrhoeae (NG-MAST)" @@ -26,10 +26,10 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ procps && \ apt-get autoclean && rm -rf /var/lib/apt/lists/* -# create conda env 'ngmaster-env' and install python, pip, biopython, isPcr version 33; +# install python, pip, biopython, isPcr version 33 into base micromamba env # cleanup conda garbage -RUN micromamba create -n ngmaster-env -c bioconda -c conda-forge -c defaults \ - python>=3.7 \ +RUN micromamba install -n base -c conda-forge -c bioconda -c defaults \ + 'python>=3.7' \ pip \ biopython \ any2fasta=${ANY2FASTA_VER} \ @@ -38,14 +38,11 @@ RUN micromamba create -n ngmaster-env -c bioconda -c conda-forge -c defaults \ micromamba clean -a -y # so that mamba/conda env is active when running below commands -ENV ENV_NAME="ngmaster-env" +ENV ENV_NAME="base" ARG MAMBA_DOCKERFILE_ACTIVATE=1 -# install ngmaster via pypi; using pip installed via micromamba -#RUN pip install ngmaster==${NGMASTER_VER} - # hardcode conda env into the PATH; set locale settings -ENV PATH="${PATH}:/opt/conda/envs/ngmaster-env/bin/" \ +ENV PATH="/opt/conda/bin:${PATH}" \ LC_ALL=C.UTF-8 # set final & default working dir to /data @@ -55,10 +52,24 @@ WORKDIR /data FROM app as test # so that mamba/conda env is active when running below commands -ENV ENV_NAME="ngmaster-env" +ENV ENV_NAME="base" ARG MAMBA_DOCKERFILE_ACTIVATE=1 # show help and version outputs; run the program's internal tests RUN ngmaster --help && echo && \ ngmaster --version && mlst --version && echo && \ ngmaster --test + +# getting unzip for unziping archive downloaded from NCBI +RUN apt-get update && apt-get install unzip curl ca-certificates -y --no-install-recommends + +# so that testing outputs are in /test +WORKDIR /test + +# test with an actual assembly downloaded from RefSeq +# more info on this genome here: https://www.ncbi.nlm.nih.gov/labs/data-hub/genome/GCF_013030075.1/ +RUN curl -OJX GET "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_013030075.1/download?include_annotation_type=GENOME_FASTA,GENOME_GFF,RNA_FASTA,CDS_FASTA,PROT_FASTA,SEQUENCE_REPORT&filename=GCF_013030075.1.zip" \ + -H "Accept: application/zip" && \ + unzip GCF_013030075.1.zip && \ + ngmaster /test/ncbi_dataset/data/GCF_013030075.1/GCF_013030075.1_ASM1303007v1_genomic.fna > ngmaster.out.tsv && \ + cat /test/ngmaster.out.tsv \ No newline at end of file diff --git a/ngmaster/1.0.0/README.md b/ngmaster/1.0.0/README.md index 16af05e3e..6e8eb94ba 100644 --- a/ngmaster/1.0.0/README.md +++ b/ngmaster/1.0.0/README.md @@ -8,15 +8,16 @@ Additional tools: - any2fasta 0.4.2 - mlst 2.23.0 -- python 3.7.12 -- biopython 1.79 +- python 3.9.0 +- biopython 1.80 - perl 5.32.1 -- bioperl 1.7.8 +- bioperl 1.7.9 ## Example Usage ```bash -$ ngmaster /opt/conda/envs/ngmaster-env/lib/python3.7/site-packages/ngmaster/test/test.fa +# test ngmaster with the test FASTA file included with ngmaster code +$ ngmaster /opt/conda/lib/python3.9/site-packages/ngmaster/test/test.fa FILE SCHEME NG-MAST/NG-STAR porB_NG-MAST tbpB penA mtrR porB_NG-STAR ponA gyrA parC 23S -/opt/conda/envs/ngmaster-env/lib/python3.7/site-packages/ngmaster/test/test.fa ngmaSTar 4186/231 2569 241 23 42 100 100 10 2 100 +/opt/conda/lib/python3.9/site-packages/ngmaster/test/test.fa ngmaSTar 4186/231 2569 241 23 42 100 100 10 2100 ``` diff --git a/pangolin/4.1.3/Dockerfile b/pangolin/4.1.3/Dockerfile index c2217b795..afb478e88 100644 --- a/pangolin/4.1.3/Dockerfile +++ b/pangolin/4.1.3/Dockerfile @@ -1,4 +1,4 @@ -FROM mambaorg/micromamba:0.27.0 as app +FROM mambaorg/micromamba:1.1.0 as app # build and run as root users since micromamba image has 'mambauser' set as the $USER USER root @@ -9,14 +9,14 @@ WORKDIR / # had to include the v for some of these due to GitHub tags. # using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" ARG PANGOLIN_VER="v4.1.3" -ARG PANGOLIN_DATA_VER="v1.16" +ARG PANGOLIN_DATA_VER="v1.17" ARG SCORPIO_VER="v0.3.17" ARG CONSTELLATIONS_VER="v0.1.10" -ARG USHER_VER="0.6.0" +ARG USHER_VER="0.6.1" # metadata labels -LABEL base.image="mambaorg/micromamba:0.27.0" -LABEL dockerfile.version="2" +LABEL base.image="mambaorg/micromamba:1.1.0" +LABEL dockerfile.version="3" LABEL software="pangolin" LABEL software.version=${PANGOLIN_VER} LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." diff --git a/pangolin/4.1.3/README.md b/pangolin/4.1.3/README.md index 4659b2c0b..12ebdda3c 100644 --- a/pangolin/4.1.3/README.md +++ b/pangolin/4.1.3/README.md @@ -8,16 +8,16 @@ Phylogenetic Assignment of Named Global Outbreak LINeages Additional tools: -- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.16 -- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.16 +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.17 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.17 - [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 -- [usher](https://github.com/yatisht/usher) 0.6.0 +- [usher](https://github.com/yatisht/usher) 0.6.1 - [faToVcf](https://github.com/yatisht/usher) 426 - [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 - [constellations](https://github.com/cov-lineages/constellations) 0.1.10 - [gofasta](https://github.com/virus-evolution/gofasta) 1.1.0 - [mafft](https://mafft.cbrc.jp/alignment/software/) 7.508 -- python 3.8.13 +- python 3.8.15 ## Example Usage diff --git a/pangolin/4.2-pdata-1.19/Dockerfile b/pangolin/4.2-pdata-1.19/Dockerfile new file mode 100644 index 000000000..ae71e42ab --- /dev/null +++ b/pangolin/4.2-pdata-1.19/Dockerfile @@ -0,0 +1,124 @@ +FROM mambaorg/micromamba:1.4.1 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +# had to include the v for some of these due to GitHub tags. +# using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" +ARG PANGOLIN_VER="v4.2" +ARG PANGOLIN_DATA_VER="v1.19" +ARG SCORPIO_VER="v0.3.17" +ARG CONSTELLATIONS_VER="v0.1.10" +ARG USHER_VER="0.6.2" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.1" +LABEL dockerfile.version="1" +LABEL software="pangolin" +LABEL software.version=${PANGOLIN_VER} +LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." +LABEL website="https://github.com/cov-lineages/pangolin" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the pangolin repo +RUN wget "https://github.com/cov-lineages/pangolin/archive/${PANGOLIN_VER}.tar.gz" && \ + tar -xf ${PANGOLIN_VER}.tar.gz && \ + rm -v ${PANGOLIN_VER}.tar.gz && \ + mv -v pangolin-* pangolin + +# set the environment; PATH is unnecessary here, but leaving anyways. It's reset later in dockerfile +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +# modify environment.yml to pin specific versions during install +# create the conda environment using modified environment.yml +RUN sed -i "s|usher.*|usher=${USHER_VER}|" /pangolin/environment.yml && \ + sed -i "s|scorpio.git|scorpio.git@${SCORPIO_VER}|" /pangolin/environment.yml && \ + sed -i "s|pangolin-data.git|pangolin-data.git@${PANGOLIN_DATA_VER}|" /pangolin/environment.yml && \ + sed -i "s|constellations.git|constellations.git@${CONSTELLATIONS_VER}|" /pangolin/environment.yml && \ + micromamba create -n pangolin -y -f /pangolin/environment.yml + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +WORKDIR /pangolin + +# run pip install step; download optional pre-computed assignment hashes for UShER (useful for running on large batches of samples) +# best to skip using the assigment-cache if running on one sample for speed +# print versions +RUN pip install . && \ + pangolin --add-assignment-cache && \ + micromamba clean -a -y && \ + mkdir /data && \ + pangolin --all-versions && \ + usher --version + +WORKDIR /data + +# hardcode pangolin executable into the PATH variable +ENV PATH="$PATH:/opt/conda/envs/pangolin/bin/" + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# test on test sequences supplied with Pangolin code +RUN pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode usher -o /data/test_seqs-output-pusher && \ + column -t -s, /data/test_seqs-output-pusher/lineage_report.csv + +# test functionality of assignment-cache option +RUN pangolin --use-assignment-cache /pangolin/pangolin/test/test_seqs.fasta + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# test on a B.1.1.7 genome +RUN pangolin /test-data/SRR13957123.consensus.fa --analysis-mode usher -o /test-data/SRR13957123-pusher && \ + column -t -s, /test-data/SRR13957123-pusher/lineage_report.csv + + # install unzip for unzipping zip archive from NCBI +RUN apt-get update && apt-get install -y --no-install-recommends unzip + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a BA.1 from Florida (https://www.ncbi.nlm.nih.gov/biosample?term=SAMN29506515 and https://www.ncbi.nlm.nih.gov/nuccore/ON924087) +# run pangolin in usher analysis mode +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && rm ON924087.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna ON924087.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin ON924087.1.genomic.fna --analysis-mode usher -o ON924087.1-usher && \ + column -t -s, ON924087.1-usher/lineage_report.csv + +# test specific for new lineage, XBB.1.16, introduced in pangolin-data v1.19 +# using this assembly: https://www.ncbi.nlm.nih.gov/nuccore/2440446687 +# biosample here: https://www.ncbi.nlm.nih.gov/biosample?term=SAMN33060589 +# one of the sample included in initial pango-designation here: https://github.com/cov-lineages/pango-designation/issues/1723 +RUN datasets download virus genome accession OQ381818.1 --filename OQ381818.1.zip && \ + unzip OQ381818.1.zip && rm OQ381818.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OQ381818.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OQ381818.1.genomic.fna --analysis-mode usher -o OQ381818.1-usher && \ + column -t -s, OQ381818.1-usher/lineage_report.csv diff --git a/pangolin/4.2-pdata-1.19/README.md b/pangolin/4.2-pdata-1.19/README.md new file mode 100644 index 000000000..316750354 --- /dev/null +++ b/pangolin/4.2-pdata-1.19/README.md @@ -0,0 +1,50 @@ +# pangolin container + +Main tool : [pangolin](https://github.com/cov-lineages/pangolin) + +Full documentation: [https://cov-lineages.org/resources/pangolin.html](https://cov-lineages.org/resources/pangolin.html) + +Phylogenetic Assignment of Named Global Outbreak LINeages + +Additional tools: + +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.19 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.19 +- [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 +- [usher](https://github.com/yatisht/usher) 0.6.2 +- [faToVcf](https://github.com/yatisht/usher) 426 +- [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 +- [constellations](https://github.com/cov-lineages/constellations) 0.1.10 +- [gofasta](https://github.com/virus-evolution/gofasta) 1.2.0 +- [mafft](https://mafft.cbrc.jp/alignment/software/) 7.520 +- python 3.8.15 + +## Warning + +This docker image contains `pangolin-data` v1.19. The upgrade from 1.18 to 1.18.1 and from 1.18.1 to 1.19 ONLY updated the underlying UShER tree/protobuf file. It did NOT update the pangoLEARN model, so please use the UShER mode of pangolin if you want to stay up-to-date with the most recent lineages. [See pangolin-data release notes here for more details](https://github.com/cov-lineages/pangolin-data/releases/tag/v1.19) + +## Example Usage + +```bash +# run Pangolin in the default mode (usher). Can optionally supply --analysis-mode usher +$ pangolin /pangolin/pangolin/test/test_seqs.fasta -o /data/test_seqs-output-pusher + +# run Pangolin in the fast/pangolearn mode. Can use either --analysis-mode fast or --analysis-mode pangolearn +$ pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode pangolearn -o /data/test_seqs-output-plearn + +# view the output CSV +$ column -t -s, /data/test_seqs-output-pusher/lineage_report.csv +taxon lineage conflict ambiguity_score scorpio_call scorpio_support scorpio_conflict scorpio_notes version pangolin_version scorpio_version constellation_version is_designated qc_status qc_notes note +India seq B.1.617.1 0.0 B.1.617.1-like 1.0 0.0 scorpio call: Alt alleles 11; Ref alleles 0; Amb alleles 0; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.617.1(1/1) +b117 B.1.1.7 0.0 Alpha (B.1.1.7-like) 0.91 0.04 scorpio call: Alt alleles 21; Ref alleles 1; Amb alleles 1; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.1.7(2/2) +outgroup_A A 0.0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: A(1/1) +issue_57_torsten_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_6000_Ns_in_18000_bases Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_no_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_is_too_short Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.9 +This_seq_has_lots_of_Ns Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.98 +This_seq_is_literally_just_N Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +Japan_seq B 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +USA_seq B.1.314 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +Unassigned_omicron_seq BA.1 0.0 Probable Omicron (BA.1-like) 0.71 0.08 scorpio call: Alt alleles 42; Ref alleles 5; Amb alleles 9; Oth alleles 3 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.03 Usher placements: BA.1(1/1) +``` diff --git a/pangolin/4.2/Dockerfile b/pangolin/4.2/Dockerfile new file mode 100644 index 000000000..06fc22a99 --- /dev/null +++ b/pangolin/4.2/Dockerfile @@ -0,0 +1,112 @@ +FROM mambaorg/micromamba:1.3.1 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +# had to include the v for some of these due to GitHub tags. +# using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" +ARG PANGOLIN_VER="v4.2" +ARG PANGOLIN_DATA_VER="v1.18.1.1" +ARG SCORPIO_VER="v0.3.17" +ARG CONSTELLATIONS_VER="v0.1.10" +ARG USHER_VER="0.6.2" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.3.1" +LABEL dockerfile.version="3" +LABEL software="pangolin" +LABEL software.version=${PANGOLIN_VER} +LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." +LABEL website="https://github.com/cov-lineages/pangolin" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the pangolin repo +RUN wget "https://github.com/cov-lineages/pangolin/archive/${PANGOLIN_VER}.tar.gz" && \ + tar -xf ${PANGOLIN_VER}.tar.gz && \ + rm ${PANGOLIN_VER}.tar.gz && \ + mv -v pangolin-* pangolin + +# set the environment +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +# modify environment.yml to pin specific versions during install +# create the conda environment using modified environment.yml +RUN sed -i "s|usher.*|usher=${USHER_VER}|" /pangolin/environment.yml && \ + sed -i "s|scorpio.git|scorpio.git@${SCORPIO_VER}|" /pangolin/environment.yml && \ + sed -i "s|pangolin-data.git|pangolin-data.git@${PANGOLIN_DATA_VER}|" /pangolin/environment.yml && \ + sed -i "s|constellations.git|constellations.git@${CONSTELLATIONS_VER}|" /pangolin/environment.yml && \ + micromamba create -n pangolin -y -f /pangolin/environment.yml + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +WORKDIR /pangolin + +# run pip install step; download optional pre-computed assignment hashes for UShER (useful for running on large batches of samples) +# best to skip using the assigment-cache if running on one sample for speed +# print versions +RUN pip install . && \ + pangolin --add-assignment-cache && \ + micromamba clean -a -y && \ + mkdir /data && \ + pangolin --all-versions && \ + usher --version + +WORKDIR /data + +# hardcode pangolin executable into the PATH variable +ENV PATH="$PATH:/opt/conda/envs/pangolin/bin/" + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# test on test sequences supplied with Pangolin code +RUN pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode usher -o /data/test_seqs-output-pusher && \ + column -t -s, /data/test_seqs-output-pusher/lineage_report.csv + +# test functionality of assignment-cache option +RUN pangolin --use-assignment-cache /pangolin/pangolin/test/test_seqs.fasta + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# test on a B.1.1.7 genome +RUN pangolin /test-data/SRR13957123.consensus.fa --analysis-mode usher -o /test-data/SRR13957123-pusher && \ + column -t -s, /test-data/SRR13957123-pusher/lineage_report.csv + + # install unzip for unzipping zip archive from NCBI +RUN apt-get update && apt-get install -y --no-install-recommends unzip + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a BA.1 from Florida (https://www.ncbi.nlm.nih.gov/biosample?term=SAMN29506515 and https://www.ncbi.nlm.nih.gov/nuccore/ON924087) +# run pangolin in usher analysis mode +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && rm ON924087.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna ncbi_dataset/data/ON924087.1.genomic.fna && \ + pangolin ncbi_dataset/data/ON924087.1.genomic.fna --analysis-mode usher -o ON924087.1-usher && \ + column -t -s, ON924087.1-usher/lineage_report.csv diff --git a/pangolin/4.2/README.md b/pangolin/4.2/README.md new file mode 100644 index 000000000..f1cd55b64 --- /dev/null +++ b/pangolin/4.2/README.md @@ -0,0 +1,50 @@ +# pangolin container + +Main tool : [pangolin](https://github.com/cov-lineages/pangolin) + +Full documentation: [https://cov-lineages.org/resources/pangolin.html](https://cov-lineages.org/resources/pangolin.html) + +Phylogenetic Assignment of Named Global Outbreak LINeages + +Additional tools: + +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.18.1 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.18.1 +- [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 +- [usher](https://github.com/yatisht/usher) 0.6.2 +- [faToVcf](https://github.com/yatisht/usher) 426 +- [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 +- [constellations](https://github.com/cov-lineages/constellations) 0.1.10 +- [gofasta](https://github.com/virus-evolution/gofasta) 1.2.0 +- [mafft](https://mafft.cbrc.jp/alignment/software/) 7.508 +- python 3.8.15 + +## Warning + +This docker image contains `pangolin-data` v1.18.1. The upgrade from 1.18 to 1.18.1 ONLY updated the underlying UShER tree/protobuf file. It did NOT update the pangoLEARN model, so please use the UShER mode of pangolin if you want to stay up-to-date with the most recent lineages. [See pangolin-data release notes here for more details](https://github.com/cov-lineages/pangolin-data/releases/tag/v1.18.1) + +## Example Usage + +```bash +# run Pangolin in the default mode (usher). Can optionally supply --analysis-mode usher +$ pangolin /pangolin/pangolin/test/test_seqs.fasta -o /data/test_seqs-output-pusher + +# run Pangolin in the fast/pangolearn mode. Can use either --analysis-mode fast or --analysis-mode pangolearn +$ pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode pangolearn -o /data/test_seqs-output-plearn + +# view the output CSV +$ column -t -s, /data/test_seqs-output-pusher/lineage_report.csv +taxon lineage conflict ambiguity_score scorpio_call scorpio_support scorpio_conflict scorpio_notes version pangolin_version scorpio_version constellation_version is_designated qc_status qc_notes note +India seq B.1.617.1 0.0 B.1.617.1-like 1.0 0.0 scorpio call: Alt alleles 11; Ref alleles 0; Amb alleles 0; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.617.1(1/1) +b117 B.1.1.7 0.0 Alpha (B.1.1.7-like) 0.91 0.04 scorpio call: Alt alleles 21; Ref alleles 1; Amb alleles 1; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.1.7(2/2) +outgroup_A A 0.0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: A(1/1) +issue_57_torsten_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_6000_Ns_in_18000_bases Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_no_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_is_too_short Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.9 +This_seq_has_lots_of_Ns Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.98 +This_seq_is_literally_just_N Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +Japan_seq B 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +USA_seq B.1.314 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +Unassigned_omicron_seq BA.1 0.0 Probable Omicron (BA.1-like) 0.71 0.08 scorpio call: Alt alleles 42; Ref alleles 5; Amb alleles 9; Oth alleles 3 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.03 Usher placements: BA.1(1/1) +``` diff --git a/pangolin/4.3-pdata-1.20/Dockerfile b/pangolin/4.3-pdata-1.20/Dockerfile new file mode 100644 index 000000000..b1c19159d --- /dev/null +++ b/pangolin/4.3-pdata-1.20/Dockerfile @@ -0,0 +1,127 @@ +FROM mambaorg/micromamba:1.4.3 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +# had to include the v for some of these due to GitHub tags. +# using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" +ARG PANGOLIN_VER="v4.3" +ARG PANGOLIN_DATA_VER="v1.20" +ARG SCORPIO_VER="v0.3.17" +ARG CONSTELLATIONS_VER="v0.1.10" +ARG USHER_VER="0.6.2" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.3" +LABEL dockerfile.version="1" +LABEL software="pangolin" +LABEL software.version=${PANGOLIN_VER} +LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." +LABEL website="https://github.com/cov-lineages/pangolin" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the pangolin repo +RUN wget "https://github.com/cov-lineages/pangolin/archive/${PANGOLIN_VER}.tar.gz" && \ + tar -xf ${PANGOLIN_VER}.tar.gz && \ + rm -v ${PANGOLIN_VER}.tar.gz && \ + mv -v pangolin-* pangolin + +# set the environment; PATH is unnecessary here, but leaving anyways. It's reset later in dockerfile +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +# modify environment.yml to pin specific versions during install +# create the conda environment using modified environment.yml +RUN sed -i "s|usher.*|usher=${USHER_VER}|" /pangolin/environment.yml && \ + sed -i "s|scorpio.git|scorpio.git@${SCORPIO_VER}|" /pangolin/environment.yml && \ + sed -i "s|pangolin-data.git|pangolin-data.git@${PANGOLIN_DATA_VER}|" /pangolin/environment.yml && \ + sed -i "s|constellations.git|constellations.git@${CONSTELLATIONS_VER}|" /pangolin/environment.yml && \ + micromamba create -n pangolin -y -f /pangolin/environment.yml + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +WORKDIR /pangolin + +# run pip install step; download optional pre-computed assignment hashes for UShER (useful for running on large batches of samples) +# best to skip using the assigment-cache if running on one sample for speed +# print versions +RUN pip install . && \ + pangolin --add-assignment-cache && \ + micromamba clean -a -y && \ + mkdir /data && \ + pangolin --all-versions && \ + usher --version + +WORKDIR /data + +# hardcode pangolin executable into the PATH variable +ENV PATH="${PATH}:/opt/conda/envs/pangolin/bin/" + +# default command is to pull up help options for virulencefinder; can be overridden of course +CMD ["pangolin", "-h"] + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# test on test sequences supplied with Pangolin code +RUN pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode usher -o /data/test_seqs-output-pusher && \ + column -t -s, /data/test_seqs-output-pusher/lineage_report.csv + +# test functionality of assignment-cache option +RUN pangolin --use-assignment-cache /pangolin/pangolin/test/test_seqs.fasta + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# test on a B.1.1.7 genome +RUN pangolin /test-data/SRR13957123.consensus.fa --analysis-mode usher -o /test-data/SRR13957123-pusher && \ + column -t -s, /test-data/SRR13957123-pusher/lineage_report.csv + + # install unzip for unzipping zip archive from NCBI +RUN apt-get update && apt-get install -y --no-install-recommends unzip + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a BA.1 from Florida (https://www.ncbi.nlm.nih.gov/biosample?term=SAMN29506515 and https://www.ncbi.nlm.nih.gov/nuccore/ON924087) +# run pangolin in usher analysis mode +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && rm ON924087.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna ON924087.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin ON924087.1.genomic.fna --analysis-mode usher -o ON924087.1-usher && \ + column -t -s, ON924087.1-usher/lineage_report.csv + +# test specific for new lineage, XBB.1.16, introduced in pangolin-data v1.19 +# using this assembly: https://www.ncbi.nlm.nih.gov/nuccore/2440446687 +# biosample here: https://www.ncbi.nlm.nih.gov/biosample?term=SAMN33060589 +# one of the sample included in initial pango-designation here: https://github.com/cov-lineages/pango-designation/issues/1723 +RUN datasets download virus genome accession OQ381818.1 --filename OQ381818.1.zip && \ + unzip OQ381818.1.zip && rm OQ381818.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OQ381818.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OQ381818.1.genomic.fna --analysis-mode usher -o OQ381818.1-usher && \ + column -t -s, OQ381818.1-usher/lineage_report.csv diff --git a/pangolin/4.3-pdata-1.20/README.md b/pangolin/4.3-pdata-1.20/README.md new file mode 100644 index 000000000..4eb3d779e --- /dev/null +++ b/pangolin/4.3-pdata-1.20/README.md @@ -0,0 +1,53 @@ +# pangolin docker image + +Main tool : [pangolin](https://github.com/cov-lineages/pangolin) + +Full documentation: [https://cov-lineages.org/resources/pangolin.html](https://cov-lineages.org/resources/pangolin.html) + +Phylogenetic Assignment of Named Global Outbreak LINeages + +Additional tools: + +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.20 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.20 +- [minimap2](https://github.com/lh3/minimap2) 2.26-r1175 +- [usher](https://github.com/yatisht/usher) 0.6.2 +- [faToVcf](https://github.com/yatisht/usher) 426 +- [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 +- [constellations](https://github.com/cov-lineages/constellations) 0.1.10 +- [gofasta](https://github.com/virus-evolution/gofasta) 1.2.0 +- [mafft](https://mafft.cbrc.jp/alignment/software/) 7.520 +- python 3.8.15 + +## pangoLEARN deprecation + +As of pangolin version 4.3, pangoLEARN mode has been deprecated. [More info can be found here on the v4.3 release page.](https://github.com/cov-lineages/pangolin/releases/tag/v4.3) + +> If `--analysis-mode fast` or `--analysis-mode pangolearn` is given, pangolin v4.3 will print out a warning and use UShER mode instead, unless `--datadir` is also given specifying a directory with pangoLEARN model files. The next release of pangolin-data (v1.20) will no longer include the model files which have not been updated since v1.18. + +This docker image contains `pangolin-data` v1.20. The pangoLEARN model has not been updated since pangolin-data version 1.18. Only the the underlying UShER tree/protobuf file will be maintained for the forseeable future. + +**Please use the UShER mode of pangolin if you want to stay up-to-date with the most recent lineages.** [See pangolin-data release notes here for more details](https://github.com/cov-lineages/pangolin-data/releases/tag/v1.20) + +## Example Usage + +```bash +# run Pangolin in the default mode (usher). Can optionally supply --analysis-mode usher +$ pangolin /pangolin/pangolin/test/test_seqs.fasta -o /data/test_seqs-output-pusher + +# view the output CSV +$ column -t -s, /data/test_seqs-output-pusher/lineage_report.csv +taxon lineage conflict ambiguity_score scorpio_call scorpio_support scorpio_conflict scorpio_notes version pangolin_version scorpio_version constellation_version is_designated qc_status qc_notes note +India seq B.1.617.1 0.0 B.1.617.1-like 1.0 0.0 scorpio call: Alt alleles 11; Ref alleles 0; Amb alleles 0; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.617.1(1/1) +b117 B.1.1.7 0.0 Alpha (B.1.1.7-like) 0.91 0.04 scorpio call: Alt alleles 21; Ref alleles 1; Amb alleles 1; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.1.7(2/2) +outgroup_A A 0.0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: A(1/1) +issue_57_torsten_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_6000_Ns_in_18000_bases Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_no_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_is_too_short Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.9 +This_seq_has_lots_of_Ns Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.98 +This_seq_is_literally_just_N Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +Japan_seq B 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +USA_seq B.1.314 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +Unassigned_omicron_seq BA.1 0.0 Probable Omicron (BA.1-like) 0.71 0.08 scorpio call: Alt alleles 42; Ref alleles 5; Amb alleles 9; Oth alleles 3 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.03 Usher placements: BA.1(1/1) +``` diff --git a/pangolin/4.3-pdata-1.21/Dockerfile b/pangolin/4.3-pdata-1.21/Dockerfile new file mode 100644 index 000000000..c7bba394b --- /dev/null +++ b/pangolin/4.3-pdata-1.21/Dockerfile @@ -0,0 +1,136 @@ +FROM mambaorg/micromamba:1.4.4 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +# had to include the v for some of these due to GitHub tags. +# using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" +ARG PANGOLIN_VER="v4.3" +ARG PANGOLIN_DATA_VER="v1.21" +ARG SCORPIO_VER="v0.3.17" +ARG CONSTELLATIONS_VER="v0.1.12" +ARG USHER_VER="0.6.2" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.4" +LABEL dockerfile.version="1" +LABEL software="pangolin" +LABEL software.version=${PANGOLIN_VER} +LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." +LABEL website="https://github.com/cov-lineages/pangolin" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the pangolin repo +RUN wget "https://github.com/cov-lineages/pangolin/archive/${PANGOLIN_VER}.tar.gz" && \ + tar -xf ${PANGOLIN_VER}.tar.gz && \ + rm -v ${PANGOLIN_VER}.tar.gz && \ + mv -v pangolin-* pangolin + +# set the environment; PATH is unnecessary here, but leaving anyways. It's reset later in dockerfile +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +# modify environment.yml to pin specific versions during install +# create the conda environment using modified environment.yml +RUN sed -i "s|usher.*|usher=${USHER_VER}|" /pangolin/environment.yml && \ + sed -i "s|scorpio.git|scorpio.git@${SCORPIO_VER}|" /pangolin/environment.yml && \ + sed -i "s|pangolin-data.git|pangolin-data.git@${PANGOLIN_DATA_VER}|" /pangolin/environment.yml && \ + sed -i "s|constellations.git|constellations.git@${CONSTELLATIONS_VER}|" /pangolin/environment.yml && \ + micromamba create -n pangolin -y -f /pangolin/environment.yml + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +WORKDIR /pangolin + +# run pip install step; download optional pre-computed assignment hashes for UShER (useful for running on large batches of samples) +# best to skip using the assigment-cache if running on one sample for speed +# print versions +RUN pip install . && \ + pangolin --add-assignment-cache && \ + micromamba clean -a -y && \ + mkdir /data && \ + pangolin --all-versions && \ + usher --version + +WORKDIR /data + +# hardcode pangolin executable into the PATH variable +ENV PATH="${PATH}:/opt/conda/envs/pangolin/bin/" + +# default command is to pull up help options for virulencefinder; can be overridden of course +CMD ["pangolin", "-h"] + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# test on test sequences supplied with Pangolin code +RUN pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode usher -o /data/test_seqs-output-pusher && \ + column -t -s, /data/test_seqs-output-pusher/lineage_report.csv + +# test functionality of assignment-cache option +RUN pangolin --use-assignment-cache /pangolin/pangolin/test/test_seqs.fasta + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# test on a B.1.1.7 genome +RUN pangolin /test-data/SRR13957123.consensus.fa --analysis-mode usher -o /test-data/SRR13957123-pusher && \ + column -t -s, /test-data/SRR13957123-pusher/lineage_report.csv + + # install unzip for unzipping zip archive from NCBI +RUN apt-get update && apt-get install -y --no-install-recommends unzip + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a BA.1 from Florida (https://www.ncbi.nlm.nih.gov/biosample?term=SAMN29506515 and https://www.ncbi.nlm.nih.gov/nuccore/ON924087) +# run pangolin in usher analysis mode +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && rm ON924087.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna ON924087.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin ON924087.1.genomic.fna --analysis-mode usher -o ON924087.1-usher && \ + column -t -s, ON924087.1-usher/lineage_report.csv + +# test specific for new lineage, XBB.1.16, introduced in pangolin-data v1.19 +# using this assembly: https://www.ncbi.nlm.nih.gov/nuccore/2440446687 +# biosample here: https://www.ncbi.nlm.nih.gov/biosample?term=SAMN33060589 +# one of the sample included in initial pango-designation here: https://github.com/cov-lineages/pango-designation/issues/1723 +RUN datasets download virus genome accession OQ381818.1 --filename OQ381818.1.zip && \ + unzip OQ381818.1.zip && rm OQ381818.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OQ381818.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OQ381818.1.genomic.fna --analysis-mode usher -o OQ381818.1-usher && \ + column -t -s, OQ381818.1-usher/lineage_report.csv + +# testing another XBB.1.16, trying to test scorpio functionality. Want pangolin to NOT assign lineage based on pango hash match. +# this test runs as expected, uses scorpio to check for constellation of mutations, then assign using PUSHER placement +RUN datasets download virus genome accession OR177999.1 --filename OR177999.1.zip && \ + unzip OR177999.1.zip && rm OR177999.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OR177999.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OR177999.1.genomic.fna --analysis-mode usher -o OR177999.1-usher && \ + column -t -s, OR177999.1-usher/lineage_report.csv \ No newline at end of file diff --git a/pangolin/4.3-pdata-1.21/README.md b/pangolin/4.3-pdata-1.21/README.md new file mode 100644 index 000000000..1a6589c7b --- /dev/null +++ b/pangolin/4.3-pdata-1.21/README.md @@ -0,0 +1,53 @@ +# pangolin docker image + +Main tool : [pangolin](https://github.com/cov-lineages/pangolin) + +Full documentation: [https://cov-lineages.org/resources/pangolin.html](https://cov-lineages.org/resources/pangolin.html) + +Phylogenetic Assignment of Named Global Outbreak LINeages + +Additional tools: + +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.21 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.21 +- [minimap2](https://github.com/lh3/minimap2) 2.26-r1175 +- [usher](https://github.com/yatisht/usher) 0.6.2 +- [faToVcf](https://github.com/yatisht/usher) 426 +- [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 +- [constellations](https://github.com/cov-lineages/constellations) 0.1.11 +- [gofasta](https://github.com/virus-evolution/gofasta) 1.2.0 +- [mafft](https://mafft.cbrc.jp/alignment/software/) 7.520 +- python 3.8.15 + +## pangoLEARN deprecation + +As of pangolin version 4.3, pangoLEARN mode has been deprecated. [More info can be found here on the v4.3 release page.](https://github.com/cov-lineages/pangolin/releases/tag/v4.3) + +> If `--analysis-mode fast` or `--analysis-mode pangolearn` is given, pangolin v4.3 will print out a warning and use UShER mode instead, unless `--datadir` is also given specifying a directory with pangoLEARN model files. The next release of pangolin-data (v1.20) will no longer include the model files which have not been updated since v1.18. + +This docker image contains `pangolin-data` v1.21. The pangoLEARN model has not been updated since pangolin-data version 1.18. Only the the underlying UShER tree/protobuf file will be maintained for the forseeable future. + +**Please use the UShER mode of pangolin if you want to stay up-to-date with the most recent lineages.** [See pangolin-data release notes here for more details](https://github.com/cov-lineages/pangolin-data/releases) + +## Example Usage + +```bash +# run Pangolin in the default mode (usher). Can optionally supply --analysis-mode usher +$ pangolin /pangolin/pangolin/test/test_seqs.fasta -o /data/test_seqs-output-pusher + +# view the output CSV +$ column -t -s, /data/test_seqs-output-pusher/lineage_report.csv +taxon lineage conflict ambiguity_score scorpio_call scorpio_support scorpio_conflict scorpio_notes version pangolin_version scorpio_version constellation_version is_designated qc_status qc_notes note +India seq B.1.617.1 0.0 B.1.617.1-like 1.0 0.0 scorpio call: Alt alleles 11; Ref alleles 0; Amb alleles 0; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.617.1(1/1) +b117 B.1.1.7 0.0 Alpha (B.1.1.7-like) 0.91 0.04 scorpio call: Alt alleles 21; Ref alleles 1; Amb alleles 1; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.1.7(2/2) +outgroup_A A 0.0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: A(1/1) +issue_57_torsten_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_6000_Ns_in_18000_bases Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_no_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_is_too_short Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.9 +This_seq_has_lots_of_Ns Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.98 +This_seq_is_literally_just_N Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +Japan_seq B 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +USA_seq B.1.314 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +Unassigned_omicron_seq BA.1 0.0 Probable Omicron (BA.1-like) 0.71 0.08 scorpio call: Alt alleles 42; Ref alleles 5; Amb alleles 9; Oth alleles 3 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.03 Usher placements: BA.1(1/1) +``` diff --git a/pangolin/4.3.1-pdata-1.22/Dockerfile b/pangolin/4.3.1-pdata-1.22/Dockerfile new file mode 100644 index 000000000..0166bbbb3 --- /dev/null +++ b/pangolin/4.3.1-pdata-1.22/Dockerfile @@ -0,0 +1,145 @@ +FROM mambaorg/micromamba:1.4.9 as app + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +# ARG variables only persist during build time +# had to include the v for some of these due to GitHub tags. +# using pangolin-data github tag, NOT what is in the GH release title "v1.2.133" +ARG PANGOLIN_VER="v4.3.1" +ARG PANGOLIN_DATA_VER="v1.22" +ARG SCORPIO_VER="v0.3.17" +ARG CONSTELLATIONS_VER="v0.1.12" +ARG USHER_VER="0.6.2" + +# metadata labels +LABEL base.image="mambaorg/micromamba:1.4.9" +LABEL dockerfile.version="1" +LABEL software="pangolin" +LABEL software.version=${PANGOLIN_VER} +LABEL description="Conda environment for Pangolin. Pangolin: Software package for assigning SARS-CoV-2 genome sequences to global lineages." +LABEL website="https://github.com/cov-lineages/pangolin" +LABEL license="GNU General Public License v3.0" +LABEL license.url="https://github.com/cov-lineages/pangolin/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + git \ + procps \ + bsdmainutils && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get the pangolin repo +RUN wget "https://github.com/cov-lineages/pangolin/archive/${PANGOLIN_VER}.tar.gz" && \ + tar -xf ${PANGOLIN_VER}.tar.gz && \ + rm -v ${PANGOLIN_VER}.tar.gz && \ + mv -v pangolin-* pangolin + +# set the environment; PATH is unnecessary here, but leaving anyways. It's reset later in dockerfile +ENV PATH="$PATH" \ + LC_ALL=C.UTF-8 + +# modify environment.yml to pin specific versions during install +# create the conda environment using modified environment.yml +RUN sed -i "s|usher.*|usher=${USHER_VER}|" /pangolin/environment.yml && \ + sed -i "s|scorpio.git|scorpio.git@${SCORPIO_VER}|" /pangolin/environment.yml && \ + sed -i "s|pangolin-data.git|pangolin-data.git@${PANGOLIN_DATA_VER}|" /pangolin/environment.yml && \ + sed -i "s|constellations.git|constellations.git@${CONSTELLATIONS_VER}|" /pangolin/environment.yml && \ + micromamba create -n pangolin -y -f /pangolin/environment.yml + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +WORKDIR /pangolin + +# run pip install step; download optional pre-computed assignment hashes for UShER (useful for running on large batches of samples) +# best to skip using the assigment-cache if running on one sample for speed +# print versions +RUN pip install . && \ + pangolin --add-assignment-cache && \ + micromamba clean -a -y && \ + mkdir /data && \ + pangolin --all-versions && \ + usher --version + +WORKDIR /data + +# hardcode pangolin executable into the PATH variable +ENV PATH="${PATH}:/opt/conda/envs/pangolin/bin/" + +# default command is to pull up help options for virulencefinder; can be overridden of course +CMD ["pangolin", "-h"] + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="pangolin" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# test on test sequences supplied with Pangolin code +RUN pangolin /pangolin/pangolin/test/test_seqs.fasta --analysis-mode usher -o /data/test_seqs-output-pusher && \ + column -t -s, /data/test_seqs-output-pusher/lineage_report.csv + +# test functionality of assignment-cache option +RUN pangolin --use-assignment-cache /pangolin/pangolin/test/test_seqs.fasta + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# test on a B.1.1.7 genome +RUN pangolin /test-data/SRR13957123.consensus.fa --analysis-mode usher -o /test-data/SRR13957123-pusher && \ + column -t -s, /test-data/SRR13957123-pusher/lineage_report.csv + + # install unzip for unzipping zip archive from NCBI +RUN apt-get update && apt-get install -y --no-install-recommends unzip + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a BA.1 from Florida (https://www.ncbi.nlm.nih.gov/biosample?term=SAMN29506515 and https://www.ncbi.nlm.nih.gov/nuccore/ON924087) +# run pangolin in usher analysis mode +RUN datasets download virus genome accession ON924087.1 --filename ON924087.1.zip && \ + unzip ON924087.1.zip && rm ON924087.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna ON924087.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin ON924087.1.genomic.fna --analysis-mode usher -o ON924087.1-usher && \ + column -t -s, ON924087.1-usher/lineage_report.csv + +# test specific for new lineage, XBB.1.16, introduced in pangolin-data v1.19 +# using this assembly: https://www.ncbi.nlm.nih.gov/nuccore/2440446687 +# biosample here: https://www.ncbi.nlm.nih.gov/biosample?term=SAMN33060589 +# one of the sample included in initial pango-designation here: https://github.com/cov-lineages/pango-designation/issues/1723 +RUN datasets download virus genome accession OQ381818.1 --filename OQ381818.1.zip && \ + unzip OQ381818.1.zip && rm OQ381818.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OQ381818.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OQ381818.1.genomic.fna --analysis-mode usher -o OQ381818.1-usher && \ + column -t -s, OQ381818.1-usher/lineage_report.csv + +# testing another XBB.1.16, trying to test scorpio functionality. Want pangolin to NOT assign lineage based on pango hash match. +# this test runs as expected, uses scorpio to check for constellation of mutations, then assign using PUSHER placement +RUN datasets download virus genome accession OR177999.1 --filename OR177999.1.zip && \ + unzip OR177999.1.zip && rm OR177999.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OR177999.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OR177999.1.genomic.fna --analysis-mode usher -o OR177999.1-usher && \ + column -t -s, OR177999.1-usher/lineage_report.csv + + ## test for BA.2.86 + # virus identified in MI: https://www.ncbi.nlm.nih.gov/nuccore/OR461132.1 + RUN datasets download virus genome accession OR461132.1 --filename OR461132.1.zip && \ + unzip OR461132.1.zip && rm OR461132.1.zip && \ + mv -v ncbi_dataset/data/genomic.fna OR461132.1.genomic.fna && \ + rm -vr ncbi_dataset/ README.md && \ + pangolin OR461132.1.genomic.fna --analysis-mode usher -o OR461132.1-usher && \ + column -t -s, OR461132.1-usher/lineage_report.csv \ No newline at end of file diff --git a/pangolin/4.3.1-pdata-1.22/README.md b/pangolin/4.3.1-pdata-1.22/README.md new file mode 100644 index 000000000..4a1065ae5 --- /dev/null +++ b/pangolin/4.3.1-pdata-1.22/README.md @@ -0,0 +1,53 @@ +# pangolin docker image + +Main tool : [pangolin](https://github.com/cov-lineages/pangolin) + +Full documentation: [https://cov-lineages.org/resources/pangolin.html](https://cov-lineages.org/resources/pangolin.html) + +Phylogenetic Assignment of Named Global Outbreak LINeages + +Additional tools: + +- [pangolin-data](https://github.com/cov-lineages/pangolin-data) 1.22 +- [pangolin-assignment](https://github.com/cov-lineages/pangolin-assignment) 1.22 +- [minimap2](https://github.com/lh3/minimap2) 2.26-r1175 +- [usher](https://github.com/yatisht/usher) 0.6.2 +- [faToVcf](https://github.com/yatisht/usher) 448 +- [scorpio](https://github.com/cov-lineages/scorpio) 0.3.17 +- [constellations](https://github.com/cov-lineages/constellations) 0.1.12 +- [gofasta](https://github.com/virus-evolution/gofasta) 1.2.1 +- [mafft](https://mafft.cbrc.jp/alignment/software/) 7.520 +- python 3.8.17 + +## pangoLEARN deprecation + +As of pangolin version 4.3, pangoLEARN mode has been deprecated. [More info can be found here on the v4.3 release page.](https://github.com/cov-lineages/pangolin/releases/tag/v4.3) + +> If `--analysis-mode fast` or `--analysis-mode pangolearn` is given, pangolin v4.3 will print out a warning and use UShER mode instead, unless `--datadir` is also given specifying a directory with pangoLEARN model files. The next release of pangolin-data (v1.20) will no longer include the model files which have not been updated since v1.18. + +This docker image contains `pangolin-data` v1.21. The pangoLEARN model has not been updated since pangolin-data version 1.18. Only the the underlying UShER tree/protobuf file will be maintained for the forseeable future. + +**Please use the UShER mode of pangolin if you want to stay up-to-date with the most recent lineages.** [See pangolin-data release notes here for more details](https://github.com/cov-lineages/pangolin-data/releases) + +## Example Usage + +```bash +# run Pangolin in the default mode (usher). Can optionally supply --analysis-mode usher +$ pangolin /pangolin/pangolin/test/test_seqs.fasta -o /data/test_seqs-output-pusher + +# view the output CSV +$ column -t -s, /data/test_seqs-output-pusher/lineage_report.csv +taxon lineage conflict ambiguity_score scorpio_call scorpio_support scorpio_conflict scorpio_notes version pangolin_version scorpio_version constellation_version is_designated qc_status qc_notes note +India seq B.1.617.1 0.0 B.1.617.1-like 1.0 0.0 scorpio call: Alt alleles 11; Ref alleles 0; Amb alleles 0; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.617.1(1/1) +b117 B.1.1.7 0.0 Alpha (B.1.1.7-like) 0.91 0.04 scorpio call: Alt alleles 21; Ref alleles 1; Amb alleles 1; Oth alleles 0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: B.1.1.7(2/2) +outgroup_A A 0.0 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.02 Usher placements: A(1/1) +issue_57_torsten_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_6000_Ns_in_18000_bases Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_has_no_seq Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +This_seq_is_too_short Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.9 +This_seq_has_lots_of_Ns Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail Ambiguous_content:0.98 +This_seq_is_literally_just_N Unassigned PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False fail failed to map +Japan_seq B 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +USA_seq B.1.314 0.0 PANGO-v1.16 4.1.3 0.3.17 v0.1.10 True pass Ambiguous_content:0.02 Assigned from designation hash. +Unassigned_omicron_seq BA.1 0.0 Probable Omicron (BA.1-like) 0.71 0.08 scorpio call: Alt alleles 42; Ref alleles 5; Amb alleles 9; Oth alleles 3 PUSHER-v1.16 4.1.3 0.3.17 v0.1.10 False pass Ambiguous_content:0.03 Usher placements: BA.1(1/1) +``` diff --git a/pasty/1.0.2/Dockerfile b/pasty/1.0.2/Dockerfile new file mode 100644 index 000000000..f1757c390 --- /dev/null +++ b/pasty/1.0.2/Dockerfile @@ -0,0 +1,120 @@ +ARG PASTY_VERSION="1.0.2" +# formatted for apt version syntax +ARG BLAST_VERSION="2.12.0+ds-3build1" + +FROM ubuntu:jammy as app + +# have to re-instatiate these variables +ARG PASTY_VERSION +ARG BLAST_VERSION + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="pasty" +LABEL software.version="${PASTY_VERSION}" +LABEL description="In silico serogrouping of Pseudomonas aeruginosa isolates from genome assemblies" +LABEL website="https://github.com/rpetit3/pasty" +LABEL license="https://github.com/rpetit3/pasty/blob/main/LICENSE" +LABEL maintainer1="Curtis Kapsak" +LABEL maintainer1.email="curtis.kapsak@theiagen.com" + +# install dependencies +# ncbi-blast+ version in apt for ubuntu:jammy = v2.12.0 +# python v3.10.6 +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + ncbi-blast+=${BLAST_VERSION} \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install python deps +RUN pip3 install executor rich-click + +# download pasty; make /data +RUN echo "downloading pasty source code..." && \ + wget -q https://github.com/rpetit3/pasty/archive/refs/tags/v${PASTY_VERSION}.tar.gz && \ + tar -zxf v${PASTY_VERSION}.tar.gz && \ + rm -rf v${PASTY_VERSION}.tar.gz && \ + mkdir /data + +# set $PATH +# set perl locale settings for singularity compatibility +ENV PATH="${PATH}:/pasty-${PASTY_VERSION}/bin"\ + LC_ALL=C.UTF-8 + +# final working directory is /data +WORKDIR /data + +# testing layer starts here +FROM app as test + +# re-instatiate for use below in test layer +ARG PASTY_VERSION + +# running the below test commands from the root pasty directory +WORKDIR /pasty-${PASTY_VERSION} + +# shamelessly stolen and modified from https://github.com/rpetit3/pasty/blob/main/.github/workflows/test-pasty.yml +RUN echo "O1-GCF_000504045" && \ + pasty --assembly test/O1-GCF_000504045.fna.gz --prefix O1-GCF_000504045 && \ + cat O1-GCF_000504045.tsv && \ + cat O1-GCF_000504045.details.tsv && \ + echo "O2-GCF_000006765" && \ + pasty --assembly test/O2-GCF_000006765.fna.gz --prefix O2-GCF_000006765 --outdir O2 && \ + cat O2/O2-GCF_000006765.tsv && \ + cat O2/O2-GCF_000006765.details.tsv && \ + echo "O3-GCF_000271365" && \ + pasty --assembly test/O3-GCF_000271365.fna.gz --prefix O3-GCF_000271365 --outdir O3 && \ + cat O3/O3-GCF_000271365.tsv && \ + cat O3/O3-GCF_000271365.details.tsv && \ + echo "O4-GCF_024652945" && \ + pasty --assembly test/O4-GCF_024652945.fna.gz --prefix O4-GCF_024652945 --outdir O4 && \ + cat O4/O4-GCF_024652945.tsv && \ + cat O4/O4-GCF_024652945.details.tsv && \ + echo "O6-GCF_001457615" && \ + pasty --assembly test/O6-GCF_001457615.fna.gz --prefix O6-GCF_001457615 && \ + cat O6-GCF_001457615.tsv && \ + cat O6-GCF_001457615.details.tsv && \ + echo "O7-GCF_001482325" && \ + pasty --assembly test/O7-GCF_001482325.fna.gz --prefix O7-GCF_001482325 && \ + cat O7-GCF_001482325.tsv && \ + cat O7-GCF_001482325.details.tsv && \ + echo "O9-GCF_002075065" && \ + pasty --assembly test/O9-GCF_002075065.fna.gz --prefix O9-GCF_002075065 && \ + cat O9-GCF_002075065.tsv && \ + cat O9-GCF_002075065.details.tsv && \ + echo "O10-GCF_009676765" && \ + pasty --assembly test/O10-GCF_009676765.fna.gz --prefix O10-GCF_009676765 && \ + cat O10-GCF_009676765.tsv && \ + cat O10-GCF_009676765.details.tsv && \ + echo "O11-GCF_002192495" && \ + pasty --assembly test/O11-GCF_002192495.fna.gz --prefix O11-GCF_002192495 && \ + cat O11-GCF_002192495.tsv && \ + cat O11-GCF_002192495.details.tsv && \ + echo "O12-GCF_000981825" && \ + pasty --assembly test/O12-GCF_000981825.fna.gz --prefix O12-GCF_000981825 && \ + cat O12-GCF_000981825.tsv && \ + cat O12-GCF_000981825.details.tsv && \ + echo "NT-GCF_000292685" && \ + pasty --assembly test/NT-GCF_000292685.fna.gz --prefix NT-GCF_000292685 && \ + cat NT-GCF_000292685.tsv && \ + cat NT-GCF_000292685.details.tsv && \ + echo "empty" && \ + pasty --assembly test/empty.fasta --prefix empty && \ + cat empty.tsv && \ + cat empty.details.tsv && \ + echo "not-a-fasta" && \ + pasty --assembly test/not-a-fasta.fasta --prefix not-a-fasta && \ + cat not-a-fasta.tsv && \ + cat not-a-fasta.details.tsv && \ + echo "poor" && \ + pasty --assembly test/poor.fasta --prefix poor --outdir poor && \ + cat poor/poor.tsv && \ + cat poor/poor.details.tsv + +# print help and version info +RUN pasty --help && \ + pasty --version \ No newline at end of file diff --git a/pasty/1.0.2/README.md b/pasty/1.0.2/README.md new file mode 100644 index 000000000..2981bd958 --- /dev/null +++ b/pasty/1.0.2/README.md @@ -0,0 +1,62 @@ +# pasty container + +Main tool : [pasty](https://github.com/rpetit3/pasty) + +Additional tools: + +- ncbi-blast+ 2.12.0 +- python 3.10.6 + +Full documentation: [https://github.com/rpetit3/pasty](https://github.com/rpetit3/pasty) + +A tool easily taken advantage of for in silico serogrouping of Pseudomonas aeruginosa isolates from genome assemblies + +## Example Usage + +```bash +# test genome assemblies are included in the docker image at /pasty-1.0.2/test/ +# visit here to see more information: https://github.com/rpetit3/pasty/tree/main/test + +# run pasty via docker container (command broken into 2 lines for readability) +$ docker run --rm -v $PWD:/data -u $(id -u):$(id -g) staphb/pasty:1.0.2 \ + pasty --assembly /pasty-1.0.2/test/O1-GCF_000504045.fna.gz --prefix O1-GCF_000504045 +Running pasty with following parameters: + --assembly /pasty-1.0.2/test/O1-GCF_000504045.fna.gz + --db /pasty-1.0.2/db/OSAdb.fasta + --prefix O1-GCF_000504045 + --outdir ./ + --min_pident 95 + --min_coverage 95 + +Running BLASTN... +Writing outputs... +BLASTN results written to ./O1-GCF_000504045.blastn.tsv + + Serogroup Results +┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓ +┃ sample ┃ serogroup ┃ coverage ┃ fragments ┃ +┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩ +│ O1-GCF_000504045 │ O1 │ 99.97 │ 1 │ +│ O1-GCF_000504045 │ O2 │ 9.83 │ 2 │ +│ O1-GCF_000504045 │ O3 │ 11.18 │ 2 │ +│ O1-GCF_000504045 │ O4 │ 14.36 │ 2 │ +│ O1-GCF_000504045 │ O5 │ 0 │ 0 │ +│ O1-GCF_000504045 │ O6 │ 14.07 │ 2 │ +│ O1-GCF_000504045 │ O7 │ 11.54 │ 2 │ +│ O1-GCF_000504045 │ O9 │ 36.62 │ 1 │ +│ O1-GCF_000504045 │ O10 │ 12.52 │ 2 │ +│ O1-GCF_000504045 │ O11 │ 15.85 │ 2 │ +│ O1-GCF_000504045 │ O12 │ 1.24 │ 1 │ +│ O1-GCF_000504045 │ O13 │ 15.39 │ 2 │ +│ O1-GCF_000504045 │ WyzB │ 0 │ 0 │ +└──────────────────┴───────────┴──────────┴───────────┘ +Serogroup Results written to ./O1-GCF_000504045.details.tsv + + Predicted Serogroup +┏━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━┓ +┃ sample ┃ serogroup ┃ coverage ┃ fragments ┃ comment ┃ +┡━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━┩ +│ O1-GCF_000504045 │ O1 │ 99.97 │ 1 │ │ +└──────────────────┴───────────┴──────────┴───────────┴─────────┘ +Predicted serogroup result written to ./O1-GCF_000504045.tsv +``` diff --git a/phyml/3.3.20220408/Dockerfile b/phyml/3.3.20220408/Dockerfile index 83f56d7c6..ef5860110 100644 --- a/phyml/3.3.20220408/Dockerfile +++ b/phyml/3.3.20220408/Dockerfile @@ -5,7 +5,7 @@ ARG PHYML_VER="3.3.20220408" # metadata LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="1" +LABEL dockerfile.version="2" LABEL software="Phyml" LABEL software.version="3.3.20220408" LABEL description="PhyML estimates maximum likelihood phylogenies from alignments of nucleotide or amino acid sequences." @@ -18,7 +18,7 @@ LABEL maintainer.email="jvhagey@gmail.com" ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update &&\ apt-get install -y --no-install-recommends \ - ca-certificates=20210119~20.04.2 \ + ca-certificates \ pkg-config=0.29.1-0ubuntu4 \ automake=1:1.16.1-4ubuntu6 \ autoconf=2.69-11.1 \ diff --git a/pilon/1.24/Dockerfile b/pilon/1.24/Dockerfile new file mode 100644 index 000000000..250aab390 --- /dev/null +++ b/pilon/1.24/Dockerfile @@ -0,0 +1,54 @@ +FROM ubuntu:jammy as app + +# ARG sets environment variables during the build stage +ARG PILONVER="1.24" +ARG JAVAVER="11" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="pilon" +LABEL software.version="${PILONVER}" +LABEL description="Automatically improve draft assemblies and find variation among strains" +LABEL website="https://github.com/broadinstitute/pilon" +LABEL license="https://github.com/broadinstitute/pilon/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + procps \ + wget \ + openjdk-${JAVAVER}-jre && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install pilon +# creating a versionless jarfile so that scripts don't have to be updated +RUN mkdir pilon && \ + cd pilon && \ + wget -q https://github.com/broadinstitute/pilon/releases/download/v${PILONVER}/pilon-${PILONVER}.jar && \ + mv -v pilon-${PILONVER}.jar pilon.jar && ls && pwd + +WORKDIR /data + +ENV PATH="/pilon:${PATH}" LC_ALL=C + +FROM staphb/samtools:1.16 as samtools + +WORKDIR /test + +RUN wget -q https://github.com/StaPH-B/docker-builds/blob/1670ca89936c634d62814ae7958062e1d80a5989/tests/SARS-CoV-2/SRR13957123.sorted.bam?raw=true && \ + mv SRR13957123.sorted.bam?raw=true SRR13957123.sorted.bam && \ + samtools index SRR13957123.sorted.bam + +FROM app as test + +WORKDIR /test + +COPY --from=samtools /test/SRR13957123* /test/ + +RUN java -jar /pilon/pilon.jar --version && java -jar /pilon/pilon.jar --help + +RUN wget -q https://github.com/UPHL-BioNGS/Cecret/blob/1928809ea07efb947039293fdf5609e8e577e61d/configs/MN908947.3.fasta?raw=true && \ + mv MN908947.3.fasta?raw=true MN908947.3.fasta && \ + java -Xmx6G -jar /pilon/pilon.jar --genome MN908947.3.fasta --frags SRR13957123.sorted.bam --changes && \ + echo "pilon changes:" && cat pilon.changes && ls pilon.fasta \ No newline at end of file diff --git a/pilon/1.24/README.md b/pilon/1.24/README.md new file mode 100644 index 000000000..0d728282e --- /dev/null +++ b/pilon/1.24/README.md @@ -0,0 +1,25 @@ +# pilon container + +Main tool : [pilon](https://github.com/broadinstitute/pilon) + +Additional tools (required): + +* java (specifically `openjdk-11-jre`) + +## Example Usage + +Note: both BAM files must be indexed with `samtools index` prior to usage + +```bash +java -jar -Xmx16G /pilon/pilon.jar \ + --genome fasta --frags paired_bam \ + --unpaired unpaired_bam \ + --output outdir \ + --changes +``` + +WARNING : pilon is included as a jarfile at `/pilon/pilon-${PILONVER}.jar` and copied to `/pilon/pilon.jar` + +Also, memory issues are common with pilon. From their [documentation](https://github.com/broadinstitute/pilon/wiki/Requirements-&-Usage): +> Generally, bacterial genomes with ~200x of Illumina coverage will require at least 8GB, though 16GB is recommended. +> Larger genomes will require more memory to process; exactly how much is very data-dependent, but as a rule of thumb, try to allocate 1GB per megabase of input genome to be processed. diff --git a/polypolish/0.5.0/Dockerfile b/polypolish/0.5.0/Dockerfile new file mode 100644 index 000000000..5eb5e4b40 --- /dev/null +++ b/polypolish/0.5.0/Dockerfile @@ -0,0 +1,48 @@ +FROM ubuntu:jammy as app + +ARG POLYPOLISH_VER="0.5.0" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="polypolish" +LABEL software.version="${POLYPOLISH_VER}" +LABEL description="Polypolish is a tool for polishing genome assemblies with short reads." +LABEL website="https://github.com/rrwick/Polypolish" +LABEL license="https://github.com/rrwick/Polypolish/blob/main/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + unzip \ + python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/local/bin + +# install polypolish without rust et al. +RUN wget -q https://github.com/rrwick/Polypolish/releases/download/v${POLYPOLISH_VER}/polypolish-linux-x86_64-musl-v${POLYPOLISH_VER}.tar.gz && \ + tar -xf polypolish-linux-x86_64-musl-v${POLYPOLISH_VER}.tar.gz && \ + rm polypolish-linux-x86_64-musl-v${POLYPOLISH_VER}.tar.gz + +# getting polypolish_insert_filter.py +RUN wget -q https://github.com/rrwick/Polypolish/archive/refs/tags/v${POLYPOLISH_VER}.tar.gz && \ + tar -xf v${POLYPOLISH_VER}.tar.gz && \ + cp Polypolish-0.5.0/scripts/polypolish_insert_filter.py . && \ + rm v${POLYPOLISH_VER}.tar.gz && \ + rm -rf Polypolish-0.5.0 + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN polypolish --help && polypolish --version && polypolish_insert_filter.py --help && polypolish_insert_filter.py --version + +# using "toy" data +RUN wget -q https://raw.githubusercontent.com/wiki/rrwick/Polypolish/files/toy_example/assembly.fasta && \ + wget -q https://raw.githubusercontent.com/wiki/rrwick/Polypolish/files/toy_example/alignments.sam && \ + polypolish assembly.fasta alignments.sam > polished.fasta \ No newline at end of file diff --git a/polypolish/0.5.0/README.md b/polypolish/0.5.0/README.md new file mode 100644 index 000000000..f1da2ee59 --- /dev/null +++ b/polypolish/0.5.0/README.md @@ -0,0 +1,33 @@ +# polypolish container + +Main tool : [polypolish](https://github.com/rrwick/Polypolish/wiki/How-to-run-Polypolish) + +Additional tools: + +- python3 3.10.6 +- polypolish_insert_filter.py + +Full documentation: [https://github.com/rrwick/Polypolish/wiki](https://github.com/rrwick/Polypolish/wiki) + +Polypolish "polishes" consensus files created during assembly of long reads with Illumina short reads. Polypolish is a little different than other polishing tools in that paired-end reads need to be aligned separatly to generate two sam files. + +## Example Usage + +Align reads to the draft sequence in a different container. The example shows bwa, as this is in the Polypolish wiki, but bbamp, minimap2 or any other similar software can perform a similar step that may be better suited for your use-case. + +```bash +bwa index draft.fasta +bwa mem -t 16 -a draft.fasta reads_1.fastq.gz > alignments_1.sam +bwa mem -t 16 -a draft.fasta reads_2.fastq.gz > alignments_2.sam +``` + +Once the sam files are generated, they can be used with polypolish in this container. + +```bash +# paired end +polypolish_insert_filter.py --in1 alignments_1.sam --in2 alignments_2.sam --out1 filtered_1.sam --out2 filtered_2.sam +polypolish draft.fasta filtered_1.sam filtered_2.sam > polished.fasta + +# single end +polypolish draft.fasta input.sam > polished.fasta +``` diff --git a/poppunk/2.6.0/Dockerfile b/poppunk/2.6.0/Dockerfile new file mode 100644 index 000000000..396131b81 --- /dev/null +++ b/poppunk/2.6.0/Dockerfile @@ -0,0 +1,65 @@ +FROM mambaorg/micromamba:1.3.1 as app + +# Version arguments +# ARG variables only persist during build time +ARG POPPUNK_VERSION="2.6.0" + +# build and run as root users since micromamba image has 'mambauser' set as the $USER +USER root +# set workdir to default for building; set to /data at the end +WORKDIR / + +LABEL base.image="mambaorg/micromamba:1.3.1" +LABEL dockerfile.version="2" +LABEL software="PopPUNK" +LABEL software.version=${POPPUNK_VERSION} +LABEL description="POPulation Partitioning Using Nucleotide Kmers" +LABEL website="https://github.com/bacpop/PopPUNK" +LABEL license="https://github.com/bacpop/PopPUNK/blob/master/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="curtis.kapsak@theiagen.com" +LABEL maintainer2="Harry Hung" +LABEL maintainer2.email="ch31@sanger.ac.uk" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# Create PopPUNK conda environment called poppunk-env from bioconda recipe +# clean up conda garbage +RUN micromamba create -n poppunk-env -c conda-forge -c bioconda -c defaults poppunk=${POPPUNK_VERSION} && \ + micromamba clean -a -y + +# set the environment, put new conda env in PATH by default; set locales to UTF-8 +ENV PATH="/opt/conda/envs/poppunk-env/bin:${PATH}" \ + LC_ALL=C.UTF-8 + +# set working directory to /data +WORKDIR /data + +# new base for testing +FROM app as test + +# so that mamba/conda env is active when running below commands +ENV ENV_NAME="poppunk-env" +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# print out various help options and version +RUN poppunk --help && \ + poppunk_assign --help && \ + poppunk_visualise --help && \ + poppunk_mst --help && \ + poppunk_references --help && \ + poppunk_info --help && \ + poppunk_mandrake --help && \ + poppunk --version + +# Download 100 S. Pneumo assemblies from GPS Public Data on ENA +# Build PopPUNK database from the assemblies +# Assign clusters on the same assemblies using the built database +# Compare the database clusters and assigned clusters of the assemblies +COPY test.sh ftps.txt /data/ +RUN bash test.sh \ No newline at end of file diff --git a/poppunk/2.6.0/README.md b/poppunk/2.6.0/README.md new file mode 100644 index 000000000..69d4bacfa --- /dev/null +++ b/poppunk/2.6.0/README.md @@ -0,0 +1,56 @@ +# PopPUNK container + +Main tool : +- [PopPUNK](https://github.com/bacpop/PopPUNK) + +Additional tools: +- biopython 1.81 +- pp-sketchlib 2.1.1 +- python 3.10.9 +- rapidnj 2.3.2 +- treeswift 1.1.33 + +Full documentation: [https://poppunk.readthedocs.io/en/latest/](https://poppunk.readthedocs.io/en/latest/) + +PopPUNK is also available as a webtool: [https://www.poppunk.net/](https://www.poppunk.net/) + +PopPUNK is a tool for clustering genomes. + +*NOTE: This docker image is intended for the CLI usage of the PopPUNK tool. It has not been built with the full web-interface functionality in mind.* + +## Example Usage + +This example is for usage of PopPUNK for Streptococcus pneumoniae clustering using a database & reference files provided by the [Global Pneumococcal Sequencing Project](https://www.pneumogen.net/gps/training_command_line.html). An example S. pneumoniae genome can be obtained from [here](https://github.com/rpetit3/pbptyper/blob/main/test/SRR2912551.fna.gz) + +```bash +# poppunk requires an input File Of File Names (FOFN). headerless TSV with a sample name (first column), followed by path to input FASTA +$ echo -e "SRR2912551\t/data/SRR2912551.fna.gz" > poppunk_input.tsv + +# showing reference files, FASTA input, and poppunk_input.tsv +$ ls +GPS_v6/ GPS_v6_external_clusters.csv SRR2912551.fna.gz poppunk_input.tsv + +# run the docker container interactively +# followed by poppunk command run inside the container +$ docker run --rm -ti -v ${PWD}:/data -u $(id -u):$(id -g) staphb/poppunk:2.6.0 +$ poppunk_assign --db GPS_v6 --distances GPS_v6/GPS_v6.dists --query /data/poppunk_input.tsv --output docker_test --external-clustering GPS_v6_external_clusters.csv +PopPUNK: assign + (with backend: sketchlib v2.0.0 + sketchlib: /opt/conda/envs/poppunk-env/lib/python3.10/site-packages/pp_sketchlib.cpython-310-x86_64-linux-gnu.so) + +Graph-tools OpenMP parallelisation enabled: with 1 threads +Mode: Assigning clusters of query sequences + +Loading previously refined model +Completed model loading +Sketching 1 genomes using 1 thread(s) +Progress (CPU): 1 / 1 +Writing sketches to file +WARNING: versions of input databases sketches are different, results may not be compatible +Calculating distances using 1 thread(s) +Progress (CPU): 100.0% +Selected type isolate for distance QC is 10050_2#1 +Network loaded: 42163 samples + +Done +``` diff --git a/poppunk/2.6.0/ftps.txt b/poppunk/2.6.0/ftps.txt new file mode 100644 index 000000000..eee69d8ff --- /dev/null +++ b/poppunk/2.6.0/ftps.txt @@ -0,0 +1,100 @@ +ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3224520/SAMEA3171250.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198719/SAMEA2554210.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225470/SAMEA3175912.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213352/SAMEA2696388.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198630/SAMEA2554162.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217126/SAMEA2783707.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3423083/SAMEA3447953.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218603/SAMEA2797493.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040777/SAMEA104035490.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041170/SAMEA104035895.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206617/SAMEA2658361.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218818/SAMEA2814082.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9377441/SAMEA4763391.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206566/SAMEA2658309.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3195033/SAMEA2467770.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079299/SAMEA104154757.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3193270/SAMEA2434815.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079844/SAMEA104155118.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ289/ERZ2890096/SAMEA102263668.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079332/SAMEA104154777.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218215/SAMEA2797058.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225910/SAMEA3176187.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3197406/SAMEA2521772.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ316/ERZ3164266/SAMEA2204200.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192969/SAMEA2434607.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9307577/SAMEA3232684.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ313/ERZ3136035/SAMEA2051001.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218069/SAMEA2796905.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041120/SAMEA104035851.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078938/SAMEA104154484.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3207298/SAMEA2659051.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3197214/SAMEA2521572.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078796/SAMEA104154345.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217801/SAMEA2796638.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3251997/SAMEA3309548.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ314/ERZ3148576/SAMEA2066281.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192932/SAMEA2434566.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206876/SAMEA2658626.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3425910/SAMEA3486806.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3219036/SAMEA2814305.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3423073/SAMEA3447941.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ315/ERZ3157278/SAMEA2160059.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ347/ERZ3470667/SAMEA3504771.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206664/SAMEA2658409.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3219281/SAMEA2814555.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217894/SAMEA2796732.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198788/SAMEA2554243.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3078812/SAMEA104154360.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9377474/SAMEA4763408.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079689/SAMEA104155019.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3217835/SAMEA2796676.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198813/SAMEA2554255.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ355/ERZ3557099/SAMEA4732546.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041014/SAMEA104035733.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3193000/SAMEA2434629.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079342/SAMEA104154791.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180205/SAMEA2298232.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204423/SAMEA2627391.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ347/ERZ3470928/SAMEA3504807.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3255898/SAMEA3354185.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9378367/SAMEA4763819.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ313/ERZ3137747/SAMEA2057315.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180296/SAMEA2298295.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206874/SAMEA2658623.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ349/ERZ3499256/SAMEA3714360.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040602/SAMEA104035309.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218412/SAMEA2797262.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3181632/SAMEA2335756.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3255160/SAMEA3353584.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204680/SAMEA2627527.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ322/ERZ3225180/SAMEA3175678.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9308145/SAMEA3233336.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ342/ERZ3421572/SAMEA3431627.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3256127/SAMEA3354364.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204642/SAMEA2627509.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3218686/SAMEA2813951.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198971/SAMEA2554336.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ355/ERZ3557464/SAMEA4732913.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ326/ERZ3260947/SAMEA3389675.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213543/SAMEA2696586.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3204451/SAMEA2627406.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3258030/SAMEA3373712.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079114/SAMEA104154622.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ318/ERZ3180293/SAMEA2298296.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9303942/SAMEA3209083.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9308103/SAMEA3233306.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ288/ERZ2889920/SAMEA102184918.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9302272/SAMEA3206695.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ930/ERZ9303776/SAMEA3208988.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3192979/SAMEA2434614.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ320/ERZ3206542/SAMEA2658288.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3198331/SAMEA2553822.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ316/ERZ3164201/SAMEA2204129.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ325/ERZ3254708/SAMEA3353251.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3041104/SAMEA104035825.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ307/ERZ3079033/SAMEA104154554.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ321/ERZ3213369/SAMEA2696405.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ319/ERZ3194930/SAMEA2467335.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ937/ERZ9379100/SAMEA4764119.contigs.fa.gz +ftp.sra.ebi.ac.uk/vol1/ERZ304/ERZ3040936/SAMEA104035654.contigs.fa.gz diff --git a/poppunk/2.6.0/test.sh b/poppunk/2.6.0/test.sh new file mode 100644 index 000000000..989766a86 --- /dev/null +++ b/poppunk/2.6.0/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash +set -euxo pipefail + +# Download 100 S. Pneumo assemblies from GPS Public Data on ENA +mkdir assemblies +while read link; do + wget -q -P assemblies $link; +done < ftps.txt + +# Generate r-file for creating PopPUNK database +for FILE in assemblies/*; do + printf $(basename -s .contigs.fa.gz $FILE)'\t'$FILE'\n' >> rfile.txt; +done + +# Build PopPUNK database from the assemblies +poppunk --create-db --output database --r-files rfile.txt --threads $(nproc) +poppunk --fit-model bgmm --ref-db database + +# Assign clusters on the same assemblies using the built database +# the sample names are modified as PopPUNK reject samples with names that are already in the database +sed 's/^/prefix_/' rfile.txt > qfile.txt +poppunk_assign --db database --query qfile.txt --output output --threads $(nproc) + +# Compare the database clusters and assigned clusters of the assemblies +sed 's/^prefix_//' output/output_clusters.csv | awk 'NR == 1; NR > 1 { print $0 | "sort" }' > assigned.csv +awk 'NR == 1; NR > 1 { print $0 | "sort" }' database/database_clusters.csv > database.csv +cmp assigned.csv database.csv \ No newline at end of file diff --git a/porechop/0.2.4/Dockerfile b/porechop/0.2.4/Dockerfile new file mode 100644 index 000000000..5105ca7d5 --- /dev/null +++ b/porechop/0.2.4/Dockerfile @@ -0,0 +1,48 @@ +FROM ubuntu:jammy as app + +ARG PORECHOP_VER=0.2.4 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="porechop" +LABEL software.version=${PORECHOP_VER} +LABEL description="The swiss army knife for genome assembly." +LABEL website="https://github.com/rrwick/Porechop" +LABEL license="https://github.com/rrwick/Porechop/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + procps \ + wget \ + python3-pip \ + python3-distutils \ + python3 \ + make \ + g++ && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN pip3 install setuptools + +RUN wget -q https://github.com/rrwick/Porechop/archive/refs/tags/v${PORECHOP_VER}.tar.gz && \ + tar -xf v${PORECHOP_VER}.tar.gz && ls && \ + rm v${PORECHOP_VER}.tar.gz && \ + cd /Porechop-${PORECHOP_VER} && \ + python3 setup.py install + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN porechop --help && porechop --version + +# DL small ONT FASTQ files, run through porechop as a test, print number of lines in each (expect less lines in the .chopped.fastq.gz file) +RUN echo "downloading ONT test data from bactopia/bactopia-tests on GitHub and running porechop on them..." && \ + wget -q https://raw.githubusercontent.com/bactopia/bactopia-tests/main/data/species/portiera/nanopore/ERR3772599.fastq.gz && \ + porechop -i ERR3772599.fastq.gz -o ERR3772599.chopped.fastq.gz && \ + wc -l *fastq.gz diff --git a/porechop/0.2.4/README.md b/porechop/0.2.4/README.md new file mode 100644 index 000000000..29e14df8a --- /dev/null +++ b/porechop/0.2.4/README.md @@ -0,0 +1,18 @@ +# porechop container + +Main tool : [porechop](https://github.com/rrwick/Porechop) + +Additional tools: +- python3 3.10.6 + +Full documentation: https://github.com/rrwick/Porechop + +> Porechop is a tool for finding and removing adapters from Oxford Nanopore reads. Adapters on the ends of reads are trimmed off, and when a read has an adapter in its middle, it is treated as chimeric and chopped into separate reads. Porechop performs thorough alignments to effectively find adapters, even at low sequence identity. + +As of October 2019, porechop is unsupported, but is still useful in many settings, including removing adapters. + +## Example Usage + +```bash +porechop -i input_reads.fastq.gz -o output_reads.fastq.gz +``` diff --git a/prokka/1.14.6/Dockerfile b/prokka/1.14.6/Dockerfile new file mode 100644 index 000000000..9236757cb --- /dev/null +++ b/prokka/1.14.6/Dockerfile @@ -0,0 +1,125 @@ +# setting these ARGs as global variables, so they persist across all build layers (but not in final production image) +ARG PROKKA_VER="1.14.6" +ARG BEDTOOLS_VER="2.29.0" +ARG BARRNAP_VER="0.9" +ARG BLAST_VER="2.9.0" + +FROM ubuntu:bionic as app + +# re-instantiating for the app build layer +ARG PROKKA_VER +ARG BEDTOOLS_VER +ARG BARRNAP_VER +ARG BLAST_VER + +LABEL base.image="ubuntu:bionic" +LABEL dockerfile.version="1" +LABEL software="Prokka" +LABEL software.version="${PROKKA_VER}" +LABEL description="Automated prokaryotic genome annotation tool" +LABEL website="https://github.com/tseemann/prokka" +LABEL license="https://github.com/tseemann/prokka#licence" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# so that apt/tzdata doesn't ask for user input for a timezone +ARG DEBIAN_FRONTEND="noninteractive" + +# install dependencies +RUN apt-get update && apt-get -y --no-install-recommends install \ + bzip2 \ + gzip \ + wget \ + perl \ + less \ + libdatetime-perl \ + libxml-simple-perl \ + libdigest-md5-perl \ + default-jre \ + bioperl \ + hmmer \ + zlib1g-dev \ + python \ + liblzma-dev \ + libbz2-dev \ + xz-utils \ + curl \ + g++ \ + cpanminus \ + make \ + libidn11 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install bedtools 2.29.0 since >=2.27.0 is required for barrnap and the apt-get package is 2.25.0 +# dependencies required for bedtools: zlib1g-dev python liblzma-dev libbz2-dev xz-utils curl g++ +RUN wget https://github.com/arq5x/bedtools2/releases/download/v${BEDTOOLS_VER}/bedtools-${BEDTOOLS_VER}.tar.gz && \ + tar -zxf bedtools-${BEDTOOLS_VER}.tar.gz && \ + rm bedtools-${BEDTOOLS_VER}.tar.gz && \ + cd bedtools2 && \ + make + +# add bedtools to PATH for barrnap test +ENV PATH="$PATH:/bedtools2/bin" + +# install barrnap +RUN wget https://github.com/tseemann/barrnap/archive/${BARRNAP_VER}.tar.gz && \ + tar -zxf ${BARRNAP_VER}.tar.gz && \ + rm ${BARRNAP_VER}.tar.gz && \ + cd barrnap-${BARRNAP_VER} && \ + make test + +# to fix error when running Prokka 1.14.5: '"uniq" is not exported by the List::Util module' +# error introduced while updating dockerfile from prokka 1.14.0 to 1.14.5 +RUN cpanm List::Util + +# download prokka and make /data +RUN wget https://github.com/tseemann/prokka/archive/v${PROKKA_VER}.tar.gz && \ + tar -xzf v${PROKKA_VER}.tar.gz && \ + rm -rf v${PROKKA_VER}.tar.gz && \ + mkdir /data + +# install tbl2asn manually since the one included with prokka is expired. +# Probably will have to do again in Dec 2021 unless Torsten removes it from prokka +RUN wget ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools/converters/by_program/tbl2asn/linux64.tbl2asn.gz -O linux64.tbl2asn.gz && \ + gunzip linux64.tbl2asn.gz && \ + mv linux64.tbl2asn /usr/bin/tbl2asn && \ + chmod 755 /usr/bin/tbl2asn + +# install blast binaries directly from NCBI FTP; prioritize this over the blast binaries included with prokka +RUN wget https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/${BLAST_VER}/ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + tar -xzf ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz && \ + rm ncbi-blast-${BLAST_VER}+-x64-linux.tar.gz + +# set PATH to prioritize /usr/bin/tbl2asn (and other paths like ncbi-blast+) before /prokka-[VERSION]/binaries*/tbl2asn +# second line added so that parallel is forced into PATH (wasn't working in Singularity without explicitly setting this) +# set perl locale settings so barrnap works in singularity image +ENV PATH="${PATH}:/ncbi-blast-${BLAST_VER}+/bin/:\ +/prokka-${PROKKA_VER}/bin:\ +/prokka-${PROKKA_VER}/binaries/common:\ +/prokka-${PROKKA_VER}/binaries/linux:\ +/barrnap-${BARRNAP_VER}/bin"\ + LC_ALL=C + +# index dbs and list what dbs were setup; set working dir to /data +RUN prokka --setupdb && prokka --listdb + +WORKDIR /data + +FROM app as test + +# re-instantiating for the test layer +ARG PROKKA_VER + +WORKDIR /prokka-${PROKKA_VER} + +# run tests from https://github.com/tseemann/prokka/blob/master/.travis.yml +RUN realpath . && prokka --version && \ + prokka --help && \ + ! prokka --doesnotexist && \ + prokka --depends && \ + prokka --setupdb && \ + prokka --listdb && \ + prokka --cpus 2 --outdir asm --prefix asm test/plasmid.fna && \ + grep '>' asm/asm.fna && \ + prokka --cleandb + \ No newline at end of file diff --git a/prokka/1.14.6/README.md b/prokka/1.14.6/README.md new file mode 100644 index 000000000..c5d46a0d1 --- /dev/null +++ b/prokka/1.14.6/README.md @@ -0,0 +1,25 @@ +# Prokka container + +Main tool : [Prokka](https://github.com/tseemann/prokka) + +Additional tools: + +- prodigal 2.6.3 +- bedtools 2.29.0 +- barrnap 0.9 +- ncbi-blast+ 2.9.0 +- tbl2asn 25.8 +- hmmer 3.1b2 (February 2015) +- perl 5.26.1 +- bioperl 1.7.2 + +Full documentation: [link to documentation](https://github.com/tseemann/prokka) + +Automated prokaryotic genome annotation tool + +## Example Usage + +```bash +# run prokka on the provided test FASTA file (plasmid sequence) +prokka --cpus 2 --outdir asm --prefix asm test/plasmid.fna +``` diff --git a/pygenomeviz/0.3.2/Dockerfile b/pygenomeviz/0.3.2/Dockerfile new file mode 100644 index 000000000..f4897bf00 --- /dev/null +++ b/pygenomeviz/0.3.2/Dockerfile @@ -0,0 +1,45 @@ +FROM ubuntu:jammy as app + +ARG PYGENOMEVIZ_VER="0.3.2" + +LABEL base.image="mambaorg/jammy" +LABEL dockerfile.version="1" +LABEL software="pyGenomeViz" +LABEL software.version=$PYGENOMEVIZ_VER +LABEL description="genome visualization python package for comparative genomics" +LABEL website="https://moshi4.github.io/pyGenomeViz/" +LABEL license="MIT License" +LABEL license.url="https://github.com/moshi4/pyGenomeViz/blob/main/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +#mmseqs2=13.45111 +#mummer=3.23 +#progressivemauve=1.2.0 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + python3 \ + python3-pip \ + mmseqs2 \ + mummer \ + progressivemauve && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN pip install pygenomeviz==$PYGENOMEVIZ_VER + +ENV LC_ALL=C.UTF-8 + +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN pgv-download-dataset -n erwinia_phage && \ + pgv-mummer --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mummer_test --tick_style axis --align_type left --feature_plotstyle arrow && \ + pgv-mmseqs --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mmseqs_test --tick_style axis --align_type left --feature_plotstyle arrow && \ + pgv-download-dataset -n escherichia_coli && \ + pgv-pmauve --seq_files NC_000913.gbk NC_002695.gbk NC_011751.gbk NC_011750.gbk -o pmauve_test --tick_style bar && \ + ls mummer_test/result.png mmseqs_test/result.png pmauve_test/result.png diff --git a/pygenomeviz/0.3.2/README.md b/pygenomeviz/0.3.2/README.md new file mode 100644 index 000000000..cdeac3c48 --- /dev/null +++ b/pygenomeviz/0.3.2/README.md @@ -0,0 +1,50 @@ +# pyGenomeViz container + +Main tool : [pyGenomeViz](https://moshi4.github.io/pyGenomeViz/) + +Additional tools: +- MMseqs2 v13.45111 +- MUMmer v3.23 +- progressiveMauve vsnapshot_2015_02_13 + +Full documentation: https://moshi4.github.io/pyGenomeViz/ + +> pyGenomeViz is a genome visualization python package for comparative genomics implemented based on matplotlib. This package is developed for the purpose of easily and beautifully plotting genomic features and sequence similarity comparison links between multiple genomes. + +## Example Usage + +Using the CLI + +```bash +# Download four Erwinia phage genbank files +pgv-download-dataset -n erwinia_phage + +# run pyGenomeViz to visualize MUMmer alignment +pgv-mummer --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mummer_example1 --tick_style axis --align_type left --feature_plotstyle arrow + +# run pyGenomeViz to visualize MMseqs2 alignment +pgv-mmseqs --gbk_resources MT939486.gbk:250000-358115 MT939487.gbk:250000-355376 MT939488.gbk:250000-356948 LT960552.gbk:270000-340000 -o mmseqs_example2 --tick_style bar --feature_plotstyle arrow + +# Download four E.coli genbank files +pgv-download-dataset -n escherichia_coli + +# run pyGenomeViz to visualize progressiveMauve alignment +pgv-pmauve --seq_files NC_000913.gbk NC_002695.gbk NC_011751.gbk NC_011750.gbk -o pmauve_example1 --tick_style bar +``` + +This container contains the pygenomeviz python package, so custom scripts can import pygenomeviz + +```python +from pygenomeviz import GenomeViz + +name, genome_size = "Tutorial 01", 5000 +cds_list = ((100, 900, -1), (1100, 1300, 1), (1350, 1500, 1), (1520, 1700, 1), (1900, 2200, -1), (2500, 2700, 1), (2700, 2800, -1), (2850, 3000, -1), (3100, 3500, 1), (3600, 3800, -1), (3900, 4200, -1), (4300, 4700, -1), (4800, 4850, 1)) + +gv = GenomeViz() +track = gv.add_feature_track(name, genome_size) +for idx, cds in enumerate(cds_list, 1): + start, end, strand = cds + track.add_feature(start, end, strand, label=f"CDS{idx:02d}") + +fig = gv.plotfig() +``` \ No newline at end of file diff --git a/pygenomeviz/0.4.2/Dockerfile b/pygenomeviz/0.4.2/Dockerfile new file mode 100644 index 000000000..bae772c7a --- /dev/null +++ b/pygenomeviz/0.4.2/Dockerfile @@ -0,0 +1,46 @@ +FROM python:3.9.17-slim as app + +ARG PYGENOMEVIZ_VER="0.4.2" + +LABEL base.image="python:3.9.17-slim" +LABEL dockerfile.version="1" +LABEL software="pyGenomeViz" +LABEL software.version=$PYGENOMEVIZ_VER +LABEL description="genome visualization python package for comparative genomics" +LABEL website="https://moshi4.github.io/pyGenomeViz/" +LABEL license="MIT License" +LABEL license.url="https://github.com/moshi4/pyGenomeViz/blob/main/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +#mmseqs2=13.45111 +#mummer=3.23 +#progressivemauve=1.2.0 +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + procps \ + mmseqs2 \ + mummer \ + progressivemauve && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN pip install --no-cache-dir pygenomeviz==$PYGENOMEVIZ_VER + +ENV LC_ALL=C.UTF-8 + +CMD pgv-simpleplot --help && pgv-mmseqs --help && pgv-mummer --help && pgv-pmauve --help + +WORKDIR /data + +FROM app as test + +WORKDIR /test + +RUN pgv-simpleplot --help && pgv-mmseqs --help && pgv-mummer --help && pgv-pmauve --help + +RUN pgv-download-dataset -n erwinia_phage && \ + pgv-mummer --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mummer_test --tick_style axis --align_type left --feature_plotstyle arrow && \ + pgv-mmseqs --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mmseqs_test --tick_style axis --align_type left --feature_plotstyle arrow && \ + pgv-download-dataset -n escherichia_coli && \ + pgv-pmauve --seq_files NC_000913.gbk NC_002695.gbk NC_011751.gbk NC_011750.gbk -o pmauve_test --tick_style bar && \ + ls mummer_test/result.png mmseqs_test/result.png pmauve_test/result.png diff --git a/pygenomeviz/0.4.2/README.md b/pygenomeviz/0.4.2/README.md new file mode 100644 index 000000000..cdeac3c48 --- /dev/null +++ b/pygenomeviz/0.4.2/README.md @@ -0,0 +1,50 @@ +# pyGenomeViz container + +Main tool : [pyGenomeViz](https://moshi4.github.io/pyGenomeViz/) + +Additional tools: +- MMseqs2 v13.45111 +- MUMmer v3.23 +- progressiveMauve vsnapshot_2015_02_13 + +Full documentation: https://moshi4.github.io/pyGenomeViz/ + +> pyGenomeViz is a genome visualization python package for comparative genomics implemented based on matplotlib. This package is developed for the purpose of easily and beautifully plotting genomic features and sequence similarity comparison links between multiple genomes. + +## Example Usage + +Using the CLI + +```bash +# Download four Erwinia phage genbank files +pgv-download-dataset -n erwinia_phage + +# run pyGenomeViz to visualize MUMmer alignment +pgv-mummer --gbk_resources MT939486.gbk MT939487.gbk MT939488.gbk LT960552.gbk -o mummer_example1 --tick_style axis --align_type left --feature_plotstyle arrow + +# run pyGenomeViz to visualize MMseqs2 alignment +pgv-mmseqs --gbk_resources MT939486.gbk:250000-358115 MT939487.gbk:250000-355376 MT939488.gbk:250000-356948 LT960552.gbk:270000-340000 -o mmseqs_example2 --tick_style bar --feature_plotstyle arrow + +# Download four E.coli genbank files +pgv-download-dataset -n escherichia_coli + +# run pyGenomeViz to visualize progressiveMauve alignment +pgv-pmauve --seq_files NC_000913.gbk NC_002695.gbk NC_011751.gbk NC_011750.gbk -o pmauve_example1 --tick_style bar +``` + +This container contains the pygenomeviz python package, so custom scripts can import pygenomeviz + +```python +from pygenomeviz import GenomeViz + +name, genome_size = "Tutorial 01", 5000 +cds_list = ((100, 900, -1), (1100, 1300, 1), (1350, 1500, 1), (1520, 1700, 1), (1900, 2200, -1), (2500, 2700, 1), (2700, 2800, -1), (2850, 3000, -1), (3100, 3500, 1), (3600, 3800, -1), (3900, 4200, -1), (4300, 4700, -1), (4800, 4850, 1)) + +gv = GenomeViz() +track = gv.add_feature_track(name, genome_size) +for idx, cds in enumerate(cds_list, 1): + start, end, strand = cds + track.add_feature(start, end, strand, label=f"CDS{idx:02d}") + +fig = gv.plotfig() +``` \ No newline at end of file diff --git a/quast/5.2.0/Dockerfile b/quast/5.2.0/Dockerfile new file mode 100644 index 000000000..9147b5ed4 --- /dev/null +++ b/quast/5.2.0/Dockerfile @@ -0,0 +1,110 @@ +ARG QUAST_VER="5.2.0" + +## Builder ## +FROM ubuntu:focal as builder + +ARG QUAST_VER + +# define timezone to avoid build stalls +ENV TZ=America/New_York +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + python2 \ + perl \ + cpanminus \ + g++ \ + make \ + openjdk-8-jre-headless \ + r-base \ + pkg-config \ + libfreetype6-dev \ + libpng-dev \ + libboost-all-dev \ + locales &&\ + locale-gen en_US.UTF-8 &&\ + cpanm Time::HiRes &&\ + apt-get autoclean &&\ + rm -rf /var/lib/apt/lists/* + +# python dependencies +RUN update-alternatives --install /usr/bin/python python /usr/bin/python2 1 &&\ + wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py &&\ + pip install --no-cache matplotlib simplejson joblib + +# install quast +RUN wget https://github.com/ablab/quast/releases/download/quast_${QUAST_VER}/quast-${QUAST_VER}.tar.gz && \ + tar -xzf quast-${QUAST_VER}.tar.gz && \ + rm -rf quast-${QUAST_VER}.tar.gz && \ + cd /quast-${QUAST_VER} && \ + /quast-${QUAST_VER}/setup.py install + +# add GRIDSS for SV detection +ADD https://github.com/ablab/quast/raw/master/external_tools/gridss/gridss-1.4.1.jar /quast-${QUAST_VER}/quast_libs/gridss/ + +## App ## +FROM ubuntu:focal as app + +ARG QUAST_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="QUAST" +LABEL software.version=${QUAST_VER} +LABEL description="Genome assembly evaluation tool" +LABEL website="https://github.com/ablab/quast" +LABEL license="https://github.com/ablab/quast/blob/master/LICENSE.txt" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="pjx8@cdc.gov" +LABEL maintainer2="Kutluhan Incekara" +LABEL maintainer2.email="kutluhan.incekara@ct.gov" + +# define timezone to avoid build stalls +ENV TZ=America/New_York +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# install only necessary programs and libraries to run quast +RUN apt-get update && apt-get install --no-install-recommends -y \ + wget \ + python2 \ + perl \ + openjdk-8-jre-headless \ + r-base \ + libidn11 \ + locales &&\ + locale-gen en_US.UTF-8 &&\ + apt-get autoclean &&\ + rm -rf /var/lib/apt/lists/* + +# python dependencies +RUN update-alternatives --install /usr/bin/python python /usr/bin/python2 1 &&\ + wget https://bootstrap.pypa.io/pip/2.7/get-pip.py && python get-pip.py &&\ + pip install --no-cache matplotlib simplejson joblib +# copy quast and compiled tools +COPY --from=builder /quast-${QUAST_VER} /quast-${QUAST_VER} +# copy compiled Time::HiRes perl module +COPY --from=builder /usr/local/lib/x86_64-linux-gnu/perl/5.30.0/Time/ /usr/local/lib/x86_64-linux-gnu/perl/5.30.0/Time/ + +ENV LC_ALL=C +ENV PATH=$PATH:/quast-${QUAST_VER} + +CMD quast.py --help && quast-lg.py --help && metaquast.py --help + +WORKDIR /data + +## Test ## +FROM app as test +# test quast +RUN quast.py --test-sv && mv ./quast_test_output/ ./quast_test_sv_output/ +# test quast-lg +RUN quast-lg.py --test && mv ./quast_test_output/ ./quast_test_lg_output/ +# test metaquast +RUN metaquast.py --test +# check logs +RUN tail -n5 ./quast_test_sv_output/quast.log &&\ + tail -n5 ./quast_test_lg_output/quast.log &&\ + tail -n5 ./quast_test_output/metaquast.log + +# Note 1: "Warnings are GenMark license related. They disappear when a valid licence is provided." +# Note 2: "metaquast.py --test-no-ref" throws error related Krona plot (known bug of Quast 5.2.0) diff --git a/quast/5.2.0/README.md b/quast/5.2.0/README.md new file mode 100644 index 000000000..160c6f3e4 --- /dev/null +++ b/quast/5.2.0/README.md @@ -0,0 +1,36 @@ +# QUAST container + +Main tool: [QUAST](https://github.com/ablab/quast) + +Code repository: https://github.com/ablab/quast + +Additional tools: +- gridss: 1.4.1 + + +Basic information on how to use this tool: +- executable: quast.py +- help: --help +- version: --version +- description: Genome assembly evaluation tool + +Additional information: + +You need a license key if you want to use GeneMark with Quast!
      +The key can be downloaded from http://exon.gatech.edu/GeneMark/license_download.cgi page (GeneMark is free for non-commercial use). +You should choose GeneMarkS-T LINUX 64. Download your license key and add it to your data folder or another folder you will bind to the container. Inside the container, copy the key to your home folder. +``` +cp /data/gm_key_64 ~/.gm_key +``` +Full documentation: https://quast.sourceforge.net/docs/manual.html + +## Example Usage + +```bash +# basic statistics without reference +quast.py contigs.fasta +# genome evaluation with reference +quast.py contigs.fasta -r reference.fasta.gz -g genes.gff +# metagenomic assemblies +metaquast.py contigs_1 contigs_2 ... -r reference_1,reference_2,reference_3,... +``` \ No newline at end of file diff --git a/samtools/1.16.1/Dockerfile b/samtools/1.16.1/Dockerfile new file mode 100644 index 000000000..b02c5516b --- /dev/null +++ b/samtools/1.16.1/Dockerfile @@ -0,0 +1,54 @@ +FROM ubuntu:focal as app + +ARG SAMTOOLSVER="1.16.1" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Samtools" +LABEL software.version="${SAMTOOLSVER}" +LABEL description="Tools (written in C using htslib) for manipulating next-generation sequencing data" +LABEL website="https://github.com/samtools/samtools" +LABEL license="https://github.com/samtools/samtools/blob/develop/LICENSE" +LABEL maintainer="Shelby Bennett" +LABEL maintainer.email="shelby.bennett@dgs.virginia.gov" + + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install --no-install-recommends -y \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + samtools stats SRR13957123.primertrim.sorted.bam && \ + samtools faidx SRR13957123.consensus.fa diff --git a/samtools/1.16.1/README.md b/samtools/1.16.1/README.md new file mode 100644 index 000000000..d89cce941 --- /dev/null +++ b/samtools/1.16.1/README.md @@ -0,0 +1,21 @@ +# samtools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/samtools) + +Additional tools: + +* python 3.8.10 +* perl 5.30.0 + +## Example Usage + +```bash +samtools ampliconclip -b bed.file input.bam + +samtools sort -T /tmp/aln.sorted -o aln.sorted.bam aln.bam +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) \ No newline at end of file diff --git a/samtools/1.17-2023-06/Dockerfile b/samtools/1.17-2023-06/Dockerfile new file mode 100644 index 000000000..91cc73868 --- /dev/null +++ b/samtools/1.17-2023-06/Dockerfile @@ -0,0 +1,68 @@ +# builder +FROM ubuntu:jammy as builder + +ARG SAMTOOLS_VER="1.17" + +RUN apt-get update && apt-get install --no-install-recommends -y \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates + +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install + +# app +FROM ubuntu:jammy as app + +ARG SAMTOOLS_VER="1.17" + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="Samtools" +LABEL software.version="${SAMTOOLS_VER}" +LABEL description="Tools (written in C using htslib) for manipulating next-generation sequencing data" +LABEL website="https://github.com/samtools/samtools" +LABEL license="https://github.com/samtools/samtools/blob/develop/LICENSE" +LABEL maintainer="Shelby Bennett" +LABEL maintainer.email="shelby.bennett@dgs.virginia.gov" +LABEL maintainer2="Kutluhan Incekara" +LABEL maintainer2.email="kutluhan.incekara@ct.gov" + +RUN apt-get update && apt-get install --no-install-recommends -y \ + perl \ + zlib1g \ + libncurses5 \ + bzip2 \ + liblzma-dev \ + libcurl4-gnutls-dev \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +COPY --from=builder /usr/local/bin/* /usr/local/bin/ + +ENV LC_ALL=C + +WORKDIR /data + +# test +FROM app as test + +RUN apt-get update && apt-get install -y wget &&\ + wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + samtools stats SRR13957123.primertrim.sorted.bam && \ + samtools faidx SRR13957123.consensus.fa + diff --git a/samtools/1.17-2023-06/README.md b/samtools/1.17-2023-06/README.md new file mode 100644 index 000000000..012969a01 --- /dev/null +++ b/samtools/1.17-2023-06/README.md @@ -0,0 +1,26 @@ +# samtools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [https://github.com/samtools/samtools](https://github.com/samtools/samtools) + +Additional tools: + +* perl 5.34.0 + +## Note about `staphb/samtools:1.17-2023-06` + +This docker image is considerably smaller than the original `staphb/samtools:1.17` docker image that was built using `samtools/1.17/Dockerfile`. Many unnecessary programs were removed and a `builder` stage was added for compiling & building the `samtools` executables. Programs such as `python3` and `gawk` are no longer included in the final docker image. + +If your workflows are failing, it may be because these programs were removed from the docker image to reduce the image size. We recommend using a different docker image (such as the docker image `staphb/samtools:1.17`!) if your pipeline requires the use of `python3` or another program that was removed. + +## Example Usage + +```bash +samtools ampliconclip -b bed.file input.bam + +samtools sort -T /tmp/aln.sorted -o aln.sorted.bam aln.bam +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) \ No newline at end of file diff --git a/samtools/1.17/Dockerfile b/samtools/1.17/Dockerfile new file mode 100644 index 000000000..9d0b90587 --- /dev/null +++ b/samtools/1.17/Dockerfile @@ -0,0 +1,55 @@ +FROM ubuntu:focal as app + +ARG SAMTOOLSVER="1.17" + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="Samtools" +LABEL software.version="${SAMTOOLSVER}" +LABEL description="Tools (written in C using htslib) for manipulating next-generation sequencing data" +LABEL website="https://github.com/samtools/samtools" +LABEL license="https://github.com/samtools/samtools/blob/develop/LICENSE" +LABEL maintainer="Shelby Bennett" +LABEL maintainer.email="shelby.bennett@dgs.virginia.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +ARG DEBIAN_FRONTEND=noninteractive + +RUN apt-get update && apt-get install --no-install-recommends -y \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + python3 && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install && \ + mkdir /data + +ENV LC_ALL=C + +WORKDIR /data + +FROM app as test + +RUN wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + wget https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + samtools stats SRR13957123.primertrim.sorted.bam && \ + samtools faidx SRR13957123.consensus.fa diff --git a/samtools/1.17/README.md b/samtools/1.17/README.md new file mode 100644 index 000000000..d89cce941 --- /dev/null +++ b/samtools/1.17/README.md @@ -0,0 +1,21 @@ +# samtools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/samtools) + +Additional tools: + +* python 3.8.10 +* perl 5.30.0 + +## Example Usage + +```bash +samtools ampliconclip -b bed.file input.bam + +samtools sort -T /tmp/aln.sorted -o aln.sorted.bam aln.bam +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) \ No newline at end of file diff --git a/samtools/1.18/Dockerfile b/samtools/1.18/Dockerfile new file mode 100644 index 000000000..e9a9e25ef --- /dev/null +++ b/samtools/1.18/Dockerfile @@ -0,0 +1,90 @@ +ARG SAMTOOLS_VER="1.18" + +FROM ubuntu:jammy as builder + +ARG SAMTOOLS_VER + +# install dependencies required for compiling samtools +RUN apt-get update && apt-get install --no-install-recommends -y \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + wget \ + make \ + perl \ + bzip2 \ + gnuplot \ + ca-certificates + +# download, compile, and install samtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install + +### start of app stage ### +FROM ubuntu:jammy as app + +ARG SAMTOOLS_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="samtools" +LABEL software.version="${SAMTOOLS_VER}" +LABEL description="Tools (written in C using htslib) for manipulating next-generation sequencing data" +LABEL website="https://github.com/samtools/samtools" +LABEL license="https://github.com/samtools/samtools/blob/develop/LICENSE" +LABEL maintainer="Shelby Bennett" +LABEL maintainer.email="shelby.bennett@dgs.virginia.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" +LABEL maintainer3="Erin Young" +LABEL maintainer3.email="eriny@utah.gov" +LABEL maintainer4="Kutluhan Incekara" +LABEL maintainer4.email="kutluhan.incekara@ct.gov" + +ARG DEBIAN_FRONTEND=noninteractive + +# install dependencies required for running samtools +# 'gnuplot' required for plot-ampliconstats +RUN apt-get update && apt-get install --no-install-recommends -y \ + perl \ + zlib1g \ + libncurses5 \ + bzip2 \ + liblzma-dev \ + libcurl4-gnutls-dev \ + gnuplot \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# copy in samtools executables from builder stage +COPY --from=builder /usr/local/bin/* /usr/local/bin/ + +ENV LC_ALL=C + +# final working directory is /data +WORKDIR /data + +# default command is to pull up help options +CMD ["samtools", "--help"] + +### start of test stage ### +FROM app as test + +# install wget for downloading test files +RUN apt-get update && apt-get install --no-install-recommends -y wget ca-certificates + +RUN wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + wget -q https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.primertrim.sorted.bam && \ + wget -q https://raw.githubusercontent.com/artic-network/artic-ncov2019/master/primer_schemes/nCoV-2019/V3/nCoV-2019.primer.bed && \ + samtools stats SRR13957123.primertrim.sorted.bam && \ + samtools faidx SRR13957123.consensus.fa && \ + samtools ampliconstats nCoV-2019.primer.bed SRR13957123.primertrim.sorted.bam > SRR13957123_ampliconstats.txt && \ + plot-ampliconstats plot SRR13957123_ampliconstats.txt && \ + ls diff --git a/samtools/1.18/README.md b/samtools/1.18/README.md new file mode 100644 index 000000000..9512f1bbe --- /dev/null +++ b/samtools/1.18/README.md @@ -0,0 +1,20 @@ +# samtools container + +Main tool: + +* [https://www.htslib.org/](https://www.htslib.org/) +* [GitHub](https://github.com/samtools/samtools) + +Additional tools: + +* perl 5.34.0 + +## Example Usage + +```bash +samtools ampliconclip -b bed.file input.bam + +samtools sort -T /tmp/aln.sorted -o aln.sorted.bam aln.bam +``` + +Better documentation can be found at [https://www.htslib.org/doc/samtools.html](https://www.htslib.org/doc/samtools.html) \ No newline at end of file diff --git a/seqkit/2.3.1/Dockerfile b/seqkit/2.3.1/Dockerfile new file mode 100644 index 000000000..acdeb5a05 --- /dev/null +++ b/seqkit/2.3.1/Dockerfile @@ -0,0 +1,49 @@ +# FROM defines the base docker image. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. We use `app` for the production image +FROM ubuntu:xenial as app + +# ARG sets environment variables during the build stage +ARG SEQKIT_VER="2.3.1" + +# LABEL instructions tag the image with metadata that might be important to the user +# Optional, but highly recommended +LABEL base.image="ubuntu:xenial" +LABEL dockerfile.version="1" +LABEL software="SeqKit" +LABEL software.version=$SEQKIT_VER +LABEL description="SeqKit - a cross-platform and ultrafast toolkit for FASTA/Q file manipulation" +LABEL website="https://github.com/shenwei356/seqkit" +LABEL license="https://github.com/shenwei356/seqkit/blob/master/LICENSE" +LABEL maintainer="Henry Kunerth" +LABEL maintainer.email="henrykunerth@gmail.com" + +# Install dependences (update as needed) +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# download SEQKIT and organize directories +RUN wget https://github.com/shenwei356/seqkit/releases/download/v${SEQKIT_VER}/seqkit_linux_amd64.tar.gz && \ + tar -xzf seqkit_linux_amd64.tar.gz && \ + mkdir /seqkit_v${SEQKIT_VER} && \ + mv seqkit /seqkit_v${SEQKIT_VER} && \ + rm seqkit_linux_amd64.tar.gz && \ + mkdir /data + +# put executable in PATH +ENV PATH="$PATH:/seqkit_v${SEQKIT_VER}" \ + LC_ALL=C +RUN echo $PATH + +# WORKDIR sets working directory +WORKDIR /data + +# A second FROM insruction creates a new stage +# We use `test` for the test image +FROM app as test + +#download test .fasta and check that SEQKIT can run to generate stats +RUN wget -P /data https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa && \ + chmod 755 SRR13957123.consensus.fa && \ + seqkit stat * diff --git a/seqkit/2.3.1/README.md b/seqkit/2.3.1/README.md new file mode 100644 index 000000000..de048f057 --- /dev/null +++ b/seqkit/2.3.1/README.md @@ -0,0 +1,36 @@ +# SeqKit container + +Main tool : [SeqKit](https://github.com/shenwei356/seqkit) + +SeqKit is a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. + +Citation: + +W Shen, S Le, Y Li*, F Hu*. SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation. PLOS ONE. doi:10.1371/journal.pone.0163962. + + +- **Documents:** [http://bioinf.shenwei.me/seqkit](http://bioinf.shenwei.me/seqkit) +([**Usage**](http://bioinf.shenwei.me/seqkit/usage/), +[**FAQ**](http://bioinf.shenwei.me/seqkit/faq/), +[**Tutorial**](http://bioinf.shenwei.me/seqkit/tutorial/), +and +[**Benchmark**](http://bioinf.shenwei.me/seqkit/benchmark/)) + +## Example Usage + +```bash +# get simple statistics from FASTA/Q files + +seqkit stats + +# or with flags + +seqkit stats --all --tabular + +# conversion from FASTA to FASTQ + +seqkit fa2fq + + + +``` diff --git a/serotypefinder/2.0.1/Dockerfile b/serotypefinder/2.0.1/Dockerfile index 85eecf310..33fe03ea8 100644 --- a/serotypefinder/2.0.1/Dockerfile +++ b/serotypefinder/2.0.1/Dockerfile @@ -1,33 +1,47 @@ -FROM ubuntu:focal - ARG SEROTYPEFINDER_VER="2.0.1" -ARG SEROTYPEFINDER_DB_COMMIT_HASH="39c68c6e1a3d94f823143a2e333019bb3f8dddba" +# important to get this commit due to line ending fix +# see here: https://bitbucket.org/genomicepidemiology/serotypefinder_db/commits/ada62c62a7fa74032448bb2273d1f7045c59fdda +ARG SEROTYPEFINDER_DB_COMMIT_HASH="ada62c62a7fa74032448bb2273d1f7045c59fdda" + +FROM ubuntu:focal as app + +# re-instantiating for use in the app layer +ARG SEROTYPEFINDER_VER +ARG SEROTYPEFINDER_DB_COMMIT_HASH # metadata LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="1" +LABEL dockerfile.version="2" LABEL software="SerotypeFinder" LABEL software.version="2.0.1" LABEL description="Tool for identifying the serotype of E. coli from reads or assemblies" LABEL website="https://bitbucket.org/genomicepidemiology/serotypefinder/src/master/" LABEL license="https://bitbucket.org/genomicepidemiology/serotypefinder/src/master/" LABEL maintainer="Curtis Kapsak" -LABEL maintainer.email="pjx8@cdc.gov" +LABEL maintainer.email="kapsakcj@gmail.com" # install dependencies; cleanup apt garbage # ncbi-blast+ v2.9.0 (ubuntu:focal), min required version is 2.8.1 -# python3 v3.8.5, min required version is 3.5 -RUN apt-get update && apt-get install -y \ +# python3 v3.8.10, min required version is 3.5 +RUN apt-get update && apt-get install -y --no-install-recommends \ wget \ + ca-certificates \ + procps \ git \ ncbi-blast+ \ python3 \ python3-pip \ - libz-dev && \ + python3-setuptools \ + python3-dev \ + gcc \ + make \ + libz-dev \ + dos2unix \ + unzip && \ apt-get autoclean && rm -rf /var/lib/apt/lists/* # install python dependencies -RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.3.2 +RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 # Install kma # apt deps: libz-dev (for compiling) @@ -39,10 +53,12 @@ RUN git clone --branch 1.0.1 --depth 1 https://bitbucket.org/genomicepidemiology # download serotypefinder database using a specific commit hash to aid in reproducibility # index database w/ kma # NOTE: files HAVE to go into '/database' since that is the default location expected by serotyperfinder.py +# dos2unix on the FASTA files to ensure they have LF line endings (there's CRLF somewhere in those files despite the last commit to the db) RUN mkdir /database && \ git clone https://bitbucket.org/genomicepidemiology/serotypefinder_db.git /database && \ cd /database && \ git checkout ${SEROTYPEFINDER_DB_COMMIT_HASH} && \ + dos2unix *.fsa && \ python3 INSTALL.py kma_index # install serotypefinder; make /data @@ -51,7 +67,38 @@ RUN git clone --branch ${SEROTYPEFINDER_VER} https://bitbucket.org/genomicepidem # set $PATH and locale settings for singularity compatibility ENV PATH="/serotypefinder:$PATH" \ - LC_ALL=C + LC_ALL=C.UTF-8 -# set working directory +# set final working directory for production docker image (app layer only) WORKDIR /data + +FROM app as test + +# set working directory for test layer +WORKDIR /test + +# download an example assembly; test with SerotypeFinder +# Escherichia coli complete genome (Unicycler assembly) +# GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ +# BioSample:SAMN08799860 +# expect O1:H7 +RUN mkdir -v /test/asm-input-O1-H7 && \ + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + serotypefinder.py -i /test/GCA_012224845.2_ASM1222484v2_genomic.fna -x -o /test/asm-input-O1-H7 && \ + cat /test/asm-input-O1-H7/results_tab.tsv + +# download Illumina reads for the same sample ^ and test reads as input into SerotypeFinder +RUN mkdir /test/reads-input-O1-H7 && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_2.fastq.gz && \ + serotypefinder.py -i SRR6903006_1.fastq.gz SRR6903006_2.fastq.gz -x -o /test/reads-input-O1-H7 && \ + cat /test/reads-input-O1-H7/results_tab.tsv + +# test using FASTA supplied with serotypefinder code +# expect O1:H9 +RUN mkdir -p /test/serotypefinder-test-fsa && \ + serotypefinder.py -i /serotypefinder/test.fsa -x -o /test/serotypefinder-test-fsa && \ + cat /test/serotypefinder-test-fsa/results_tab.tsv + +RUN serotypefinder.py --help diff --git a/serotypefinder/2.0.1/README.md b/serotypefinder/2.0.1/README.md index 9bbf3a9b2..8b3623ae0 100644 --- a/serotypefinder/2.0.1/README.md +++ b/serotypefinder/2.0.1/README.md @@ -1,24 +1,36 @@ -# SerotypeFinder Container How-To +# SerotypeFinder Container -### Description A docker container that contains SerotypeFinder, a tool for serotyping E. coli isolates from reads or assemblies -[Link to DockerHub repository](https://hub.docker.com/r/staphb/serotypefinder) +[Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/serotypefinder) -### Version information -SerotypeFinder version: 2.0.1 https://bitbucket.org/genomicepidemiology/serotypefinder/src/2.0.1/ made on 2019‑01‑28 +Main tool: -SerotypeFinder database version: Git commit `39c68c6e1a3d94f823143a2e333019bb3f8dddba` made on 2020‑09‑24. [Link to commit history](https://bitbucket.org/genomicepidemiology/serotypefinder_db/commits/) +- Main Code Repo: [https://bitbucket.org/genomicepidemiology/serotypefinder/src/master/](https://bitbucket.org/genomicepidemiology/serotypefinder/src/master/) +- You may be familiar with the web version of SerotypeFinder: [https://cge.cbs.dtu.dk/services/SerotypeFinder/](https://cge.cbs.dtu.dk/services/SerotypeFinder/) -You may be familiar with the web version of SerotypeFinder: https://cge.cbs.dtu.dk/services/SerotypeFinder/ +Additional tools: + +- python 3.8.10 +- biopython 1.73 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.0.0 +- ncbi-blast+ 2.9.0 + +## Version information + +SerotypeFinder version: 2.0.1 [https://bitbucket.org/genomicepidemiology/serotypefinder/src/2.0.1/](https://bitbucket.org/genomicepidemiology/serotypefinder/src/2.0.1/) made on 2019‑01‑28 + +SerotypeFinder database version: Git commit `ada62c62a7fa74032448bb2273d1f7045c59fdda` made on 2022‑05‑16. [Link to commit history](https://bitbucket.org/genomicepidemiology/serotypefinder_db/commits/) ## Requirements - * Docker or Singularity - * E. coli raw reads (fastq.gz) or assembly (fasta) - * Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) + +- Docker or Singularity +- E. coli raw reads (fastq.gz) or assembly (fasta) + - Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) ## Usage -``` + +```bash usage: serotypefinder.py [-h] -i INFILE [INFILE ...] [-o OUTDIR] [-tmp TMP_DIR] [-mp METHOD_PATH] [-p DB_PATH] [-d DATABASES] [-l MIN_COV] [-t THRESHOLD] [-x] [-q] optional arguments: @@ -45,22 +57,24 @@ optional arguments: ``` ## Notes and Recommendations - * You do not need to supply a database or use the `-p` or `-d` flags - * Database is included in the container and is in the default/expected location within the container: `/database` - * (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `serotypefinder.py -p` flag. You can find instructions for this on the SerotypeFinder Bitbucket README. `kma` is included in this docker container for database indexing. - * SerotypeFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. - * **Default % Identity threshold: 90%**. Adjust with `-t 0.95` - * **Default % coverage threshold: 60%**. Adjust with `-l 0.70` - * Use the `-x` flag (extended output) if you want the traditional/legacy SerotypeFinder output files `results_tab.tsv results.txt Serotype_allele_seq.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results - * (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] - * Querying reads: - * This will run SerotypeFinder with `kma` (instead of ncbi-blast+) - * Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take slightly longer to run (a few extra seconds) if you use both R1 and R2 files. - * Querying assemblies: - * This will run SerotypeFinder with `ncbi-blast+` - * SerotypeFinder does not clean up after itself. `tmp/out_H_type.xml` and `tmp/out_O_type.xml` will exist in the specified output directory + +- You do not need to supply a database or use the `-p` or `-d` flags + - Database is included in the container and is in the default/expected location within the container: `/database` + - (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `serotypefinder.py -p` flag. You can find instructions for this on the SerotypeFinder Bitbucket README. `kma` is included in this docker container for database indexing. + - SerotypeFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. + - **Default % Identity threshold: 90%**. Adjust with `-t 0.95` + - **Default % coverage threshold: 60%**. Adjust with `-l 0.70` + - Use the `-x` flag (extended output) if you want the traditional/legacy SerotypeFinder output files `results_tab.tsv results.txt Serotype_allele_seq.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results + - (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] +- Querying reads: + - This will run SerotypeFinder with `kma` (instead of ncbi-blast+) + - Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take slightly longer to run (a few extra seconds) if you use both R1 and R2 files. +- Querying assemblies: + - This will run SerotypeFinder with `ncbi-blast+` + - SerotypeFinder does not clean up after itself. `tmp/out_H_type.xml` and `tmp/out_O_type.xml` will exist in the specified output directory ## Example Usage: Docker + ```bash # download the image $ docker pull staphb/serotypefinder:2.0.1 @@ -82,6 +96,7 @@ $ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/serotypefinder:2.0.1 ``` ## Example Usage: Singularity + ```bash # download the image $ singularity build serotypefinder.2.0.1.sif docker://staphb/serotypefinder:2.0.1 diff --git a/shigatyper/2.0.3/Dockerfile b/shigatyper/2.0.3/Dockerfile new file mode 100644 index 000000000..4860c7cdd --- /dev/null +++ b/shigatyper/2.0.3/Dockerfile @@ -0,0 +1,99 @@ +FROM ubuntu:focal as app + +ARG SHIGATYPER_VER="2.0.3" +ARG SAMTOOLSVER="1.9" +ARG BCFTOOLSVER="1.9" +ARG MINIMAP2_VER="2.24" + +ARG DEBIAN_FRONTEND=noninteractive + +# LABEL instructions tag the image with metadata that might be important to the user +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="shigatyper" +LABEL software.version=${SHIGATYPER_VER} +LABEL description="Determine Shigella serotype using Illumina (single or paired-end) or Oxford Nanopore reads!" +LABEL website="https://github.com/CFSAN-Biostatistics/shigatyper" +LABEL license="https://github.com/CFSAN-Biostatistics/shigatyper/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" + +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + git \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + make \ + python3 \ + python3-setuptools \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + curl \ + build-essential && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# shigatyper depends on samtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install && \ + cd .. + +# shigatyper depends on bcftools +RUN wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLSVER}/bcftools-${BCFTOOLSVER}.tar.bz2 && \ + tar -xjf bcftools-${BCFTOOLSVER}.tar.bz2 && \ + rm bcftools-${BCFTOOLSVER}.tar.bz2 && \ + cd bcftools-${BCFTOOLSVER} && \ + make && \ + make install && \ + cd .. + +# shigatyper depends on MINIMAP2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxf - + +# get shigatyper repo +# RUN git clone -b conda-package-${SHIGATYPER_VER} https://github.com/CFSAN-Biostatistics/shigatyper && \ +RUN wget https://github.com/CFSAN-Biostatistics/shigatyper/archive/refs/tags/conda-package-${SHIGATYPER_VER}.tar.gz && \ + tar -xf conda-package-${SHIGATYPER_VER}.tar.gz && \ + rm conda-package-${SHIGATYPER_VER}.tar.gz && \ + cd shigatyper-conda-package-${SHIGATYPER_VER} && \ + python3 setup.py install && \ + samtools faidx /usr/local/lib/python3.8/dist-packages/ShigaTyper-${SHIGATYPER_VER}-py3.8.egg/shigatyper/resources/ShigellaRef5.fasta + +# set the environment +ENV PATH="${PATH}:/shigatyper-conda-package-${SHIGATYPER_VER}:/minimap2-${MINIMAP2_VER}_x64-linux" \ + LC_ALL=C + +# WORKDIR sets working directory +WORKDIR /data + +# test layer +FROM app as test + +# fetch test data from ENA FTP +# SRR8186675 = https://www.ncbi.nlm.nih.gov/biosample/SAMN10040549 , expect Shigella boydii serotype 12 +# SRR7738178 = https://www.ncbi.nlm.nih.gov/biosample/SAMN09878976 , expect Shigella sonnei (I think Form II?) +RUN wget -P /data ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_1.fastq.gz && \ + wget -P /data ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_2.fastq.gz && \ + wget -P /data ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_1.fastq.gz && \ + wget -P /data ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_2.fastq.gz + +# actually run shigatyper on test fastq files; ls to see output filenames +RUN shigatyper --R1 /data/SRR8186675_1.fastq.gz --R2 /data/SRR8186675_2.fastq.gz --name SRR8186675 && \ + shigatyper --R1 /data/SRR7738178_1.fastq.gz --R2 /data/SRR7738178_2.fastq.gz --name SRR7738178 && \ + echo && \ + ls -lh && \ + shigatyper --version diff --git a/shigatyper/2.0.3/README.md b/shigatyper/2.0.3/README.md new file mode 100644 index 000000000..b0bacf5e8 --- /dev/null +++ b/shigatyper/2.0.3/README.md @@ -0,0 +1,31 @@ +# Shigatyper container + +Main tool : [shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) + +Additional tools: + +- samtools 1.9 +- bcftools 1.9 +- minimap2 2.24 +- python 3.8.10 + +## Example Usage + +```{bash} +# Paired-end reads +shigatyper.py --R1 SRX5006488_R1.fastq.gz --R2 SRX5006488_R2.fastq.gz +sample prediction ipaB +SRX5006488 Shigella boydii serotype 12 + + +# Single-end reads +shigatyper.py --SE SRX5006488.fastq.gz +sample prediction ipaB +SRX5006488-se Shigella boydii serotype 12 + + +# Oxford Nanopore reads +shigatyper.py --SE SRX7050861.fastq.gz --ont +sample prediction ipaB +SRX7050861-ont Shigella dysenteriae serotype 3 + +``` + +Better documentation can be found at [https://github.com/CFSAN-Biostatistics/shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) diff --git a/shigatyper/2.0.4/Dockerfile b/shigatyper/2.0.4/Dockerfile new file mode 100644 index 000000000..454036f00 --- /dev/null +++ b/shigatyper/2.0.4/Dockerfile @@ -0,0 +1,143 @@ +# global arg variables +ARG SHIGATYPER_VER="2.0.4" +ARG SAMTOOLSVER="1.9" +ARG BCFTOOLSVER="1.9" +ARG MINIMAP2_VER="2.24" + +### start of builder stage ### +FROM ubuntu:jammy as builder + +# re-instantiate these variables +ARG SHIGATYPER_VER +ARG SAMTOOLSVER +ARG BCFTOOLSVER +ARG MINIMAP2_VER + +# so that apt doesn't ask for input +ARG DEBIAN_FRONTEND=noninteractive + +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + git \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + make \ + python3 \ + python3-setuptools \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + curl \ + build-essential && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# shigatyper depends on samtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm -v samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# shigatyper depends on bcftools +RUN wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLSVER}/bcftools-${BCFTOOLSVER}.tar.bz2 && \ + tar -xjf bcftools-${BCFTOOLSVER}.tar.bz2 && \ + rm -v bcftools-${BCFTOOLSVER}.tar.bz2 && \ + cd bcftools-${BCFTOOLSVER} && \ + make && \ + make install + +# shigatyper depends on minimap2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxf - + +# set the environment +ENV PATH="${PATH}:/shigatyper-conda-package-${SHIGATYPER_VER}:/minimap2-${MINIMAP2_VER}_x64-linux" \ + LC_ALL=C + +### start of app stage ### +# using fresh ubuntu:jammy image as base for app stage +FROM ubuntu:jammy as app + +# re-instantiate thes variables +ARG SHIGATYPER_VER +ARG MINIMAP2_VER + +# LABELS must be contained in `app` stage to pass the GHActions checks +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="shigatyper" +LABEL software.version="${SHIGATYPER_VER}" +LABEL description="Determine Shigella serotype using Illumina (single or paired-end) or Oxford Nanopore reads!" +LABEL website="https://github.com/CFSAN-Biostatistics/shigatyper" +LABEL license="https://github.com/CFSAN-Biostatistics/shigatyper/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" + +# copy in samtools and bcftools executables into app stage +COPY --from=builder /usr/local/bin/* /usr/local/bin + +# copy in minimap2 executable into app stage +COPY --from=builder /minimap2-${MINIMAP2_VER}_x64-linux /usr/local/bin + +# ensuring samtools & bcftools dependencies are present (dependencies for running tools, not for compiling/installing tools) +RUN apt-get update && apt-get install --no-install-recommends -y \ + perl \ + zlib1g \ + libncurses5 \ + bzip2 \ + liblzma-dev \ + libcurl4-gnutls-dev \ + python3 \ + python3-setuptools \ + wget \ + ca-certificates \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get shigatyper code; install; index fasta file +# FYI - samtools faidx cmd has a * in the path due to version inconsistencies +RUN wget https://github.com/CFSAN-Biostatistics/shigatyper/archive/refs/tags/conda-package-${SHIGATYPER_VER}.tar.gz && \ + tar -xf conda-package-${SHIGATYPER_VER}.tar.gz && \ + rm -v conda-package-${SHIGATYPER_VER}.tar.gz && \ + cd shigatyper-conda-package-${SHIGATYPER_VER} && \ + python3 setup.py install && \ + samtools faidx /usr/local/lib/python3.10/dist-packages/ShigaTyper-*-py3.10.egg/resources/ShigellaRef5.fasta + +# ensuring final working directory is /data +WORKDIR /data + +# default command is to pull up help options; can be overridden of course +CMD ["shigatyper", "--help"] + +### start of test stage ### +# using app stage as base image for test stage +FROM app as test + +# fetch test data from ENA FTP +# SRR8186675 = https://www.ncbi.nlm.nih.gov/biosample/SAMN10040549 , expect Shigella boydii serotype 12 +# SRR7738178 = https://www.ncbi.nlm.nih.gov/biosample/SAMN09878976 , expect Shigella sonnei (I think Form II?) +# SRR8186651 = https://www.ncbi.nlm.nih.gov/biosample/SAMN10052840 , expect Shigella flexneri serotype 3a +RUN echo "downloading test FASTQ files from ENA's FTP, please be patient..." && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_1.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_2.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_1.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_2.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/001/SRR8186651/SRR8186651_1.fastq.gz && \ + wget -q -P /data/ ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/001/SRR8186651/SRR8186651_2.fastq.gz + +# actually run shigatyper on test fastq files; ls to see output filenames +RUN shigatyper --R1 /data/SRR8186675_1.fastq.gz --R2 /data/SRR8186675_2.fastq.gz --name SRR8186675 && \ + shigatyper --R1 /data/SRR7738178_1.fastq.gz --R2 /data/SRR7738178_2.fastq.gz --name SRR7738178 && \ + shigatyper --R1 /data/SRR8186651_1.fastq.gz --R2 /data/SRR8186651_2.fastq.gz --name SRR8186651 && \ + echo && \ + ls -lh && \ + shigatyper --version diff --git a/shigatyper/2.0.4/README.md b/shigatyper/2.0.4/README.md new file mode 100644 index 000000000..b797d78d6 --- /dev/null +++ b/shigatyper/2.0.4/README.md @@ -0,0 +1,35 @@ +# Shigatyper 2.0.4 container + +Main tool : [shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) + +Additional tools: + +- samtools 1.9 +- bcftools 1.9 +- minimap2 2.24 +- python 3.10.6 + +## Known Version Issue + +The 2.0.4 release of Shigatyper reports the incorrect version `2.0.3` when running `shigatyper --version`. This is [a known issue](https://github.com/CFSAN-Biostatistics/shigatyper/pull/16) and should be addressed in the next version release. Users should not rely upon the command for correctly reporting the version, but rather the docker image tag `2.0.4` for tracking the correct version. + +## Example Usage + +```{bash} +# Paired-end reads +shigatyper.py --R1 SRX5006488_R1.fastq.gz --R2 SRX5006488_R2.fastq.gz +sample prediction ipaB +SRX5006488 Shigella boydii serotype 12 + + +# Single-end reads +shigatyper.py --SE SRX5006488.fastq.gz +sample prediction ipaB +SRX5006488-se Shigella boydii serotype 12 + + +# Oxford Nanopore reads +shigatyper.py --SE SRX7050861.fastq.gz --ont +sample prediction ipaB +SRX7050861-ont Shigella dysenteriae serotype 3 + +``` + +Better documentation can be found at [https://github.com/CFSAN-Biostatistics/shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) diff --git a/shigatyper/2.0.5/Dockerfile b/shigatyper/2.0.5/Dockerfile new file mode 100644 index 000000000..e80daf0ef --- /dev/null +++ b/shigatyper/2.0.5/Dockerfile @@ -0,0 +1,143 @@ +# global arg variables +ARG SHIGATYPER_VER="2.0.5" +ARG SAMTOOLSVER="1.9" +ARG BCFTOOLSVER="1.9" +ARG MINIMAP2_VER="2.24" + +### start of builder stage ### +FROM ubuntu:jammy as builder + +# re-instantiate these variables +ARG SHIGATYPER_VER +ARG SAMTOOLSVER +ARG BCFTOOLSVER +ARG MINIMAP2_VER + +# so that apt doesn't ask for input +ARG DEBIAN_FRONTEND=noninteractive + +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + git \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + gcc \ + make \ + python3 \ + python3-setuptools \ + bzip2 \ + gnuplot \ + ca-certificates \ + gawk \ + curl \ + build-essential && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# shigatyper depends on samtools +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLSVER}/samtools-${SAMTOOLSVER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLSVER}.tar.bz2 && \ + rm -v samtools-${SAMTOOLSVER}.tar.bz2 && \ + cd samtools-${SAMTOOLSVER} && \ + ./configure && \ + make && \ + make install + +# shigatyper depends on bcftools +RUN wget https://github.com/samtools/bcftools/releases/download/${BCFTOOLSVER}/bcftools-${BCFTOOLSVER}.tar.bz2 && \ + tar -xjf bcftools-${BCFTOOLSVER}.tar.bz2 && \ + rm -v bcftools-${BCFTOOLSVER}.tar.bz2 && \ + cd bcftools-${BCFTOOLSVER} && \ + make && \ + make install + +# shigatyper depends on minimap2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxf - + +# set the environment +ENV PATH="${PATH}:/minimap2-${MINIMAP2_VER}_x64-linux" \ + LC_ALL=C + +### start of app stage ### +# using fresh ubuntu:jammy image as base for app stage +FROM ubuntu:jammy as app + +# re-instantiate thes variables +ARG SHIGATYPER_VER +ARG MINIMAP2_VER + +# LABELS must be contained in `app` stage to pass the GHActions checks +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="shigatyper" +LABEL software.version="${SHIGATYPER_VER}" +LABEL description="Determine Shigella serotype using Illumina (single or paired-end) or Oxford Nanopore reads!" +LABEL website="https://github.com/CFSAN-Biostatistics/shigatyper" +LABEL license="https://github.com/CFSAN-Biostatistics/shigatyper/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" + +# copy in samtools and bcftools executables into app stage +COPY --from=builder /usr/local/bin/* /usr/local/bin + +# copy in minimap2 executable into app stage +COPY --from=builder /minimap2-${MINIMAP2_VER}_x64-linux /usr/local/bin + +# ensuring samtools & bcftools dependencies are present (dependencies for running tools, not for compiling/installing tools) +RUN apt-get update && apt-get install --no-install-recommends -y \ + perl \ + zlib1g \ + libncurses5 \ + bzip2 \ + liblzma-dev \ + libcurl4-gnutls-dev \ + python3 \ + python3-setuptools \ + wget \ + ca-certificates \ + && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# get shigatyper code; install; index fasta file +# FYI - samtools faidx cmd has a * in the path due to version inconsistencies +RUN wget https://github.com/CFSAN-Biostatistics/shigatyper/archive/refs/tags/${SHIGATYPER_VER}.tar.gz && \ + tar -xf ${SHIGATYPER_VER}.tar.gz && \ + rm -v ${SHIGATYPER_VER}.tar.gz && \ + cd shigatyper-${SHIGATYPER_VER} && \ + python3 setup.py install && \ + samtools faidx /usr/local/lib/python3.10/dist-packages/ShigaTyper-*-py3.10.egg/resources/ShigellaRef5.fasta + +# ensuring final working directory is /data +WORKDIR /data + +# default command is to pull up help options; can be overridden of course +CMD ["shigatyper", "--help"] + +### start of test stage ### +# using app stage as base image for test stage +FROM app as test + +# fetch test data from ENA FTP +# SRR8186675 = https://www.ncbi.nlm.nih.gov/biosample/SAMN10040549 , expect Shigella boydii serotype 12 +# SRR7738178 = https://www.ncbi.nlm.nih.gov/biosample/SAMN09878976 , expect Shigella sonnei (I think Form II?) +# SRR8186651 = https://www.ncbi.nlm.nih.gov/biosample/SAMN10052840 , expect Shigella flexneri serotype 3a +RUN echo "downloading test FASTQ files from ENA's FTP, please be patient..." && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_1.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/005/SRR8186675/SRR8186675_2.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_1.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR773/008/SRR7738178/SRR7738178_2.fastq.gz && \ + wget -q -P /data ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/001/SRR8186651/SRR8186651_1.fastq.gz && \ + wget -q -P /data/ ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR818/001/SRR8186651/SRR8186651_2.fastq.gz + +# actually run shigatyper on test fastq files; ls to see output filenames +RUN shigatyper --R1 /data/SRR8186675_1.fastq.gz --R2 /data/SRR8186675_2.fastq.gz --name SRR8186675 && \ + shigatyper --R1 /data/SRR7738178_1.fastq.gz --R2 /data/SRR7738178_2.fastq.gz --name SRR7738178 && \ + shigatyper --R1 /data/SRR8186651_1.fastq.gz --R2 /data/SRR8186651_2.fastq.gz --name SRR8186651 && \ + echo && \ + ls -lh && \ + shigatyper --version diff --git a/shigatyper/2.0.5/README.md b/shigatyper/2.0.5/README.md new file mode 100644 index 000000000..9c4bc78c2 --- /dev/null +++ b/shigatyper/2.0.5/README.md @@ -0,0 +1,31 @@ +# Shigatyper 2.0.5 container + +Main tool : [shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) + +Additional tools: + +- samtools 1.9 +- bcftools 1.9 +- minimap2 2.24 +- python 3.10.6 + +## Example Usage + +```{bash} +# Paired-end reads +shigatyper.py --R1 SRX5006488_R1.fastq.gz --R2 SRX5006488_R2.fastq.gz +sample prediction ipaB +SRX5006488 Shigella boydii serotype 12 + + +# Single-end reads +shigatyper.py --SE SRX5006488.fastq.gz +sample prediction ipaB +SRX5006488-se Shigella boydii serotype 12 + + +# Oxford Nanopore reads +shigatyper.py --SE SRX7050861.fastq.gz --ont +sample prediction ipaB +SRX7050861-ont Shigella dysenteriae serotype 3 + +``` + +Better documentation can be found at [https://github.com/CFSAN-Biostatistics/shigatyper](https://github.com/CFSAN-Biostatistics/shigatyper) diff --git a/shigeifinder/1.3.3/Dockerfile b/shigeifinder/1.3.3/Dockerfile new file mode 100644 index 000000000..9aa992f0e --- /dev/null +++ b/shigeifinder/1.3.3/Dockerfile @@ -0,0 +1,149 @@ +ARG SHIGEIFINDER_VER="1.3.3" +ARG SAMTOOLS_VER="1.10" +ARG BWA_VER="0.7.17" + +FROM ubuntu:focal as app + +ARG SHIGEIFINDER_VER +ARG SAMTOOLS_VER +ARG BWA_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="ShigEiFinder" +LABEL software.version=${SHIGEIFINDER_VER} +LABEL description="Cluster informed Shigella and EIEC serotyping tool from Illumina reads and assemblies" +LABEL website="https://github.com/LanLab/ShigEiFinder" +LABEL license="https://github.com/LanLab/ShigEiFinder/blob/main/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="curtis.kapsak@theiagen.com" + +# so that apt/tzdata doesn't ask for a timezone during build. Variable will not be in environment after building +ARG DEBIAN_FRONTEND=noninteractive + +# install dependencies via apt; cleanup apt garbage +# ncbi-blast+ from apt is v2.9.0 (min ver requirement for ShigEiFinder) +# python version is 3.8.10 +RUN apt-get update && apt-get install --no-install-recommends -y \ + make \ + gcc \ + g++ \ + python3 \ + python3-pip \ + python3-setuptools \ + zlib1g-dev \ + wget \ + ca-certificates \ + procps \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + bzip2 \ + gawk \ + gnuplot \ + ncbi-blast+ \ + unzip && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# install bwa +RUN mkdir /bwa && \ + cd /bwa && \ + wget https://github.com/lh3/bwa/releases/download/v${BWA_VER}/bwa-${BWA_VER}.tar.bz2 && \ + tar -xjf bwa-${BWA_VER}.tar.bz2 && \ + rm bwa-${BWA_VER}.tar.bz2 && \ + cd bwa-${BWA_VER} && \ + make + +# install samtools; no need to add to PATH, 'make install' does this for us +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + rm samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install + +# install ShigEiFinder; make /data +RUN wget https://github.com/LanLab/ShigEiFinder/archive/refs/tags/v${SHIGEIFINDER_VER}.tar.gz && \ + tar -xvf v${SHIGEIFINDER_VER}.tar.gz && \ + rm v${SHIGEIFINDER_VER}.tar.gz && \ + cd ShigEiFinder-${SHIGEIFINDER_VER} && \ + python3 setup.py install && \ + mkdir /data + +# final working directory is /data +WORKDIR /data + +# set locale settings for singularity compatibility. Set PATH to include bwa +ENV LC_ALL=C \ + PATH="${PATH}:/bwa/bwa-${BWA_VER}" + +# test layer +FROM app as test + +ARG SHIGEIFINDER_VER + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# check dependencies and print help options +RUN shigeifinder --check && \ + shigeifinder --help + +# downloading a couple of representative genomes (FASTA) and run them through ShigEiFinder +# Shigella sonnei genome: https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_019947675.1/ +# BioSample: SAMN21386344 +ARG GENBANK_ACCESSION="GCA_019947675.1" +RUN datasets download genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \ + mkdir -v ${GENBANK_ACCESSION}-download && \ + unzip ${GENBANK_ACCESSION}.zip -d ${GENBANK_ACCESSION}-download && \ + rm ${GENBANK_ACCESSION}.zip && \ + mv -v ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}*.fna ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna && \ + shigeifinder -i ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna \ + --hits \ + -t 2 \ + --output shigeifinder.${GENBANK_ACCESSION}.out && \ + head -n 2 shigeifinder.${GENBANK_ACCESSION}.out + +# Shigella flexneri 2a str. 301 +# https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/198214/ +ARG GENBANK_ACCESSION="GCF_000006925.2" +RUN datasets download genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \ + mkdir -v ${GENBANK_ACCESSION}-download && \ + unzip ${GENBANK_ACCESSION}.zip -d ${GENBANK_ACCESSION}-download && \ + rm ${GENBANK_ACCESSION}.zip && \ + mv -v ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}*.fna ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna && \ + shigeifinder -i ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna \ + --hits \ + -t 2 \ + --output shigeifinder.${GENBANK_ACCESSION}.out && \ + head -n 2 shigeifinder.${GENBANK_ACCESSION}.out + +# test with Shigella Sonnei FASTQs and run through ShigEiFinder (test bwa & samtools) +# https://www.ncbi.nlm.nih.gov/sra/SRX17216573[accn] +# https://www.ncbi.nlm.nih.gov/biosample/SAMN30499774 +### NOTE: ENA FTP CAN BE SLOW TO DOWNLOAD FROM, HAVE PATIENCE. ALSO, SHIGEIFINDER RUNS MUCH SLOWER ON FASTQ FILES COMPARED TO FASTA FILES +RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR212/091/SRR21205791/SRR21205791_1.fastq.gz && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR212/091/SRR21205791/SRR21205791_2.fastq.gz && \ + shigeifinder -r \ + -i SRR21205791_1.fastq.gz SRR21205791_2.fastq.gz \ + --hits \ + --dratio \ + -t 2 \ + --output shigeifinder.SRR21205791.out && \ + head -n 2 shigeifinder.SRR21205791.out + +# testing single end mode with same sample as above ^ +RUN shigeifinder -r \ + -i SRR21205791_1.fastq.gz \ + --hits \ + --dratio \ + --single_end \ + -t 2 \ + --output shigeifinder.SRR21205791.single-end.out && \ + head -n 2 shigeifinder.SRR21205791.single-end.out \ No newline at end of file diff --git a/shigeifinder/1.3.3/README.md b/shigeifinder/1.3.3/README.md new file mode 100644 index 000000000..204befe3f --- /dev/null +++ b/shigeifinder/1.3.3/README.md @@ -0,0 +1,57 @@ +# ShigEiFinder docker image + +Main tool : [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) + +Additional tools: + +- bwa 0.7.17 +- samtools 1.10 +- python 3.8.10 +- ncbi-blast+ 2.9.0 + +Full documentation: https://github.com/LanLab/ShigEiFinder + +Publication: https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000704 + +> This is a tool that is used to identify differentiate Shigella/EIEC using cluster-specific genes and identify the serotype using O-antigen/H-antigen genes. This pipeline can serotype over 59 Shigella and 22 EIEC serotypes using either assembled whole genomes or Whole Genome Sequencing (WGS) reads. The results are output in a tabular format which if saved as a file can be opened in Excel or other tabular programs. + +## Example Usage + +```bash +# genome assembly FASTA as input +$ shigeifinder -i GCF_000006925.2 \ + --hits \ + -t 2 \ + --output shigeifinder.GCF_000006925.2.out + +$ head -n 2 shigeifinder.GCF_000006925.2.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +GCF_000006925 + 38 C3 SF2a SF1-5 + +############################################################## + +# paired end Illumina read FASTQs as input +$ shigeifinder -r \ + -i SRR21205791_1.fastq.gz SRR21205791_2.fastq.gz \ + --hits \ + --dratio \ + -t 2 \ + --output shigeifinder.SRR21205791.out + +$ head -n 2 shigeifinder.SRR21205791.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +SRR21205791 + 38 CSS SS SS + +# single end Illumina read FASTQs as input +$ shigeifinder -r \ + -i SRR21205791_1.fastq.gz \ + --hits \ + --dratio \ + --single_end \ + -t 2 \ + --output shigeifinder.SRR21205791.single-end.out + +$ head -n 2 shigeifinder.SRR21205791.single-end.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +SRR21205791_1 + 38 CSS SS SS +``` diff --git a/shigeifinder/1.3.5/Dockerfile b/shigeifinder/1.3.5/Dockerfile new file mode 100644 index 000000000..d6f1adbeb --- /dev/null +++ b/shigeifinder/1.3.5/Dockerfile @@ -0,0 +1,178 @@ +# global arg variables +ARG SHIGEIFINDER_VER="1.3.5" +ARG SAMTOOLS_VER="1.10" +ARG BWA_VER="0.7.17" + +### start of builder stage ### +FROM ubuntu:focal as builder + +# re-instantiate these variables +ARG SHIGEIFINDER_VER +ARG SAMTOOLS_VER +ARG BWA_VER + +# so that apt/tzdata doesn't ask for a timezone during build. Variable will not be in environment after building +ARG DEBIAN_FRONTEND=noninteractive + +# install dependencies via apt (a lot of this is for compiling C code); cleanup apt garbage +RUN apt-get update && apt-get install --no-install-recommends -y \ + make \ + gcc \ + g++ \ + zlib1g-dev \ + wget \ + ca-certificates \ + procps \ + libncurses5-dev \ + libbz2-dev \ + liblzma-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + libssl-dev \ + bzip2 \ + gawk \ + gnuplot && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# install bwa +RUN mkdir /bwa && \ + cd /bwa && \ + wget https://github.com/lh3/bwa/releases/download/v${BWA_VER}/bwa-${BWA_VER}.tar.bz2 && \ + tar -xjf bwa-${BWA_VER}.tar.bz2 && \ + rm -v bwa-${BWA_VER}.tar.bz2 && \ + cd bwa-${BWA_VER} && \ + make + +# install samtools; no need to add to PATH, 'make install' does this for us +RUN wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + rm samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ + ./configure && \ + make && \ + make install + +### start of app stage ### +# using fresh ubuntu:jammy image as base for app stage +FROM ubuntu:focal as app + +# re-instantiate these variables +ARG SHIGEIFINDER_VER +ARG SAMTOOLS_VER +ARG BWA_VER + +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="ShigEiFinder" +LABEL software.version="${SHIGEIFINDER_VER}" +LABEL description="Cluster informed Shigella and EIEC serotyping tool from Illumina reads and assemblies" +LABEL website="https://github.com/LanLab/ShigEiFinder" +LABEL license="https://github.com/LanLab/ShigEiFinder/blob/main/LICENSE" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="curtis.kapsak@theiagen.com" + +# copy in samtools executables into app stage +COPY --from=builder /usr/local/bin/* /usr/local/bin + +# copy in bwa executable +COPY --from=builder /bwa/bwa-${BWA_VER}/bwa /usr/local/bin + +# install dependencies via apt; cleanup apt garbage +# ncbi-blast+ from apt is v2.9.0 (min ver requirement for ShigEiFinder) +# python version is 3.8.10 +RUN apt-get update && apt-get install --no-install-recommends -y \ + python3 \ + python3-pip \ + python3-setuptools \ + wget \ + ca-certificates \ + procps \ + libncurses5-dev \ + libbz2-dev \ + libcurl4-gnutls-dev \ + zlib1g-dev \ + bzip2 \ + ncbi-blast+ \ + unzip && \ + rm -rf /var/lib/apt/lists/* && apt-get autoclean + +# install ShigEiFinder; make /data +RUN wget https://github.com/LanLab/ShigEiFinder/archive/refs/tags/v${SHIGEIFINDER_VER}.tar.gz && \ + tar -xvf v${SHIGEIFINDER_VER}.tar.gz && \ + rm -v v${SHIGEIFINDER_VER}.tar.gz && \ + cd ShigEiFinder-${SHIGEIFINDER_VER} && \ + python3 setup.py install && \ + mkdir /data + +# set locale settings for singularity compatibility. Set PATH to include bwa +ENV LC_ALL=C + +# final working directory is /data +WORKDIR /data + +### start of test stage ### +# using app stage as base image for test stage +FROM app as test + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# downloading a couple of representative genomes (FASTA) and run them through ShigEiFinder +# Shigella sonnei genome: https://www.ncbi.nlm.nih.gov/data-hub/genome/GCA_019947675.1/ +# BioSample: SAMN21386344 +ARG GENBANK_ACCESSION="GCA_019947675.1" +RUN datasets download genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \ + mkdir -v ${GENBANK_ACCESSION}-download && \ + unzip ${GENBANK_ACCESSION}.zip -d ${GENBANK_ACCESSION}-download && \ + rm ${GENBANK_ACCESSION}.zip && \ + mv -v ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}*.fna ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna && \ + shigeifinder -i ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna \ + --hits \ + -t 2 \ + --output shigeifinder.${GENBANK_ACCESSION}.out && \ + head -n 2 shigeifinder.${GENBANK_ACCESSION}.out + +# Shigella flexneri 2a str. 301 +# https://www.ncbi.nlm.nih.gov/data-hub/taxonomy/198214/ +ARG GENBANK_ACCESSION="GCF_000006925.2" +RUN datasets download genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \ + mkdir -v ${GENBANK_ACCESSION}-download && \ + unzip ${GENBANK_ACCESSION}.zip -d ${GENBANK_ACCESSION}-download && \ + rm ${GENBANK_ACCESSION}.zip && \ + mv -v ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}*.fna ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna && \ + shigeifinder -i ${GENBANK_ACCESSION}-download/ncbi_dataset/data/${GENBANK_ACCESSION}/${GENBANK_ACCESSION}.genomic.fna \ + --hits \ + -t 2 \ + --output shigeifinder.${GENBANK_ACCESSION}.out && \ + head -n 2 shigeifinder.${GENBANK_ACCESSION}.out + +# test with Shigella Sonnei FASTQs and run through ShigEiFinder (test bwa & samtools) +# https://www.ncbi.nlm.nih.gov/sra/SRX17216573[accn] +# https://www.ncbi.nlm.nih.gov/biosample/SAMN30499774 +### NOTE: ENA FTP CAN BE SLOW TO DOWNLOAD FROM, HAVE PATIENCE. ALSO, SHIGEIFINDER RUNS MUCH SLOWER ON FASTQ FILES COMPARED TO FASTA FILES +RUN wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR212/091/SRR21205791/SRR21205791_1.fastq.gz && \ + wget -q ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR212/091/SRR21205791/SRR21205791_2.fastq.gz && \ + shigeifinder -r \ + -i SRR21205791_1.fastq.gz SRR21205791_2.fastq.gz \ + --hits \ + --dratio \ + -t 2 \ + --output shigeifinder.SRR21205791.out && \ + head -n 2 shigeifinder.SRR21205791.out + +# testing single end mode with same sample as above ^ +RUN shigeifinder -r \ + -i SRR21205791_1.fastq.gz \ + --hits \ + --dratio \ + --single_end \ + -t 2 \ + --output shigeifinder.SRR21205791.single-end.out && \ + head -n 2 shigeifinder.SRR21205791.single-end.out + +# check dependencies and print help options +RUN shigeifinder --check && \ + shigeifinder --help && \ + shigeifinder --version \ No newline at end of file diff --git a/shigeifinder/1.3.5/README.md b/shigeifinder/1.3.5/README.md new file mode 100644 index 000000000..204befe3f --- /dev/null +++ b/shigeifinder/1.3.5/README.md @@ -0,0 +1,57 @@ +# ShigEiFinder docker image + +Main tool : [ShigEiFinder](https://github.com/LanLab/ShigEiFinder) + +Additional tools: + +- bwa 0.7.17 +- samtools 1.10 +- python 3.8.10 +- ncbi-blast+ 2.9.0 + +Full documentation: https://github.com/LanLab/ShigEiFinder + +Publication: https://www.microbiologyresearch.org/content/journal/mgen/10.1099/mgen.0.000704 + +> This is a tool that is used to identify differentiate Shigella/EIEC using cluster-specific genes and identify the serotype using O-antigen/H-antigen genes. This pipeline can serotype over 59 Shigella and 22 EIEC serotypes using either assembled whole genomes or Whole Genome Sequencing (WGS) reads. The results are output in a tabular format which if saved as a file can be opened in Excel or other tabular programs. + +## Example Usage + +```bash +# genome assembly FASTA as input +$ shigeifinder -i GCF_000006925.2 \ + --hits \ + -t 2 \ + --output shigeifinder.GCF_000006925.2.out + +$ head -n 2 shigeifinder.GCF_000006925.2.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +GCF_000006925 + 38 C3 SF2a SF1-5 + +############################################################## + +# paired end Illumina read FASTQs as input +$ shigeifinder -r \ + -i SRR21205791_1.fastq.gz SRR21205791_2.fastq.gz \ + --hits \ + --dratio \ + -t 2 \ + --output shigeifinder.SRR21205791.out + +$ head -n 2 shigeifinder.SRR21205791.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +SRR21205791 + 38 CSS SS SS + +# single end Illumina read FASTQs as input +$ shigeifinder -r \ + -i SRR21205791_1.fastq.gz \ + --hits \ + --dratio \ + --single_end \ + -t 2 \ + --output shigeifinder.SRR21205791.single-end.out + +$ head -n 2 shigeifinder.SRR21205791.single-end.out +#SAMPLE ipaH VIRULENCE_PLASMID CLUSTER SEROTYPE O_ANTIGEN H_ANTIGEN NOTES +SRR21205791_1 + 38 CSS SS SS +``` diff --git a/shovill/1.1.0/Dockerfile b/shovill/1.1.0/Dockerfile index f41d56b0a..30592b24d 100644 --- a/shovill/1.1.0/Dockerfile +++ b/shovill/1.1.0/Dockerfile @@ -1,18 +1,52 @@ -FROM ubuntu:xenial +# ephemeral environmental variables that go away after building the image +ARG SHOVILL_VER="1.1.0" +ARG SPADES_VER="3.15.5" +ARG SEQTK_VER="1.3" +ARG KMC_VER="3.1.1" +ARG LIGHTER_VER="1.1.1" +ARG TRIMMOMATIC_VER="0.39" +ARG BWA_VER="0.7.17" +# not sure if a more recent samtools is compatible or not, previously used samtools 1.10 +ARG SAMTOOLS_VER="1.16.1" +ARG SKESA_VER="2.3.0" +ARG MEGAHIT_VER="1.1.4" +ARG VELVET_VER="1.2.10" +ARG FLASH_VER="1.2.11" +# also not sure if this upgraded pilon version is compatible, previously used 1.22 +ARG PILON_VER="1.24" + +FROM ubuntu:xenial as app + +# reinstantiating variables so they are available in app FROM layer +ARG SHOVILL_VER +ARG SPADES_VER +ARG SEQTK_VER +ARG KMC_VER +ARG LIGHTER_VER +ARG TRIMMOMATIC_VER +ARG BWA_VER +ARG SAMTOOLS_VER +ARG SKESA_VER +ARG MEGAHIT_VER +ARG VELVET_VER +ARG FLASH_VER +ARG PILON_VER LABEL base.image="ubuntu:xenial" -LABEL container.version="2" +LABEL dockerfile.version="3" LABEL software="Shovill" LABEL software.version="1.1.0" LABEL description="faster than SPAdes de novo DBG genome assembler (with assembler options!)" LABEL website="https://github.com/tseemann/shovill" LABEL lisence="https://github.com/tseemann/shovill/blob/master/LICENSE" LABEL maintainer="Curtis Kapsak" -LABEL maintainer.email="pjx8@cdc.gov" +LABEL maintainer.email="kapsakcj@gmail.com" # install dependencies, cleanup apt garbage -RUN apt-get update && apt-get install -y python \ +RUN apt-get update && apt-get install -y --no-install-recommends \ + python \ wget \ + ca-certificates \ pigz \ zlib1g-dev \ make \ @@ -29,111 +63,110 @@ RUN apt-get update && apt-get install -y python \ libssl-dev \ libfindbin-libs-perl && \ apt-get clean && apt-get autoclean && \ - rm -rf /var/lib/apt/lists/* + rm -rfv /var/lib/apt/lists/* # SPAdes -ENV spadesVer=3.14.1 -RUN wget https://github.com/ablab/spades/releases/download/v${spadesVer}/SPAdes-${spadesVer}-Linux.tar.gz && \ - tar -xzf SPAdes-${spadesVer}-Linux.tar.gz && \ - rm SPAdes-${spadesVer}-Linux.tar.gz +RUN wget https://github.com/ablab/spades/releases/download/v${SPADES_VER}/SPAdes-${SPADES_VER}-Linux.tar.gz && \ + tar -xzf SPAdes-${SPADES_VER}-Linux.tar.gz && \ + rm -v SPAdes-${SPADES_VER}-Linux.tar.gz -# Seqtk 1.3 -RUN mkdir seqtk && \ +# Seqtk install +RUN mkdir -v seqtk && \ cd seqtk && \ - wget https://github.com/lh3/seqtk/archive/v1.3.tar.gz && \ - tar -zxf v1.3.tar.gz && \ - rm v1.3.tar.gz && \ - cd seqtk-1.3/ && \ + wget https://github.com/lh3/seqtk/archive/v${SEQTK_VER}.tar.gz && \ + tar -zxf v${SEQTK_VER}.tar.gz && \ + rm -v v${SEQTK_VER}.tar.gz && \ + cd seqtk-${SEQTK_VER}/ && \ make && \ make install # kmc RUN mkdir kmc && \ cd kmc && \ - wget https://github.com/refresh-bio/KMC/releases/download/v3.1.1/KMC3.1.1.linux.tar.gz && \ - tar -xzf KMC3.1.1.linux.tar.gz && \ - rm KMC3.1.1.linux.tar.gz - -# lighter 1.1.1 -RUN wget https://github.com/mourisl/Lighter/archive/v1.1.1.tar.gz && \ - tar -zxf v1.1.1.tar.gz && \ - rm -rf v1.1.1.tar.gz && \ - cd Lighter-1.1.1 && \ + wget https://github.com/refresh-bio/KMC/releases/download/v${KMC_VER}/KMC${KMC_VER}.linux.tar.gz && \ + tar -xzf KMC${KMC_VER}.linux.tar.gz && \ + rm -v KMC${KMC_VER}.linux.tar.gz + +# lighter +RUN wget https://github.com/mourisl/Lighter/archive/v${LIGHTER_VER}.tar.gz && \ + tar -zxf v${LIGHTER_VER}.tar.gz && \ + rm -rvf v${LIGHTER_VER}.tar.gz && \ + cd Lighter-${LIGHTER_VER} && \ make -# trimmomatic 0.38 +# trimmomatic RUN mkdir trimmomatic && \ cd trimmomatic && \ - wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.38.zip && \ - unzip Trimmomatic-0.38.zip && \ - rm -rf Trimmomatic-0.38.zip && \ - chmod +x Trimmomatic-0.38/trimmomatic-0.38.jar && \ + wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-${TRIMMOMATIC_VER}.zip && \ + unzip Trimmomatic-${TRIMMOMATIC_VER}.zip && \ + rm -rf Trimmomatic-${TRIMMOMATIC_VER}.zip && \ + chmod +x Trimmomatic-${TRIMMOMATIC_VER}/trimmomatic-${TRIMMOMATIC_VER}.jar && \ echo "#!/bin/bash" >> trimmomatic && \ - echo "exec java -jar /trimmomatic/Trimmomatic-0.38/trimmomatic-0.38.jar """"$""@"""" " >> trimmomatic && \ + echo "exec java -jar /trimmomatic/Trimmomatic-${TRIMMOMATIC_VER}/trimmomatic-${TRIMMOMATIC_VER}.jar """"$""@"""" " >> trimmomatic && \ chmod +x trimmomatic -# bwa (mem) 0.7.17 +# bwa (mem) install RUN mkdir bwa && \ cd bwa && \ - wget https://github.com/lh3/bwa/releases/download/v0.7.17/bwa-0.7.17.tar.bz2 && \ - tar -xjf bwa-0.7.17.tar.bz2 && \ - rm bwa-0.7.17.tar.bz2 && \ - cd bwa-0.7.17 && \ + wget https://github.com/lh3/bwa/releases/download/v${BWA_VER}/bwa-${BWA_VER}.tar.bz2 && \ + tar -xjf bwa-${BWA_VER}.tar.bz2 && \ + rm bwa-${BWA_VER}.tar.bz2 && \ + cd bwa-${BWA_VER} && \ make -# samtools 1.10 +# samtools install RUN mkdir samtools && \ cd samtools && \ - wget https://github.com/samtools/samtools/releases/download/1.10/samtools-1.10.tar.bz2 && \ - tar -xjf samtools-1.10.tar.bz2 && \ - rm samtools-1.10.tar.bz2 && \ - cd samtools-1.10 && \ + wget https://github.com/samtools/samtools/releases/download/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ + tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ + rm samtools-${SAMTOOLS_VER}.tar.bz2 && \ + cd samtools-${SAMTOOLS_VER} && \ ./configure && \ make && \ make install -# skesa 2.3.0 (skesa 2.4.0 binary works better on ubuntu:bionic, so not upgrading here) +# skesa 2.3.0 binary (skesa 2.4.0 binary works better on ubuntu:bionic, so not upgrading here) RUN mkdir skesa && \ cd skesa && \ - wget https://github.com/ncbi/SKESA/releases/download/v2.3.0/skesa.centos6.10 && \ + wget https://github.com/ncbi/SKESA/releases/download/v${SKESA_VER}/skesa.centos6.10 && \ mv skesa.centos6.10 skesa && \ chmod +x skesa # MEGAHIT 1.1.4 binary (I'm pretty sure these are binaries at this point) RUN mkdir megahit && \ cd megahit && \ - wget https://github.com/voutcn/megahit/releases/download/v1.1.4/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin.tar.gz && \ - tar -xzf megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin.tar.gz && \ - rm megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin.tar.gz + wget https://github.com/voutcn/megahit/releases/download/v${MEGAHIT_VER}/megahit_v${MEGAHIT_VER}_LINUX_CPUONLY_x86_64-bin.tar.gz && \ + tar -xzf megahit_v${MEGAHIT_VER}_LINUX_CPUONLY_x86_64-bin.tar.gz && \ + rm megahit_v${MEGAHIT_VER}_LINUX_CPUONLY_x86_64-bin.tar.gz # Velvet 1.2.10 RUN mkdir velvet && \ cd velvet && \ - wget https://github.com/dzerbino/velvet/archive/v1.2.10.tar.gz && \ - tar -xzf v1.2.10.tar.gz && \ - rm -rf v1.2.10.tar.gz && \ - cd velvet-1.2.10 && \ + wget https://github.com/dzerbino/velvet/archive/v${VELVET_VER}.tar.gz && \ + tar -xzf v${VELVET_VER}.tar.gz && \ + rm -rf v${VELVET_VER}.tar.gz && \ + cd velvet-${VELVET_VER} && \ make # Flash 1.2.11 RUN mkdir flash && \ cd flash && \ - wget https://sourceforge.net/projects/flashpage/files/FLASH-1.2.11.tar.gz && \ - tar -zxf FLASH-1.2.11.tar.gz && \ - rm -rf FLASH-1.2.11.tar.gz && \ - cd FLASH-1.2.11 && \ + wget https://sourceforge.net/projects/flashpage/files/FLASH-${FLASH_VER}.tar.gz && \ + tar -zxf FLASH-${FLASH_VER}.tar.gz && \ + rm -rf FLASH-${FLASH_VER}.tar.gz && \ + cd FLASH-${FLASH_VER} && \ make -# pilon 1.22 +# pilon RUN mkdir pilon && \ cd pilon && \ - wget https://github.com/broadinstitute/pilon/releases/download/v1.22/pilon-1.22.jar && \ - chmod +x pilon-1.22.jar && \ + wget https://github.com/broadinstitute/pilon/releases/download/v${PILON_VER}/pilon-${PILON_VER}.jar && \ + chmod +x pilon-${PILON_VER}.jar && \ echo "#!/bin/bash" >> pilon && \ - echo "exec java -jar /pilon/pilon-1.22.jar """"$""@"""" " >> pilon && \ + echo "exec java -jar /pilon/pilon-${PILON_VER}.jar """"$""@"""" " >> pilon && \ chmod +x pilon -# Samclip +# Samclip (not going to pin a version, this is the recommended way of installing and there have not been updates since March 2020) RUN mkdir samclip && \ cd samclip && \ wget https://raw.githubusercontent.com/tseemann/samclip/master/samclip && \ @@ -142,28 +175,55 @@ RUN mkdir samclip && \ # aaannnddd finally install shovill v1.1.0 itself # extra perl module I had to install via apt-get: libfindbin-libs-perl # create /data for working directory -RUN mkdir shovill && \ - cd shovill && \ - wget https://github.com/tseemann/shovill/archive/v1.1.0.tar.gz && \ - tar -xzf v1.1.0.tar.gz && \ - rm v1.1.0.tar.gz && \ +RUN wget https://github.com/tseemann/shovill/archive/v${SHOVILL_VER}.tar.gz && \ + tar -xzf v${SHOVILL_VER}.tar.gz && \ + rm v${SHOVILL_VER}.tar.gz && \ mkdir /data -# set /data as working directory +# set /data as final working directory WORKDIR /data # set $PATH's ENV PATH="${PATH}:\ -/SPAdes-${spadesVer}-Linux/bin:\ +/SPAdes-${SPADES_VER}-Linux/bin:\ /kmc:\ -/Lighter-1.1.1:\ +/Lighter-${LIGHTER_VER}:\ /trimmomatic:\ -/bwa/bwa-0.7.17:\ +/bwa/bwa-${BWA_VER}:\ /skesa:\ -/megahit/megahit_v1.1.4_LINUX_CPUONLY_x86_64-bin:\ -/velvet/velvet-1.2.10:\ -/flash/FLASH-1.2.11:\ -/shovill/shovill-1.1.0/bin:\ +/megahit/megahit_v${MEGAHIT_VER}_LINUX_CPUONLY_x86_64-bin:\ +/velvet/velvet-${VELVET_VER}:\ +/flash/FLASH-${FLASH_VER}:\ +/shovill-${SHOVILL_VER}/bin:\ /pilon:\ /samclip"\ LC_ALL=C + +# test layer +FROM app as test + +# reinstantiating variable so it's available for cd cmd below +ARG SHOVILL_VER + +# so that the below commands are run with /bin/bash shell and not /bin/sh - needed for bash-specific tricks below +SHELL ["/bin/bash", "-c"] + +# test shamelessly stolen & modified from: https://github.com/tseemann/shovill/blob/master/.travis.yml +RUN cd /shovill-${SHOVILL_VER}/ && \ +kmc && \ +skesa --version && \ +! shovill --doesnotexist && \ +echo "TESTING SHOVILL + SPADES" && \ +shovill --outdir out.spades --assembler spades --R1 test/R1.fq.gz --R2 test/R2.fq.gz --nostitch --noreadcorr --nocorr && \ +grep '>' out.spades/contigs.fa && \ +echo "TESTING SHOVILL + MEGAHIT" && \ +shovill --outdir out.megahit --assembler megahit --R1 test/R1.fq.gz --R2 test/R2.fq.gz --trim && \ +grep '>' out.megahit/contigs.fa && \ +echo "TESTING SHOVILL + VELVET" && \ +shovill --outdir out.velvet --assembler velvet --R1 test/R1.fq.gz --R2 test/R2.fq.gz --ram 4 --noreadcorr --nocorr && \ +grep '>' out.velvet/contigs.fa && \ +echo "TESTING SHOVILL + SKESA" && \ +shovill --outdir out.skesa --assembler skesa --R1 test/R1.fq.gz --R2 test/R2.fq.gz --ram 4 --noreadcorr --nocorr && \ +grep '>' out.velvet/contigs.fa + +RUN shovill --help && shovill --check && shovill --version \ No newline at end of file diff --git a/shovill/1.1.0/README.md b/shovill/1.1.0/README.md new file mode 100644 index 000000000..b314705e0 --- /dev/null +++ b/shovill/1.1.0/README.md @@ -0,0 +1,46 @@ +# Shovill v1.1.0 docker image + +Main tool : [Shovill](https://github.com/tseemann/shovill) + +Additional tools: + +- SPAdes 3.15.5 +- SKESA 2.3.0 +- megahit 1.1.4 +- velvet 1.2.10 +- seqtk 1.3 +- kmc 3.1.1 +- lighter 1.1.1 +- trimmomatic 0.39 +- bwa 0.7.17 +- pilon 1.24 +- samclip +- perl 5.22.1 +- python 2.7.12 +- pigz + +Full documentation: [https://github.com/tseemann/shovill](https://github.com/tseemann/shovill) + +> *Assemble bacterial isolate genomes from Illumina paired-end reads* + +## NOTE ON DEC 2022 UPGRADES + +The Shovill 1.1.0 docker image was originally developed in May 2020 using this the version of the dockerfile, [commit 615d1cfeb4](https://github.com/StaPH-B/docker-builds/blob/615d1cfeb464df4635b2efbe1b359351c5b5b0f7/shovill/1.1.0/Dockerfile) + +This older docker image will be maintained under this docker image tag: `staphb/shovill:1.1.0` + +The Shovill docker image has since been upgraded via [Pull Request #511](https://github.com/StaPH-B/docker-builds/pull/511) which upgraded the following tools in the shovill 1.1.0 dockerfile. Other smaller changes were also introduced (see the PR for details) + +- upgraded spades from 3.14.1 to 3.15.5 +- upgraded pilon from 1.22 to 1.24 +- upgraded samtools from 1.10 to 1.16.1 +- upgraded trimmomatic from 0.38 to 0.39 + +This newer docker image will be maintained under the docker image tags: `staphb/shovill:1.1.0-2022Dec` & `staphb/shovill:latest` in an effort to preserve the older docker image & allow for users to more easily transition to the new docker image in their pipelines. + +### Example Usage + +```bash +# run shovill on the test FASTQ files bundled with shovill +shovill --outdir /data/out.spades --assembler spades --R1 /shovill-1.1.0/test/R1.fq.gz --R2 /shovill-1.1.0/test/R2.fq.gz +``` diff --git a/skani/0.2.0/Dockerfile b/skani/0.2.0/Dockerfile new file mode 100644 index 000000000..95bba6d94 --- /dev/null +++ b/skani/0.2.0/Dockerfile @@ -0,0 +1,47 @@ +ARG SKANI_VER="0.2.0" + +## Builder ## +FROM rust:1.67 as builder + +ARG SKANI_VER + +RUN wget https://github.com/bluenote-1577/skani/archive/refs/tags/v${SKANI_VER}.tar.gz &&\ + tar -xvf v${SKANI_VER}.tar.gz &&\ + cd skani-${SKANI_VER} &&\ + cargo install --path . --root ~/.cargo &&\ + chmod +x /root/.cargo/bin/skani + +## App ## +FROM ubuntu:jammy as app + +ARG SKANI_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="skani" +LABEL software.version=${SKANI_VER} +LABEL description="skani is a program for calculating average nucleotide identity (ANI) from DNA sequences (contigs/MAGs/genomes) for ANI > ~80%." +LABEL website="https://github.com/bluenote-1577/skani" +LABEL license="https://github.com/bluenote-1577/skani/blob/main/LICENSE" +LABEL maintainer="Kutluhan Incekara" +LABEL maintainer.email="kutluhan.incekara@ct.gov" + +# copy app from builder stage +COPY --from=builder /root/.cargo/bin/skani /usr/local/bin/skani + +# default run command +CMD skani -h + +# singularity compatibility +ENV LC_ALL=C + +WORKDIR /data + +## Test ## +FROM app as test + +RUN apt-get update && apt-get install -y wget &&\ + wget https://github.com/bluenote-1577/skani/raw/v0.2.0/refs/e.coli-EC590.fasta &&\ + wget https://github.com/bluenote-1577/skani/raw/v0.2.0/refs/e.coli-K12.fasta + +RUN skani dist e.coli-EC590.fasta e.coli-K12.fasta \ No newline at end of file diff --git a/skani/0.2.0/README.md b/skani/0.2.0/README.md new file mode 100644 index 000000000..439299d7d --- /dev/null +++ b/skani/0.2.0/README.md @@ -0,0 +1,23 @@ +# skani container + +Main tool : [skani](https://github.com/bluenote-1577/skani) + +Full documentation: https://github.com/bluenote-1577/skani/wiki + +skani is a program for calculating average nucleotide identity (ANI) from DNA sequences (contigs/MAGs/genomes) for ANI > ~80%. + +## Example Usage + +Quick ANI calculation: +```bash +skani dist genome1.fa genome2.fa +``` +Memory-efficient database search: +```bash +skani sketch genomes/* -o database +skani search -d database query1.fa query2.fa ... +``` +All-to-all comparison: +```bash +skani triangle genomes/* +``` diff --git a/skani/0.2.1/Dockerfile b/skani/0.2.1/Dockerfile new file mode 100644 index 000000000..f2aa93a60 --- /dev/null +++ b/skani/0.2.1/Dockerfile @@ -0,0 +1,47 @@ +ARG SKANI_VER="0.2.1" + +## Builder ## +FROM rust:1.67 as builder + +ARG SKANI_VER + +RUN wget https://github.com/bluenote-1577/skani/archive/refs/tags/v${SKANI_VER}.tar.gz &&\ + tar -xvf v${SKANI_VER}.tar.gz &&\ + cd skani-${SKANI_VER} &&\ + cargo install --path . --root ~/.cargo &&\ + chmod +x /root/.cargo/bin/skani + +## App ## +FROM ubuntu:jammy as app + +ARG SKANI_VER + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="skani" +LABEL software.version=${SKANI_VER} +LABEL description="skani is a program for calculating average nucleotide identity (ANI) from DNA sequences (contigs/MAGs/genomes) for ANI > ~80%." +LABEL website="https://github.com/bluenote-1577/skani" +LABEL license="https://github.com/bluenote-1577/skani/blob/main/LICENSE" +LABEL maintainer="Kutluhan Incekara" +LABEL maintainer.email="kutluhan.incekara@ct.gov" + +# copy app from builder stage +COPY --from=builder /root/.cargo/bin/skani /usr/local/bin/skani + +# default run command +CMD skani -h + +# singularity compatibility +ENV LC_ALL=C + +WORKDIR /data + +## Test ## +FROM app as test + +RUN apt-get update && apt-get install -y wget &&\ + wget https://github.com/bluenote-1577/skani/raw/v0.2.0/refs/e.coli-EC590.fasta &&\ + wget https://github.com/bluenote-1577/skani/raw/v0.2.0/refs/e.coli-K12.fasta + +RUN skani dist e.coli-EC590.fasta e.coli-K12.fasta \ No newline at end of file diff --git a/skani/0.2.1/README.md b/skani/0.2.1/README.md new file mode 100644 index 000000000..e0d986aef --- /dev/null +++ b/skani/0.2.1/README.md @@ -0,0 +1,69 @@ + + +# skani container + +Main tool: [skani](https://github.com/bluenote-1577/skani) + +Code repository: https://github.com/bluenote-1577/skani + +Basic information on how to use this tool: +- executable: skani +- help: -h, --help +- version: -V, --version +- description: skani is a program for calculating average nucleotide identity (ANI) from DNA sequences (contigs/MAGs/genomes) for ANI > ~80%. + +Additional information: + +This container does not contain any database or reference genome. + +Full documentation: https://github.com/bluenote-1577/skani/wiki + +## Example Usage + +Quick ANI calculation: +```bash +skani dist genome1.fa genome2.fa +``` +Memory-efficient database search: +```bash +skani sketch genomes/* -o database +skani search -d database query1.fa query2.fa ... +``` +All-to-all comparison: +```bash +skani triangle genomes/* +``` + + + diff --git a/snippy/4.6.0_SC2/Dockerfile b/snippy/4.6.0_SC2/Dockerfile index 7929459bf..328a50963 100644 --- a/snippy/4.6.0_SC2/Dockerfile +++ b/snippy/4.6.0_SC2/Dockerfile @@ -1,9 +1,14 @@ -FROM ubuntu:bionic +ARG SNIPPY_VER="4.6.0" + +FROM ubuntu:bionic as app + +ARG SNIPPY_VER LABEL base.image="ubuntu:bionic" +LABEL dockerfile.version="2" LABEL container.version="1.0.0" LABEL software="Snippy" -LABEL software.version="4.6.0" +LABEL software.version="${SNIPPY_VER}" LABEL description="Rapid haploid variant calling and core genome alignment" LABEL website="https://github.com/tseemann/snippy" LABEL license="https://github.com/tseemann/snippy/blob/master/LICENSE" @@ -77,19 +82,38 @@ RUN apt-get update && apt-get -y --no-install-recommends install \ # wgsim 0.3.1-r13 - bundled for testing snippy -# Snippy 4.6.0, make /data, and grab reference genome -RUN wget https://github.com/tseemann/snippy/archive/v4.6.0.tar.gz && \ - tar -zxf v4.6.0.tar.gz && \ - rm v4.6.0.tar.gz && \ +# Snippy ${SNIPPY_VER}, make /data, and grab reference genome +RUN wget https://github.com/tseemann/snippy/archive/v${SNIPPY_VER}.tar.gz && \ + tar -zxf v${SNIPPY_VER}.tar.gz && \ + rm v${SNIPPY_VER}.tar.gz && \ mkdir /data /reference && \ curl -s ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/858/895/GCF_009858895.2_ASM985889v3/GCF_009858895.2_ASM985889v3_genomic.fna.gz | gzip -dc > /reference/MN908947_SC2_reference.fasta # set PATH and perl local settings ENV PATH="${PATH}:\ -/snippy-4.6.0/bin:\ -/snippy-4.6.0/binaries/linux:\ -/snippy-4.6.0/binaries/noarch" \ +/snippy-${SNIPPY_VER}/bin:\ +/snippy-${SNIPPY_VER}/binaries/linux:\ +/snippy-${SNIPPY_VER}/binaries/noarch" \ LC_ALL=C WORKDIR /data + +FROM app as test + +ARG SNIPPY_VER + +# run tests from https://github.com/tseemann/snippy/blob/master/.travis.yml +RUN realpath . && \ + snippy --version && \ + snippy --check && \ + snippy-core --version && \ + snippy-core --check && \ + cd /snippy-${SNIPPY_VER} && \ + make -C test + +WORKDIR /test +# Download SC2 fastqs for test, run Snippy 4.6.0 +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR918/006/ERR9187736/ERR9187736_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR918/006/ERR9187736/ERR9187736_2.fastq.gz && \ + snippy --cpus 2 --outdir results --ref /reference/MN908947_SC2_reference.fasta --R1 ERR9187736_1.fastq.gz --R2 ERR9187736_2.fastq.gz diff --git a/snippy/4.6.0_SC2/README.md b/snippy/4.6.0_SC2/README.md new file mode 100644 index 000000000..95224f765 --- /dev/null +++ b/snippy/4.6.0_SC2/README.md @@ -0,0 +1,53 @@ +# Snippy docker image + +> Snippy: Rapid haploid variant calling and core genome alignment + +Main tool : [Snippy](https://github.com/tseemann/snippy) + +Additional tools: +- perl 5.26.1 +- bioperl 1.7.2 +- hmmer +- python 2.7.17 +- python3 3.6.9 + +## Example Usage +```bash +# paired end Illumina reads with reference genome in fasta format +$ snippy \ + --ref CVOW01.fasta \ + --R1 SRR1157476_1.fastq.gz \ + --R2 SRR1157476_2.fastq.gz \ + --cpus 2 \ + --outdir results + +# Example of successful snippy run +[19:12:16] Creating extra output files: BED GFF CSV TXT HTML +[19:12:16] Identified 17 variants. +[19:12:16] Result folder: results +[19:12:16] Result files: +[19:12:16] * results/snps.aligned.fa +[19:12:16] * results/snps.bam +[19:12:16] * results/snps.bam.bai +[19:12:16] * results/snps.bed +[19:12:16] * results/snps.consensus.fa +[19:12:16] * results/snps.consensus.subs.fa +[19:12:16] * results/snps.csv +[19:12:16] * results/snps.filt.vcf +[19:12:16] * results/snps.gff +[19:12:16] * results/snps.html +[19:12:16] * results/snps.log +[19:12:16] * results/snps.raw.vcf +[19:12:16] * results/snps.subs.vcf +[19:12:16] * results/snps.tab +[19:12:16] * results/snps.txt +[19:12:16] * results/snps.vcf +[19:12:16] * results/snps.vcf.gz +[19:12:16] * results/snps.vcf.gz.csi +[19:12:16] Walltime used: 33 seconds +[19:12:16] Wishing you a life free of homopolymer errors. +[19:12:16] Done. + +# snippy-core on multiple snippy results directories +$ snippy-core --prefix core results1 results2 results3 results4 +``` diff --git a/spades/3.15.4/Dockerfile b/spades/3.15.4/Dockerfile index 5456b3857..c350c7e6a 100644 --- a/spades/3.15.4/Dockerfile +++ b/spades/3.15.4/Dockerfile @@ -17,12 +17,13 @@ LABEL maintainer.email="kapsakcj@gmail.com" # python v3.8.10 is installed here; point 'python' to python3 RUN apt-get update && apt-get install --no-install-recommends -y python3 \ python3-distutils \ - wget && \ + wget \ + ca-certificates && \ apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 10 # install SPAdes binary; make /data -RUN wget http://cab.spbu.ru/files/release${SPAdesVer}/SPAdes-${SPAdesVer}-Linux.tar.gz && \ +RUN wget https://github.com/ablab/spades/releases/download/v${SPAdesVer}/SPAdes-${SPAdesVer}-Linux.tar.gz && \ tar -xzf SPAdes-${SPAdesVer}-Linux.tar.gz && \ rm -r SPAdes-${SPAdesVer}-Linux.tar.gz && \ mkdir /data diff --git a/spades/3.15.5/Dockerfile b/spades/3.15.5/Dockerfile index 02dbeb23c..a88fcd6e8 100644 --- a/spades/3.15.5/Dockerfile +++ b/spades/3.15.5/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:focal as app ARG SPADES_VER="3.15.5" LABEL base.image="ubuntu:focal" -LABEL dockerfile.version="1" +LABEL dockerfile.version="2" LABEL software="SPAdes" LABEL software.version="${SPADES_VER}" LABEL description="de novo DBG genome assembler" @@ -17,7 +17,9 @@ LABEL maintainer.email="kapsakcj@gmail.com" # python v3.8.10 is installed here; point 'python' to python3 RUN apt-get update && apt-get install --no-install-recommends -y python3 \ python3-distutils \ - wget && \ + wget \ + pigz \ + ca-certificates && \ apt-get autoclean && rm -rf /var/lib/apt/lists/* && \ update-alternatives --install /usr/bin/python python /usr/bin/python3 10 diff --git a/srst2/0.2.0-vibrio-230224/Dockerfile b/srst2/0.2.0-vibrio-230224/Dockerfile new file mode 100644 index 000000000..e3a15837c --- /dev/null +++ b/srst2/0.2.0-vibrio-230224/Dockerfile @@ -0,0 +1,95 @@ +FROM ubuntu:xenial as app + +# for easy upgrade later. ARG variables only persist at build time +# Main package version +ARG SRST2_VER=0.2.0 + +# Dependency versions +ARG BOWTIE2_VER=2.2.6-2 +ARG SAMTOOLS_VER=0.1.18 + +LABEL base.image="ubuntu:xenial" +LABEL dockerfile.version="1" +LABEL software="SRST2" +LABEL software.version="v0.2.0" +LABEL description="Short Read Sequence Typing for Bacterial Pathogens" +LABEL website="https://github.com/katholt/srst2" +LABEL license="https://github.com/katholt/srst2/blob/master/LICENSE.txt" +LABEL maintainer="Holly Halstead" +LABEL maintainer.email="holly.halstead@doh.wa.gov" +LABEL maintainer2="Inês Mendes" +LABEL maintainer2.email="ines.mendes@theiagen.com" + +# install dependencies; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ +python2.7 \ +python-scipy \ +python-biopython \ +make \ +libc6-dev \ +g++ \ +zlib1g-dev \ +build-essential \ +git \ +libx11-dev \ +xutils-dev \ +zlib1g-dev \ +bowtie2=${BOWTIE2_VER} \ +curl \ +libncurses5-dev \ +unzip \ +wget \ +locate \ +python-pip \ +python-setuptools && \ +apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# download samtools source code; unzip; compile; put executable in /usr/local/bin +RUN curl -O -L https://sourceforge.net/projects/samtools/files/samtools/${SAMTOOLS_VER}/samtools-${SAMTOOLS_VER}.tar.bz2 && \ +tar -xjf samtools-${SAMTOOLS_VER}.tar.bz2 && \ +rm samtools-${SAMTOOLS_VER}.tar.bz2 && \ +cd samtools-${SAMTOOLS_VER} && \ +make && \ +cp -v samtools /usr/local/bin + +# Install SRST2; make /data +RUN pip install biopython git+https://github.com/katholt/srst2.git@v${SRST2_VER} && \ +mkdir /data + +# add custom database to /vibrio-cholerae-db directory, make readable to all +ADD vibrio_230224.fasta /vibrio-cholerae-db/ + +# index custom database in /vibrio-cholerae-db directory; ensure files are readable to all users +RUN bowtie2-build /vibrio-cholerae-db/vibrio_230224.fasta /vibrio-cholerae-db/vibrio_230224.fasta && \ + samtools faidx /vibrio-cholerae-db/vibrio_230224.fasta && \ + chmod -R 755 /vibrio-cholerae-db + +# set final working directory +WORKDIR /data + +# test layer +FROM app as test + +# check help options +RUN srst2 --version && \ +getmlst.py -h && \ +slurm_srst2.py -h + +# test getmlst.py script as well as usage of srst2 for calling the ST on a Shigella sonnei isolate +# https://www.ebi.ac.uk/ena/browser/view/ERR024070 +RUN getmlst.py --species "Escherichia coli#1" && \ +wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR024/ERR024070/ERR024070_1.fastq.gz && \ +wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR024/ERR024070/ERR024070_2.fastq.gz && \ +srst2 --input_pe ERR024070*.fastq.gz --output shigella1 --log --save_scores --mlst_db Escherichia_coli#1.fasta --mlst_definitions profiles_csv --mlst_delimiter '_' && \ +ls shigella1__ERR024070.Escherichia_coli#1.pileup \ +shigella1__ERR024070.Escherichia_coli#1.scores \ +shigella1__ERR024070.Escherichia_coli#1.sorted.bam \ +shigella1__mlst__Escherichia_coli#1__results.txt + +# test for vibrio custom DB, print output summary +# https://www.ebi.ac.uk/ena/browser/view/SRR7062495 +RUN wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR706/005/SRR7062495/SRR7062495_1.fastq.gz && \ +wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR706/005/SRR7062495/SRR7062495_2.fastq.gz && \ +srst2 --input_pe SRR7062495*.fastq.gz --gene_db /vibrio-cholerae-db/vibrio_230224.fasta --output SRR7062495 && \ +ls SRR7062495__genes__vibrio_230224__results.txt && \ +cat SRR7062495__genes__vibrio_230224__results.txt diff --git a/srst2/0.2.0-vibrio-230224/README.md b/srst2/0.2.0-vibrio-230224/README.md new file mode 100644 index 000000000..290594272 --- /dev/null +++ b/srst2/0.2.0-vibrio-230224/README.md @@ -0,0 +1,77 @@ +# SRST2 container + +Main tool : [SRST2](https://github.com/katholt/srst2) 0.2.0 + +Additional tools: + +- Biopython 1.76 +- [Bowtie2](https://github.com/BenLangmead/bowtie2) 2.2.6-2 +- python 2.7 +- [SAMtools](https://github.com/samtools/samtools) 0.1.18 +- SciPy 0.16 + +Full documentation: [https://github.com/katholt/srst2](https://github.com/katholt/srst2) + +SRST2 performs short read sequence typing for bacterial pathogens when given Illumina sequence data, a MLST database, and/or a database of gene sequences such as resistance genes, virulence genes, etc. + +## Custom *Vibrio cholerae* database info + +This docker image includes a *Vibrio cholerae-specific* database of gene targets (traditionally used in PCR methods) for detecting O1 & O139 serotypes, toxin-production markers, and Biotype markers within the O1 serogroup ("El Tor" or "Classical" biotypes). These sequences were shared via personal communication with Dr. Christine Lee, of the National Listeria, Yersinia, Vibrio and Enterobacterales Reference Laboratory within the Enteric Diseases Laboratory Branch at CDC. + +The genes included (and their purpose) included in the database are as follows: + +- `ctxA` - Cholera toxin, an indication of toxigenic cholerae +- `ompW` - outer membrane protein, a *V. cholerae* species marker (presence of any allele of this gene distinguishes *V. cholerae* from *V. parahaemolyticus* and *V. vulnificus*) +- `tcpA` - toxin co-pilus A, used to infer Biotype, either "El Tor" or "Clasical" + - database includes an allele for each Biotype. `tcpA_classical` and `tcpA_ElTor` +- `toxR` - transcriptional activator (controls cholera toxin, pilus, and outer-membrane protein expression) - Species marker (allele distinguishes *V. cholerae* from *V. parahaemolyticus* and *V. vulnificus*) +- `wbeN` - O antigen encoding region - used to identify the O1 serogroup +- `wbfR` - O antigen encoding region - used to identify the O139 serogroup + +The database's FASTA file & index files are located within `/vibrio-cholerae-db/` in the container's file system and can be utilized via the example command below. + +## Basic usage - MLST + +### 1 - Gather your input files + +```bash +getmlst.py --species 'Escherichia coli#1' +``` + +### 2 - Run MLST + +```bash +srst2 --input_pe strainA_1.fastq.gz strainA_2.fastq.gz --output strainA_test --log --mlst_db Escherichia_coli#1.fasta --mlst_definitions profiles_csv --mlst_delimiter _ +``` + +### 3 - Check the outputs + +MLST results are output in: `strainA_test__mlst__Escherichia_coli#1__results.txt` + +## Basic usage - Vibrio characterization + +### 1 - Run srst2 + +```bash +srst2 --input_pe SRR7062495_1.fastq.gz SRR7062495_2.fastq.gz --gene_db /vibrio-cholerae-db/vibrio_230224.fasta --output SRR7062495_test +``` + +### 2 - Check the outputs + +Summary results are output in: `SRR7062495_test__genes__vibrio_230224__results.txt` and detailed results are found in `SRR7062495_test__fullgenes__vibrio_230224__results.txt` + +```bash +# summary +$ column -t -s $'\t' -n SRR7062495_test__genes__vibrio_230224__results.txt +Sample ctxA ompW tcpA_ElTor toxR wbeN_O1 +SRR7062495 ctxA_O395 ompW_O395* tcpA_ElTor_C6706* toxR_O395* wbeN_O1_INDRE + +# detailed results +$ column -t -s $'\t' -n SRR7062495_test__fullgenes__vibrio_230224__results.txt +Sample DB gene allele coverage depth diffs uncertainty divergence length maxMAF clusterid seqid annotation +SRR7062495 vibrio_230224 ctxA ctxA_O395 100.0 103.877 0.0 777 0.063 1 1 CP000627.1 +SRR7062495 vibrio_230224 ompW ompW_O395 100.0 78.414 6snp 0.917 654 0.04 2 3 CP000626.1 +SRR7062495 vibrio_230224 toxR toxR_O395 100.0 74.081 14snp 1.582 885 0.053 5 6 CP000627.1 +SRR7062495 vibrio_230224 tcpA_ElTor tcpA_ElTor_C6706 100.0 82.698 1snp 0.148 675 0.046 4 5 CP064350.1 +SRR7062495 vibrio_230224 wbeN_O1 wbeN_O1_INDRE 100.0 112.119 0.0 2478 0.091 6 7 +``` diff --git a/srst2/0.2.0-vibrio-230224/vibrio_230224.fasta b/srst2/0.2.0-vibrio-230224/vibrio_230224.fasta new file mode 100644 index 000000000..99f374107 --- /dev/null +++ b/srst2/0.2.0-vibrio-230224/vibrio_230224.fasta @@ -0,0 +1,16 @@ +>1__ctxA__ctxA_O395__1 CP000627.1 +ATGGTAAAGATAATATTTGTGTTTTTTATTTTCTTATCATCATTTTCATATGCAAATGATGATAAGTTATATCGGGCAGATTCTAGACCTCCTGATGAAATAAAGCAGTCAGGTGGTCTTATGCCAAGAGGACAGAGTGAGTACTTTGACCGAGGTACTCAAATGAATATCAACCTTTATGATCATGCAAGAGGAACTCAGACGGGATTTGTTAGGCACGATGATGGATATGTTTCCACCTCAATTAGTTTGAGAAGTGCCCACTTAGTGGGTCAAACTATATTGTCTGGTCATTCTACTTATTATATATATGTTATAGCCACTGCACCCAACATGTTTAACGTTAATGATGTATTAGGGGCATACAGTCCTCATCCAGATGAACAAGAAGTTTCTGCTTTAGGTGGGATTCCATACTCCCAAATATATGGATGGTATCGAGTTCATTTTGGGGTGCTTGATGAACAATTACATCGTAATAGGGGCTACAGAGATAGATATTACAGTAACTTAGATATTGCTCCAGCAGCAGATGGTTATGGATTGGCAGGTTTCCCTCCGGAGCATAGAGCTTGGAGGGAAGAGCCGTGGATTCATCATGCACCGCCGGGTTGTGGGAATGCTCCAAGATCATCGATGAGTAATACTTGCGATGAAAAAACCCAAAGTCTAGGTGTAAAATTCCTTGACGAATACCAATCTAAAGTTAAAAGACAAATATTTTCAGGCTATCAATCTGATATTGATACACATAATAGAATTAAGGATGAATTATGA +>2__ompW__ompW_RFB16__2 CP043556.1 +ATGAAACAAACCATTTGCGGCCTAGCCGTACTTGCAGCCCTAAGCTCCGCTCCTGTATTTGCTCACCAAGAAGGTGACTTTATTGTGCGCGCGGGTATTGCCTCGGTAGTACCTAATGACAGTAGCGATAAAGTGTTAAACACTCAAAGTGAGTTGGCAGTTAATAGCAATACCCAGTTAGGGTTAACGCTTGGCTATATGTTTACTGACAACATCAGTTTTGAAGTCCTTGCTGCTACGCCATTTTCACATAAGATTTCTACCTCTGGTGGTGAGTTAGGTAGCCTTGGTGATATTGGTGAAACAAAACATTTGCCACCTACCTTTATGGTCCAATACTACTTTGGTGAAGCTAATTCGACTTTCCGTCCATATGTTGGTGCGGGTTTGAATTACACCACTTTCTTTGATGAAAGCTTTAATAGTACGGGTACTAATAATGCATTGAGTGATTTAAAACTGGACGACTCATGGGGACTTGCTGCTAACGTTGGCTTTGATTATATGCTCAATGATAGCTGGTTCCTCAACGCTTCTGTGTGGTATGCCAATATTGAAACAACGGCAACCTACAAAGCAGGTGCAGATGCCAAATCCACGGATGTTGAAATCAATCCTTGGGTATTTATGATCGCGGGTGGTTATAAGTTCTAA +>2__ompW__ompW_O395__3 CP000626.1 +ATGAAACAAACCATTTGCGGCCTAGCCGTACTTGCAGCCCTAAGCTCCGCTCCTGTATTTGCTCACCAAGAAGGTGACTTTATTGTGCGCGCGGGTATTGCCTCGGTAGTACCTAATGACAGTAGCGATAAAGTGTTAAACACTCAAAGTGAGTTGGCAGTTAATAGCAATACCCAGTTAGGGTTAACGCTTGGCTATATGTTTACTGACAACATCAGTTTTGAAGTCCTCGCTGCTACGCCATTTTCACATAAGATTTCTACCTCTGGTGGTGAGTTAGGTAGCCTTGGTGATATTGGTGAAACAAAACATTTGCCACCTACCTTTATGGTCCAATACTACTTTGGTGAAGCTAATTCGACTTTCCGTCCATATGTTGGTGCGGGTTTGAATTACACCACTTTCTTTGATGAAAGCTTTAATAGTACGGGTACTAATAATGCATTGAGTGATTTAAAACTGGACGACTCATGGGGACTTGCTGCTAACGTTGGCTTTGATTATATGCTCAATGATAGCTGGTTCCTCAACGCTTCTGTGTGGTATGCCAATATTGAAACAACGGCAACCTACAAAGCAGGTGCAGATGCCAAATCCACGGATGTTGAAATCAATCCTTGGGTATTTATGATCGCGGGTGGTTATAAGTTCTAA +>3__tcpA_classical__tcpA_classical_395__4 AF325733.1 +ATGCAATTATTAAAACAGCTTTTTAAGAAGAAATTTGTAAAAGAAGAACACGATAAGAAAACCGGTCAAGAGGGTATGACATTACTCGAAGTGATCATCGTTCTAGGCATTATGGGGGTGGTTTCGGCGGGGGTTGTTACTCTGGCGCAGCGTGCGATTGATTCGCAGAATATGACCAAGGCCGCGCAAAGTCTCAATAGTATCCAAGTTGCACTGACACAGACATACCGTGGTCTAGGTAATTATCCAGCAACAGCTGATGCGACAGCTGCTAGTAAGCTAACTTCAGGCTTGGTTAGTTTAGGTAAAATATCATCCGATGAGGCAAAAAACCCATTCATTGGTACAAATATGAATATTTTTTCATTTCCGCGTAATGCAGCAGCTAATAAAGCATTTGCAATTTCAGTGGATGGTCTGACACAGGCTCAATGCAAGACACTTATTACCAGTGTCGGTGATATGTTCCCATATATTGCAATCAAAGCTGGTGGCGCAGTAGCACTTGCAGATCTAGGTGATTTTGAGAATTCTGCAGCAGCGGCTGAGACAGGCGTTGGTGTGATCAAATCTATCGCTCCCGCTAGTAAGAATTTAGATCTAACGAACATCACTCACGTTGAGAAATTATGTAAAGGTACTGCTCCATTCGGCGTTGCATTTGGTAACAGCTAA +>4__tcpA_ElTor__tcpA_ElTor_C6706__5 CP064350.1 +ATGCAATTATTAAAACAGCTTTTTAAGAAGAAGTTTGTAAAAGAAGAACACGATAAGAAAACCGGTCAAGAGGGTATGACATTACTCGAAGTAATCATTGTTCTGGGTATTATGGGTGTGGTCTCAGCGGGTGTTGTTACGCTGGCTCAGCGTGCGATTGATTCGCAGAATATGACTAAGGCTGCGCAAAATCTAAACAGCGTGCAAATTGCAATGACACAAACTTATCGTAGTCTTGGTAATTATCCAGCTACCGCAAACGCAAATGCTGCTACACAGCTAGCTAATGGTTTGGTCAGCCTTGGTAAGGTTTCAGCTGATGAGGCAAAGAATCCTTTCACTGGTACAGCTATGGGGATTTTCTCATTTCCACGAAACTCTGCAGCGAATAAAGCATTCGCAATTACAGTCGGTGGCTTGACCCAAGCACAATGTAAGACTTTGGTTACAAGCGTAGGGGATATGTTTCCATTTATCAACGTGAAAGAAGGTGCTTTCGCTGCTGTCGCTGATCTTGGTGATTTCGAAACGAGTGTCGCAGATGCTGCTACTGGCGCTGGCGTAATTAAGTCCATTGCACCAGGAAGTGCCAACTTAAACCTAACTAATATCACGCATGTTGAGAAGCTTTGTACAGGAACTGCTCCATTCACAGTAGCTTTTGGTAACAGTTAA +>5__toxR__toxR_O395__6 CP000627.1 +ATGTTCGGATTAGGACACAACTCAAAAGAGATATCGATGAGTCATATTGGTACTAAATTCATTCTTGCTGAAAAATTTACCTTCGATCCCCTAAGCAATACTCTGATTGACAAAGAAGATAGTGAAGAGATCATTCGATTAGGCAGCAACGAAAGCCGAATTCTTTGGCTGCTGGCCCAACGTCCAAACGAGGTGATTTCTCGCAATGATTTGCATGACTTTGTTTGGCGAGAGCAAGGTTTTGAAGTCGATGATTCCAGCTTAACCCAAGCCATTTCGACTCTGCGCAAAATGCTCAAAGATTCGACAAAGTCCCCACAATACGTCAAAACGGTTCCGAAGCGCGGTTACCAATTGATCGCCCGAGTGGAAACGGTTGAAGAAGAGATGGCTCGCGAAAACGAAGCTGCTCATGACATCTCTCAGCCAGAATCTGTCAATGAATACGCAGAATCAAGCAGTGTGCCTTCATCAGCCACTGTAGTGAACACACCGCAGCCAGCCAATGTCGTGGCGAATAAATCGGCTCCAAACTTGGGGAATCGACTGTTTATTCTGATAGCGGTCTTACTTCCCCTCGCAGTATTACTGCTCACTAACCCAAGCCAATCCAGCTTTAAACCCCTAACGGTTGTCGATGGCGTAGCCGTCAATATGCCGAATAACCACCCTGATCTTTCAAATTGGCTACCGTCAATCGAACTGTGCGTTAAAAAATACAATGAAAAACATACTGGTGGACTCAAGCCGATAGAAGTGATTGCCACTGGTGGACAAAATAACCAGTTAACGCTGAATTACATTCACAGCCCTGAAGTTTCAGGGGAAAACATAACCTTACGCATCGTTGCTAACCCTAACGATGCCATCAAAGTGTGTGAGTAG +>6__wbeN_O1__wbeN_O1_INDRE__7 +ATGCCTGTAAATAACGAAAATCTGACCAGTGTACTTGATGCTCGCCCTTTTGAATTATCAGAAGAGCAAAAATCTCCACTATTTAAAGCGAACTTACTTGCAGAGTTAGTACATCATTATCAATGCAACGAGATGTATCGCAAATTTTGTCAAAAAAACAAATTTGACCCTTTGGTATTTGATGGTGAGGTTGCAGATATTCCACCCATACCTGTGCACATCTTCAAAGCAATAGGACATAAATTATCTTCGGTAAGCGATGATACGATAAAAGCGAAGCTTCAATCTTCTGCTACCAGTGGCGTACCCAGTACCATATTGTTAGATAAGGTAACCGCTCGTCGACAGACTCGAGCAATGGCAAGAGTTATGCAGGAGGTGTTGGGGCCTAAACGTCGCCCGTTTTGCATTATGGATATTGATCCGACAAGCCCAAATGCCACTAACCTTGGGGCTCGTATTGCGGCGGTAAAAGGTTACCTAAACTTCGCCTCAACATCGAAGTATTTTATAGATGCTGATAGCCCAAGTGCTCCACTTGAATTTCTGGAGCAAAAGTTTGTTGAACATCTGAATTCACTTGCGAGTGAAGAGCCGCTCATAATTTTTGGATTCACGTTTGTACTTTATCACACGGTTTTTAAGACCCTTAAAGACAAGGGGATCTCGTTTCAATTGCCTAAAGGTTCTCAGGTTATTCATATTGGTGGTTGGAAAAAACTTGAGTCAGAGAAGGTGGATAAAATTACCTTTAATCGAGATATCGCCTCAGTATTGGGTATTTCTCCTGATGATGTTGTGGATATCTATGGTTTCACTGAACAGATGGGGCTTAATTACCCAGATTGTAAAGCAGGATGGAAACATATTCATGCCTATTCTGACGTAATTATTCGTGATGAATCGAACCTAGAAGTGTGTGGGCCAGGTAAAGTAGGCTTACTTGAGTTTGTAAGCCCACTACCGCATTCATATCCGGGGAATGTTGTACTTACAGATGACCTTGGTGTGATTGAAGAAAGTCTTTGTGAGTGTGGTAAAGCTGGAAAAAGATTCAAAGTCATTGGACGAGCAAAAAAAGCAGAAGTAAGAGGCTGTGGTGATGTTATGTCTGAGAAATTGACTAAAAAGCCATCGTATAAGCCACTTTCTCAACAAGAAGAGAGGTTGACTATCTACCACTCACCGATATTTCTCGATGATACTATGTCCGCATCTCAGCAGCTTGATCAAATCTTTTGTTCTTTAAAGAGGAAGCAAAAATGGCTGGCTAACCAACCATTAGAAGCTATTCTTGGTTTAATCAATGAAGCGCGCAAAAGCTGGTCGAGTACGCCGGAGCTTGACCCTTATCGACATACTGGATTGAACTTCCTAGCTGATTGGTGTGAACCCAATCGTTTGAAAAACCTGCTTGATTCAGCATTGAATGGTCAGCGAGCTTTTTTGGATAATTTTTTACCTCGTAAAGATATTAGCCATAGCTCTCAAAAAGCAATGCCAAGAGGTATCGTATCTCACTGGCTGTCGGGTAACGTACCGTTACTCGGCATGTTTGCGCTGGTACAGAGTATTTTAAGTAAAAATGCCAACATTCTGAAAGTTTCAGCAAGCGAATCGCAAGCTTTGCCAGTATTATTGGCGACTTTTAAAGGCCTTAGCTACACTACCCCAGGTGGTTACACTATCCACGGTGATGACTTATTAGGGACTCTCGCTGTTGTATATTTTGATCGACACCAAACTAAAATTGCAGAGAAGTTTTCGGCCAATGCTGATGTGCGTATAGCTTGGGGGGGACGAGAGGCAATCGAGTCTGTAAGTGGCCTTCCAAAGAAATATAATAGTCAAGATATCCTCTTTGGACCTAAGCTTTCTATGATGGTTGTTGGCAGCGATGCTCTAGACTCTGACAAGGCAATCAGAAAGTTGATTCGTCGGGCTGCAACTGACTCTAGTGTGTTCGATCAGTTTGCTTGCGCTTCTCCGCACACCATTTTTGTTGAGAAGGGCGGTCTAATAACACCTAAAGAGTTTGCAGAGAAGCTTGCCTCAGCAATGGATAAGGCTCTTGTACGCTTACCAACTCAAGTACCAGACATTGGGCAAGCAAATAAGATTCGCTCAAAGATAGCGGAATATGCATTTATTGGCGAATATTGGCATGACAAGCACTTACGTTGGACGGTGTTGTTTGATGAAGGGATAGAGCTTGTTGAGCCGACATATCAACGTGTTATTACAGTAAAAGCAGTTGATAATGTATTTGATGTAGTCGACAGTGTACATGAAGATATCCAAACGGTCGGGTTGGCGATGAATGGTGAAAAGCGTCTTCGTTTTGCTAACGAGATAATGTTAAAAGGTGCGATGCGATGTCCAGATGTCGGCTACATGACCCATTTTGATTCCCCATGGGATGGGGTTGTAGCGCTAGATAGAATGGTTCGTTGGGTAACTCTAGGAGGACCGCTGTGA +>7__wbfR_O139__wbfR_MO45__8 +ATGTGCGGTGTAGCGGGTTTTATTAGTAAGCGTTTATCGCCGGTCGACTGTTTAACTTCCATGGTCGAAAGTATTATGCATCGTGGACCGAATGATAGTGGTCTATGGGTTGATGATGACTTTGGTGTCTGTTTAGCGCACGCACGCTTATCAATACAGGATTTAAGTTCAGCTGGGCATCAGCCGATGCATTCAAAATCTGAGCGCTATGTTATGATTTTTAATGGTGAAATATACAATCATTTAACATTGCGTGAAGAACTGATCGAGATTGTACCAAGTTACTGGAATGGTCATTCAGATACCGAAACCTTGTTGGCTGGTTTTGAAGTGTGGGGAATAGAACAGACCATACAAAAATGTGTCGGTATGTTTGCTATCGTCCTATGGGATAAAGTACTTAAACAGTTGATCTTGATTCGGGATCGATTTGGTGAGAAGCCTCTTTATTACGGGTGGCAGCGCGATACTTTTCTGTTTGCTTCTGAGTTAAAAGCGCTTAAAGCTCATCCCAGTTTTGAAGGCAGCATTAATCGTCAGGCGTTATCGCATTTTTTTCGTTTGAATTACATACCAACGCCCTTATCCATTTATGAAGGTATCTTCAAGTTAGAGCCGGGTGTTATTGCTGTCTTTTCTCACGAGGGGCAGTTGCTCTCTAAACAAACATTTTGGGATGCCAGTCATGCTGTTTCTCTGCAAAATTTTTCCGATCATGATGCCGTTGATAAATTAGATGACTTAATTAAGCAGTCTATTCAAGATCAAGCGTTATCGGATGTTCCGTTAGGGGCTTTTTTATCCGGAGGGGTTGATTCGTCCACTGTGGTGGGTATTTTACAATCCCTCTCTACTCGTCCGGTCAAAACCTTTACGATCGGGTTTGACCACGCGGATTTTAATGAAGCGAGTGAGGCCTCAGACGTTGCAAAACACTTAGGAACGGATCATGTCGAGTTAATTGTCAGTGCAGAAGATGCTCTAGCGATTATTAATCAGTTACCTGTTATGTACGATGAACCTTTTGCTGACGCCTCTCAAGTGCCTACGTTTCTGGTTTCGAAGCTGGCTAAAAAAGAGGTCACTGTATGCTTGTCTGGTGATGGGGGCGATGAACTGTTTTGTGGTTATAACCGCTATCATTACACTGCTAAAGTTTGGTCGTATTTAGAAAAAATTCCCTTTCCAATCCGAAAAATGCTCTCAGTCTTTTTGTTGACGCTTTCGCCATCTTCTTGGGATGTTTTAAGTAAAACTTTAGGTTTGAATACCAGATTACCAAATTTAGGCAATAAAATTCAAAAAGGTGCCCAAGCTTTAAAGGCAAGAGATATTGAAGACCTTTATACACGGGTTGTCTCCAACTGGGATCTAGATGAGCCTTTGGTTAAAAATACTGCGGTTGAGAAATTACCGTTTTTGTCTGACTTAACAGAACTTTCCCATCTTAATGACTTAGAAAAAATGATGTTGTGGGATAAGCAATCTTATCTAATGGACGATGTTTTAGTGAAAACAGATCGTGCTACGATGGCGTGTTCATTAGAAGGGCGGGTTCCCTTGTTAGACCACCGCATTGCTGAGTTTGCTGCCAGTTTGCCGATCCATTTGAAATACCGAGGTGGAAAGGGAAAGTGGCTTTTACGAGAAGTACTGTATCGTTATGTACCTAAAAAATTAATTGAAAGGCCAAAAAAAGGGTTTAGTTTACCCATCGCTGAATGGTTGAGAGGACCGCTAAAAGATTGGGCGAATGTTTTGCTGGATTCTGATCGTATTGATAAAGAAGGCTTTTTGTCGTCTGAATTGGTTCAAAAGAAGTGGCGTGAACATTTAGCGGGTAAACGAGATTGGTCGTCGCAGTTGTGGAGCGTTCTAATGTTCCAATTATGGCTTGAGAAAAACAAATGA \ No newline at end of file diff --git a/tbprofiler/4.4.0/Dockerfile b/tbprofiler/4.4.0/Dockerfile new file mode 100644 index 000000000..a4ed6f5a9 --- /dev/null +++ b/tbprofiler/4.4.0/Dockerfile @@ -0,0 +1,52 @@ +# FROM defines the base docker image. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. We use `app` for the production image +FROM --platform=linux/x86_64 mambaorg/micromamba:0.24.0 as app + +USER root +WORKDIR / +ARG TBPROFILER_VER="4.4.0" +ARG TBDB_VER="c2fb9a2" + +# LABEL instructions tag the image with metadata that might be important to the user +# Optional, but highly recommended +LABEL base.image="micromamba:0.24.0" +LABEL dockerfile.version="2" +LABEL software="tbprofiler" +LABEL software.version=$TBPROFILER_VER +LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +LABEL website="https://github.com/StaPH-B/docker-builds" +LABEL license="https://github.com/StaPH-B/docker-builds/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" + +# RUN executes code during the build +# Install dependencies via apt-get + +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + + RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \ + tb-profiler=${TBPROFILER_VER} && \ + micromamba clean --all --yes + + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +# Version of database can be confirmed at ./TBProfiler-${TBPROFILER_VER}/db/tbdb.version.json +RUN tb-profiler update_tbdb + +ENV MAMBA_DOCKERFILE_ACTIVATE=1 +WORKDIR /data + +ENV PATH="/opt/conda/bin:${PATH}" + + +FROM app as test + +RUN mkdir test_run && \ + cd test_run && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt diff --git a/tbprofiler/4.4.0/README.md b/tbprofiler/4.4.0/README.md new file mode 100644 index 000000000..a475106cf --- /dev/null +++ b/tbprofiler/4.4.0/README.md @@ -0,0 +1,24 @@ +# TBProfiler Container +Main tool: [TBProfiler](https://github.com/jodyphelan/TBProfiler) + +The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database. It also predicts the number of reads supporting drug resistance variants as an insight into hetero-resistance. + +## Database +This tool relies on a database to run. The database that is included in the docker image is ```4738132```. This is from the Git repository https://github.com/jodyphelan/tbdb. This can be confirmed in the json file:``` ./TBProfiler-${TBPROFILER_VER}/db/tbdb.version.json ```: +``` +{"name": "tbdb", "commit": "c2fb9a2", "Author": "jodyphelan ", "Date": "Tue Oct 4 11:40:15 2022 +0100"} +``` + +# Example Usage +Run whole pipeline: +``` +tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt +``` +Make alternative database: +``` +tb-profiler create_db --prefix +tb-profiler load_library --prefix +``` + + +Better documentation can be found [here.](https://jodyphelan.gitbook.io/tb-profiler/) diff --git a/tbprofiler/4.4.2/Dockerfile b/tbprofiler/4.4.2/Dockerfile new file mode 100644 index 000000000..7471c8ceb --- /dev/null +++ b/tbprofiler/4.4.2/Dockerfile @@ -0,0 +1,59 @@ +# FROM defines the base docker image. This command has to come first in the file +# The 'as' keyword lets you name the folowing stage. We use `app` for the production image +FROM mambaorg/micromamba:1.3.0 as app + +USER root +WORKDIR / +ARG TBPROFILER_VER="4.4.2" +# this version is the shortened commit hash on the `master` branch here https://github.com/jodyphelan/tbdb/ +# this was the latest commit as of 2023-02-17 +ARG TBDB_VER="5f3c51e" + +# LABEL instructions tag the image with metadata that might be important to the user +# Optional, but highly recommended +LABEL base.image="micromamba:1.3.0" +LABEL dockerfile.version="1" +LABEL software="tbprofiler" +LABEL software.version="${TBPROFILER_VER}" +LABEL description="The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database." +LABEL website="https://github.com/jodyphelan/TBProfiler/" +LABEL license="https://github.com/jodyphelan/TBProfiler/blob/master/LICENSE" +LABEL maintainer="John Arnn" +LABEL maintainer.email="jarnn@utah.gov" +LABEL maintainer2="Curtis kapsak" +LABEL maintainer2.email="kapsakcj@gmail.com" + +# Install dependencies via apt-get; cleanup apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install tb-profiler via bioconda; install into 'base' conda env + RUN micromamba install --yes --name base --channel conda-forge --channel bioconda \ + tb-profiler=${TBPROFILER_VER} && \ + micromamba clean --all --yes + +# this ARG is so that the conda env is activated for running the following command +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +# Version of database can be confirmed at /opt/conda/share/tbprofiler/tbdb.version.json +# can also run 'tb-profiler list_db' to find the same version info +RUN tb-profiler update_tbdb --commit ${TBDB_VER} + +# ensure conda environment is always active for the user +ENV MAMBA_DOCKERFILE_ACTIVATE=1 +WORKDIR /data + +# hardcode 'base' env bin into PATH, so conda env does not have to be "activated" at run time +ENV PATH="/opt/conda/bin:${PATH}" + +# test stage +FROM app as test + +# download some TB FASTQs and run through tb-profiler +RUN mkdir test_run && \ + cd test_run && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_1.fastq.gz && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR166/009/ERR1664619/ERR1664619_2.fastq.gz && \ + tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 2 -p ERR1664619 --txt diff --git a/tbprofiler/4.4.2/README.md b/tbprofiler/4.4.2/README.md new file mode 100644 index 000000000..1b65359c6 --- /dev/null +++ b/tbprofiler/4.4.2/README.md @@ -0,0 +1,55 @@ +# TBProfiler Container + +Main tool: [TBProfiler](https://github.com/jodyphelan/TBProfiler) + +The pipeline aligns reads to the H37Rv reference using bowtie2, BWA or minimap2 and then calls variants using bcftools. These variants are then compared to a drug-resistance database. It also predicts the number of reads supporting drug resistance variants as an insight into hetero-resistance. + +## Database + +This tool relies on a database to run. The version (AKA git commit hash) of the database that is included in the docker image is `c2fb9a2`. This is from the GitHub repository https://github.com/jodyphelan/tbdb. This can be confirmed in the json file: `/opt/conda/share/tbprofiler/tbdb.version.json`: + +```bash +$ grep 'commit' /opt/conda/share/tbprofiler/tbdb.version.json +{"name": "tbdb", "commit": "5f3c51e", "Merge": "b1a2549 abddb8e", "Author": "Jody Phelan ", "Date": "Thu Jan 19 10:47:32 2023 +0000"} +``` + +Additionally you can run the command `tb-profiler list_db` to list the same information + +```bash +$ tb-profiler list_db +tbdb 5f3c51e Jody Phelan Thu Jan 19 10:47:32 2023 +0000 /opt/conda/share/tbprofiler/tbdb +``` + +## Additional included tools/dependencies + +- bcftools 1.12 +- bedtools 2.30.0 +- bwa 0.7.17 +- freebayes 1.3.5 +- gatk4 4.3.0.0 +- kmc 3.2.1 +- pathogen-profiler 2.0.4 +- perl 5.32.1 +- python 3.9.9 +- samclip 0.4.0 +- samtools 1.12 +- snpeff 5.1 +- trimmomatic 0.39 + +## Example Usage + +Run whole pipeline on Illumina paired-end reads: + +```bash +tb-profiler profile -1 ERR1664619_1.fastq.gz -2 ERR1664619_2.fastq.gz -t 4 -p ERR1664619 --txt +``` + +Make alternative database: + +```bash +tb-profiler create_db --prefix +tb-profiler load_library --prefix +``` + + +Better documentation can be found [here.](https://jodyphelan.gitbook.io/tb-profiler/) diff --git a/tostadas/0.2.0-beta/Dockerfile b/tostadas/0.2.0-beta/Dockerfile new file mode 100644 index 000000000..ed97f4eb9 --- /dev/null +++ b/tostadas/0.2.0-beta/Dockerfile @@ -0,0 +1,57 @@ +FROM nfcore/base:2.1 as app +# nfcore/base is Debian + +# comes with warning about conda version +# ==> WARNING: A newer version of conda exists. <== +# current version: 4.9.2 +# latest version: 22.11.1 +# +# Please update conda by running +# +# $ conda update -n base -c defaults conda + +ARG TOSTADAS_VER="0.2.0-beta" + +LABEL authors="Cole and Ankush Gupta" +LABEL base.image="nfcore/base:2.1" +LABEL dockerfile.version="1" +LABEL software="tostadas" +LABEL software.version=$TOSTADAS_VER +LABEL description="Image for the TOSTADAS: Toolkit for Open Sequence Triage, Annotation and DAtabase Submission pipeline" +LABEL website="https://github.com/CDCgov/tostadas" +LABEL license="https://github.com/CDCgov/tostadas/LICENSE" +LABEL maintainer="Ankush Gupta" +LABEL maintainer.email="ankushkgupta@deloitte.com" +LABEL maintainer2="Kyle O'Connell" +LABEL maintainer2.email="kyoconnell@deloitte.com" +LABEL maintainer3="Cole Tindall" +LABEL maintainer3.email="ctindall@deloitte.com" + +# download tostadas repo, move to /tostadas, and create /data +RUN wget https://github.com/CDCgov/tostadas/archive/refs/tags/v${TOSTADAS_VER}.tar.gz && \ + tar -xvf v${TOSTADAS_VER}.tar.gz && \ + rm v${TOSTADAS_VER}.tar.gz && \ + mv tostadas-${TOSTADAS_VER} tostadas && \ + mkdir /data + +# install mamba +RUN conda install mamba -n base -c conda-forge + +# use mamba to install conda packages +RUN mamba env create -f tostadas/environment.yml && \ + mamba clean -a -y && \ + echo "source activate tostadas" > ~/.bashrc && \ + rm -rf /tostadas + +ENV PATH=/opt/conda/envs/tostadas/bin:/opt/conda/envs/env/bin:$PATH \ + LC_ALL=C.UTF-8 + +WORKDIR /data + +FROM app as test + +RUN mamba list && conda list + +RUN liftoff --version && \ + samtools --version && \ + python --version diff --git a/tostadas/0.2.0-beta/README.md b/tostadas/0.2.0-beta/README.md new file mode 100644 index 000000000..9cb921d3b --- /dev/null +++ b/tostadas/0.2.0-beta/README.md @@ -0,0 +1,37 @@ +# tostadas container + +Main tool : [tostadas](https://github.com/CDCgov/tostadas) + +Additional tools installed via conda: + - pip + - libgcc-ng=12.1.0=h8d9b700_16 + - libstdcxx-ng=12.1.0=ha89aaad_16 + - liftoff=1.6.3=pyhdfd78af_0 + - numpy=1.22.4=py39hc58783e_0 + - packaging=21.3=pyhd3eb1b0_0 + - pandas=1.4.2=py39h1832856_2 + - python-dateutil=2.8.2=pyhd8ed1ab_0 + - python_abi=3.9=2_cp39 + - pytz=2022.1=pyhd8ed1ab_0 + - pyvcf3=1.0.3=pyhdfd78af_0 + - pyyaml=6.0=py39hb9d737c_4 + - readline=8.1=h46c0cb4_0 + - requests=2.28.0=pyhd8ed1ab_1 + - samtools=1.15.1=h1170115_0 + - setuptools=62.3.2=py39hf3d152e_0 + - simplejson=3.17.6=py39hb9d737c_1 + - tbb=2021.5.0=h924138e_1 + - tbl2asn=25.7=h9ee0642_1 + - tk=8.6.12=h27826a3_0 + - tzdata=2022a=h191b570_0 + - ujson=5.3.0=py39h5a03fae_0 + - urllib3=1.26.9=pyhd8ed1ab_0 + - wgs2ncbi=1.1.2=pl5262hdfd78af_1 + - wheel=0.37.1=pyhd8ed1ab_0 + - xz=5.2.5=h516909a_1 + - yaml=0.2.5=h7f98852_2 + - openpyxl=3.0.10 + +Full documentation: https://github.com/CDCgov/tostadas + +[tostadas](https://github.com/CDCgov/tostadas) is a nextflow workflow for annotation and submission of MonkeyPox Virus consensus sequences. The resultant container is not intended to run independently of the workflow. diff --git a/trycycler/0.5.3/Dockerfile b/trycycler/0.5.3/Dockerfile new file mode 100644 index 000000000..7a2e898a2 --- /dev/null +++ b/trycycler/0.5.3/Dockerfile @@ -0,0 +1,86 @@ +ARG TRYCYCLER_VER=0.5.3 + +FROM ubuntu:jammy as app + +ARG TRYCYCLER_VER +ARG MASH_VER=2.3 +ARG MINIASM_VER=0.3 +ARG MINIMAP2_VER=2.24 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="Trycycler" +LABEL software.version="${TRYCYCLER_VER}" +LABEL description="A tool for generating consensus long-read assemblies for bacterial genomes" +LABEL website="https://github.com/rrwick/Trycycler" +LABEL license="https://github.com/rrwick/Trycycler/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# install prerequisites, cleanup apt garbage +# muscle version: 3.8.31 +ARG DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + gcc \ + wget \ + curl \ + bzip2 \ + build-essential \ + procps \ + ca-certificates \ + libz-dev \ + muscle \ + r-base \ + gfortran \ + libblas-dev \ + liblapack-dev && \ + apt-get clean && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# minimap2 +# apt deps: curl bzip2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxvf - + +# mash +RUN wget https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ + tar -xvf mash-Linux64-v${MASH_VER}.tar && \ + rm -rf mash-Linux64-v${MASH_VER}.tar + +# miniasm +RUN wget https://github.com/lh3/miniasm/archive/v${MINIASM_VER}.tar.gz && \ + mkdir miniasm && \ + tar -xzvf v${MINIASM_VER}.tar.gz -C miniasm --strip-components 1 && \ + rm v${MINIASM_VER}.tar.gz && \ + cd miniasm && \ + make + +# install R packages +RUN R -e "install.packages(c('ape',\ + 'phangorn'),\ + repos = 'http://cran.us.r-project.org')" + +# Trycycler +# apt deps: muscle gcc +RUN wget https://github.com/rrwick/Trycycler/archive/v${TRYCYCLER_VER}.tar.gz && \ + tar -xzf v${TRYCYCLER_VER}.tar.gz && \ + rm v${TRYCYCLER_VER}.tar.gz && \ + pip3 install ./Trycycler-${TRYCYCLER_VER} Pillow && \ + mkdir /data + +# set /data as working directory +WORKDIR /data + +# set env path variable for installed programs. LC_ALL for singularity compatibility. +ENV PATH="/mash-Linux64-v${MASH_VER}:/minimap2-${MINIMAP2_VER}_x64-linux:/miniasm:${PATH}"\ + LC_ALL=C + +FROM app as test + +ARG TRYCYCLER_VER + +WORKDIR /Trycycler-${TRYCYCLER_VER} + +RUN trycycler --help && trycycler --version + +RUN pytest \ No newline at end of file diff --git a/trycycler/0.5.3/README.md b/trycycler/0.5.3/README.md new file mode 100644 index 000000000..cac1ad7c5 --- /dev/null +++ b/trycycler/0.5.3/README.md @@ -0,0 +1,34 @@ +# trycycler container + +Main tool: [trycycler](https://github.com/rrwick/Trycycler) + +Additional tools: + +- [miniasm](https://github.com/lh3/miniasm) 0.3-r179 +- [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 +- [muscle](https://www.ebi.ac.uk/Tools/msa/muscle/) 3.8.1551 +- [mash](https://github.com/marbl/Mash) 2.3 +- R package ['ape'](https://cran.r-project.org/web/packages/ape/index.html) 5.7 +- R package ['phangorn'](https://cran.r-project.org/web/packages/phangorn/index.html) 2.11.1 + +Trycycler "tries" different assemblies to find the one most supported by long reads. + +## Example Usage + +```bash +trycycler subsample --reads reads.fastq --out_dir read_subsets +``` + +```bash +trycycler cluster --assemblies assemblies/*.fasta --reads reads.fastq --out_dir trycycler +``` + +```bash +trycycler reconcile --reads reads.fastq --cluster_dir trycycler/cluster_001 +``` + +```bash +trycycler consensus --cluster_dir trycycler/cluster_001 +``` + +It is highly recommended to read Trycycler's [wiki](https://github.com/rrwick/Trycycler/wiki) for full usability and reasoning. diff --git a/trycycler/0.5.4/Dockerfile b/trycycler/0.5.4/Dockerfile new file mode 100644 index 000000000..bffcb27aa --- /dev/null +++ b/trycycler/0.5.4/Dockerfile @@ -0,0 +1,89 @@ +ARG TRYCYCLER_VER=0.5.4 + +FROM ubuntu:jammy as app + +ARG TRYCYCLER_VER +ARG MASH_VER=2.3 +ARG MINIASM_VER=0.3 +ARG MINIMAP2_VER=2.24 + +LABEL base.image="ubuntu:jammy" +LABEL dockerfile.version="1" +LABEL software="Trycycler" +LABEL software.version="${TRYCYCLER_VER}" +LABEL description="A tool for generating consensus long-read assemblies for bacterial genomes" +LABEL website="https://github.com/rrwick/Trycycler" +LABEL license="https://github.com/rrwick/Trycycler/blob/master/LICENSE" +LABEL maintainer="Erin Young" +LABEL maintainer.email="eriny@utah.gov" + +# install prerequisites, cleanup apt garbage +# muscle version: 3.8.31 +ARG DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3 \ + python3-pip \ + gcc \ + wget \ + curl \ + bzip2 \ + build-essential \ + procps \ + ca-certificates \ + libz-dev \ + muscle \ + r-base \ + gfortran \ + libblas-dev \ + liblapack-dev && \ + apt-get clean && apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# minimap2 +# apt deps: curl bzip2 +RUN curl -L https://github.com/lh3/minimap2/releases/download/v${MINIMAP2_VER}/minimap2-${MINIMAP2_VER}_x64-linux.tar.bz2 | tar -jxvf - + +# mash +RUN wget https://github.com/marbl/Mash/releases/download/v${MASH_VER}/mash-Linux64-v${MASH_VER}.tar && \ + tar -xvf mash-Linux64-v${MASH_VER}.tar && \ + rm -rf mash-Linux64-v${MASH_VER}.tar + +# miniasm +RUN wget https://github.com/lh3/miniasm/archive/v${MINIASM_VER}.tar.gz && \ + mkdir miniasm && \ + tar -xzvf v${MINIASM_VER}.tar.gz -C miniasm --strip-components 1 && \ + rm v${MINIASM_VER}.tar.gz && \ + cd miniasm && \ + make + +# install R packages +RUN R -e "install.packages(c('ape',\ + 'phangorn'),\ + repos = 'http://cran.us.r-project.org')" + +# Trycycler +# apt deps: muscle gcc +RUN wget https://github.com/rrwick/Trycycler/archive/v${TRYCYCLER_VER}.tar.gz && \ + tar -xzf v${TRYCYCLER_VER}.tar.gz && \ + rm v${TRYCYCLER_VER}.tar.gz && \ + pip3 install ./Trycycler-${TRYCYCLER_VER} Pillow && \ + mkdir /data + +# set /data as working directory +WORKDIR /data + +# set env PATH variable for installed programs +# LC_ALL for singularity compatibility +# TERM set so that output is pretty during tests and so warnings about TERM not being set are silenced +ENV PATH="/mash-Linux64-v${MASH_VER}:/minimap2-${MINIMAP2_VER}_x64-linux:/miniasm:${PATH}"\ + LC_ALL=C \ + TERM=xterm-256color + +FROM app as test + +ARG TRYCYCLER_VER + +WORKDIR /Trycycler-${TRYCYCLER_VER} + +RUN trycycler --help && trycycler --version + +RUN pytest \ No newline at end of file diff --git a/trycycler/0.5.4/README.md b/trycycler/0.5.4/README.md new file mode 100644 index 000000000..ba19bdc7c --- /dev/null +++ b/trycycler/0.5.4/README.md @@ -0,0 +1,34 @@ +# trycycler container + +Main tool: [trycycler](https://github.com/rrwick/Trycycler) + +Additional tools: + +- [miniasm](https://github.com/lh3/miniasm) 0.3-r179 +- [minimap2](https://github.com/lh3/minimap2) 2.24-r1122 +- [muscle](https://www.ebi.ac.uk/Tools/msa/muscle/) 3.8.1551 (recommended version) +- [mash](https://github.com/marbl/Mash) 2.3 +- R package ['ape'](https://cran.r-project.org/web/packages/ape/index.html) 5.7 +- R package ['phangorn'](https://cran.r-project.org/web/packages/phangorn/index.html) 2.11.1 + +Trycycler "tries" different assemblies to find the one most supported by long reads. + +## Example Usage + +```bash +trycycler subsample --reads reads.fastq --out_dir read_subsets +``` + +```bash +trycycler cluster --assemblies assemblies/*.fasta --reads reads.fastq --out_dir trycycler +``` + +```bash +trycycler reconcile --reads reads.fastq --cluster_dir trycycler/cluster_001 +``` + +```bash +trycycler consensus --cluster_dir trycycler/cluster_001 +``` + +It is highly recommended to read Trycycler's [wiki](https://github.com/rrwick/Trycycler/wiki) for full usability and reasoning. diff --git a/vadr/1.5.1/Dockerfile b/vadr/1.5.1/Dockerfile new file mode 100644 index 000000000..76f6f845a --- /dev/null +++ b/vadr/1.5.1/Dockerfile @@ -0,0 +1,167 @@ +FROM ubuntu:focal as app + +# for easy upgrade later. LC_ALL set for singularity compatibility +ENV VADR_VERSION="1.5.1" \ + VADR_SARSCOV2_MODELS_VERSION="1.3-2" \ + VADR_MPXV_MODELS_VERSION="1.4.2-1" \ + VADR_RSV_MODELS_VER="1.5-2"\ + LC_ALL=C \ + VADRINSTALLDIR=/opt/vadr + +ENV VADRSCRIPTSDIR=$VADRINSTALLDIR/vadr \ + VADRMINISCRIPTSDIR=$VADRINSTALLDIR/vadr/miniscripts \ + VADRMODELDIR=$VADRINSTALLDIR/vadr-models \ + VADRINFERNALDIR=$VADRINSTALLDIR/infernal/binaries \ + VADREASELDIR=$VADRINSTALLDIR/infernal/binaries \ + VADRHMMERDIR=$VADRINSTALLDIR/hmmer/binaries \ + VADRBIOEASELDIR=$VADRINSTALLDIR/Bio-Easel \ + VADRSEQUIPDIR=$VADRINSTALLDIR/sequip \ + VADRBLASTDIR=$VADRINSTALLDIR/ncbi-blast/bin \ + VADRFASTADIR=$VADRINSTALLDIR/fasta/bin \ + VADRMINIMAP2DIR=$VADRINSTALLDIR/minimap2 + +ENV PERL5LIB=$VADRSCRIPTSDIR:$VADRSEQUIPDIR:$VADRBIOEASELDIR/blib/lib:$VADRBIOEASELDIR/blib/arch:$PERL5LIB \ + PATH=$VADRSCRIPTSDIR:$VADRMINISCRIPTSDIR:$PATH + +# metadata - optional, but highly recommended +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="VADR" +LABEL software.version="${VADR_VERSION}" +LABEL description="Classification and annotation of viral sequences based on RefSeq annotation" +LABEL website="https://github.com/ncbi/vadr" +LABEL license="https://github.com/ncbi/vadr/blob/master/LICENSE" +LABEL maintainer="Anders Goncalves da Silva" +LABEL maintainer.email="andersgs@gmail.com" +LABEL maintainer2="Curtis Kapsak" +LABEL maintainer2.email="curtis.kapsak@theiagen.com" + +# install dependencies via apt-get. Clean up apt garbage +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + perl \ + curl \ + unzip \ + build-essential \ + autoconf \ + libinline-c-perl \ + liblwp-protocol-https-perl \ + zip \ + unzip \ + procps \ + zlib1g-dev && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install VADR +# download entire VADR source code from GitHub release +# use vadr-install.sh script to install VADR into $VADRINSTALLDIR (set to /opt/vadr) +# this script grabs files from tagged release and sets things up in /opt/vadr/vadr +# last step is to delete the original source code that is a duplicate (/opt/vadr/vadr-$VADR_VERSION) +RUN mkdir -p ${VADRINSTALLDIR} && \ + cd ${VADRINSTALLDIR} && \ + wget https://github.com/ncbi/vadr/archive/refs/tags/vadr-${VADR_VERSION}.tar.gz && \ + mkdir vadr-${VADR_VERSION} && tar -xzf vadr-${VADR_VERSION}.tar.gz -C vadr-${VADR_VERSION} --strip-components 1 && \ + rm vadr-${VADR_VERSION}.tar.gz && \ + bash vadr-${VADR_VERSION}/vadr-install.sh linux && \ + rm -rf vadr-${VADR_VERSION}/ && \ + mkdir /data + +# install the latest sarscov2 and mpxv models +# copy calici model files into VADRMODELDIR to allow VADR tests to pass completely +# cleanup duplicate copies of model files +RUN wget -O vadr-models-sarscov2.tar.gz https://ftp.ncbi.nlm.nih.gov/pub/nawrocki/vadr-models/sarscov2/${VADR_SARSCOV2_MODELS_VERSION}/vadr-models-sarscov2-${VADR_SARSCOV2_MODELS_VERSION}.tar.gz && \ + wget -O vadr-models-mpxv.tar.gz https://ftp.ncbi.nlm.nih.gov/pub/nawrocki/vadr-models/mpxv/${VADR_MPXV_MODELS_VERSION}/vadr-models-mpxv-${VADR_MPXV_MODELS_VERSION}.tar.gz && \ + tar -xf vadr-models-sarscov2.tar.gz && \ + tar -xf vadr-models-mpxv.tar.gz && \ + mkdir -vp ${VADRMODELDIR} && \ + cp -nv /vadr-models-sarscov2-${VADR_SARSCOV2_MODELS_VERSION}/* ${VADRMODELDIR} && \ + cp -nv /vadr-models-mpxv-${VADR_MPXV_MODELS_VERSION}/* ${VADRMODELDIR} && \ + rm -rf /vadr-models-sarscov2* && \ + rm -rf /vadr-models-mpxv* && \ + cp -nv ${VADRINSTALLDIR}/vadr-models-calici/* ${VADRMODELDIR} && \ + rm -rf ${VADRINSTALLDIR}/vadr-models-calici/ + +# download RSV VADR models; copy model files into VADRMODELDIR +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/nawrocki/vadr-models/rsv/${VADR_RSV_MODELS_VER}/vadr-models-rsv-${VADR_RSV_MODELS_VER}.tar.gz && \ + tar -xf /vadr-models-rsv-${VADR_RSV_MODELS_VER}.tar.gz && \ + rm -v /vadr-models-rsv-${VADR_RSV_MODELS_VER}.tar.gz && \ + cp -nvr /vadr-models-rsv-${VADR_RSV_MODELS_VER}/* ${VADRMODELDIR} && \ + rm -rfv /vadr-models-rsv-${VADR_RSV_MODELS_VER} + +# Virus model files other than sarscov2 will need to be made available to vadr either in +# the $VADRMODELDIR or another path can be specified using the 'v-annotate.pl -mdir' option. +# These files will need to be mounted into the container at runtime, e.g. 'docker run -v' option. + +# set working directory +WORKDIR /data + +FROM app as test + +# download B.1.1.7 genome from Utah +ADD https://raw.githubusercontent.com/StaPH-B/docker-builds/master/tests/SARS-CoV-2/SRR13957123.consensus.fa /test-data/SRR13957123.consensus.fa + +# print help options (which prints version at top) +# run test script included w VADR +# test terminal N trimming script +# run v-annotate.pl on trimmed B.1.1.7 genome +RUN v-annotate.pl -h && \ + /opt/vadr/vadr/testfiles/do-install-tests-local.sh && \ + /opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \ + /test-data/SRR13957123.consensus.fa \ + --minlen 50 \ + --maxlen 30000 \ + > /test-data/SRR13957123.consensus.trimmed.fasta && \ + v-annotate.pl --noseqnamemax --glsearch -s -r --nomisc \ + --mkey sarscov2 --lowsim5seq 6 --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn \ + "/test-data/SRR13957123.consensus.trimmed.fasta" \ + "SRR13957123-vadr-outdir" && \ + ls SRR13957123-vadr-outdir + +# install ncbi datasets tool (pre-compiled binary); place in $PATH +RUN wget https://ftp.ncbi.nlm.nih.gov/pub/datasets/command-line/LATEST/linux-amd64/datasets && \ + chmod +x datasets && \ + mv -v datasets /usr/local/bin + +# download assembly for a MPXV from the UK +# run VADR trimming script and v-annotate.pl +# link to GenBank accession: https://www.ncbi.nlm.nih.gov/nuccore/OP022171 +ARG GENBANK_ACCESSION="OP022171.1" +RUN datasets download virus genome accession ${GENBANK_ACCESSION} --filename ${GENBANK_ACCESSION}.zip && \ + unzip ${GENBANK_ACCESSION}.zip && rm ${GENBANK_ACCESSION}.zip && \ + mv -v ncbi_dataset/data/genomic.fna ncbi_dataset/data/${GENBANK_ACCESSION}.genomic.fna && \ + fasta-trim-terminal-ambigs.pl /data/ncbi_dataset/data/${GENBANK_ACCESSION}.genomic.fna \ + --minlen 50 \ + --maxlen 210000 \ + >/data/${GENBANK_ACCESSION}.trimmed.fasta && \ +v-annotate.pl --split --cpu 2 \ + --glsearch -s -r \ + --nomisc \ + --mkey mpxv \ + --r_lowsimok \ + --r_lowsimxd 100 \ + --r_lowsimxl 2000 \ + --alt_pass discontn,dupregin \ + --minimap2 \ + --s_overhang 150 \ + /data/${GENBANK_ACCESSION}.trimmed.fasta \ + ${GENBANK_ACCESSION}-mpxv-vadr-test-output + +### COMMENTING OUT RSV TEST BELOW SINCE THIS TEST CAN CONSUME UPWARDS OF 30GB RAM ### +### it runs fine when you have that much RAM available, but not in GHActions runners that are limited to 7GB RAM ### + +# download a test RSV genome, run through VADR using RSV models +# example commands taken from VADR RSV guide: https://github.com/ncbi/vadr/wiki/RSV-annotation +# RUN echo "testing RSV functionality..." && \ +# wget https://ftp.ncbi.nlm.nih.gov/pub/nawrocki/vadr-models/rsv/rsv.r10.fa && \ +# fasta-trim-terminal-ambigs.pl rsv.r10.fa \ +# --minlen 50 \ +# --maxlen 15500 \ +# >/data/rsv.r10.trimmed.fasta && \ +# v-annotate.pl --split \ +# -r \ +# -xnocomp \ +# -mkey rsv \ +# /data/rsv.r10.trimmed.fasta \ +# rsv-vadr-test-output + \ No newline at end of file diff --git a/vadr/1.5.1/README.md b/vadr/1.5.1/README.md new file mode 100644 index 000000000..d2b7a1003 --- /dev/null +++ b/vadr/1.5.1/README.md @@ -0,0 +1,72 @@ +# VADR container + +Main tool : [VADR](https://github.com/ncbi/vadr) + +VADR is a suite of tools for classifying and analyzing sequences homologous to a set of reference models of viral genomes or gene families. It has been mainly tested for analysis of Norovirus, Dengue, and SARS-CoV-2 virus sequences in preparation for submission to the GenBank database. + +You can find [additional information on the SARS-CoV-2 models used by VADR here](https://github.com/ncbi/vadr/wiki/Coronavirus-annotation#sarscov2models). The models are downloaded from the [NCBI FTP server](https://ftp.ncbi.nlm.nih.gov/pub/nawrocki/vadr-models/sarscov2/) + +Additional tools: + +- perl v5.22.1 +- infernal v1.1.4 +- hmmer v3.3.2 +- ncbi-blast+ v2.12.0 +- fasta v36.3.8h (the tool, not the file format) +- minimap2 v2.24 + +Available VADR models: + +- sarscov2 v1.3-2 +- Mpox (AKA MPXV, formerly known as "Monkeypox") v1.4.2-1 +- Norovirus and other Caliciviridae +- Dengue virus and other Flaviviridae +- RSV v1.5.1-2 + +## FYI + +- Mpox FYIs + - **Note:** Support for MonkeyPox genome annotation was added to the VADR software (July 2022) and is under active development. Things may change quickly. See the above documentation ^ to see the latest information on the state of MPXV annotation with VADR. + - Also be aware that some Mpox sequences may take up to **30 minutes** to annotate, depending on how divergent it is from the RefSeq NC_063383 sequence. Some sequences may only take a minute or so. +- Most of the VADR model files are located at `/opt/vadr/vadr-models` in the container filesystem and this path is stored in the globally accessible bash variable `$VADRMODELDIR`. For most applications, there is no need to specify `v-annotate.pl --mdir /path/to/model/files` since `$VADRMODELDIR` is set in the environment. + - The exception is that Dengue and other Flaviviridae model files are located at `/opt/vadr/vadr-models-flavi/` within the container filesystem. To use these models, please specify the 2 options: `v-annotate.pl --mdir /opt/vadr/vadr-models-flavi/ --mkey flavi`. A full example command can be found below. + +### VADR Documentation + +- [Full documentation](https://github.com/ncbi/vadr#vadr-documentation-) +- [Docs on Coronavirus annotation](https://github.com/ncbi/vadr/wiki/Coronavirus-annotation) +- [Docs on Mpox annotation](https://github.com/ncbi/vadr/wiki/Monkeypox-virus-annotation) +- [Docs on Dengue and other Flaviviridae annotation](https://github.com/ncbi/vadr/wiki/Available-VADR-model-files#dengue-virus-and-other-flaviviridae-refseq-vadr-models) +- [Docs on RSV annotation](https://github.com/ncbi/vadr/wiki/RSV-annotation) + +## Example Usage + +```bash +# trim terminal Ns from my input genome (VADR requires this as the first step) +# for MPXV, adjust maxlen to 210000 +/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \ + /data/SRR13957123.consensus.fa \ + --minlen 50 \ + --maxlen 30000 \ + > /data/SRR13957123.consensus.trimmed.fasta + +# run v-annotate.pl using the sarscov2 model to annotate a trimmed input genome +v-annotate.pl --noseqnamemax --glsearch -s -r --nomisc \ + --mkey sarscov2 --lowsim5seq 6 \ + --lowsim3seq 6 --alt_fail lowscore,insertnn,deletinn \ + /data/SRR13957123.consensus.trimmed.fasta \ + SRR13957123-vadr-outdir + +# run v-annotate.pl using mpxv model to annotate a trimmed input genome +v-annotate.pl --split --cpu 8 --glsearch -s -r --nomisc --mkey mpxv \ + --r_lowsimok --r_lowsimxd 100 --r_lowsimxl 2000 --alt_pass discontn,dupregin \ + --minimap2 --s_overhang 150 \ + mpxv.consensus.trimmed.fasta \ + mpxv-vadr-1.5-test-output + +# run v-annotate.pl using Flaviviridae model to annotate a Dengue viral genome +v-annotate.pl --split --cpu 1 --group Dengue --nomisc --noprotid \ + --mdir /opt/vadr/vadr-models-flavi/ --mkey flavi \ + GCF_000862125.1_ViralProj15306_genomic.fna \ + dengue-test-outdir +``` diff --git a/virulencefinder/2.0.4/Dockerfile b/virulencefinder/2.0.4/Dockerfile new file mode 100644 index 000000000..0e7d1abaf --- /dev/null +++ b/virulencefinder/2.0.4/Dockerfile @@ -0,0 +1,105 @@ +ARG VIRULENCEFINDER_VER="2.0.1" +# Database not properly versioned, so using most recent commit made on 2023-05-03 +# see here: https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/f678bdc15283aed3a45f66050d2eb3a6c9651f3f +ARG VIRULENCEFINDER_DB_COMMIT_HASH="f678bdc15283aed3a45f66050d2eb3a6c9651f3f" + +FROM ubuntu:focal as app + +# re-instantiating for use in the app layer +ARG VIRULENCEFINDER_VER +ARG VIRULENCEFINDER_DB_COMMIT_HASH + +# metadata +LABEL base.image="ubuntu:focal" +LABEL dockerfile.version="1" +LABEL software="VirulenceFinder" +LABEL software.version="${VIRULENCEFINDER_VER}" +LABEL description="Tool for identifying the virulence genes in E. coli, Enterococcus, Staphylococcus aureus, & Listeria from reads or assemblies" +LABEL website="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL license="https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/" +LABEL maintainer="Curtis Kapsak" +LABEL maintainer.email="kapsakcj@gmail.com" + +# install dependencies; cleanup apt garbage +# ncbi-blast+ v2.9.0 (ubuntu:focal), min required version is 2.8.1 +# python3 v3.8.10, min required version is 3.5 +RUN apt-get update && apt-get install -y --no-install-recommends \ + wget \ + ca-certificates \ + procps \ + git \ + ncbi-blast+ \ + python3 \ + python3-pip \ + python3-setuptools \ + python3-dev \ + gcc \ + make \ + libz-dev \ + dos2unix \ + unzip && \ + apt-get autoclean && rm -rf /var/lib/apt/lists/* + +# install python dependencies +RUN pip3 install biopython==1.73 tabulate==0.7.7 cgecore==1.5.5 + +# Install kma +# apt deps: libz-dev (for compiling) +RUN git clone --branch 1.0.1 --depth 1 https://bitbucket.org/genomicepidemiology/kma.git && \ + cd kma && \ + make && \ + mv -v kma* /usr/local/bin/ + +# download VIRULENCEFINDER database using a specific commit hash to aid in reproducibility +# index database w/ kma +# NOTE: files HAVE to go into '/database' since that is the default location expected by serotyperfinder.py +# dos2unix on the FASTA files to ensure they have LF line endings +RUN mkdir /database && \ + git clone https://bitbucket.org/genomicepidemiology/virulencefinder_db.git /database && \ + cd /database && \ + git checkout ${VIRULENCEFINDER_DB_COMMIT_HASH} && \ + dos2unix *.fsa && \ + python3 INSTALL.py kma_index + +# install virulencefinder to specific tag/version; make /data +RUN git clone --branch ${VIRULENCEFINDER_VER} https://bitbucket.org/genomicepidemiology/virulencefinder.git && \ + mkdir /data + +# set $PATH and locale settings for singularity compatibility +ENV PATH="/virulencefinder:${PATH}" \ + LC_ALL=C.UTF-8 + +# set final working directory for production docker image (app layer only) +WORKDIR /data + +# default command is to pull up help options for virulencefinder +CMD [ "virulencefinder.py", "-h"] + +### START OF TEST STAGE ### +FROM app as test + +# set working directory for test layer +WORKDIR /test + +# download an example assembly; test with VirulenceFinder +# Escherichia coli complete genome (Unicycler assembly) +# GenBank Nucleotide entry: https://www.ncbi.nlm.nih.gov/nuccore/CP113091.1/ +# BioSample:SAMN08799860 +RUN mkdir -v /test/asm-input && \ + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/012/224/845/GCA_012224845.2_ASM1222484v2/GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + gunzip GCA_012224845.2_ASM1222484v2_genomic.fna.gz && \ + virulencefinder.py -i /test/GCA_012224845.2_ASM1222484v2_genomic.fna -x -o /test/asm-input && \ + cat /test/asm-input/results_tab.tsv + +# download Illumina reads for the same sample ^ and test reads as input into VirulenceFinder +RUN mkdir /test/reads-input && \ + wget ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR690/006/SRR6903006/SRR6903006_1.fastq.gz && \ + virulencefinder.py -i SRR6903006_1.fastq.gz -mp kma -x -o /test/reads-input && \ + cat /test/reads-input/results_tab.tsv + +# test using FASTA supplied with VirulenceFinder code; print help options +# expect to see hits to astA and 2 stx genes; unfortunately it finds astA and 3 stx genes (that don't match) +# issue created here: https://bitbucket.org/genomicepidemiology/virulencefinder/issues/11/test-results-do-not-match-expected-results +RUN cd /virulencefinder/test && \ + virulencefinder.py -i test.fsa -o . -mp blastn -x -q && \ + virulencefinder.py --help diff --git a/virulencefinder/2.0.4/README.md b/virulencefinder/2.0.4/README.md new file mode 100644 index 000000000..a9831fc7f --- /dev/null +++ b/virulencefinder/2.0.4/README.md @@ -0,0 +1,119 @@ +# VirulenceFinder Docker Image + +A docker image that contains VirulenceFinder, a tool for identifying virulence factors in E. coli isolates from reads or assemblies + +[Link to StaPH-B DockerHub repository](https://hub.docker.com/r/staphb/virulencefinder) + +Main tool: + +- Main Code Repo: [https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/master/) +- VirulenceFinder database: [https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/](https://bitbucket.org/genomicepidemiology/virulencefinder_db/src/master/) +- You may be familiar with the web version of VirulenceFinder: [https://cge.food.dtu.dk/services/VirulenceFinder/](https://cge.food.dtu.dk/services/VirulenceFinder/) + +Additional tools: + +- python 3.8.10 +- biopython 1.73 +- [kma](https://bitbucket.org/genomicepidemiology/kma/src/master/) 1.0.0 +- ncbi-blast+ 2.9.0 + +## Version information + +VirulenceFinder version: 2.0.4 [https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/](https://bitbucket.org/genomicepidemiology/virulencefinder/src/2.0.4/) made on 2020-02-06 + +VirulenceFinder database version: commit `f678bdc15283aed3a45f66050d2eb3a6c9651f3f` made on 2023‑05‑03. [Link to commit history](https://bitbucket.org/genomicepidemiology/virulencefinder_db/commits/) + +## Requirements + +- Docker or Singularity +- E. coli raw reads (fastq.gz) or assembly (fasta) + - Illumina, Ion Torrent, Roche 454, SOLiD, Oxford Nanopore, and PacBio reads are supported. (I've only tested Illumina reads) + +## Usage + +```bash +usage: virulencefinder.py [-h] -i INFILE [INFILE ...] [-o OUTDIR] [-tmp TMP_DIR] [-mp METHOD_PATH] [-p DB_PATH] [-d DATABASES] [-l MIN_COV] [-t THRESHOLD] [-x] [-q] + +optional arguments: + -h, --help show this help message and exit + -i INFILE [INFILE ...], --infile INFILE [INFILE ...] + FASTA or FASTQ input files. + -o OUTDIR, --outputPath OUTDIR + Path to blast output + -tmp TMP_DIR, --tmp_dir TMP_DIR + Temporary directory for storage of the results from the external software. + -mp METHOD_PATH, --methodPath METHOD_PATH + Path to method to use (kma or blastn) + -p DB_PATH, --databasePath DB_PATH + Path to the databases + -d DATABASES, --databases DATABASES + Databases chosen to search in - if non is specified all is used + -l MIN_COV, --mincov MIN_COV + Minimum coverage + -t THRESHOLD, --threshold THRESHOLD + Minimum threshold for identity + -x, --extented_output + Give extented output with allignment files, template and query hits in fasta and a tab seperated file with gene profile results + -q, --quiet +``` + +## Notes and Recommendations + +- You do not need to supply a database or use the `-p` or `-d` flags + - Database is included in the image and is in the default/expected location within the image filesystem: `/database` + - (*NOT RECOMMENDED*) If you do need to use your own database, you will need to first index it with `kma` and use the `virulencefinder.py -p` flag. You can find instructions for this on the VirulenceFinder Bitbucket README. `kma` is included in this docker image for database indexing. + - VirulenceFinder does **NOT** create an output directory when you use the `-o` flag. You MUST create it beforehand or it will throw an error. + - **Default % Identity threshold: 90%**. Adjust with `-t 0.95` + - **Default % coverage threshold: 60%**. Adjust with `-l 0.70` + - Use the `-x` flag (extended output) if you want the traditional/legacy VirulenceFinder output files `results_tab.tsv results.txt Virulence_genes.fsa Hit_in_genome_seq.fsa`. Otherwise you will need to parse the default output file `data.json` for results + - (*RECOMMENDED*) Use raw reads due to the increased sensitivity (without loss of specificity) and the additional information gleaned from KMA output (specifically the depth metric). You also save time from having to assemble the genome first. [CITATION NEEDED, PROBABLY THE KMA PAPER] +- Querying reads: + - This will run VirulenceFinder with `kma` (instead of ncbi-blast+) + - Only one of the PE read files is necessary. There is likely little benefit to using both R1 and R2. It will take longer to run if you use both R1 and R2 files. +- Querying assemblies: + - This will run VirulenceFinder with `ncbi-blast+` + - VirulenceFinder does not clean up after itself. `tmp/` (which contains 7 different `.xml` files) will exist in the specified output directory + +## Example Usage: Docker + +```bash +# download the image +$ docker pull staphb/virulencefinder:2.0.4 + +# input files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads, mount PWD to /data inside container (broken into two lines for readabilty) +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# query assembly +$ docker run --rm -u $(id -u):$(id -g) -v $PWD:/data staphb/virulencefinder:2.0.1 \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +``` + +## Example Usage: Singularity + +```bash +# download the image +$ singularity build virulencefinder.2.0.4.sif docker://staphb/virulencefinder:2.0.4 + +# files are in my PWD +$ ls +E-coli.skesa.fasta E-coli.R1.fastq.gz E-coli.R2.fastq.gz + +# make an output directory +$ mkdir output-dir-reads output-dir-asm + +# query reads; mount PWD to /data inside container +$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ + virulencefinder.py -i /data/E-coli.R1.fastq.gz -o /data/output-dir-reads + +# assembly +$ singularity exec --no-home -B $PWD:/data virulencefinder.2.0.4.sif \ + virulencefinder.py -i /data/E-coli.skesa.fasta -o /data/output-dir-asm +```