diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 901f7e7..72543f1 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -23,8 +23,7 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
- if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- python setup.py install
+ python -m pip install .
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
diff --git a/.gitignore b/.gitignore
index bd6ad26..40797be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,6 @@ dmypy.json
# PyCharm
.idea/
+
+#setuptools_scm version file
+_version.py
\ No newline at end of file
diff --git a/README.md b/README.md
index c892d77..84b499b 100644
--- a/README.md
+++ b/README.md
@@ -33,8 +33,8 @@ unzip eva-sub-cli.zip && mv eva-sub-cli-* eva-sub-cli
Then install the library and its dependencies as follows (e.g. in a virtual environment):
```bash
cd eva-sub-cli
-pip install -r requirements.txt
-python setup.py install
+# Activate your virtual environment
+python -m pip install .
```
To check it is installed correctly, you can run:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0575701..608aa79 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -25,4 +25,4 @@ RUN curl -L "https://github.com/nextflow-io/nextflow/releases/download/v${NXF_VE
COPY . /opt/
-RUN pip install .
+RUN python -m pip install .
diff --git a/eva_sub_cli/VERSION b/eva_sub_cli/VERSION
deleted file mode 100644
index ca91a08..0000000
--- a/eva_sub_cli/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.4.dev0
diff --git a/eva_sub_cli/__init__.py b/eva_sub_cli/__init__.py
index 5cac71f..718a001 100644
--- a/eva_sub_cli/__init__.py
+++ b/eva_sub_cli/__init__.py
@@ -7,8 +7,6 @@
ETC_DIR = os.path.join(PACKAGE_DIR, 'etc')
LSRI_CLIENT_ID = "aa0fcc42-096a-4f9d-b871-aceb1a97d174"
-__version__ = open(os.path.join(PACKAGE_DIR, 'VERSION')).read().strip()
-
SUB_CLI_CONFIG_FILE = ".eva_sub_cli_config.yml"
# Environment variable
@@ -16,3 +14,14 @@
ENA_WEBIN_ACCOUNT_VAR = 'ENA_WEBIN_ACCOUNT'
ENA_WEBIN_PASSWORD_VAR = 'ENA_WEBIN_PASSWORD'
+
+try:
+ # If setuptools_scm is installed we can get the version directly from it
+ from setuptools_scm import get_version
+ __version__ = get_version(root='..', relative_to=__file__)
+ del get_version
+except:
+ # otherwise assume that we're working in a deployed instance which should have the _version file
+ from ._version import version as __version__
+
+
diff --git a/eva_sub_cli/executables/__init__.py b/eva_sub_cli/executables/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/bin/check_fasta_insdc.py b/eva_sub_cli/executables/check_fasta_insdc.py
similarity index 99%
rename from bin/check_fasta_insdc.py
rename to eva_sub_cli/executables/check_fasta_insdc.py
index 8b2c727..6635dca 100644
--- a/bin/check_fasta_insdc.py
+++ b/eva_sub_cli/executables/check_fasta_insdc.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
import argparse
import gzip
import hashlib
@@ -186,6 +184,3 @@ def main():
results = assess_fasta(args.input_fasta, analyses, metadata_insdc)
write_result_yaml(args.output_yaml, results)
-
-if __name__ == "__main__":
- main()
diff --git a/bin/check_metadata_semantics.py b/eva_sub_cli/executables/check_metadata_semantics.py
similarity index 94%
rename from bin/check_metadata_semantics.py
rename to eva_sub_cli/executables/check_metadata_semantics.py
index 07705ec..68bf75e 100644
--- a/bin/check_metadata_semantics.py
+++ b/eva_sub_cli/executables/check_metadata_semantics.py
@@ -16,7 +16,3 @@ def main():
checker = SemanticMetadataChecker(metadata)
checker.check_all()
checker.write_result_yaml(args.output_yaml)
-
-
-if __name__ == "__main__":
- main()
diff --git a/bin/eva-sub-cli.py b/eva_sub_cli/executables/cli.py
similarity index 91%
rename from bin/eva-sub-cli.py
rename to eva_sub_cli/executables/cli.py
index 8e52d42..9a62683 100755
--- a/bin/eva-sub-cli.py
+++ b/eva_sub_cli/executables/cli.py
@@ -1,6 +1,6 @@
-#!/usr/bin/env python
import sys
+import eva_sub_cli
from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException
@@ -13,8 +13,8 @@
from argparse import ArgumentParser
from ebi_eva_common_pyutils.logger import logging_config
-from eva_sub_cli import main
-from eva_sub_cli.main import VALIDATE, SUBMIT, DOCKER, NATIVE
+from eva_sub_cli import orchestrator
+from eva_sub_cli.orchestrator import VALIDATE, SUBMIT, DOCKER, NATIVE
from eva_sub_cli.file_utils import is_submission_dir_writable
@@ -36,15 +36,9 @@ def validate_command_line_arguments(args, argparser):
sys.exit(1)
-def get_version():
- base_dir = os.path.abspath(os.path.dirname(main.__file__))
- version = open(os.path.join(base_dir, 'VERSION')).read().strip()
- return f'{version}'
-
-
-if __name__ == "__main__":
+def main():
argparser = ArgumentParser(prog='eva-sub-cli', description='EVA Submission CLI - validate and submit data to EVA')
- argparser.add_argument('--version', action='version', version=f'%(prog)s {get_version()}')
+ argparser.add_argument('--version', action='version', version=f'%(prog)s {eva_sub_cli.__version__}')
argparser.add_argument('--submission_dir', required=True, type=str,
help='Full path to the directory where all processing will be done '
'and submission info is/will be stored')
@@ -83,7 +77,7 @@ def get_version():
try:
# Pass on all the arguments
- main.orchestrate_process(**args.__dict__)
+ orchestrator.orchestrate_process(**args.__dict__)
except FileNotFoundError as fne:
print(fne)
except SubmissionNotFoundException as snfe:
diff --git a/bin/samples_checker.py b/eva_sub_cli/executables/samples_checker.py
similarity index 99%
rename from bin/samples_checker.py
rename to eva_sub_cli/executables/samples_checker.py
index 54e2ae6..0e99efd 100644
--- a/bin/samples_checker.py
+++ b/eva_sub_cli/executables/samples_checker.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
import argparse
import gzip
import json
@@ -149,7 +148,3 @@ def main():
args = arg_parser.parse_args()
logging_config.add_stdout_handler()
check_sample_name_concordance(args.metadata_json, args.vcf_files, args.output_yaml)
-
-
-if __name__ == "__main__":
- main()
diff --git a/bin/xlsx2json.py b/eva_sub_cli/executables/xlsx2json.py
similarity index 99%
rename from bin/xlsx2json.py
rename to eva_sub_cli/executables/xlsx2json.py
index 767adaa..473046d 100644
--- a/bin/xlsx2json.py
+++ b/eva_sub_cli/executables/xlsx2json.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
import argparse
import datetime
import json
@@ -368,7 +367,3 @@ def main():
parser.add_error(e)
finally:
parser.save_errors(args.errors_yaml)
-
-
-if __name__ == "__main__":
- main()
diff --git a/eva_sub_cli/jinja_templates/html_report.html b/eva_sub_cli/jinja_templates/html_report.html
index e32f800..dade5de 100644
--- a/eva_sub_cli/jinja_templates/html_report.html
+++ b/eva_sub_cli/jinja_templates/html_report.html
@@ -42,6 +42,7 @@
Validation Report
+ eva-sub-cli v{{cli_version}}
diff --git a/eva_sub_cli/main.py b/eva_sub_cli/orchestrator.py
similarity index 100%
rename from eva_sub_cli/main.py
rename to eva_sub_cli/orchestrator.py
diff --git a/eva_sub_cli/report.py b/eva_sub_cli/report.py
index 7640105..f85b73f 100644
--- a/eva_sub_cli/report.py
+++ b/eva_sub_cli/report.py
@@ -3,6 +3,8 @@
from jinja2 import Environment, FileSystemLoader
+import eva_sub_cli
+
current_dir = os.path.dirname(__file__)
@@ -22,6 +24,7 @@ def generate_html_report(validation_results, validation_date, submission_dir, vc
loader=FileSystemLoader(os.path.join(current_dir, 'jinja_templates'))
).get_template('html_report.html')
rendered_template = template.render(
+ cli_version=eva_sub_cli.__version__,
logo_data=get_logo_data(),
project_title=project_title,
validation_date=validation_date,
diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py
index 1e2a973..9795531 100755
--- a/eva_sub_cli/validators/validator.py
+++ b/eva_sub_cli/validators/validator.py
@@ -580,7 +580,6 @@ def get_vcf_fasta_analysis_mapping(self):
else:
self.error('Error building validation report : Metadata file not present')
-
def create_reports(self):
report_html = generate_html_report(self.results, self.validation_date, self.submission_dir,
self.get_vcf_fasta_analysis_mapping(), self.project_title)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..c686ff7
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,50 @@
+[build-system]
+requires = [
+ "setuptools >= 64",
+ "setuptools_scm[toml] >= 8",
+ "setuptools_scm_git_archive",
+ "wheel >= 0.29.0",
+]
+build-backend = 'setuptools.build_meta'
+
+[project]
+name = 'eva_sub_cli'
+description = 'EBI EVA - validation and submission command line tool'
+dynamic = ["version", 'requires-python', 'dependencies']
+readme = 'README.md'
+classifiers = [
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Console',
+ 'Intended Audience :: Science/Research',
+ 'Natural Language :: English',
+ 'Operating System :: POSIX :: Linux',
+ 'Operating System :: MacOS :: MacOS X',
+ 'Programming Language :: Python :: 3.6',
+ 'Programming Language :: Python :: 3.7',
+ 'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3.9',
+ 'Programming Language :: Python :: 3.10',
+ 'Topic :: Scientific/Engineering :: Bio-Informatics',
+ 'Topic :: Communications :: File Sharing',
+ 'License :: OSI Approved :: Apache',
+]
+
+[project.scripts]
+'eva-sub-cli.py'='eva_sub_cli.executables.cli:main'
+'check_fasta_insdc.py'='eva_sub_cli.executables.check_fasta_insdc:main'
+'check_metadata_semantics.py'='eva_sub_cli.executables.check_metadata_semantics:main'
+'samples_checker.py'='eva_sub_cli.executables.samples_checker:main'
+'xlsx2json.py'='eva_sub_cli.executables.xlsx2json:main'
+
+[tool.setuptools]
+packages = ['eva_sub_cli', 'eva_sub_cli.exceptions', 'eva_sub_cli.executables', 'eva_sub_cli.validators']
+
+[tool.setuptools.package-data]
+'eva_sub_cli'=['nextflow/*', 'etc/*', 'jinja_templates/*']
+
+[tool.setuptools.dynamic]
+dependencies = {file = 'requirements.txt'}
+
+[tool.setuptools_scm]
+write_to = 'eva_sub_cli/_version.py'
+
diff --git a/setup.cfg b/setup.cfg
index bbff137..2ebefc5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,3 @@
-[metadata]
-description-file = README.md
[zest.releaser]
tag-format = v{version}
@@ -12,3 +10,6 @@ ignore =
tests/*/*/*/*
.gitlab-ci.yml
+[options]
+python_requires = >=3.6
+
diff --git a/setup.py b/setup.py
index 762d87a..1abbd06 100644
--- a/setup.py
+++ b/setup.py
@@ -1,30 +1,4 @@
-import glob
-from distutils.core import setup
-from os.path import join, abspath, dirname
-from setuptools import find_packages
+import setuptools
-base_dir = abspath(dirname(__file__))
-requirements_txt = join(base_dir, 'requirements.txt')
-requirements = [l.strip() for l in open(requirements_txt) if l and not l.startswith('#')]
-
-version = open(join(base_dir, 'eva_sub_cli', 'VERSION')).read().strip()
-
-setup(
- name='eva_sub_cli',
- packages=find_packages(),
- package_data={'eva_sub_cli': ['nextflow/*', 'etc/*', 'VERSION', 'jinja_templates/*']},
- version=version,
- license='Apache',
- description='EBI EVA - validation and submission command line tool',
- url='https://github.com/EBIvariation/eva-sub-cli',
- keywords=['ebi', 'eva', 'python', 'submission', 'validation'],
- install_requires=requirements,
- classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: Science/Research',
- 'Topic :: Communications :: File Sharing',
- 'License :: OSI Approved :: Apache Software License',
- 'Programming Language :: Python :: 3'
- ],
- scripts=glob.glob(join(dirname(__file__), 'bin', '*.py'))
-)
+if __name__ == "__main__":
+ setuptools.setup()
diff --git a/tests/resources/validation_reports/expected_report.html b/tests/resources/validation_reports/expected_report.html
index 51861b6..46df980 100644
--- a/tests/resources/validation_reports/expected_report.html
+++ b/tests/resources/validation_reports/expected_report.html
@@ -19,4 +19,4 @@
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
.info { background-color: #dadada; }
- .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | | Project Title | In sheet "Project", column "Project Title" is not populated |
Project | | Description | In sheet "Project", column "Description" is not populated |
Project | | Tax ID | In sheet "Project", column "Tax ID" is not populated |
Project | | Center | In sheet "Project", column "Center" is not populated |
Analysis | 2 | Analysis Title | In sheet "Analysis", row "2", column "Analysis Title" is not populated |
Analysis | 2 | Description | In sheet "Analysis", row "2", column "Description" is not populated |
Analysis | 2 | Experiment Type | In sheet "Analysis", row "2", column "Experiment Type" is not populated |
Analysis | 2 | Reference | In sheet "Analysis", row "2", column "Reference" is not populated |
Sample | 3 | Sample Accession | In sheet "Sample", row "3", column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
+ .error-list, .no-show { display: none; }Project Summary
General details about the project
Project Title: My cool project
Validation Date: 2023-08-31 12:34:56
Submission Directory: /test/submission/dir
▶ Files mapping
VCF File | Fasta File | Analysis |
---|
input_fail.vcf | input_fail.fa | A |
input_pass.vcf | input_pass.fa | B |
input_test.vcf | input_test.fa | could not be linked |
Metadata validation results
Ensures that required fields are present and values are formatted correctly. For requirements, please refer to the
EVA website.
▶ ❌ Metadata validation check
Full report: /path/to/metadata/metadata_spreadsheet_validation.txt
Sheet | Row | Column | Description |
---|
Files | | | Sheet "Files" is missing |
Project | | Project Title | In sheet "Project", column "Project Title" is not populated |
Project | | Description | In sheet "Project", column "Description" is not populated |
Project | | Tax ID | In sheet "Project", column "Tax ID" is not populated |
Project | | Center | In sheet "Project", column "Center" is not populated |
Analysis | 2 | Analysis Title | In sheet "Analysis", row "2", column "Analysis Title" is not populated |
Analysis | 2 | Description | In sheet "Analysis", row "2", column "Description" is not populated |
Analysis | 2 | Experiment Type | In sheet "Analysis", row "2", column "Experiment Type" is not populated |
Analysis | 2 | Reference | In sheet "Analysis", row "2", column "Reference" is not populated |
Sample | 3 | Sample Accession | In sheet "Sample", row "3", column "Sample Accession" is not populated |
VCF validation results
Checks whether each file is compliant with the
VCF specification. Also checks whether the variants' reference alleles match against the reference assembly.
input_fail.vcf
▶ ❌ Assembly check: 26/36 (72.22%)
First 10 errors per category are below. Full report: /path/to/assembly_failed/report
Category | Error |
---|
Parsing Error | The assembly checking could not be completed: Contig 'chr23' not found in assembly report |
mismatch error | Chromosome 1, position 35549, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35595, reference allele 'G' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35618, reference allele 'G' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35626, reference allele 'A' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35639, reference allele 'T' does not match the reference sequence, expected 'c' |
mismatch error | Chromosome 1, position 35643, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35717, reference allele 'T' does not match the reference sequence, expected 'g' |
mismatch error | Chromosome 1, position 35819, reference allele 'T' does not match the reference sequence, expected 'a' |
mismatch error | Chromosome 1, position 35822, reference allele 'T' does not match the reference sequence, expected 'c' |
▶ ❌ VCF check: 1 critical errors, 1 non-critical errors
First 10 errors per category are below. Full report: /path/to/vcf_failed/report
Category | Error |
---|
critical error | Line 4: Error in meta-data section. |
non-critical error | Sample #11, field AD does not match the meta specification Number=R (expected 2 value(s)). AD=.. |
input_passed.vcf
✔ Assembly check: 247/247 (100.0%)
✔ VCF check: 0 critical errors, 0 non-critical errors
Sample name concordance check
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
▶ ❌ Analysis A: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleA1, SampleA2 , SampleA3, SampleA4, SampleA5 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | A1Sample , A2Sample, A3Sample, A4Sample, A5Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- A1Sample•
- •A2Sample
- A3Sample
- A4Sample
- A5Sample
- A6Sample
- A7Sample
- A8Sample
- A9Sample
- A10Sample
Hide ✔ Analysis B: Sample names in metadata match with those in VCF files
▶ ❌ Analysis C: Sample names in metadata do not match with those in VCF files
Category | First 5 Errors For Category | Link To View All Errors |
---|
Samples described in the metadata but not in the VCF files | SampleC1 , SampleC2, SampleC3, SampleC4 | Show All Errors For Category |
Samples in the VCF files but not described in the metadata | C1Sample , C2Sample, C3Sample, C4Sample | Show All Errors For Category |
All Errors For Category - Samples in the VCF files but not described in the metadata:
- C1Sample•
- •C2Sample
- C3Sample
- C4Sample
HideReference genome INSDC check
Checks that the reference sequences in the FASTA file used to call the variants are accessioned in INSDC. Also checks if the reference assembly accession in the metadata matches the one determined from the FASTA file.
metadata_asm_match.fa
✔ All sequences are INSDC accessioned
✔ Analysis A: Assembly accession in metadata is compatible
metadata_asm_not_found.fa
✔ All sequences are INSDC accessioned
▶ ❌ No assembly accession found in metadata
Full report: /path/to/metadata_asm_not_found.yml
Category | Accessions |
---|
Assembly accession found in metadata | Not found |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_asm_not_match.fa
✔ All sequences are INSDC accessioned
▶ ❌ Analysis B: Assembly accession in metadata is not compatible
Full report: /path/to/metadata_asm_not_match.yml
Category | Accessions |
---|
Assembly accession found in metadata | GCA_2 |
Assembly accession(s) compatible with FASTA | GCA_1 |
metadata_error.fa
Warning: The following results may be incomplete due to problems with external services. Please try again later for complete results.
Error message: 500 Server Error: Internal Server Error for url: https://www.ebi.ac.uk/eva/webservices/contig-alias/v1/chromosomes/md5checksum/hjfdoijsfc47hfg0gh9qwjrve
✔ All sequences are INSDC accessioned
✔ Analysis C: Assembly accession in metadata is compatible
not_all_insdc.fa
▶ ❌ Some sequences are not INSDC accessioned
First 10 sequences not in INSDC. Full report: /path/to/not_all_insdc_check.yml
Sequence name | Refget md5 |
---|
2 | hjfdoijsfc47hfg0gh9qwjrve |
✔ Analysis A: Assembly accession in metadata is compatible
\ No newline at end of file
diff --git a/tests/test_check_fasta_insdc.py b/tests/test_check_fasta_insdc.py
index 116e6cc..d3447a6 100644
--- a/tests/test_check_fasta_insdc.py
+++ b/tests/test_check_fasta_insdc.py
@@ -5,7 +5,7 @@
import pytest
import requests as requests
-from bin.check_fasta_insdc import assess_fasta, get_analyses_and_reference_genome_from_metadata
+from eva_sub_cli.executables.check_fasta_insdc import assess_fasta, get_analyses_and_reference_genome_from_metadata
class TestFastaChecker(TestCase):
@@ -20,8 +20,8 @@ def test_get_analysis_and_reference_genome_from_metadata(self):
def test_assess_fasta_is_insdc(self):
input_fasta = os.path.join(self.resource_dir, 'fasta_files', 'Saccharomyces_cerevisiae_I.fa')
- with patch('bin.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
- patch('bin.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
+ with patch('eva_sub_cli.executables.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
+ patch('eva_sub_cli.executables.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
m_get_refget.return_value = {'sequence_name': 'chr1'}
m_get_assemblies.return_value = {'GCA_000146045.2'}
results = assess_fasta(input_fasta, ['analysis'], None)
@@ -30,8 +30,8 @@ def test_assess_fasta_is_insdc(self):
'sequences': [{'sequence_name': 'I', 'sequence_md5': '6681ac2f62509cfc220d78751b8dc524', 'insdc': True}],
'possible_assemblies': {'GCA_000146045.2'}
}
- with patch('bin.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
- patch('bin.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
+ with patch('eva_sub_cli.executables.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
+ patch('eva_sub_cli.executables.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
m_get_refget.return_value = None
m_get_assemblies.return_value = set()
results = assess_fasta(input_fasta, ['analysis'], None)
@@ -42,8 +42,8 @@ def test_assess_fasta_is_insdc(self):
def test_assess_fasta_matches_metadata(self):
input_fasta = os.path.join(self.resource_dir, 'fasta_files', 'Saccharomyces_cerevisiae_I.fa')
- with patch('bin.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
- patch('bin.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
+ with patch('eva_sub_cli.executables.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
+ patch('eva_sub_cli.executables.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
m_get_refget.return_value = {'sequence_name': 'I'}
m_get_assemblies.return_value = {'GCA_000146045.2'}
results = assess_fasta(input_fasta, ['analysis'], 'GCA_000146045.2')
@@ -69,8 +69,8 @@ def test_assess_fasta_matches_metadata(self):
def test_assess_fasta_http_error(self):
input_fasta = os.path.join(self.resource_dir, 'fasta_files', 'Saccharomyces_cerevisiae_I.fa')
- with patch('bin.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
- patch('bin.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
+ with patch('eva_sub_cli.executables.check_fasta_insdc.get_refget_metadata', autospec=True) as m_get_refget, \
+ patch('eva_sub_cli.executables.check_fasta_insdc._get_containing_assemblies_paged', autospec=True) as m_get_assemblies:
m_get_refget.return_value = {'sequence_name': 'I'}
m_get_assemblies.side_effect = requests.HTTPError('500 Internal Server Error')
results = assess_fasta(input_fasta, ['analysis'], None)
diff --git a/tests/test_main.py b/tests/test_orchestrator.py
similarity index 82%
rename from tests/test_main.py
rename to tests/test_orchestrator.py
index 2a19196..1d39590 100644
--- a/tests/test_main.py
+++ b/tests/test_orchestrator.py
@@ -10,12 +10,12 @@
from eva_sub_cli import SUB_CLI_CONFIG_FILE
from eva_sub_cli.exceptions.submission_not_found_exception import SubmissionNotFoundException
from eva_sub_cli.exceptions.submission_status_exception import SubmissionStatusException
-from eva_sub_cli.main import orchestrate_process, VALIDATE, SUBMIT, DOCKER, check_validation_required
+from eva_sub_cli.orchestrator import orchestrate_process, VALIDATE, SUBMIT, DOCKER, check_validation_required
from eva_sub_cli.submit import SUB_CLI_CONFIG_KEY_SUBMISSION_ID
from eva_sub_cli.validators.validator import READY_FOR_SUBMISSION_TO_EVA
-class TestMain(unittest.TestCase):
+class TestOrchestrator(unittest.TestCase):
project_title = 'Example Project'
resource_dir = os.path.join(os.path.dirname(__file__), 'resources')
test_sub_dir = os.path.join(resource_dir, 'test_sub_dir')
@@ -68,10 +68,10 @@ def test_check_validation_required(self):
check_validation_required(tasks, sub_config)
def test_orchestrate_validate(self):
- with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
- patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.get_vcf_files') as m_get_vcf, \
+ patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
m_get_project_title_and_create_vcf_files_mapping.return_value = self.project_title, self.mapping_file
orchestrate_process(self.test_sub_dir, None, None, self.metadata_json,
self.metadata_xlsx, tasks=[VALIDATE], executor=DOCKER)
@@ -84,11 +84,11 @@ def test_orchestrate_validate(self):
m_docker_validator().validate_and_report.assert_called_once_with()
def test_orchestrate_validate_submit(self):
- with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
- patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \
- patch('eva_sub_cli.main.StudySubmitter') as m_submitter:
+ with patch('eva_sub_cli.orchestrator.get_vcf_files') as m_get_vcf, \
+ patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator, \
+ patch('eva_sub_cli.orchestrator.StudySubmitter') as m_submitter:
# Empty config
config = WritableConfig()
m_config.return_value = config
@@ -111,11 +111,11 @@ def test_orchestrate_validate_submit(self):
submitter.submit.assert_called_once_with()
def test_orchestrate_submit_no_validate(self):
- with patch('eva_sub_cli.main.get_vcf_files') as m_get_vcf, \
- patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator, \
- patch('eva_sub_cli.main.StudySubmitter') as m_submitter:
+ with patch('eva_sub_cli.orchestrator.get_vcf_files') as m_get_vcf, \
+ patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.get_project_title_and_create_vcf_files_mapping') as m_get_project_title_and_create_vcf_files_mapping, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator, \
+ patch('eva_sub_cli.orchestrator.StudySubmitter') as m_submitter:
# Empty config
m_config.return_value = {READY_FOR_SUBMISSION_TO_EVA: True}
m_get_project_title_and_create_vcf_files_mapping.return_value = self.project_title, self.mapping_file
@@ -133,8 +133,8 @@ def test_orchestrate_submit_no_validate(self):
submitter.submit.assert_called_once_with()
def test_orchestrate_with_vcf_files(self):
- with patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
orchestrate_process( self.test_sub_dir, self.vcf_files, self.reference_fasta, self.metadata_json,
self.metadata_xlsx, tasks=[VALIDATE], executor=DOCKER)
# Mapping file was created from the vcf and assembly files
@@ -152,8 +152,8 @@ def test_orchestrate_with_vcf_files(self):
def test_orchestrate_with_metadata_json_without_asm_report(self):
- with patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
orchestrate_process(self.test_sub_dir, None, None, self.metadata_json,
None, tasks=[VALIDATE], executor=DOCKER)
# Mapping file was created from the metadata_json
@@ -172,8 +172,8 @@ def test_orchestrate_with_metadata_json_without_asm_report(self):
def test_orchestrate_with_metadata_json_with_asm_report(self):
shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test_with_asm_report.json'), self.metadata_json)
- with patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
orchestrate_process(self.test_sub_dir, None, None, self.metadata_json, None,
tasks=[VALIDATE], executor=DOCKER)
# Mapping file was created from the metadata_json
@@ -193,8 +193,8 @@ def test_orchestrate_with_metadata_json_with_asm_report(self):
def test_orchestrate_vcf_files_takes_precedence_over_metadata(self):
shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test_with_asm_report.json'), self.metadata_json)
- with patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
orchestrate_process(self.test_sub_dir, self.vcf_files, self.reference_fasta, self.metadata_json,
None, tasks=[VALIDATE], executor=DOCKER, resume=False)
# Mapping file was created from the metadata_json
@@ -215,8 +215,8 @@ def test_orchestrate_vcf_files_takes_precedence_over_metadata(self):
def test_orchestrate_with_metadata_xlsx(self):
shutil.copy(os.path.join(self.resource_dir, 'EVA_Submission_test.xlsx'), self.metadata_xlsx)
- with patch('eva_sub_cli.main.WritableConfig') as m_config, \
- patch('eva_sub_cli.main.DockerValidator') as m_docker_validator:
+ with patch('eva_sub_cli.orchestrator.WritableConfig') as m_config, \
+ patch('eva_sub_cli.orchestrator.DockerValidator') as m_docker_validator:
orchestrate_process(self.test_sub_dir, None, None, None, self.metadata_xlsx,
tasks=[VALIDATE], executor=DOCKER)
# Mapping file was created from the metadata_xlsx
diff --git a/tests/test_report.py b/tests/test_report.py
index 4f55182..4b55ee1 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -2,6 +2,7 @@
import datetime
from unittest import TestCase
+import eva_sub_cli
from eva_sub_cli.report import generate_html_report
validation_results = {
@@ -190,7 +191,10 @@ def test_generate_html_report(self):
open_file.write(report)
with open(self.expected_report) as open_html:
- assert report == open_html.read()
+ expected_report_text = open_html.read()
+ # Inject the version in the expected report
+ expected_report_text = expected_report_text.replace('cligeneratedversion', eva_sub_cli.__version__)
+ assert report == expected_report_text
# Remove output file if assert passes
if os.path.exists('report.html'):
diff --git a/tests/test_samples_checker.py b/tests/test_samples_checker.py
index 8e63a70..3b37c20 100644
--- a/tests/test_samples_checker.py
+++ b/tests/test_samples_checker.py
@@ -3,7 +3,7 @@
import yaml
-from bin.samples_checker import check_sample_name_concordance
+from eva_sub_cli.executables.samples_checker import check_sample_name_concordance
class TestSampleChecker(TestCase):
diff --git a/tests/test_xlsx2json.py b/tests/test_xlsx2json.py
index 84a6c63..79948f7 100644
--- a/tests/test_xlsx2json.py
+++ b/tests/test_xlsx2json.py
@@ -6,7 +6,7 @@
import yaml
from eva_sub_cli import ETC_DIR
-from bin.xlsx2json import XlsxParser, create_xls_template_from_yaml
+from eva_sub_cli.executables.xlsx2json import XlsxParser, create_xls_template_from_yaml
class TestXlsReader(TestCase):