Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3275: Improvements to validation report #9

Merged
merged 2 commits into from
Sep 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ jobs:
strategy:
matrix:
python-version: [3.9]
nextflow-version: [21.10.5]

steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies ${{ matrix.nextflow-version }}
- name: Install dependencies and build docker image
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

# PyCharm
.idea/
4 changes: 2 additions & 2 deletions cli/docker_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,11 @@ def verify_container_is_running(self):
return False

def verify_container_is_stopped(self):
container_stop_cmd_ouptut = run_command_with_output(
container_stop_cmd_output = run_command_with_output(
"check if container is stopped",
f"{self.docker_path} ps -a"
)
if container_stop_cmd_ouptut is not None and self.container_name in container_stop_cmd_ouptut:
if container_stop_cmd_output is not None and self.container_name in container_stop_cmd_output:
logger.info(f"Container ({self.container_name}) is in stop state")
return True
else:
Expand Down
124 changes: 68 additions & 56 deletions cli/jinja_templates/file_validation.html
Original file line number Diff line number Diff line change
@@ -1,61 +1,73 @@

{% macro file_validation_report(validation_results, file_name) -%}
<ul>
{% for check_type, check_per_file in validation_results.items() %}
{% set result = check_per_file.get(file_name, {}) %}
{% if check_type == "assembly_check" %}
{% set nb_match = result.get("match", 0) %}
{% set nb_total = result.get("total", 0) %}
{% set match_percentage = nb_match / nb_total * 100 %}
{% if result.get("nb_mismatch", 0) > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "fail collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "pass" %}
{% endif %}
<li class='{{ row_class }}'>{{ icon }} Assembly check: {{ nb_match }}/{{ nb_total }} ({{ match_percentage|round(2) }}%)</li>
{% set mismatch_list = result.get("mismatch_list") %}
{% if mismatch_list %}
<div class='error-list'>
<ul>
{% for error in mismatch_list %}
<li><strong>{{ check_type }} error:</strong> {{ error }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% elif check_type == "vcf_check" %}
{% set critical_count = result.get("critical_count", 0) %}
{% set error_count = result.get("error_count", 0) %}
{% set warning_count = result.get("warning_count", 0) %}
{% if critical_count > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "fail collapsible" %}
{% elif error_count > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "warn collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "pass" %}
{% endif %}
<li class='{{ row_class }}'>{{ icon }} VCF check: {{ critical_count }} critical errors {{ error_count }} non critical error {{ warning_count }} warning </li>
{% set critical_list = result.get("critical_list") %}
{% set error_list = result.get("error_list") %}
{% for check_type, check_per_file in validation_results.items() %}
{% set result = check_per_file.get(file_name, {}) %}
{% if check_type == "assembly_check" %}
{% set nb_match = result.get("match", 0) %}
{% set nb_total = result.get("total", 0) %}
{% set match_percentage = nb_match / nb_total * 100 %}
{% if result.get("nb_mismatch", 0) > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "report-section fail collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "report-section pass" %}
{% endif %}
<div class='{{ row_class }}'>{{ icon }} Assembly check: {{ nb_match }}/{{ nb_total }} ({{ match_percentage|round(2) }}%)</div>
{% set mismatch_list = result.get("mismatch_list") %}
{% if mismatch_list %}
<div class="error-list">
<div class="error-description">First 10 errors per category are below. <strong>Full report:</strong> {{ result.get('report_path', '') }}</div>
<table>
<tr>
<th>Category</th><th>Error</th>
</tr>
{% for error in mismatch_list[:10] %}
<tr>
<td><strong>mismatch error</strong></td><td> {{ error }}</td>
</tr>
{% endfor %}
</table>
</div>
{% endif %}
{% elif check_type == "vcf_check" %}
{% set critical_count = result.get("critical_count", 0) %}
{% set error_count = result.get("error_count", 0) %}
{% set warning_count = result.get("warning_count", 0) %}
{% if critical_count > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "report-section fail collapsible" %}
{% elif error_count > 0 %}
{% set icon = "&#10060;" %}
{% set row_class = "report-section warn collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "report-section pass" %}
{% endif %}
<div class='{{ row_class }}'>{{ icon }} VCF check: {{ critical_count }} critical errors, {{ error_count }} non-critical errors, {{ warning_count }} warnings </div>
{% set critical_list = result.get("critical_list") %}
{% set error_list = result.get("error_list") %}

{% if critical_list or error_list %}
<div class='error-list'>
<ul>
{% for error in critical_list %}
<li><strong>{{ check_type }} error:</strong> {{ error }}</li>
{% endfor %}
{% for error in error_list %}
<li><strong>{{ check_type }} error:</strong> {{ error }}</li>
{% endfor %}
</ul>
</div>
{% endif %}
{% if critical_list or error_list%}
<div class="error-list">
<div class="error-description">First 10 errors per category are below. <strong>Full report:</strong> {{ result.get('report_path', '') }}</div>
<table>
<tr>
<th>Category</th><th>Error</th>
</tr>
{% for error in critical_list[:10] %}
<tr>
<td><strong>critical error</strong></td><td> {{ error }}</td>
</tr>
{% endfor %}
{% for error in error_list[:10] %}
<tr>
<td><strong>non-critical error</strong></td><td> {{ error }}</td>
</tr>
{% endfor %}
</table>
</div>
{% endif %}
{% endfor %}
</ul>
{% endif %}
{% endfor %}
{%- endmacro %}
80 changes: 65 additions & 15 deletions cli/jinja_templates/html_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,86 @@
<meta charset="UTF-8">
<title>Validation Report</title>
<style>
table { border-collapse: collapse; }
body { font-family: Verdana, sans-serif; }

.logo { float: left; padding: 20px; }
.title { padding: 8px; }
header { overflow: auto; }
header::after { clear: both; }

section { padding: 20px 20px 10px 40px; }
.description { padding-bottom: 10px}
.error-description { padding: 10px 0px 10px}

.report-section { padding: 12px; border: 1px solid white; }
.active, .collapsible:hover, .fail { background-color: #e6a3ae; }

table { border-collapse: collapse; padding: 10px; }
th, td { border: 1px solid black; padding: 8px; text-align: left; }
th { background-color: lightgrey; }
tr.fail { background-color: #FFB6C1; }
tr.pass { background-color: #90EE90; }
.fail { background-color: #FFB6C1; }
.pass { background-color: #90EE90; }
.error-list { display: none; }
</style>
</head>
<body>
<img src="data:image/png;base64,{{ logo_data }}" width="100" height="100">
<h1>Validation Report</h1>
<h2>Metadata validation results</h2>
{{ metadata_validation_report(validation_results) }}

{% for file_name in file_names %}
<h2>Validation results for file {{ file_name }}</h2>
{{ file_validation_report(validation_results, file_name) }}
{% endfor %}
<header>
<div class="logo">
<img src="data:image/png;base64,{{ logo_data }}" width="100" height="100" alt="EVA logo">
</div>
<div class="title">
{% if project_title %}
<h1>Validation Report: {{ project_title }}</h1>
{% else %}
<h1>Validation Report</h1>
{% endif %}
Generated at {{ validation_date }}
</div>
</header>

<section>
<h2>Metadata validation results</h2>
<div class="description">
Ensures that required fields are present and values are formatted correctly.
For requirements, please refer to the <a href="https://www.ebi.ac.uk/eva/?Submit-Data">EVA website</a>.
</div>
{{ metadata_validation_report(validation_results) }}
</section>

<section>
<h2>VCF validation results</h2>
<div class="description">
Checks whether each file is compliant with the <a href="http://samtools.github.io/hts-specs/VCFv4.3.pdf">VCF specification</a>.
Also checks whether the variants' reference alleles match against the reference assembly.
</div>
{% for file_name in file_names %}
<h3>{{ file_name }}</h3>
{{ file_validation_report(validation_results, file_name) }}
{% endfor %}
</section>

<h2>Sample name concordance check</h2>
{{ sample_name_check_report(validation_results)}}
<section>
<h2>Sample name concordance check</h2>
<div class="description">
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Checks whether information in the metadata is concordant with that contained in the VCF files, in particular sample names.
Checks whether information in the metadata (ex: Sample names) is concordant with that contained in the VCF files.

</div>
{{ sample_name_check_report(validation_results)}}
</section>

<script>
let collapsibles = document.querySelectorAll('.collapsible');
for (let collapsible of collapsibles) {
collapsible.addEventListener('click', function() {
this.classList.toggle('active');
let content = this.nextElementSibling;
if (content.style.display === 'block') { content.style.display = 'none'; }
else { content.style.display = 'block'; }
if (content.style.display === 'block') {
content.style.display = 'none';
}
else {
content.style.display = 'block';
content.scrollIntoView({block: 'nearest'});
}
});
}
</script>
Expand Down
27 changes: 17 additions & 10 deletions cli/jinja_templates/metadata_validation.html
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@

{% macro metadata_validation_report(validation_results) -%}
{% set json_errors = validation_results.get('metadata_check', {}).get('json_errors', []) %}
{% set results = validation_results.get('metadata_check', {}) %}
{% set json_errors = results.get('json_errors', []) %}
{% if json_errors %}
{% set icon = "&#10060;" %}
{% set row_class = "fail collapsible" %}
{% set row_class = "report-section fail collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "pass" %}
{% set row_class = "report-section pass" %}
{% endif %}
<li class='{{ row_class }}'>{{ icon }} Metadata validation check </li>
<div class='{{ row_class }}'>{{ icon }} Metadata validation check </div>
{% if json_errors %}
<div class='error-list'>
<ul>
{% for error in json_errors %}
<li><strong> {{ error.get('property') }} : {{ error.get('description') }} </strong></li>
{% endfor %}
</ul>
<div class="error-list">
<div class="error-description"><strong>Full report:</strong> {{ results.get('report_path', '') }}</div>
<table>
<tr>
<th>Property</th><th>Error</th>
</tr>
{% for error in json_errors %}
<tr>
<td><strong>{{ error.get('property') }}</strong></td><td> {{ error.get('description') }}</td>
</tr>
{% endfor %}
</table>
</div>
{% endif %}
{%- endmacro %}
26 changes: 18 additions & 8 deletions cli/jinja_templates/sample_name_check.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,28 @@
{% for analysis, results_for_analysis in results.get('results_per_analysis', {}).items() %}
{% if results_for_analysis.get('difference') %}
{% set icon = "&#10060;" %}
{% set row_class = "fail collapsible" %}
{% set row_class = "report-section fail collapsible" %}
{% else %}
{% set icon = "&#10004;" %}
{% set row_class = "pass" %}
{% set row_class = "report-section pass" %}
{% endif %}
<li class='{{ row_class }}'>{{ icon }} {{ analysis }}: Sample names concordance check </li>
<div class='{{ row_class }}'>{{ icon }} {{ analysis }}: Sample names concordance check </div>
{% if results_for_analysis.get('difference') %}
<div class='error-list'>
<ul>
<li><strong> List of samples described in the metadata but not in the VCF files:</strong> {{ results_for_analysis.get('more_metadata_submitted_files')|join(", ") }}</li>
<li><strong> List of samples used in the VCF files but not described in the metadata:</strong> {{ results_for_analysis.get('more_submitted_files_metadata')|join(", ") }}</li>
</ul>
<div class="error-list">
<div class="error-description">First 10 errors per category are below. <strong>Full report:</strong> {{ results.get('report_path', '') }}</div>
<table>
<tr>
<th>Category</th><th>Error</th>
</tr>
<tr>
<td><strong>Samples described in the metadata but not in the VCF files</strong></td>
<td>{{ results_for_analysis.get('more_metadata_submitted_files')[:10]|join(", ") }}</td>
</tr>
<tr>
<td><strong>Samples in the VCF files but not described in the metadata</strong></td>
<td>{{ results_for_analysis.get('more_submitted_files_metadata')[:10]|join(", ") }}</td>
</tr>
</table>
</div>
{% endif %}
{% endfor %}
Expand Down
13 changes: 7 additions & 6 deletions cli/report.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import base64
import os.path
from pprint import pprint

from jinja2 import Template, Environment, FileSystemLoader
from jinja2 import Environment, FileSystemLoader
from minify_html import minify_html

current_dir = os.path.dirname(__file__)
Expand All @@ -14,17 +13,19 @@ def get_logo_data():
return logo_data


def generate_html_report(validation_results):
def generate_html_report(validation_results, validation_date, project_title=None):
file_names = sorted(set([file_name
for check in validation_results if check in ["vcf_check", "assembly_check"]
for file_name in validation_results[check]
]))
for check in validation_results if check in ["vcf_check", "assembly_check"]
for file_name in validation_results[check]
]))

template = Environment(
loader=FileSystemLoader(os.path.join(current_dir, 'jinja_templates'))
).get_template('html_report.html')
rendered_template = template.render(
logo_data=get_logo_data(),
project_title=project_title,
validation_date=validation_date,
file_names=file_names,
validation_results=validation_results,
)
Expand Down
Loading