From ef4c75030c9cb6473bea724d06e904192eb31d7e Mon Sep 17 00:00:00 2001 From: tcezard Date: Wed, 6 Nov 2024 10:01:37 +0000 Subject: [PATCH 1/6] When checking the accessioning QC we do not need to count star * allele --- eva_submission/nextflow/accession_and_load.nf | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 07e963b..0aa581c 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -240,10 +240,13 @@ process accession_vcf { # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation. # TODO revert once accessioning pipeline properly registers structural variants - # First grep finds the "Structural variant" reported by the accessioning process, remove the duplicates and count + # First grep finds the "Structural variant" reported by the accessioning process, remove the duplicates, remove the * alleles and count + SV_IN_ACCESSION=\$(grep 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | grep -v "alternate='*'" | cut -d ' ' -f 10- | sort -u | wc -l) # Second grep count the reported number of missing variants in the Accessioning report - [[ \$(grep 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | cut -d ' ' -f 10- | sort -u| wc -l) \ - == \$(grep -oP '\\d+(?= unaccessioned variants need to be checked)' ${params.logs_dir}/${log_filename}.log) ]] + SV_IN_QC_REPORT=\$(grep ' variants that were not found in the accession report' ${params.logs_dir}/${log_filename}.log | sed 's/, AbstractVariant/\n AbstractVariant/g' | grep -v "alternate='*'" | wc -l) + echo "SV_IN_ACCESSION \$SV_IN_ACCESSION" + echo "SV_IN_QC_REPORT \$SV_IN_QC_REPORT" + [[ \$SV_IN_ACCESSION == \$SV_IN_QC_REPORT ]] echo "done" > ${accessioned_filename}.tmp """ } From dba5fc7fe5fced1823c0a9cc65aa875aa1850516 Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 7 Nov 2024 11:03:48 +0000 Subject: [PATCH 2/6] Ensure grep is not the last command because it exit with 1 when nothing is found --- eva_submission/nextflow/accession_and_load.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 0aa581c..41b3b29 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -243,7 +243,7 @@ process accession_vcf { # First grep finds the "Structural variant" reported by the accessioning process, remove the duplicates, remove the * alleles and count SV_IN_ACCESSION=\$(grep 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | grep -v "alternate='*'" | cut -d ' ' -f 10- | sort -u | wc -l) # Second grep count the reported number of missing variants in the Accessioning report - SV_IN_QC_REPORT=\$(grep ' variants that were not found in the accession report' ${params.logs_dir}/${log_filename}.log | sed 's/, AbstractVariant/\n AbstractVariant/g' | grep -v "alternate='*'" | wc -l) + SV_IN_QC_REPORT=\$(grep ' variants that were not found in the accession report' ${params.logs_dir}/${log_filename}.log | sed 's/, AbstractVariant/\\n AbstractVariant/g' | grep -v "alternate='*'" | wc -l) echo "SV_IN_ACCESSION \$SV_IN_ACCESSION" echo "SV_IN_QC_REPORT \$SV_IN_QC_REPORT" [[ \$SV_IN_ACCESSION == \$SV_IN_QC_REPORT ]] From 709a06616fe3ebfc8c6e69b8a81dd51b02f4a8df Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 7 Nov 2024 11:12:28 +0000 Subject: [PATCH 3/6] Always exit the java command with 0 --- eva_submission/nextflow/accession_and_load.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 41b3b29..407a21f 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -236,7 +236,7 @@ process accession_vcf { """ - (java -Xmx${task.memory.toGiga()-1}G -jar $params.jar.accession_pipeline --spring.config.location=file:$params.accession_job_props $pipeline_parameters) || \ + (java -Xmx${task.memory.toGiga()-1}G -jar $params.jar.accession_pipeline --spring.config.location=file:$params.accession_job_props $pipeline_parameters) || true # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation. # TODO revert once accessioning pipeline properly registers structural variants From 89812739662daeb773d5bb92d2024f4a512aa2da Mon Sep 17 00:00:00 2001 From: tcezard Date: Thu, 7 Nov 2024 11:22:46 +0000 Subject: [PATCH 4/6] update the comment --- eva_submission/nextflow/accession_and_load.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 407a21f..6d807af 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -242,7 +242,7 @@ process accession_vcf { # TODO revert once accessioning pipeline properly registers structural variants # First grep finds the "Structural variant" reported by the accessioning process, remove the duplicates, remove the * alleles and count SV_IN_ACCESSION=\$(grep 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | grep -v "alternate='*'" | cut -d ' ' -f 10- | sort -u | wc -l) - # Second grep count the reported number of missing variants in the Accessioning report + # Second grep count the number of missing variants in the Accessioning report after removing the * alleles SV_IN_QC_REPORT=\$(grep ' variants that were not found in the accession report' ${params.logs_dir}/${log_filename}.log | sed 's/, AbstractVariant/\\n AbstractVariant/g' | grep -v "alternate='*'" | wc -l) echo "SV_IN_ACCESSION \$SV_IN_ACCESSION" echo "SV_IN_QC_REPORT \$SV_IN_QC_REPORT" From c829973af830e5f18dcab574313fb9773563753f Mon Sep 17 00:00:00 2001 From: tcezard Date: Wed, 13 Nov 2024 15:13:54 +0000 Subject: [PATCH 5/6] Keep the java exit code to check if successful and only check the QC if the java is not successful QC check only pass if there are missing variants and there the same number of missing variants in accessioning and in the report --- eva_submission/nextflow/accession_and_load.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 6d807af..027d642 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -236,17 +236,27 @@ process accession_vcf { """ - (java -Xmx${task.memory.toGiga()-1}G -jar $params.jar.accession_pipeline --spring.config.location=file:$params.accession_job_props $pipeline_parameters) || true + (java -Xmx${task.memory.toGiga()-1}G -jar $params.jar.accession_pipeline --spring.config.location=file:$params.accession_job_props $pipeline_parameters) || java_exit_code=\$? + # need this line to ensure we do not get unbound variable when the java process is successfull + if [ \${java_exit_code:-"Not set"} == "Not set" ]; then java_exit_code=0; fi # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation. # TODO revert once accessioning pipeline properly registers structural variants + + grep -oP '\\d+(?= unaccessioned variants need to be checked)' ${params.logs_dir}/${log_filename}.log || grep_exit_code=\$? + if [ \${grep_exit_code:-"Not set"} == "Not set" ]; then grep_exit_code=0; fi + # First grep finds the "Structural variant" reported by the accessioning process, remove the duplicates, remove the * alleles and count SV_IN_ACCESSION=\$(grep 'Skipped processing structural variant' ${params.logs_dir}/${log_filename}.log | grep -v "alternate='*'" | cut -d ' ' -f 10- | sort -u | wc -l) # Second grep count the number of missing variants in the Accessioning report after removing the * alleles SV_IN_QC_REPORT=\$(grep ' variants that were not found in the accession report' ${params.logs_dir}/${log_filename}.log | sed 's/, AbstractVariant/\\n AbstractVariant/g' | grep -v "alternate='*'" | wc -l) + + echo "java_exit_code \$java_exit_code" + echo "grep_exit_code \$grep_exit_code" echo "SV_IN_ACCESSION \$SV_IN_ACCESSION" echo "SV_IN_QC_REPORT \$SV_IN_QC_REPORT" - [[ \$SV_IN_ACCESSION == \$SV_IN_QC_REPORT ]] + # If the java is successful OR QC reports missing variants (only valid reason for failure) and QC has the same number of skipped variant as the accession + [[ \$java_exit_code == 0 ]] || ([[ \$grep_exit_code == 0 ]] && [[ \$SV_IN_ACCESSION == \$SV_IN_QC_REPORT ]]) echo "done" > ${accessioned_filename}.tmp """ } From 78563dc511480a56f52975d124a20b7b52213d2e Mon Sep 17 00:00:00 2001 From: tcezard Date: Wed, 13 Nov 2024 15:18:05 +0000 Subject: [PATCH 6/6] fix typo --- eva_submission/nextflow/accession_and_load.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/eva_submission/nextflow/accession_and_load.nf b/eva_submission/nextflow/accession_and_load.nf index 027d642..33576e1 100644 --- a/eva_submission/nextflow/accession_and_load.nf +++ b/eva_submission/nextflow/accession_and_load.nf @@ -237,7 +237,7 @@ process accession_vcf { """ (java -Xmx${task.memory.toGiga()-1}G -jar $params.jar.accession_pipeline --spring.config.location=file:$params.accession_job_props $pipeline_parameters) || java_exit_code=\$? - # need this line to ensure we do not get unbound variable when the java process is successfull + # need this line to ensure we do not get unbound variable when the java process is successful if [ \${java_exit_code:-"Not set"} == "Not set" ]; then java_exit_code=0; fi # If accessioning fails due to missing variants, but the only missing variants are structural variants, # then we should treat this as a success from the perspective of the automation.