From 7d45dc8033d051961be43434796b76b8dd733382 Mon Sep 17 00:00:00 2001 From: Paul Sud <41386393+paul-sud@users.noreply.github.com> Date: Tue, 8 Feb 2022 12:17:00 -0800 Subject: [PATCH] PIP-1593-filter-type-value (#231) --- accession/accession_steps.py | 2 +- accession_steps/atac_steps.json | 12 ++++++++++++ accession_steps/dnase_no_footprints_steps.json | 14 ++++++++++++++ accession_steps/dnase_steps.json | 18 ++++++++++++++++++ accession_steps/jsonnet/atac_chip.jsonnet | 2 ++ .../tf_chip_bwa_control_fastqs_steps.json | 12 ++++++++++++ accession_steps/tf_chip_pbam_steps.json | 12 ++++++++++++ .../tf_chip_peak_call_only_steps.json | 12 ++++++++++++ accession_steps/tf_chip_steps.json | 12 ++++++++++++ 9 files changed, 95 insertions(+), 1 deletion(-) diff --git a/accession/accession_steps.py b/accession/accession_steps.py index b8e742b7..fcac57cf 100644 --- a/accession/accession_steps.py +++ b/accession/accession_steps.py @@ -111,4 +111,4 @@ def __init__(self, file_params: Dict[str, Any]): "maybe_preferred_default", False ) self.filter_type: Optional[str] = file_params.get("filter_type") - self.filter_value: Optional[int] = file_params.get("filter_value") + self.filter_value: Optional[Union[float, int]] = file_params.get("filter_value") diff --git a/accession_steps/atac_steps.json b/accession_steps/atac_steps.json index 618e34cc..3aedff0f 100644 --- a/accession_steps/atac_steps.json +++ b/accession_steps/atac_steps.json @@ -207,6 +207,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "atac_replication", @@ -249,6 +251,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "atac_replication", @@ -297,6 +301,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "atac_replication", @@ -320,6 +326,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -340,6 +348,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -363,6 +373,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], diff --git a/accession_steps/dnase_no_footprints_steps.json b/accession_steps/dnase_no_footprints_steps.json index 75218d3a..4e16d9f7 100644 --- a/accession_steps/dnase_no_footprints_steps.json +++ b/accession_steps/dnase_no_footprints_steps.json @@ -84,6 +84,8 @@ ], "file_format": "starch", "filekey": "analysis.tenth_of_one_percent_peaks_starch", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "output_type": "peaks", "quality_metrics": [] } @@ -109,6 +111,8 @@ "file_format": "bed", "file_format_type": "bed3+", "filekey": "analysis.five_percent_allcalls_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "FDR cut rate", "quality_metrics": [ "five_percent_allcalls_qc" @@ -156,6 +160,8 @@ "file_format": "bigBed", "file_format_type": "bed3+", "filekey": "analysis.five_percent_allcalls_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "FDR cut rate", "quality_metrics": [] } @@ -181,6 +187,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "analysis.five_percent_narrowpeaks_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "peaks", "quality_metrics": [ "five_percent_narrowpeaks_qc" @@ -208,6 +216,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "analysis.tenth_of_one_percent_narrowpeaks_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "maybe_preferred_default": true, "output_type": "peaks", "quality_metrics": [ @@ -231,6 +241,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "analysis.tenth_of_one_percent_narrowpeaks_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "maybe_preferred_default": true, "output_type": "peaks", "quality_metrics": [] @@ -252,6 +264,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "analysis.five_percent_narrowpeaks_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "peaks", "quality_metrics": [] } diff --git a/accession_steps/dnase_steps.json b/accession_steps/dnase_steps.json index 1683d392..e45fc2b5 100644 --- a/accession_steps/dnase_steps.json +++ b/accession_steps/dnase_steps.json @@ -84,6 +84,8 @@ ], "file_format": "starch", "filekey": "analysis.tenth_of_one_percent_peaks_starch", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "output_type": "peaks", "quality_metrics": [] } @@ -109,6 +111,8 @@ "file_format": "bed", "file_format_type": "bed3+", "filekey": "analysis.five_percent_allcalls_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "FDR cut rate", "quality_metrics": [ "five_percent_allcalls_qc" @@ -145,6 +149,8 @@ "file_format": "bed", "file_format_type": "bed3+", "filekey": "analysis.one_percent_footprints_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 1.0, "output_type": "footprints", "quality_metrics": [ "footprints_quality_metric" @@ -168,6 +174,8 @@ "file_format": "bigBed", "file_format_type": "bed3+", "filekey": "analysis.one_percent_footprints_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 1.0, "output_type": "footprints", "quality_metrics": [] } @@ -213,6 +221,8 @@ "file_format": "bigBed", "file_format_type": "bed3+", "filekey": "analysis.five_percent_allcalls_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "FDR cut rate", "quality_metrics": [] } @@ -238,6 +248,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "analysis.five_percent_narrowpeaks_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "peaks", "quality_metrics": [ "five_percent_narrowpeaks_qc" @@ -265,6 +277,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "analysis.tenth_of_one_percent_narrowpeaks_bed_gz", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "maybe_preferred_default": true, "output_type": "peaks", "quality_metrics": [ @@ -288,6 +302,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "analysis.tenth_of_one_percent_narrowpeaks_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 0.1, "maybe_preferred_default": true, "output_type": "peaks", "quality_metrics": [] @@ -309,6 +325,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "analysis.five_percent_narrowpeaks_bigbed", + "filter_type": "false discovery rate percentage", + "filter_value": 5.0, "output_type": "peaks", "quality_metrics": [] } diff --git a/accession_steps/jsonnet/atac_chip.jsonnet b/accession_steps/jsonnet/atac_chip.jsonnet index e5503dd0..c005c81b 100644 --- a/accession_steps/jsonnet/atac_chip.jsonnet +++ b/accession_steps/jsonnet/atac_chip.jsonnet @@ -149,6 +149,8 @@ local shared_file_props_no_qc = { file_format_type: file_format_type, output_type: 'IDR thresholded peaks', + filter_type: 'IDR', + filter_value: 0.05, }, local shared_file_props = shared_file_props_no_qc { quality_metrics: (if is_atac then ['atac_replication', 'atac_peak_enrichment'] else [ diff --git a/accession_steps/tf_chip_bwa_control_fastqs_steps.json b/accession_steps/tf_chip_bwa_control_fastqs_steps.json index 4ecb95da..83249ede 100644 --- a/accession_steps/tf_chip_bwa_control_fastqs_steps.json +++ b/accession_steps/tf_chip_bwa_control_fastqs_steps.json @@ -311,6 +311,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -372,6 +374,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -435,6 +439,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -461,6 +467,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -484,6 +492,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -508,6 +518,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], diff --git a/accession_steps/tf_chip_pbam_steps.json b/accession_steps/tf_chip_pbam_steps.json index 150bdcc7..cba32810 100644 --- a/accession_steps/tf_chip_pbam_steps.json +++ b/accession_steps/tf_chip_pbam_steps.json @@ -193,6 +193,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -254,6 +256,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -317,6 +321,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -343,6 +349,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -366,6 +374,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -390,6 +400,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], diff --git a/accession_steps/tf_chip_peak_call_only_steps.json b/accession_steps/tf_chip_peak_call_only_steps.json index c8be1197..3d8eb66a 100644 --- a/accession_steps/tf_chip_peak_call_only_steps.json +++ b/accession_steps/tf_chip_peak_call_only_steps.json @@ -152,6 +152,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -213,6 +215,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -276,6 +280,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -302,6 +308,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -325,6 +333,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -349,6 +359,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], diff --git a/accession_steps/tf_chip_steps.json b/accession_steps/tf_chip_steps.json index 3ece38f9..e87373a8 100644 --- a/accession_steps/tf_chip_steps.json +++ b/accession_steps/tf_chip_steps.json @@ -232,6 +232,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -293,6 +295,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -356,6 +360,8 @@ "file_format": "bed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks", "quality_metrics": [ "chip_replication", @@ -382,6 +388,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -405,6 +413,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ], @@ -429,6 +439,8 @@ "file_format": "bigBed", "file_format_type": "narrowPeak", "filekey": "bfilt_idr_peak_bb", + "filter_type": "IDR", + "filter_value": 0.05, "output_type": "IDR thresholded peaks" } ],