Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fold80 Bug Fix #1972

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
3 changes: 2 additions & 1 deletion src/main/java/picard/analysis/CollectRawWgsMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ public RawWgsMetrics() {

public RawWgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualityDepthHistogramNonZero
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -135,7 +136,7 @@ public RawWgsMetrics(final IntervalList intervals,
final int coverageCap,
final Histogram<Integer> unfilteredBaseQHistogram,
final int sampleSize) {
super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
super(intervals, highQualityDepthHistogram, highQualityDepthHistogramNonZero,unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ public WgsMetricsWithNonZeroCoverage() {

public WgsMetricsWithNonZeroCoverage(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualitDepthHistogramNonZero,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -116,7 +117,7 @@ public WgsMetricsWithNonZeroCoverage(final IntervalList intervals,
final int coverageCap,
final Histogram<Integer> unfilteredBaseQHistogram,
final int sampleSize) {
super(intervals, highQualityDepthHistogram, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
super(intervals, highQualityDepthHistogram, highQualitDepthHistogramNonZero, unfilteredDepthHistogram, pctExcludedByAdapter, pctExcludedByMapq, pctExcludedByDupes, pctExcludedByPairing, pctExcludedByBaseq,
pctExcludedByOverlap, pctExcludedByCapping, pctTotal, coverageCap, unfilteredBaseQHistogram, sampleSize);
}
}
Expand Down Expand Up @@ -170,6 +171,7 @@ protected int doWork() {
@Override
protected WgsMetrics generateWgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> highQualityDepthHistogramNonZero,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
final double pctExcludedByMapq,
Expand All @@ -185,6 +187,7 @@ protected WgsMetrics generateWgsMetrics(final IntervalList intervals,
return new WgsMetricsWithNonZeroCoverage(
intervals,
highQualityDepthHistogram,
highQualityDepthHistogramNonZeroFinal,
unfilteredDepthHistogram,
pctExcludedByAdapter,
pctExcludedByMapq,
Expand All @@ -207,7 +210,7 @@ protected WgsMetricsCollector getCollector(final int coverageCap, final Interval

protected class WgsMetricsWithNonZeroCoverageCollector extends WgsMetricsCollector {
Histogram<Integer> highQualityDepthHistogram;
Histogram<Integer> highQualityDepthHistogramNonZero;
Histogram<Integer> highQualityDepthHistogramNonZeroFinal;

public WgsMetricsWithNonZeroCoverageCollector(final CollectWgsMetricsWithNonZeroCoverage metrics,
final int coverageCap, final IntervalList intervals) {
Expand All @@ -222,7 +225,7 @@ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
final CountingFilter mapqFilter,
final CountingPairedFilter pairFilter) {
highQualityDepthHistogram = getDepthHistogram();
highQualityDepthHistogramNonZero = getDepthHistogramNonZero();
highQualityDepthHistogramNonZeroFinal = getDepthHistogramNonZero();

// calculate metrics the same way as in CollectWgsMetrics
final WgsMetricsWithNonZeroCoverage metrics = (WgsMetricsWithNonZeroCoverage) getMetrics(dupeFilter, adapterFilter, mapqFilter, pairFilter);
Expand All @@ -239,7 +242,7 @@ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
file.addMetric(metrics);
file.addMetric(metricsNonZero);
file.addHistogram(highQualityDepthHistogram);
file.addHistogram(highQualityDepthHistogramNonZero);
file.addHistogram(highQualityDepthHistogramNonZeroFinal);

if (includeBQHistogram) {
addBaseQHistogram(file);
Expand All @@ -260,7 +263,7 @@ private Histogram<Integer> getDepthHistogramNonZero() {
}

public boolean areHistogramsEmpty() {
return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZero.isEmpty());
return (highQualityDepthHistogram.isEmpty() || highQualityDepthHistogramNonZeroFinal.isEmpty());
}
}
}
27 changes: 22 additions & 5 deletions src/main/java/picard/analysis/WgsMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import picard.PicardException;
import picard.util.MathUtil;
import picard.util.help.HelpConstants;
import java.util.stream.LongStream;

/** Metrics for evaluating the performance of whole genome sequencing experiments. */
@DocumentedFeature(groupName = HelpConstants.DOC_CAT_METRICS, summary = HelpConstants.DOC_CAT_METRICS_SUMMARY)
Expand All @@ -46,6 +47,9 @@ public class WgsMetrics extends MergeableMetricBase {
@MergingIsManual
protected final Histogram<Integer> highQualityDepthHistogram;

/** Count of sites with a given depth of coverage. Excludes bases with quality below MINIMUM_BASE_QUALITY and 0 coverage.*/
@MergingIsManual
protected final Histogram<Integer> highQualityDepthHistogramNonZero;
/** Count of sites with a given depth of coverage. Includes all but quality 2 bases */
@MergingIsManual
protected final Histogram<Integer> unfilteredDepthHistogram;
Expand All @@ -68,6 +72,7 @@ public class WgsMetrics extends MergeableMetricBase {
public WgsMetrics() {
intervals = null;
highQualityDepthHistogram = null;
highQualityDepthHistogramNonZero = null;
unfilteredDepthHistogram = null;
unfilteredBaseQHistogram = null;
theoreticalHetSensitivitySampleSize = -1;
Expand All @@ -77,6 +82,7 @@ public WgsMetrics() {
/**
* Create an instance of this metric that is mergeable.
*
* @param highQualityDepthHistogramNonZero the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY or with 0 coverage.
* @param highQualityDepthHistogram the count of genomic positions observed for each observed depth. Excludes bases with quality below MINIMUM_BASE_QUALITY.
* @param unfilteredDepthHistogram the depth histogram that includes all but quality 2 bases.
* @param pctExcludedByAdapter the fraction of aligned bases that were filtered out because they were in reads with 0 mapping quality that looked like adapter sequence.
Expand All @@ -92,6 +98,7 @@ public WgsMetrics() {
* @param theoreticalHetSensitivitySampleSize the sample size used for theoretical het sensitivity sampling.
*/
public WgsMetrics(final IntervalList intervals,
final Histogram<Integer> highQualityDepthHistogramNonZero,
final Histogram<Integer> highQualityDepthHistogram,
final Histogram<Integer> unfilteredDepthHistogram,
final double pctExcludedByAdapter,
Expand All @@ -107,6 +114,7 @@ public WgsMetrics(final IntervalList intervals,
final int theoreticalHetSensitivitySampleSize) {
this.intervals = intervals.uniqued();
this.highQualityDepthHistogram = highQualityDepthHistogram;
this.highQualityDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count");
this.unfilteredDepthHistogram = unfilteredDepthHistogram;
this.unfilteredBaseQHistogram = unfilteredBaseQHistogram;
this.coverageCap = coverageCap;
Expand Down Expand Up @@ -320,12 +328,21 @@ public void calculateDerivedFields() {
PCT_90X = MathUtil.sum(depthHistogramArray, 90, depthHistogramArray.length) / (double) GENOME_TERRITORY;
PCT_100X = MathUtil.sum(depthHistogramArray, 100, depthHistogramArray.length) / (double) GENOME_TERRITORY;



long maxDepth = 0;
for (final Histogram.Bin<Integer> bin : highQualityDepthHistogram.values()) {
maxDepth = Math.max((int) bin.getIdValue(), maxDepth);
final int depth = (int) bin.getIdValue();
if (depth > 0) {
highQualityDepthHistogramNonZero.increment(depth,bin.getValue());
}
}
LongStream.range(1, maxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0));
// This roughly measures by how much we must over-sequence so that xx% of bases have coverage at least as deep as the current mean coverage:
if (highQualityDepthHistogram.getCount() > 0) {
FOLD_80_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.2);
FOLD_90_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.1);
FOLD_95_BASE_PENALTY = MEAN_COVERAGE / highQualityDepthHistogram.getPercentile(0.05);
if (highQualityDepthHistogramNonZero.getCount() > 0) {
FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2);
FOLD_90_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.1);
FOLD_95_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.05);
} else {
FOLD_80_BASE_PENALTY = 0;
FOLD_90_BASE_PENALTY = 0;
Expand Down
15 changes: 11 additions & 4 deletions src/main/java/picard/analysis/directed/TargetMetricsCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,10 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
// give it the bin label "coverage_or_base_quality" to make clear that in the metrics file the coverage and base quality histograms share the same bin column on the left
private final Histogram<Integer> highQualityDepthHistogram = new Histogram<>("coverage_or_base_quality", "high_quality_coverage_count");

// histogram of depths. does not include bases with quality less than MINIMUM_BASE_QUALITY (default 20) or bases with zero coverage.
// we use this for the calculation of the FOLD_80_BASE_PENALTY metric
private final Histogram<Integer> highQualitDepthHistogramNonZero = new Histogram<>("coverage_or_base_quality", "high_quality_non_zero_coverage_count");

// histogram of base qualities. includes all but quality 2 bases. we use this histogram to calculate theoretical het sensitivity.
private final Histogram<Integer> unfilteredDepthHistogram = new Histogram<>("coverage_or_base_quality", "unfiltered_coverage_count");

Expand Down Expand Up @@ -708,6 +712,7 @@ public void finish() {
private void calculateTargetCoverageMetrics() {

LongStream.range(0, hqMaxDepth).forEach(i -> highQualityDepthHistogram.increment((int) i, 0));
LongStream.range(1, hqMaxDepth).forEach(i -> highQualityDepthHistogramNonZero.increment((int) i, 0));

int zeroCoverageTargets = 0;

Expand Down Expand Up @@ -738,7 +743,9 @@ private void calculateTargetCoverageMetrics() {
totalCoverage += depth;
highQualityDepthHistogram.increment(depth, 1);
minDepth = Math.min(minDepth, depth);

if (depth > 0) {
highQualityDepthHistogramNonZero.increment(depth, 1);
}
// Add to the "how many target bases at at-least X" calculations.
for (int i = 0; i < targetBasesDepth.length; i++) {
if (depth >= targetBasesDepth[i]) targetBases[i]++;
Expand All @@ -750,16 +757,16 @@ private void calculateTargetCoverageMetrics() {
if (targetBases[0] != highQualityCoverageByTarget.keySet().stream().mapToInt(Interval::length).sum()) {
throw new PicardException("the number of target bases with at least 0x coverage does not equal the number of target bases");
}

metrics.MEAN_TARGET_COVERAGE = (double) totalCoverage / metrics.TARGET_TERRITORY;
metrics.MEAN_TARGET_COVERAGE = highQualityDepthHistogram.getMean();
metrics.MEDIAN_TARGET_COVERAGE = highQualityDepthHistogram.getMedian();
metrics.MAX_TARGET_COVERAGE = hqMaxDepth;
// Use Math.min() to account for edge case where highQualityCoverageByTarget is empty (minDepth=Long.MAX_VALUE)
metrics.MIN_TARGET_COVERAGE = Math.min(minDepth, hqMaxDepth);

// compute the coverage value such that 80% of target bases have better coverage than it i.e. 20th percentile
// this roughly measures how much we must sequence extra such that 80% of target bases have coverage at least as deep as the current mean coverage
metrics.FOLD_80_BASE_PENALTY = metrics.MEAN_TARGET_COVERAGE / highQualityDepthHistogram.getPercentile(0.2);
metrics.FOLD_80_BASE_PENALTY = highQualityDepthHistogramNonZero.getMean() / highQualityDepthHistogramNonZero.getPercentile(0.2);
metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size();

// Store the "how many bases at at-least X" calculations.
Expand Down
Loading