Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make truncating the histogram optional in CollectInsertSizeMetrics #1779

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/main/java/picard/analysis/CollectInsertSizeMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
"truncated to a shorter range of sizes, the MIN_HISTOGRAM_WIDTH will enforce a minimum range.", optional=true)
public Integer MIN_HISTOGRAM_WIDTH = null;

@Argument(shortName = "TR", doc="Do not truncate the insert size histogram.", optional = true)
public boolean DO_NOT_TRUNCATE_HISTOGRAMS = false;

@Argument(shortName="M", doc="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this " +
"percentage of overall reads. (Range: 0 to 1).")
public float MINIMUM_PCT = 0.05f;
Expand Down Expand Up @@ -146,6 +149,10 @@ protected String[] customCommandLineValidation() {
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsWritable(Histogram_FILE);

if (DO_NOT_TRUNCATE_HISTOGRAMS){
HISTOGRAM_WIDTH = Integer.MAX_VALUE;
}

//Delegate actual collection to InsertSizeMetricCollector
multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT,
HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file)
}

// Trim the Histogram down to get rid of outliers that would make the chart useless.
final Histogram<Integer> trimmedHistogram = histogram; // alias it
trimmedHistogram.trimByWidth(getWidthToTrimTo(metrics));
histogram.trimByWidth(getWidthToTrimTo(metrics));

if (!trimmedHistogram.isEmpty()) {
metrics.MEAN_INSERT_SIZE = trimmedHistogram.getMean();
metrics.STANDARD_DEVIATION = trimmedHistogram.getStandardDeviation();
// Compute metrics that are sensitive to outliers after trimming.
if (!histogram.isEmpty()) {
metrics.MEAN_INSERT_SIZE = histogram.getMean();
metrics.STANDARD_DEVIATION = histogram.getStandardDeviation();
}

file.addHistogram(trimmedHistogram);
file.addHistogram(histogram);
file.addMetric(metrics);
}
}
Expand Down
68 changes: 65 additions & 3 deletions src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
import picard.PicardException;
import picard.cmdline.CommandLineProgramTest;
import picard.util.RExecutor;

Expand Down Expand Up @@ -248,6 +249,70 @@ public void testHistogramWidthIsSetProperly() throws IOException {
Assert.assertEquals(output.getAllHistograms().size(), 5);
}

@Test
public void testSkipTruncatingHistogram() throws IOException {
final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true);
final int readLength = 10;
setBuilder.setReadLength(readLength);
final int minInsertSize = 100;
final int maxInsertSize = 400;
int readCount = 0;
final int start = 1000;
final String cigar = readLength + "M";
for (int j = minInsertSize; j < maxInsertSize; j++) {
setBuilder.addPair("read:" + readCount++, 0, start, start + j - readLength, false, false,
cigar, cigar, false, true, 60);
}

// Add an outlier
int outlierInsertSize = 2000;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

final

setBuilder.addPair("outlierRead", 0, start, start + outlierInsertSize - readLength,
false, false, cigar, cigar, false, true, 60);

final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam");
testSamFile.deleteOnExit();
final File testSamFileIndex = new File(testSamFile.getParentFile(),testSamFile.getName().replace("bam$","bai"));
testSamFileIndex.deleteOnExit();
try ( final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
.makeBAMWriter(setBuilder.getHeader(), false, testSamFile)) {
setBuilder.forEach(writer::addAlignment);
}

final File outfile = File.createTempFile("test", ".insert_size_metrics");
final File pdf = File.createTempFile("test", ".pdf");
outfile.deleteOnExit();
pdf.deleteOnExit();
final String[] args = new String[]{
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"Histogram_FILE=" + pdf.getAbsolutePath(),
"TR=" + true
};

Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));

Assert.assertEquals(output.getAllHistograms().get(0).get(outlierInsertSize).getValue(), 1.0);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

run with other argument to show that this argument makes a difference


// Run again with truncation enabled
final File outfileWithTruncation = File.createTempFile("test", ".insert_size_metrics");
outfileWithTruncation.deleteOnExit();
final String[] argsWithTruncation = new String[]{
"INPUT=" + testSamFile.getAbsolutePath(),
"OUTPUT=" + outfileWithTruncation.getAbsolutePath(),
"Histogram_FILE=" + pdf.getAbsolutePath()
};

Assert.assertEquals(runPicardCommandLine(argsWithTruncation), 0);
final MetricsFile<InsertSizeMetrics, Comparable<?>> outputWithTruncation = new MetricsFile<>();
outputWithTruncation.read(new FileReader(outfileWithTruncation));

// Check that the outlier has been removed when truncation is enabled
Assert.assertFalse(outputWithTruncation.getAllHistograms().get(0).containsKey(outlierInsertSize));

}

@Test
public void testMultipleOrientationsForHistogram() throws IOException {
final File output = new File("testdata/picard/analysis/directed/CollectInsertSizeMetrics", "multiple_orientation.sam.insert_size_metrics");
Expand Down Expand Up @@ -291,8 +356,6 @@ public void testWidthOfMetrics() throws IOException {
testSamFileIndex.deleteOnExit();


new File(testSamFile.getAbsolutePath().replace(".bam$",".bai")).deleteOnExit();

final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true);
setBuilder.setReadLength(10);

Expand Down Expand Up @@ -326,7 +389,6 @@ public void testWidthOfMetrics() throws IOException {
setBuilder.forEach(writer::addAlignment);
writer.close();


final File outfile = File.createTempFile("test", ".insert_size_metrics");
final File pdf = File.createTempFile("test", ".pdf");
outfile.deleteOnExit();
Expand Down