Skip to content

Commit

Permalink
Added mapping quality concordance functionality to CompareSAMs (#1617)
Browse files Browse the repository at this point in the history
- If the argument COMPARE_MQ is set to true, CompareSAMs will produce a histogram that reflects concordance of mapping qualities between two alignment files.
- Added separate tests for all CompareSAM test cases to check the mapping quality concordance.
  • Loading branch information
michaelgatzen authored Jan 25, 2021
1 parent 70dd405 commit 4a9c42d
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ public class SAMComparisonArgumentCollection {
public boolean LENIENT_UNKNOWN_MQ_ALIGNMENT;

@Argument(doc = "When running in LENIENT_LOW_MQ_ALIGNMENT mode, reads which have mapping quality below this value will be counted as matches. " +
"if LENIENT_LOW_MQ_ALIGNMENT is false (default), then this argument has no effect.")
"if LENIENT_LOW_MQ_ALIGNMENT is false (default), then this argument has no effect.")
public int LOW_MQ_THRESHOLD = 3;

@Argument(doc = "If set to true, generate a histogram for mapping quality concordance between the two SAM files and write it to the output metrics file. " +
"In this histogram, an entry of 10 at bin \"20,30\" means that 10 reads in the left file have mapping quality 20 while those same reads have mapping quality 30 in the right file. " +
"The reads are associated based solely on read names and not the mapped position. Only primary alignments are included, but all duplicate reads are counted individually.",
optional = true)
public boolean COMPARE_MQ = false;
}
18 changes: 17 additions & 1 deletion src/main/java/picard/sam/util/SamComparison.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import htsjdk.samtools.*;
import htsjdk.samtools.metrics.Header;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.IterableAdapter;
import htsjdk.samtools.util.SortingCollection;
import picard.PicardException;
Expand All @@ -29,6 +30,10 @@ public final class SamComparison {

private final SAMComparisonArgumentCollection samComparisonArgumentCollection;

// Ideally, this should be a Histogram<Pair<Integer, Integer>>, however, the MetricsFile class cannot read
// arbitrary types, therefore, it must be converted to a String, which may be slower
private final Histogram<String> mappingQualityHistogram = new Histogram<>();

private SortingCollection<SAMRecord> markDuplicatesCheckLeft;
private SortingCollection<SAMRecord> markDuplicatesCheckRight;

Expand Down Expand Up @@ -65,10 +70,13 @@ public void writeReport(final File output) {
}

public void writeReport(final File output, final List<Header> headers) {
final MetricsFile<SamComparisonMetric, ?> comparisonMetricFile = new MetricsFile<>();
final MetricsFile<SamComparisonMetric, String> comparisonMetricFile = new MetricsFile<>();

headers.forEach(comparisonMetricFile::addHeader);
comparisonMetricFile.addAllMetrics(Collections.singletonList(comparisonMetric));
if (samComparisonArgumentCollection.COMPARE_MQ) {
comparisonMetricFile.addHistogram(mappingQualityHistogram);
}
comparisonMetricFile.write(output);
}

Expand Down Expand Up @@ -397,6 +405,10 @@ private boolean alignmentsMatch(final SAMRecord s1, final SAMRecord s2) {
(samComparisonArgumentCollection.LENIENT_UNKNOWN_MQ_ALIGNMENT && s1.getMappingQuality() == SAMRecord.UNKNOWN_MAPPING_QUALITY && s2.getMappingQuality() == SAMRecord.UNKNOWN_MAPPING_QUALITY));
}

private void compareAndUpdateMappingQualityConcordance(final SAMRecord s1, final SAMRecord s2) {
mappingQualityHistogram.increment(String.format("%d,%d", s1.getMappingQuality(), s2.getMappingQuality()));
}

/**
* Compare the mapping information for two SAMRecords. Makes comparison of alignments, and also catalogs duplicate marking differences.
*/
Expand All @@ -407,6 +419,10 @@ private void tallyAlignmentRecords(final SAMRecord s1, final SAMRecord s2) {
catalogDuplicateDifferences(s1, s2);
final AlignmentComparison comp = compareAlignmentRecords(s1, s2);
comparisonMetric.updateMetric(comp);

if (samComparisonArgumentCollection.COMPARE_MQ) {
compareAndUpdateMappingQualityConcordance(s1, s2);
}
}

private void catalogDuplicateDifferences(final SAMRecord s1, final SAMRecord s2) {
Expand Down
84 changes: 79 additions & 5 deletions src/test/java/picard/sam/CompareSAMsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
package picard.sam;

import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.Histogram;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
Expand All @@ -32,10 +33,11 @@
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;

public class CompareSAMsTest extends CommandLineProgramTest {
private static final File TEST_FILES_DIR = new File("testdata/picard/sam/CompareSAMs");
Expand Down Expand Up @@ -83,7 +85,8 @@ public Object[][] compareSamsTestData() {

@Test(dataProvider = "compareSams")
public void testComparisons(final String f1, final String f2, final ArrayList<String> args, final boolean areEqual) throws IOException {
final Path tmpOutput = Files.createTempFile("compareSam", ".tsv");
final File tmpOutput = File.createTempFile("compareSam", ".tsv");
tmpOutput.deleteOnExit();
final String in1 = new File(TEST_FILES_DIR, f1).getAbsolutePath();
final String in2 = new File(TEST_FILES_DIR, f2).getAbsolutePath();
ArrayList<String> commandArgs = new ArrayList<>(
Expand All @@ -98,7 +101,8 @@ public void testComparisons(final String f1, final String f2, final ArrayList<St
}
Assert.assertEquals(runPicardCommandLine(commandArgs) == 0, areEqual);
final MetricsFile<SamComparisonMetric, Comparable<?>> metricsOutput = new MetricsFile<>();
metricsOutput.read(new FileReader(tmpOutput.toFile()));
metricsOutput.read(new FileReader(tmpOutput));
Assert.assertEquals(metricsOutput.getNumHistograms(), 0);

//swap order of input files
commandArgs = new ArrayList<>(
Expand All @@ -112,12 +116,82 @@ public void testComparisons(final String f1, final String f2, final ArrayList<St
commandArgs.addAll(args);
}
Assert.assertEquals(runPicardCommandLine(commandArgs) == 0, areEqual);
metricsOutput.read(new FileReader(tmpOutput.toFile()));
metricsOutput.read(new FileReader(tmpOutput));
Assert.assertEquals(metricsOutput.getNumHistograms(), 0);

Assert.assertEquals(metricsOutput.getMetrics().get(0).LEFT_FILE, in1);
Assert.assertEquals(metricsOutput.getMetrics().get(0).RIGHT_FILE, in2);

Assert.assertEquals(metricsOutput.getMetrics().get(1).LEFT_FILE, in2);
Assert.assertEquals(metricsOutput.getMetrics().get(1).RIGHT_FILE, in1);
}

@DataProvider(name="compareSamsMQConcordance")
public Object[][] compareSamsMQConcordanceTestData() {
return new Object[][] {
{"genomic_sorted.sam", "unsorted.sam", null},
{"genomic_sorted.sam", "chr21.sam", null},
{"genomic_sorted.sam", "bigger_seq_dict.sam", null},
{"genomic_sorted.sam", "genomic_sorted.sam", new Object[][] { {"20,20", 1}, {"30,30", 1}}},
{"genomic_sorted.sam", "has_non_primary.sam", new Object[][] { {"20,20", 1}, {"30,30", 1}}},
{"genomic_sorted_5.sam", "genomic_sorted_5_plus.sam", new Object[][] { {"20,20", 1}, {"30,30", 4}}},
{"group_same_coord.sam", "group_same_coord_diff_order.sam", new Object[][] { {"20,20", 1}, {"30,30", 2}}},
{"genomic_sorted_same_position.sam", "genomic_sorted_same_position.sam", new Object[][] { {"0,0", 2}}},
{"group_same_coord.sam", "diff_coords.sam", new Object[][] { {"20,20", 1}, {"30,30", 4}}},
{"genomic_sorted.sam", "unmapped_first.sam", new Object[][] { {"20,0", 1}, {"30,30", 1}}},
{"genomic_sorted.sam", "unmapped_second.sam", new Object[][] { {"30,0", 1}, {"20,20", 1}}},
{"unmapped_first.sam", "unmapped_second.sam", new Object[][] { {"0,20", 1}, {"30,0", 1}}},
{"unmapped_first.sam", "unmapped_first.sam", new Object[][] { {"0,0", 1}, {"30,30", 1}}},
{"genomic_sorted.sam", "genomic_sorted_sam_v1.6.sam", new Object[][] { {"20,20", 1}, {"30,30", 1}}},
{"unsorted.sam", "unsorted.sam", new Object[][] { {"20,20", 1}, {"30,30", 1}}},
{"unsorted.sam", "unsorted2.sam", new Object[][] { {"20,20", 1}}},
{"duplicate_base.sam", "duplicate_four_mismatch_strict.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base.sam", "duplicate_four_mismatch_lenient_one_align_differ.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base.sam", "duplicate_two_mismatch_lenient.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base.sam", "duplicate_four_mismatch_lenient.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base.sam", "duplicate_four_mismatch_strict.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base_queryname.sam", "duplicate_four_mismatch_strict_queryname.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base_queryname.sam", "duplicate_four_mismatch_lenient_one_align_differ_queryname.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base_queryname.sam", "duplicate_two_mismatch_lenient_queryname.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base_queryname.sam", "duplicate_four_mismatch_lenient_queryname.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"duplicate_base_queryname.sam", "duplicate_four_mismatch_strict_queryname.sam", new Object[][] { {"20,20", 2}, {"30,30", 12}}},
{"genomic_sorted.sam", "mq0_2.sam", new Object[][] { {"20,0", 1}, {"30,30", 1}}},
{"mq0_1.sam", "mq0_2.sam", new Object[][] { {"0,0", 1}, {"30,30", 1}}}
};
}

@Test(dataProvider = "compareSamsMQConcordance")
public void testMQConcordance(final String f1, final String f2, final Object[][] expectedMQConcordance) throws IOException {
final File tmpOutput = File.createTempFile("compareSam", ".tsv");
tmpOutput.deleteOnExit();
final String in1 = new File(TEST_FILES_DIR, f1).getAbsolutePath();
final String in2 = new File(TEST_FILES_DIR, f2).getAbsolutePath();
final ArrayList<String> commandArgs = new ArrayList<>(
Arrays.asList(
in1,
in2,
"O=" + tmpOutput,
"COMPARE_MQ=true"
)
);

runPicardCommandLine(commandArgs);

final MetricsFile<SamComparisonMetric, String> metricsOutput = new MetricsFile<>();
metricsOutput.read(new FileReader(tmpOutput));

// If the files cannot be compared (e.g. if their sort order differs) then there should be no histogram in the metrics file.
if (expectedMQConcordance == null) {
Assert.assertEquals(metricsOutput.getNumHistograms(), 0);
} else {
Assert.assertEquals(metricsOutput.getNumHistograms(), 1);

final Map<String, Integer> expectedMQConcordanceMap = Stream.of(expectedMQConcordance).collect(Collectors.toMap(data -> (String) data[0], data -> (Integer) data[1]));
final Histogram<String> expectedMQConcordanceHistogram = new Histogram<>();
expectedMQConcordanceMap.forEach(expectedMQConcordanceHistogram::increment);

final Histogram<String> mqConcordanceHistogram = metricsOutput.getHistogram();
Assert.assertEquals(mqConcordanceHistogram, expectedMQConcordanceHistogram);
}
}
}

0 comments on commit 4a9c42d

Please sign in to comment.