Skip to content

Commit

Permalink
despite efforts to the contrary, FingerprintChecker cannot fingerprin…
Browse files Browse the repository at this point in the history
…ting sam files that have a mismatching dictionary from the one stated in the haplotype map (it works for vcfs). This is because when fingerprinting sam files, the SamLocusIterator is given an IntervalList to query, and this IL has a mismatching dictionary which isn't supported.

This PR resolves this by creating a new IL just before querying the Iterator using the proper dictionary.
  • Loading branch information
yfarjoun committed Apr 3, 2024
1 parent 5b0b4c0 commit 0f038a1
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 1 deletion.
10 changes: 9 additions & 1 deletion src/main/java/picard/fingerprint/FingerprintChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,15 @@ public Map<FingerprintIdDetails, Fingerprint> fingerprintSamFile(final Path samF
log.info(String.format("Reading an indexed file (%s)", samFile.toUri().toString()));
}

final SamLocusIterator iterator = new SamLocusIterator(in, this.haplotypes.getIntervalList(), in.hasIndex());
// fingerprinting allows that headers differ, but SamLocusIterator doesn't allow the dictionary of the query
// interval list to differ from that of the samfile, leading to an exception thrown.
// At this point we already know that the dictionary of the haplotypes is a proper prefix of that of the sam file
// So we just swap out the header in the interval list.
final IntervalList il = this.haplotypes.getIntervalList();
final IntervalList newIl = new IntervalList(in.getFileHeader());
newIl.addall(il.getIntervals());

final SamLocusIterator iterator = new SamLocusIterator(in, newIl, in.hasIndex());
iterator.setEmitUncoveredLoci(true);
iterator.setMappingQualityScoreCutoff(this.minimumMappingQuality);
iterator.setQualityScoreCutoff(this.minimumBaseQuality);
Expand Down
26 changes: 26 additions & 0 deletions src/test/java/picard/fingerprint/CrosscheckFingerprintsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public class CrosscheckFingerprintsTest extends CommandLineProgramTest {

private final File TEST_DATA_DIR = new File("testdata/picard/fingerprint/");
private final File HAPLOTYPE_MAP = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.txt");
private final File HAPLOTYPE_MAP_SHORT = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.short_dictionary.txt");
private final File HAPLOTYPE_MAP_FOR_CRAMS = new File(TEST_DATA_DIR, "Homo_sapiens_assembly19.haplotype_database.subset.shifted.for.crams.txt");

private final File NA12891_r1_sam = new File(TEST_DATA_DIR, "NA12891.over.fingerprints.r1.sam");
Expand Down Expand Up @@ -201,6 +202,31 @@ public void testCrossCheckRGs(final File file1, final File file2, final boolean
doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch);
}

@Test(dataProvider = "bamFilesRGs")
public void testCrossCheckRGs_with_short_dictionary(final File file1, final File file2, final boolean expectAllMatch, final int expectedRetVal, final int expectedNMetrics) throws IOException {

File metrics = File.createTempFile("Fingerprinting", "NA1291.RG.crosscheck_metrics");
metrics.deleteOnExit();

final List<String> args = new ArrayList<>(Arrays.asList("INPUT=" + file1.getAbsolutePath(),
"INPUT=" + file2.getAbsolutePath(),
"OUTPUT=" + metrics.getAbsolutePath(),
"LOD_THRESHOLD=" + -2.0,
"EXPECT_ALL_GROUPS_TO_MATCH=" + expectAllMatch)
);

if (file1.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
args.add("R=" + referenceForCrams);
args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_FOR_CRAMS);
} else {
args.add("HAPLOTYPE_MAP=" + HAPLOTYPE_MAP_SHORT);
}

doTest(args.toArray(new String[0]), metrics, expectedRetVal, expectedNMetrics, CrosscheckMetric.DataType.READGROUP, expectAllMatch);
}



@DataProvider(name = "cramsWithNoReference")
public Object[][] cramsWithNoReference() {
return new Object[][]{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
@HD VN:1.4 GO:none SO:coordinate
@SQ SN:1 LN:249250621 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:1b22b98cdeb4a9304cb5d48026a85128 SP:Homo Sapiens
@SQ SN:2 LN:243199373 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:a0d9851da00400dec1098a9255ac712e SP:Homo Sapiens
@SQ SN:3 LN:198022430 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:fdfd811849cc2fadebc929bb925902e5 SP:Homo Sapiens
@SQ SN:4 LN:191154276 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:23dccd106897542ad87d2765d28a19a1 SP:Homo Sapiens
@SQ SN:5 LN:180915260 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:0740173db9ffd264d728f32784845cd7 SP:Homo Sapiens
@SQ SN:6 LN:171115067 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:1d3a93a248d92a729ee764823acbbc6b SP:Homo Sapiens
@SQ SN:7 LN:159138663 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:618366e953d6aaad97dbe4777c29375e SP:Homo Sapiens
@SQ SN:8 LN:146364022 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:96f514a9929e410c6651697bded59aec SP:Homo Sapiens
@SQ SN:9 LN:141213431 AS:GRCh37 UR:http://www.broadinstitute.org/ftp/pub/seq/references/Homo_sapiens_assembly19.fasta M5:3e273117f15e0a400f01055d9f393768 SP:Homo Sapiens
#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS
1 14804874 rs7555566 A G 0.223794
3 17077268 rs17272796 C T 0.623026
4 57194525 rs6834736 C G 0.512884
5 156355375 rs2862058 A G 0.349127
7 4490854 rs314605 C T 0.786367

0 comments on commit 0f038a1

Please sign in to comment.