Skip to content

Commit

Permalink
Merge pull request #643 from CAWarmerdam/master
Browse files Browse the repository at this point in the history
Bug fixes and minor improvements to Genotype-IO and GH
  • Loading branch information
PatrickDeelen authored Dec 15, 2022
2 parents ce7e048 + d3d8071 commit 362406c
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Genotype-Harmonizer/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>Genotype-Harmonizer</artifactId>
<version>1.4.25-SNAPSHOT</version>
<version>1.4.26-SNAPSHOT</version>
<name>Genotype Harmonizer</name>
<packaging>jar</packaging>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -441,20 +441,20 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException {
+ "\" is not a supported genotype field.");
}

boolean raiseExceptionIfUnavailable = true;
boolean forcePreferredGenotypeFormat = true;
if (genotypeFormatArguments.length > 2) {
if (genotypeFormatArguments[2].equals("suppress")) {
System.out.println("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen.");
LOGGER.warn("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen.");
raiseExceptionIfUnavailable = false;
forcePreferredGenotypeFormat = false;
}
}

nonFinalVcfGenotypeFormatSupplier = new VcfGenotypeFormatSupplier(
VcfGenotypeFormat.valueOf(genotypeFormatArguments[0]),
genotypeFormatArguments.length > 1 ?
genotypeFormatArguments[1] : genotypeFormatArguments[0],
raiseExceptionIfUnavailable);
forcePreferredGenotypeFormat);
}

vcfGenotypeFormatSupplier = nonFinalVcfGenotypeFormatSupplier;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,8 +301,7 @@ private void addMetaData(File bgenFile, BgenixWriter bgenixWriter) throws IOExce
byte[] firstBytes = new byte[1000];
randomAccessBgenFile.read(firstBytes, 0, 1000);

//Add current time in int.
System.out.println((System.currentTimeMillis() / 1000L));
// Add current time in int.
// Create and write new metadata.
BgenixMetadata m = new BgenixMetadata(
bgenFile.getName(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ public class VcfGenotypeData extends AbstractRandomAccessGenotypeData implements
private final LinkedHashSet<VcfGenotypeFormat> genotypeProbabilitiesFieldPrecedence;
private final LinkedHashSet<VcfGenotypeFormat> genotypeCallFieldPrecedence;
private final LinkedHashSet<VcfGenotypeFormat> genotypeDosageFieldPrecedence;
private final LinkedHashSet<VcfGenotypeFormat> haplotypeProbabilitiesFieldPresedence;


/**
Expand Down Expand Up @@ -138,6 +139,8 @@ public VcfGenotypeData(File bzipVcfFile, File tabixIndexFile, int cacheSize, dou
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.GT, VcfGenotypeFormat.GP, VcfGenotypeFormat.DS));
genotypeDosageFieldPrecedence =
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.DS, VcfGenotypeFormat.GP, VcfGenotypeFormat.GT));
haplotypeProbabilitiesFieldPresedence =
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS));

genotypeFormatSupplier = new VcfGenotypeFormatSupplier();
}
Expand Down Expand Up @@ -321,15 +324,12 @@ public boolean arePhasedProbabilitiesPresent(GeneticVariant variant) {
LinkedHashSet<VcfGenotypeFormat> haplotypeProbabilitiesFields = getVcfHaplotypeFormats(variant);

// If the requested format is set and present for this variant base decision on this format
VcfGenotypeFormat genotypeFormat = genotypeFormatSupplier.getVcfGenotypeFormat(
return genotypeFormatSupplier.vcfGenotypeFormatReadable(
vcfRecord, haplotypeProbabilitiesFields);

return (genotypeFormat != null);
}

private LinkedHashSet<VcfGenotypeFormat> getVcfHaplotypeFormats(GeneticVariant variant) {
LinkedHashSet<VcfGenotypeFormat> haplotypeProbabilitiesFields =
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS));
LinkedHashSet<VcfGenotypeFormat> haplotypeProbabilitiesFields = haplotypeProbabilitiesFieldPresedence;

if (variant.hasPhasedGenotypes()) {
haplotypeProbabilitiesFields.add(VcfGenotypeFormat.GT);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.molgenis.genotype.vcf.VcfGenotypeField;

import org.apache.commons.lang3.StringUtils;
import org.molgenis.genotype.GenotypeDataException;
import org.molgenis.vcf.VcfRecord;

Expand All @@ -15,7 +14,7 @@
public class VcfGenotypeFormatSupplier {
private VcfGenotypeFormat preferredGenotypeFormat;
private String preferredGenotypeFormatIdentifier;
private boolean raiseExceptionIfUnavailable;
private boolean forcePreferredGenotypeFormat;

public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat) {
this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false);
Expand All @@ -29,11 +28,11 @@ public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, bool
this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false);
}

public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean raiseExceptionIfUnavailable) {
public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean forcePreferredGenotypeFormat) {

this.preferredGenotypeFormat = preferredGenotypeFormat;
this.preferredGenotypeFormatIdentifier = formatIdentifier;
this.raiseExceptionIfUnavailable = raiseExceptionIfUnavailable;
this.forcePreferredGenotypeFormat = forcePreferredGenotypeFormat;
}

public VcfGenotypeFormatSupplier() {
Expand All @@ -55,31 +54,96 @@ public VcfGenotypeFormat getVcfGenotypeFormat(

List<String> formatIdentifiers = Arrays.asList(vcfRecord.getFormat());

if (preferredGenotypeFormat != null
&& genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)
&& formatIdentifiers.contains(this.getGenotypeFormatIdentifier(preferredGenotypeFormat))) {
return preferredGenotypeFormat;
}

if (this.raiseExceptionIfUnavailable) {
throw new GenotypeDataException(String.format(
"Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
"Available format fields: %s",
preferredGenotypeFormatIdentifier,
String.join(", ", vcfRecord.getIdentifiers()),
vcfRecord.getChromosome(), vcfRecord.getPosition(),
String.join(", ", vcfRecord.getFormat())));
// Check if the preferred genotype format is set
if (preferredGenotypeFormat != null) {
// If it is set, check if it is available, and, if it is not, if we should write exceptions or not.
if (genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)
&& isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
return preferredGenotypeFormat;
} else if (this.forcePreferredGenotypeFormat) {
if (!isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
throw new GenotypeDataException(String.format(
"Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
"Available format fields: %s",
preferredGenotypeFormatIdentifier,
String.join(", ", vcfRecord.getIdentifiers()),
vcfRecord.getChromosome(), vcfRecord.getPosition(),
String.join(", ", vcfRecord.getFormat())));
} else if (!genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)) {
throw new GenotypeDataException(String.format(
"Preferred genotype format field (%s) cannot be used. " +
"Requested to load vcf record %n%s (%s:%s). " +
"Possible format fields: %s",
preferredGenotypeFormatIdentifier,
String.join(", ", vcfRecord.getIdentifiers()),
vcfRecord.getChromosome(), vcfRecord.getPosition(),
String.join(", ", Arrays.toString(genotypeDosageFieldPrecedence.toArray()))));
}
}
}

for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) {
if (formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat))) {
if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) {
return genotypeFormat;
}
}

return null;
}

/**
* @param vcfRecord record, row, within a VCF file. corresponding to a particular variant.
* @param genotypeDosageFieldPrecedence LinkedHashSet that lists all formats that can be read,
* in order of precedence (high precedence to low precedence).
* @return If there is a preferred genotype format supplied, this method only returns true if the
* preferred genotype format is available from the vcf record and the list of
* possible formats that can be read according to the genotype field precedence hash set.
* If a preferred genotype format is not supplied, this method will return true if one of
* the genotype field formats from the precedence list can be read from the vcf record.
* If nothing matches these conditions, false is returned.
*/
public boolean vcfGenotypeFormatReadable(
VcfRecord vcfRecord,
LinkedHashSet<VcfGenotypeFormat> genotypeDosageFieldPrecedence) {

List<String> formatIdentifiers = Arrays.asList(vcfRecord.getFormat());

// Test if the preferred genotype format is present
// Test if we should suppress exception if this is not the case
// Test if the

if (preferredGenotypeFormat != null) {
if (isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
return genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat);
} else if (this.forcePreferredGenotypeFormat) {
throw new GenotypeDataException(String.format(
"Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
"Available format fields: %s",
preferredGenotypeFormatIdentifier,
String.join(", ", vcfRecord.getIdentifiers()),
vcfRecord.getChromosome(), vcfRecord.getPosition(),
String.join(", ", vcfRecord.getFormat())));
}
}

for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) {
if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) {
return true;
}
}

return false;
}

public boolean isPreferredGenotypeFormatPresent(VcfRecord vcfRecord) {
List<String> formatIdentifiers = Arrays.asList(vcfRecord.getFormat());
return isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat);
}

private boolean isGenotypeFormatPresent(List<String> formatIdentifiers, VcfGenotypeFormat genotypeFormat) {
return formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat));
}

public VcfGenotypeFormat getPreferredGenotypeFormat() {
return preferredGenotypeFormat;
}
Expand Down

0 comments on commit 362406c

Please sign in to comment.