diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml
index d425765da..10d4f976e 100644
--- a/Genotype-Harmonizer/pom.xml
+++ b/Genotype-Harmonizer/pom.xml
@@ -7,7 +7,7 @@
4.0.0
Genotype-Harmonizer
- 1.4.25-SNAPSHOT
+ 1.4.26-SNAPSHOT
Genotype Harmonizer
jar
diff --git a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java
index 37b9c09c6..e96cf3a7a 100644
--- a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java
+++ b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java
@@ -441,12 +441,12 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException {
+ "\" is not a supported genotype field.");
}
- boolean raiseExceptionIfUnavailable = true;
+ boolean forcePreferredGenotypeFormat = true;
if (genotypeFormatArguments.length > 2) {
if (genotypeFormatArguments[2].equals("suppress")) {
System.out.println("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen.");
LOGGER.warn("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen.");
- raiseExceptionIfUnavailable = false;
+ forcePreferredGenotypeFormat = false;
}
}
@@ -454,7 +454,7 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException {
VcfGenotypeFormat.valueOf(genotypeFormatArguments[0]),
genotypeFormatArguments.length > 1 ?
genotypeFormatArguments[1] : genotypeFormatArguments[0],
- raiseExceptionIfUnavailable);
+ forcePreferredGenotypeFormat);
}
vcfGenotypeFormatSupplier = nonFinalVcfGenotypeFormatSupplier;
diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java
index aaaf4bf33..fc0dee0fe 100644
--- a/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java
+++ b/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java
@@ -301,8 +301,7 @@ private void addMetaData(File bgenFile, BgenixWriter bgenixWriter) throws IOExce
byte[] firstBytes = new byte[1000];
randomAccessBgenFile.read(firstBytes, 0, 1000);
- //Add current time in int.
- System.out.println((System.currentTimeMillis() / 1000L));
+ // Add current time in int.
// Create and write new metadata.
BgenixMetadata m = new BgenixMetadata(
bgenFile.getName(),
diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java
index 9738ecc3a..8454bbc2d 100644
--- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java
+++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java
@@ -65,6 +65,7 @@ public class VcfGenotypeData extends AbstractRandomAccessGenotypeData implements
private final LinkedHashSet genotypeProbabilitiesFieldPrecedence;
private final LinkedHashSet genotypeCallFieldPrecedence;
private final LinkedHashSet genotypeDosageFieldPrecedence;
+ private final LinkedHashSet haplotypeProbabilitiesFieldPresedence;
/**
@@ -138,6 +139,8 @@ public VcfGenotypeData(File bzipVcfFile, File tabixIndexFile, int cacheSize, dou
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.GT, VcfGenotypeFormat.GP, VcfGenotypeFormat.DS));
genotypeDosageFieldPrecedence =
new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.DS, VcfGenotypeFormat.GP, VcfGenotypeFormat.GT));
+ haplotypeProbabilitiesFieldPresedence =
+ new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS));
genotypeFormatSupplier = new VcfGenotypeFormatSupplier();
}
@@ -321,15 +324,12 @@ public boolean arePhasedProbabilitiesPresent(GeneticVariant variant) {
LinkedHashSet haplotypeProbabilitiesFields = getVcfHaplotypeFormats(variant);
// If the requested format is set and present for this variant base decision on this format
- VcfGenotypeFormat genotypeFormat = genotypeFormatSupplier.getVcfGenotypeFormat(
+ return genotypeFormatSupplier.vcfGenotypeFormatReadable(
vcfRecord, haplotypeProbabilitiesFields);
-
- return (genotypeFormat != null);
}
private LinkedHashSet getVcfHaplotypeFormats(GeneticVariant variant) {
- LinkedHashSet haplotypeProbabilitiesFields =
- new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS));
+ LinkedHashSet haplotypeProbabilitiesFields = haplotypeProbabilitiesFieldPresedence;
if (variant.hasPhasedGenotypes()) {
haplotypeProbabilitiesFields.add(VcfGenotypeFormat.GT);
diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java
index 8c761c9be..d63b93e42 100644
--- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java
+++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java
@@ -1,6 +1,5 @@
package org.molgenis.genotype.vcf.VcfGenotypeField;
-import org.apache.commons.lang3.StringUtils;
import org.molgenis.genotype.GenotypeDataException;
import org.molgenis.vcf.VcfRecord;
@@ -15,7 +14,7 @@
public class VcfGenotypeFormatSupplier {
private VcfGenotypeFormat preferredGenotypeFormat;
private String preferredGenotypeFormatIdentifier;
- private boolean raiseExceptionIfUnavailable;
+ private boolean forcePreferredGenotypeFormat;
public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat) {
this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false);
@@ -29,11 +28,11 @@ public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, bool
this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false);
}
- public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean raiseExceptionIfUnavailable) {
+ public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean forcePreferredGenotypeFormat) {
this.preferredGenotypeFormat = preferredGenotypeFormat;
this.preferredGenotypeFormatIdentifier = formatIdentifier;
- this.raiseExceptionIfUnavailable = raiseExceptionIfUnavailable;
+ this.forcePreferredGenotypeFormat = forcePreferredGenotypeFormat;
}
public VcfGenotypeFormatSupplier() {
@@ -55,24 +54,36 @@ public VcfGenotypeFormat getVcfGenotypeFormat(
List formatIdentifiers = Arrays.asList(vcfRecord.getFormat());
- if (preferredGenotypeFormat != null
- && genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)
- && formatIdentifiers.contains(this.getGenotypeFormatIdentifier(preferredGenotypeFormat))) {
- return preferredGenotypeFormat;
- }
-
- if (this.raiseExceptionIfUnavailable) {
- throw new GenotypeDataException(String.format(
- "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
- "Available format fields: %s",
- preferredGenotypeFormatIdentifier,
- String.join(", ", vcfRecord.getIdentifiers()),
- vcfRecord.getChromosome(), vcfRecord.getPosition(),
- String.join(", ", vcfRecord.getFormat())));
+ // Check if the preferred genotype format is set
+ if (preferredGenotypeFormat != null) {
+ // If it is set, check if it is available, and, if it is not, if we should write exceptions or not.
+ if (genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)
+ && isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
+ return preferredGenotypeFormat;
+ } else if (this.forcePreferredGenotypeFormat) {
+ if (!isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
+ throw new GenotypeDataException(String.format(
+ "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
+ "Available format fields: %s",
+ preferredGenotypeFormatIdentifier,
+ String.join(", ", vcfRecord.getIdentifiers()),
+ vcfRecord.getChromosome(), vcfRecord.getPosition(),
+ String.join(", ", vcfRecord.getFormat())));
+ } else if (!genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)) {
+ throw new GenotypeDataException(String.format(
+ "Preferred genotype format field (%s) cannot be used. " +
+ "Requested to load vcf record %n%s (%s:%s). " +
+ "Possible format fields: %s",
+ preferredGenotypeFormatIdentifier,
+ String.join(", ", vcfRecord.getIdentifiers()),
+ vcfRecord.getChromosome(), vcfRecord.getPosition(),
+ String.join(", ", Arrays.toString(genotypeDosageFieldPrecedence.toArray()))));
+ }
+ }
}
for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) {
- if (formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat))) {
+ if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) {
return genotypeFormat;
}
}
@@ -80,6 +91,59 @@ public VcfGenotypeFormat getVcfGenotypeFormat(
return null;
}
+ /**
+ * @param vcfRecord record, row, within a VCF file. corresponding to a particular variant.
+ * @param genotypeDosageFieldPrecedence LinkedHashSet that lists all formats that can be read,
+ * in order of precedence (high precedence to low precedence).
+ * @return If there is a preferred genotype format supplied, this method only returns true if the
+ * preferred genotype format is available from the vcf record and the list of
+ * possible formats that can be read according to the genotype field precedence hash set.
+ * If a preferred genotype format is not supplied, this method will return true if one of
+ * the genotype field formats from the precedence list can be read from the vcf record.
+ * If nothing matches these conditions, false is returned.
+ */
+ public boolean vcfGenotypeFormatReadable(
+ VcfRecord vcfRecord,
+ LinkedHashSet genotypeDosageFieldPrecedence) {
+
+ List formatIdentifiers = Arrays.asList(vcfRecord.getFormat());
+
+ // Test if the preferred genotype format is present
+ // Test if we should suppress exception if this is not the case
+ // Test if the
+
+ if (preferredGenotypeFormat != null) {
+ if (isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) {
+ return genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat);
+ } else if (this.forcePreferredGenotypeFormat) {
+ throw new GenotypeDataException(String.format(
+ "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " +
+ "Available format fields: %s",
+ preferredGenotypeFormatIdentifier,
+ String.join(", ", vcfRecord.getIdentifiers()),
+ vcfRecord.getChromosome(), vcfRecord.getPosition(),
+ String.join(", ", vcfRecord.getFormat())));
+ }
+ }
+
+ for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) {
+ if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public boolean isPreferredGenotypeFormatPresent(VcfRecord vcfRecord) {
+ List formatIdentifiers = Arrays.asList(vcfRecord.getFormat());
+ return isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat);
+ }
+
+ private boolean isGenotypeFormatPresent(List formatIdentifiers, VcfGenotypeFormat genotypeFormat) {
+ return formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat));
+ }
+
public VcfGenotypeFormat getPreferredGenotypeFormat() {
return preferredGenotypeFormat;
}