From 79f8df9593440350963d42bb7ff8bb3728ccf499 Mon Sep 17 00:00:00 2001 From: Robert Warmerdam Date: Thu, 3 Nov 2022 14:00:45 +0100 Subject: [PATCH 1/3] Changed mvn plugin configurations in Genotype-Harmonizer and Genotype-IO to use '--release 8' instead of '--source 1.8 --target 1.8'. This resolves issues with Java APIs that changed over versions (e.g. ByteBuffer.flip()). Also version bump in both modules. --- .../GenotypeHarmonizerParamaters.java | 6 +-- .../genotype/vcf/VcfGenotypeData.java | 10 ++--- .../VcfGenotypeFormatSupplier.java | 40 ++++++++++++++++--- 3 files changed, 43 insertions(+), 13 deletions(-) diff --git a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java index 37b9c09c6..e96cf3a7a 100644 --- a/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java +++ b/Genotype-Harmonizer/src/main/java/nl/umcg/deelenp/genotypeharmonizer/GenotypeHarmonizerParamaters.java @@ -441,12 +441,12 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException { + "\" is not a supported genotype field."); } - boolean raiseExceptionIfUnavailable = true; + boolean forcePreferredGenotypeFormat = true; if (genotypeFormatArguments.length > 2) { if (genotypeFormatArguments[2].equals("suppress")) { System.out.println("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen."); LOGGER.warn("WARNING: requested to supress exceptions if preferred genotype format is unavailable. For those variants the default will be chosen."); - raiseExceptionIfUnavailable = false; + forcePreferredGenotypeFormat = false; } } @@ -454,7 +454,7 @@ public GenotypeHarmonizerParamaters(String... args) throws ParseException { VcfGenotypeFormat.valueOf(genotypeFormatArguments[0]), genotypeFormatArguments.length > 1 ? genotypeFormatArguments[1] : genotypeFormatArguments[0], - raiseExceptionIfUnavailable); + forcePreferredGenotypeFormat); } vcfGenotypeFormatSupplier = nonFinalVcfGenotypeFormatSupplier; diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java index 9738ecc3a..8f91658af 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java @@ -65,6 +65,7 @@ public class VcfGenotypeData extends AbstractRandomAccessGenotypeData implements private final LinkedHashSet genotypeProbabilitiesFieldPrecedence; private final LinkedHashSet genotypeCallFieldPrecedence; private final LinkedHashSet genotypeDosageFieldPrecedence; + private final LinkedHashSet haplotypeProbabilitiesFieldPresedence; /** @@ -138,6 +139,8 @@ public VcfGenotypeData(File bzipVcfFile, File tabixIndexFile, int cacheSize, dou new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.GT, VcfGenotypeFormat.GP, VcfGenotypeFormat.DS)); genotypeDosageFieldPrecedence = new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.DS, VcfGenotypeFormat.GP, VcfGenotypeFormat.GT)); + haplotypeProbabilitiesFieldPresedence = + new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS)); genotypeFormatSupplier = new VcfGenotypeFormatSupplier(); } @@ -321,15 +324,12 @@ public boolean arePhasedProbabilitiesPresent(GeneticVariant variant) { LinkedHashSet haplotypeProbabilitiesFields = getVcfHaplotypeFormats(variant); // If the requested format is set and present for this variant base decision on this format - VcfGenotypeFormat genotypeFormat = genotypeFormatSupplier.getVcfGenotypeFormat( + return genotypeFormatSupplier.VcfGenotypeFormatReadable( vcfRecord, haplotypeProbabilitiesFields); - - return (genotypeFormat != null); } private LinkedHashSet getVcfHaplotypeFormats(GeneticVariant variant) { - LinkedHashSet haplotypeProbabilitiesFields = - new LinkedHashSet<>(Arrays.asList(VcfGenotypeFormat.HP, VcfGenotypeFormat.ADS)); + LinkedHashSet haplotypeProbabilitiesFields = haplotypeProbabilitiesFieldPresedence; if (variant.hasPhasedGenotypes()) { haplotypeProbabilitiesFields.add(VcfGenotypeFormat.GT); diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java index 8c761c9be..ee946a9c9 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java @@ -1,6 +1,5 @@ package org.molgenis.genotype.vcf.VcfGenotypeField; -import org.apache.commons.lang3.StringUtils; import org.molgenis.genotype.GenotypeDataException; import org.molgenis.vcf.VcfRecord; @@ -15,7 +14,7 @@ public class VcfGenotypeFormatSupplier { private VcfGenotypeFormat preferredGenotypeFormat; private String preferredGenotypeFormatIdentifier; - private boolean raiseExceptionIfUnavailable; + private boolean forcePreferredGenotypeFormat; public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat) { this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false); @@ -29,11 +28,11 @@ public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, bool this(preferredGenotypeFormat, preferredGenotypeFormat.toString(), false); } - public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean raiseExceptionIfUnavailable) { + public VcfGenotypeFormatSupplier(VcfGenotypeFormat preferredGenotypeFormat, String formatIdentifier, boolean forcePreferredGenotypeFormat) { this.preferredGenotypeFormat = preferredGenotypeFormat; this.preferredGenotypeFormatIdentifier = formatIdentifier; - this.raiseExceptionIfUnavailable = raiseExceptionIfUnavailable; + this.forcePreferredGenotypeFormat = forcePreferredGenotypeFormat; } public VcfGenotypeFormatSupplier() { @@ -61,7 +60,7 @@ public VcfGenotypeFormat getVcfGenotypeFormat( return preferredGenotypeFormat; } - if (this.raiseExceptionIfUnavailable) { + if (this.forcePreferredGenotypeFormat) { throw new GenotypeDataException(String.format( "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " + "Available format fields: %s", @@ -80,6 +79,37 @@ public VcfGenotypeFormat getVcfGenotypeFormat( return null; } + /** + * @param vcfRecord record, row, within a VCF file. corresponding to a particular variant. + * @param genotypeDosageFieldPrecedence LinkedHashSet that lists all formats that can be read, + * in order of precedence (high precedence to low precedence). + * @return If there is a preferred genotype format supplied, this method only returns true if the + * preferred genotype format is available from the vcf record and the list of + * possible formats that can be read according to the genotype field precedence hash set. + * If a preferred genotype format is not supplied, this method will return true if one of + * the genotype field formats from the precedence list can be read from the vcf record. + * If nothing matches these conditions, false is returned. + */ + public boolean VcfGenotypeFormatReadable( + VcfRecord vcfRecord, + LinkedHashSet genotypeDosageFieldPrecedence) { + + List formatIdentifiers = Arrays.asList(vcfRecord.getFormat()); + + if (preferredGenotypeFormat != null) { + return genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat) + && formatIdentifiers.contains(this.getGenotypeFormatIdentifier(preferredGenotypeFormat)); + } + + for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) { + if (formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat))) { + return true; + } + } + + return false; + } + public VcfGenotypeFormat getPreferredGenotypeFormat() { return preferredGenotypeFormat; } From c3588bd179594423653e3a1bb03d078920525bbe Mon Sep 17 00:00:00 2001 From: Robert Warmerdam Date: Thu, 15 Dec 2022 15:50:56 +0100 Subject: [PATCH 2/3] Removed unnecessary print line from the bgen writer. Renamed erroneously named method. Resolved issues still present in the vcf field picker --- .../genotype/bgen/BgenGenotypeWriter.java | 3 +- .../genotype/vcf/VcfGenotypeData.java | 2 +- .../VcfGenotypeFormatSupplier.java | 72 ++++++++++++++----- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java b/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java index aaaf4bf33..fc0dee0fe 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/bgen/BgenGenotypeWriter.java @@ -301,8 +301,7 @@ private void addMetaData(File bgenFile, BgenixWriter bgenixWriter) throws IOExce byte[] firstBytes = new byte[1000]; randomAccessBgenFile.read(firstBytes, 0, 1000); - //Add current time in int. - System.out.println((System.currentTimeMillis() / 1000L)); + // Add current time in int. // Create and write new metadata. BgenixMetadata m = new BgenixMetadata( bgenFile.getName(), diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java index 8f91658af..8454bbc2d 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeData.java @@ -324,7 +324,7 @@ public boolean arePhasedProbabilitiesPresent(GeneticVariant variant) { LinkedHashSet haplotypeProbabilitiesFields = getVcfHaplotypeFormats(variant); // If the requested format is set and present for this variant base decision on this format - return genotypeFormatSupplier.VcfGenotypeFormatReadable( + return genotypeFormatSupplier.vcfGenotypeFormatReadable( vcfRecord, haplotypeProbabilitiesFields); } diff --git a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java index ee946a9c9..d63b93e42 100644 --- a/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java +++ b/Genotype-IO/src/main/java/org/molgenis/genotype/vcf/VcfGenotypeField/VcfGenotypeFormatSupplier.java @@ -54,24 +54,36 @@ public VcfGenotypeFormat getVcfGenotypeFormat( List formatIdentifiers = Arrays.asList(vcfRecord.getFormat()); - if (preferredGenotypeFormat != null - && genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat) - && formatIdentifiers.contains(this.getGenotypeFormatIdentifier(preferredGenotypeFormat))) { - return preferredGenotypeFormat; - } - - if (this.forcePreferredGenotypeFormat) { - throw new GenotypeDataException(String.format( - "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " + - "Available format fields: %s", - preferredGenotypeFormatIdentifier, - String.join(", ", vcfRecord.getIdentifiers()), - vcfRecord.getChromosome(), vcfRecord.getPosition(), - String.join(", ", vcfRecord.getFormat()))); + // Check if the preferred genotype format is set + if (preferredGenotypeFormat != null) { + // If it is set, check if it is available, and, if it is not, if we should write exceptions or not. + if (genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat) + && isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) { + return preferredGenotypeFormat; + } else if (this.forcePreferredGenotypeFormat) { + if (!isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) { + throw new GenotypeDataException(String.format( + "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " + + "Available format fields: %s", + preferredGenotypeFormatIdentifier, + String.join(", ", vcfRecord.getIdentifiers()), + vcfRecord.getChromosome(), vcfRecord.getPosition(), + String.join(", ", vcfRecord.getFormat()))); + } else if (!genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat)) { + throw new GenotypeDataException(String.format( + "Preferred genotype format field (%s) cannot be used. " + + "Requested to load vcf record %n%s (%s:%s). " + + "Possible format fields: %s", + preferredGenotypeFormatIdentifier, + String.join(", ", vcfRecord.getIdentifiers()), + vcfRecord.getChromosome(), vcfRecord.getPosition(), + String.join(", ", Arrays.toString(genotypeDosageFieldPrecedence.toArray())))); + } + } } for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) { - if (formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat))) { + if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) { return genotypeFormat; } } @@ -90,19 +102,32 @@ public VcfGenotypeFormat getVcfGenotypeFormat( * the genotype field formats from the precedence list can be read from the vcf record. * If nothing matches these conditions, false is returned. */ - public boolean VcfGenotypeFormatReadable( + public boolean vcfGenotypeFormatReadable( VcfRecord vcfRecord, LinkedHashSet genotypeDosageFieldPrecedence) { List formatIdentifiers = Arrays.asList(vcfRecord.getFormat()); + // Test if the preferred genotype format is present + // Test if we should suppress exception if this is not the case + // Test if the + if (preferredGenotypeFormat != null) { - return genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat) - && formatIdentifiers.contains(this.getGenotypeFormatIdentifier(preferredGenotypeFormat)); + if (isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat)) { + return genotypeDosageFieldPrecedence.contains(preferredGenotypeFormat); + } else if (this.forcePreferredGenotypeFormat) { + throw new GenotypeDataException(String.format( + "Preferred genotype format field (%s) is unavailable for vcf record: %n%s (%s:%s). " + + "Available format fields: %s", + preferredGenotypeFormatIdentifier, + String.join(", ", vcfRecord.getIdentifiers()), + vcfRecord.getChromosome(), vcfRecord.getPosition(), + String.join(", ", vcfRecord.getFormat()))); + } } for (VcfGenotypeFormat genotypeFormat: genotypeDosageFieldPrecedence) { - if (formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat))) { + if (isGenotypeFormatPresent(formatIdentifiers, genotypeFormat)) { return true; } } @@ -110,6 +135,15 @@ public boolean VcfGenotypeFormatReadable( return false; } + public boolean isPreferredGenotypeFormatPresent(VcfRecord vcfRecord) { + List formatIdentifiers = Arrays.asList(vcfRecord.getFormat()); + return isGenotypeFormatPresent(formatIdentifiers, preferredGenotypeFormat); + } + + private boolean isGenotypeFormatPresent(List formatIdentifiers, VcfGenotypeFormat genotypeFormat) { + return formatIdentifiers.contains(this.getGenotypeFormatIdentifier(genotypeFormat)); + } + public VcfGenotypeFormat getPreferredGenotypeFormat() { return preferredGenotypeFormat; } From d3d80718e48b813eb8807e04d28e78dd3ede23ed Mon Sep 17 00:00:00 2001 From: Robert Warmerdam Date: Thu, 15 Dec 2022 16:02:05 +0100 Subject: [PATCH 3/3] Version bump for genotype harmonizer --- Genotype-Harmonizer/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Genotype-Harmonizer/pom.xml b/Genotype-Harmonizer/pom.xml index d425765da..10d4f976e 100644 --- a/Genotype-Harmonizer/pom.xml +++ b/Genotype-Harmonizer/pom.xml @@ -7,7 +7,7 @@ 4.0.0 Genotype-Harmonizer - 1.4.25-SNAPSHOT + 1.4.26-SNAPSHOT Genotype Harmonizer jar