Skip to content

Commit

Permalink
Update Allele to use AlleleProtoFormatter for toString() method
Browse files Browse the repository at this point in the history
Update AlleleProtoFormatter to account to AN = 0 AlleleProto.Frequencies and display pre-calculated AF
Update DbSnpAlleleParser and TopMedAlleleParser to use new AlleleProto.Frequency objects
Update Hg19Config to include dbSNP resource to enable addition of TopMED allele frequencies
  • Loading branch information
julesjacobsen committed Feb 7, 2024
1 parent 176e5e6 commit 6b8027f
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 113 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ public class AlleleData {
private AlleleData() {
}

public static AlleleProto.Frequency frequencyOf(AlleleProto.FrequencySource frequencySource, float freq) {
if (freq > 100f) {
throw new IllegalArgumentException(frequencySource + " AF=" + freq + " must be less than 100%!");
}
return AlleleProto.Frequency.newBuilder().setFrequencySource(frequencySource).setFrequency(freq).build();
}

public static AlleleProto.Frequency frequencyOf(AlleleProto.FrequencySource frequencySource, int ac, int an) {
if (ac > an) {
throw new IllegalArgumentException(frequencySource + " AC=" + ac + " must be less than or equal to AN=" + an);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public static String formatFrequency(AlleleProto.Frequency frequency) {
+ "|" +
frequency.getHom()
+ "|" +
numberFormat.format(Frequency.percentageFrequency(frequency.getAc(), frequency.getAn()));
numberFormat.format(frequency.getAn() == 0 ? frequency.getFrequency() : Frequency.percentageFrequency(frequency.getAc(), frequency.getAn()));
}

public static String formatPathScores(List<AlleleProto.PathogenicityScore> pathogenicityScores) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ public Map<String, AlleleResource> hg19AlleleResources() {
alleleResources.put("gnomad-genome", gnomadGenomeAlleleResource());
alleleResources.put("gnomad-exome", gnomadExomeAlleleResource());
alleleResources.put("gnomad-mito", gnomadMitoAlleleResource());
// TOPMed removed as this is now part of gnomAD v2.1
// TOPMed removed as this is now part of gnomAD v2.1 (which release of TOPMed?)
// TOPMed removed as this is now part of dbSNP
alleleResources.put("dbsnp", dbSnpAlleleResource());
// dbSNP removed as this mostly adds a lot of empty data with only rsids
alleleResources.put("uk10k", uk10kAlleleResource());
// ExAC removed as this is part of gnomad-exomes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.exomiser.core.proto.AlleleProtoFormatter;

import java.util.*;

Expand Down Expand Up @@ -181,51 +182,15 @@ public int hashCode() {
@Override
public String toString() {
return "Allele{" +
"chr=" + chr +
", pos=" + pos +
", ref='" + ref + '\'' +
", alt='" + alt + '\'' +
", rsId='" + rsId + '\'' +
", clinVarData='" + clinVarData + '\'' +
", values=" + values + '\'' +
", frequencies=" + printFrequencies(frequencies) +
", pathogenicityScores=" + printPathScores(pathogenicityScores) +
'}';
}

private String printFrequencies(List<AlleleProto.Frequency> frequencies) {
StringBuilder stringBuilder = new StringBuilder("{");
for (int i = 0; i < frequencies.size(); i++) {
AlleleProto.Frequency frequency = frequencies.get(i);
stringBuilder.append(frequency.getFrequencySource());
stringBuilder.append("=");
stringBuilder.append(frequency.getAc());
stringBuilder.append("|");
stringBuilder.append(frequency.getAn());
stringBuilder.append("|");
stringBuilder.append(frequency.getHom());
if (i < frequencies.size() - 1) {
stringBuilder.append(", ");
}
}
stringBuilder.append("}");
return stringBuilder.toString();
}

private String printPathScores(List<AlleleProto.PathogenicityScore> pathogenicityScores) {
StringBuilder stringBuilder = new StringBuilder("{");
for (int i = 0; i < pathogenicityScores.size(); i++) {
AlleleProto.PathogenicityScore pathogenicityScore = pathogenicityScores.get(i);
stringBuilder.append(pathogenicityScore.getPathogenicitySource());
stringBuilder.append("=");
stringBuilder.append(pathogenicityScore.getScore());
if (i < pathogenicityScores.size() - 1) {
stringBuilder.append(", ");
}
}
stringBuilder.append("}");
return stringBuilder.toString();
"chr=" + chr +
", pos=" + pos +
", ref='" + ref + '\'' +
", alt='" + alt + '\'' +
", rsId='" + rsId + '\'' +
", clinVarData='" + clinVarData + '\'' +
", values=" + values + '\'' +
", frequencies=" + AlleleProtoFormatter.formatFrequencies(frequencies) +
", pathogenicityScores=" + AlleleProtoFormatter.formatPathScores(pathogenicityScores) +
'}';
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

package org.monarchinitiative.exomiser.data.genome.model.parsers;

import org.monarchinitiative.exomiser.core.proto.AlleleData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.exomiser.data.genome.model.Allele;
import org.monarchinitiative.exomiser.data.genome.model.AlleleProperty;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -40,17 +41,17 @@ public class DbSnpAlleleParser extends VcfAlleleParser {

@Override
List<Allele> parseInfoField(List<Allele> alleles, String info) {
Map<AlleleProperty, List<String>> minorAlleleFrequencies = parseMinorAlleleFrequencies(info);
Map<AlleleProto.FrequencySource, List<String>> minorAlleleFrequencies = parseMinorAlleleFrequencies(info);

for (Map.Entry<AlleleProperty, List<String>> entry : minorAlleleFrequencies.entrySet()) {
AlleleProperty alleleProperty = entry.getKey();
for (Map.Entry<AlleleProto.FrequencySource, List<String>> entry : minorAlleleFrequencies.entrySet()) {
AlleleProto.FrequencySource alleleProperty = entry.getKey();
List<String> alleleMafs = entry.getValue();
for (int i = 0; i < alleleMafs.size(); i++) {
String maf = alleleMafs.get(i);
if (!maf.equals(".")) {
float freq = 100f * Float.parseFloat(maf);
Allele allele = alleles.get(i);
allele.addValue(alleleProperty, freq);
allele.addFrequency(AlleleData.frequencyOf(alleleProperty, freq));
}
}
}
Expand All @@ -60,17 +61,17 @@ List<Allele> parseInfoField(List<Allele> alleles, String info) {
// ##INFO=<ID=CAF,Number=.,Type=String,Description="An ordered, comma delimited list of allele frequencies based on 1000Genomes, starting with the reference allele followed by alternate alleles as ordered in the ALT column. Where a 1000Genomes alternate allele is not in the dbSNPs alternate allele set, the allele is added to the ALT column. The minor allele is the second largest value in the list, and was previuosly reported in VCF as the GMAF. This is the GMAF reported on the RefSNP and EntrezSNP pages and VariationReporter">
// also in b151
// ##INFO=<ID=TOPMED,Number=.,Type=String,Description="An ordered, comma delimited list of allele frequencies based on TOPMed, starting with the reference allele followed by alternate alleles as ordered in the ALT column. The TOPMed minor allele is the second largest value in the list.">
private Map<AlleleProperty, List<String>> parseMinorAlleleFrequencies(String info) {
EnumMap<AlleleProperty, List<String>> mafMap = new EnumMap<>(AlleleProperty.class);
private Map<AlleleProto.FrequencySource, List<String>> parseMinorAlleleFrequencies(String info) {
EnumMap<AlleleProto.FrequencySource, List<String>> mafMap = new EnumMap<>(AlleleProto.FrequencySource.class);
String[] infoFields = info.split(";");
for (String infoField : infoFields) {
if (infoField.startsWith("CAF=")) {
String frequencyValues = getFrequencyValues(infoField);
mafMap.put(AlleleProperty.KG, parseFreqField(frequencyValues));
mafMap.put(AlleleProto.FrequencySource.KG, parseFreqField(frequencyValues));
}
if (infoField.startsWith("TOPMED=")) {
String frequencyValues = getFrequencyValues(infoField);
mafMap.put(AlleleProperty.TOPMED, parseFreqField(frequencyValues));
mafMap.put(AlleleProto.FrequencySource.TOPMED, parseFreqField(frequencyValues));
}
// newer b152+ format has all the frequency data in the FREQ field which requires further parsing
if (infoField.startsWith("FREQ=")) {
Expand All @@ -81,9 +82,9 @@ private Map<AlleleProperty, List<String>> parseMinorAlleleFrequencies(String inf
String frequencyValues = source.substring(colonPos + 1);
switch (sourceId) {
case "1000Genomes":
mafMap.put(AlleleProperty.KG, parseFreqField(frequencyValues));
mafMap.put(AlleleProto.FrequencySource.KG, parseFreqField(frequencyValues));
case "TOPMED":
mafMap.put(AlleleProperty.TOPMED, parseFreqField(frequencyValues));
mafMap.put(AlleleProto.FrequencySource.TOPMED, parseFreqField(frequencyValues));
// case "TWINSUK":
// // https://twinsuk.ac.uk/about-us/what-is-twinsuk/
// mafMap.put(AlleleProperty.TWINSUK, parseFreqField(frequencyValues));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,15 @@

package org.monarchinitiative.exomiser.data.genome.model.parsers;

import org.monarchinitiative.exomiser.core.proto.AlleleData;
import org.monarchinitiative.exomiser.data.genome.model.Allele;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.List;

import static org.monarchinitiative.exomiser.data.genome.model.AlleleProperty.TOPMED;
import static org.monarchinitiative.exomiser.core.proto.AlleleProto.FrequencySource.TOPMED;

/**
* Parser for TOPMED VCF files downloaded from
Expand Down Expand Up @@ -56,8 +57,8 @@ List<Allele> parseInfoField(List<Allele> alleles, String info) {
String freqValue = alleleFrequencyValues.get(i);
if (!freqValue.isEmpty() && !".".equals(freqValue)) {
try {
Float freq = 100f * Float.parseFloat(freqValue);
allele.addValue(TOPMED, freq);
float freq = 100f * Float.parseFloat(freqValue);
allele.addFrequency(AlleleData.frequencyOf(TOPMED, freq));
} catch (NumberFormatException ex) {
// swallow these
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ public void testResources() {
alleleResources.put("gnomad-genome", instance.gnomadGenomeAlleleResource());
alleleResources.put("gnomad-exome", instance.gnomadExomeAlleleResource());
alleleResources.put("gnomad-mito", instance.gnomadMitoAlleleResource());
// alleleResources.put("dbsnp", instance.dbSnpAlleleResource());
alleleResources.put("dbsnp", instance.dbSnpAlleleResource());
alleleResources.put("uk10k", instance.uk10kAlleleResource());
// exac removed as this is part of gnomad
alleleResources.put("esp", instance.espAlleleResource());
Expand Down
Loading

0 comments on commit 6b8027f

Please sign in to comment.