diff --git a/.circleci/config.yml b/.circleci/config.yml index 86976a57a..90b35e73a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,4 +1,4 @@ -version: 2 +version: 2.1 jobs: build: working_directory: ~/exomiser @@ -11,7 +11,9 @@ jobs: - image: cimg/openjdk:17.0.7 steps: - checkout - - run: chmod +x mvnw + - run: + name: make executable + command: chmod +x mvnw - restore_cache: key: exomiser-{{ arch }}-{{ checksum "pom.xml" }} - run: ./mvnw clean install -Dmaven.javadoc.skip=true -B -V diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 5cc773c4e..0e2d08f81 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -20,9 +20,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up JDK 17 - uses: actions/setup-java@v3 + uses: actions/setup-java@v4 with: java-version: '17' distribution: 'temurin' diff --git a/exomiser-cli/pom.xml b/exomiser-cli/pom.xml index 13caf5017..df24df4b5 100644 --- a/exomiser-cli/pom.xml +++ b/exomiser-cli/pom.xml @@ -32,7 +32,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA diff --git a/exomiser-core/pom.xml b/exomiser-core/pom.xml index 550715ad8..a6fa88fc8 100644 --- a/exomiser-core/pom.xml +++ b/exomiser-core/pom.xml @@ -30,7 +30,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA @@ -139,6 +139,12 @@ jakarta.annotation jakarta.annotation-api + + org.rocksdb + rocksdbjni + 7.6.0 + test + diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acgs2020Classifier.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acgs2020Classifier.java index dbd1f1c8b..c884165bf 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acgs2020Classifier.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acgs2020Classifier.java @@ -29,8 +29,10 @@ * Implementation of the ACGS v4 guidelines, Table 3 from * https://www.acgs.uk.com/media/11631/uk-practice-guidelines-for-variant-classification-v4-01-2020.pdf * + * @deprecated Guidelines now suggest using the {@link Acmg2020PointsBasedClassifier} * @since 13.1.0 */ +@Deprecated(since = "14.1.0", forRemoval = true) public final class Acgs2020Classifier implements AcmgEvidenceClassifier { public AcmgClassification classify(AcmgEvidence acmgEvidence) { diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015Classifier.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015Classifier.java index c521839cd..24c2e7026 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015Classifier.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015Classifier.java @@ -29,8 +29,10 @@ * Implementation of the 2015 ACMG Standards guidelines for interpretation of sequence variants (Richards et al. doi:10.1038/gim.2015.30). * Table 5 'Rules for combining criteria to classify sequence variants' * + * @deprecated Guidelines now suggest using the {@link Acmg2020PointsBasedClassifier} * @since 13.1.0 */ +@Deprecated(since = "14.1.0", forRemoval = true) public final class Acmg2015Classifier implements AcmgEvidenceClassifier { public AcmgClassification classify(AcmgEvidence acmgEvidence) { diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssigner.java index 8ddb92860..167fcede4 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssigner.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssigner.java @@ -22,29 +22,24 @@ import de.charite.compbio.jannovar.annotation.VariantEffect; import de.charite.compbio.jannovar.mendel.ModeOfInheritance; -import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraint; -import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraints; import org.monarchinitiative.exomiser.core.analysis.util.InheritanceModeAnalyser; import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; import org.monarchinitiative.exomiser.core.genome.dao.ClinVarDao; import org.monarchinitiative.exomiser.core.model.*; import org.monarchinitiative.exomiser.core.model.Pedigree.Individual; +import org.monarchinitiative.exomiser.core.model.frequency.Frequency; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource; import org.monarchinitiative.exomiser.core.model.pathogenicity.*; import org.monarchinitiative.exomiser.core.phenotype.ModelPhenotypeMatch; import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; import org.monarchinitiative.exomiser.core.proto.AlleleProto; -import org.monarchinitiative.svart.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; -import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*; @@ -98,8 +93,6 @@ public class Acmg2015EvidenceAssigner implements AcmgEvidenceAssigner { AlleleProtoAdaptor.toAlleleKey(3, 15_645_186, "G", "C") ); - // e.g. p.(Lys567Thr) - private static final Pattern MISSENSE_HGVS_P = Pattern.compile("p\\.\\((?[A-Z][a-z]{2}\\d+)(?[A-Z][a-z]{2})\\)"); private final ClinVarDao clinVarDao; @@ -151,31 +144,31 @@ public AcmgEvidence assignVariantAcmgEvidence(VariantEvaluation variantEvaluatio // BA1 is supposed to be used as a filtering criterion where no other evidence need be considered. return acmgEvidenceBuilder.build(); } + // TODO: MT specific rules // PM2 "Absent from controls (or at extremely low frequency if recessive) in Exome Sequencing Project, 1000 Genomes Project, or Exome Aggregation Consortium" assignPM2(acmgEvidenceBuilder, frequencyData, modeOfInheritance); + // BS1 "Allele frequency is greater than expected for disorder" + assignBS1(acmgEvidenceBuilder, variantEvaluation, frequencyData, variantEvaluation.getPathogenicityData().clinVarData()); + // BS2 "Observed in a healthy adult individual for a recessive (homozygous), dominant (heterozygous), or X-linked (hemizygous) disorder, with full penetrance expected at an early age" + assignBS2(acmgEvidenceBuilder, variantEvaluation, modeOfInheritance, knownDiseases, frequencyData, variantEvaluation.getPathogenicityData().clinVarData()); + // PVS1 "null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon deletion) in a gene where LOF is a known mechanism of disease" + // "Recommendations for interpreting the loss of function PVS1 ACMG/AMP variant criterion" https://doi.org/10.1002/humu.23626 + AcmgPVS1EvidenceAssigner.assignPVS1(acmgEvidenceBuilder, variantEvaluation, probandSex, modeOfInheritance, knownDiseases); - boolean hasCompatibleDiseaseMatches = !compatibleDiseaseMatches.isEmpty(); + // Apply PS1, PM5, PM1 to missense and inframe indels + AcmgMissenseInFrameIndelEvidenceAssigner.assignMissenseEvidence(acmgEvidenceBuilder, variantEvaluation, modeOfInheritance, knownDiseases, clinVarDao); - // PVS1 "null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon deletion) in a gene where LOF is a known mechanism of disease" - assignPVS1(acmgEvidenceBuilder, variantEvaluation, modeOfInheritance, knownDiseases); - - // ignore non-missense, truncating, splice or mitochondrial variants - if (isMissenseOrInframeIndel(variantEvaluation.getVariantEffect()) && variantEvaluation.contigId() != 25) { - // ensure region is within contig bounds - Contig contig = variantEvaluation.contig(); - var upStream = Math.max(1, variantEvaluation.start() - 25); - var downStream = Math.min(contig.length(), variantEvaluation.start() + 25); - GenomicInterval genomicInterval = GenomicInterval.of(contig, Strand.POSITIVE, Coordinates.oneBased(upStream, downStream)); - var localClinVarData = clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval); - // PS1 "Same amino acid change as a previously established pathogenic variant regardless of nucleotide change" - // PM5 "Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before" - assignPS1PM5(acmgEvidenceBuilder, variantEvaluation, localClinVarData); - // PM1 "Located in a mutational hot spot and/or critical and well-established functional domain (e.g., active site of an enzyme) without benign variation" - assignPM1(acmgEvidenceBuilder, variantEvaluation, localClinVarData); - // TODO: PM1/BP3 "In-frame deletions/insertions in a repetitive region without a known function" - requires domain information. - } + // TODO: MISSENSE_VARIANT should be assessed for splicing score if they are also a SPLICE_REGION_VARIANT then the max of these two outcomes should be used. + // Note that these can conflict e.g. a MISSENSE with a low REVEL score can have a BP4 assigned yet have a SPLICE_AI > 0.2 and therefore a PP3 if assessed + // as a SPLICE_REGION_VARIANT. This could also effect the PS1 assignment if a splice variant has been assigned a PP3 and there are other CLinVar variants in the region. + // e.g. 22-51019849-C-T missense_variant|splice_region_variant MODERATE NC_000022.10:g.51019849C>T ENST00000406938.2(CHKB):c.581G>A (CHKB)p.(Arg194Gln) 1638bp 395aa + // apply PVS1, PS1, PP3, BP4, BP7 to splice region variants according to "Using the ACMG/AMP framework to capture + // evidence related to predicted and observed impact on splicing: Recommendations from the ClinGen SVI Splicing Subgroup" + // https://doi.org/10.1016/j.ajhg.2023.06.002 + AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, modeOfInheritance, knownDiseases, clinVarDao); + boolean hasCompatibleDiseaseMatches = !compatibleDiseaseMatches.isEmpty(); if (pedigree.containsId(probandId)) { Individual proband = pedigree.getIndividualById(probandId); // PS2 "De novo (both maternity and paternity confirmed) in a patient with the disease and no family history" @@ -194,7 +187,6 @@ public AcmgEvidence assignVariantAcmgEvidence(VariantEvaluation variantEvaluatio // PP4 "Patient’s phenotype or family history is highly specific for a disease with a single genetic etiology" assignPP4(acmgEvidenceBuilder, compatibleDiseaseMatches); - PathogenicityData pathogenicityData = variantEvaluation.getPathogenicityData(); ClinVarData clinVarData = variantEvaluation.getPathogenicityData().clinVarData(); if (!clinVarData.isEmpty()) { // PP5 "Reputable source recently reports variant as pathogenic, but the evidence is not available to the laboratory to perform an independent evaluation" @@ -202,92 +194,45 @@ public AcmgEvidence assignVariantAcmgEvidence(VariantEvaluation variantEvaluatio // BP6 "Reputable source recently reports variant as benign, but the evidence is not available to the laboratory to perform an independent evaluation" assignBP6(acmgEvidenceBuilder, clinVarData); } - // PP3 "Multiple lines of computational evidence support a deleterious effect on the gene or gene product (conservation, evolutionary, splicing impact, etc.)" - // BP4 "Multiple lines of computational evidence suggest no impact on gene or gene product (conservation, evolutionary, splicing impact, etc.)" - // TODO: internalise in PathogenicityData and create proper cutoff values adding isPath/isbenign to PathogenicityScore - assignPP3orBP4(acmgEvidenceBuilder, pathogenicityData); return acmgEvidenceBuilder.build(); } - /** - * PVS1 "null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon deletion) in a gene where LOF is a known mechanism of disease" - */ - private void assignPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ModeOfInheritance modeOfInheritance, List knownDiseases) { - // TODO: add new method - gene.getAssociatedDiseases() - // also need ClinvarCache.getClinvarDataForGene(GeneIdentifier geneIdentifier) - - // Certain types of variants (e.g., nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single exon or multiexon - // deletion) can often be assumed to disrupt gene function by leading to a complete absence of the gene product by lack of tran- - // scription or nonsense-mediated decay of an altered transcript. One must, however, exercise caution when classifying these - // variants as pathogenic by considering the following principles: - // (i) When classifying such variants as pathogenic, one must ensure that null variants are a known mechanism of - // pathogenicity consistent with the established inheritance pattern for the disease. For example, there are genes for - // which only heterozygous missense variants cause disease and null variants are benign in a heterozygous state (e.g., - // many hypertrophic cardiomyopathy genes). A novel heterozygous nonsense variant in the MYH7 gene would - // not be considered pathogenic for dominant hypertrophic cardiomyopathy based solely on this evidence, whereas a - // novel heterozygous nonsense variant in the CFTR gene would likely be considered a recessive pathogenic variant. - // Caveats: - // • Beware of genes where LOF is not a known disease mechanism (e.g., GFAP, MYH7) - // • Use caution interpreting LOF variants at the extreme 3′ end of a gene - // • Use caution with splice variants that are predicted to lead to exon skipping but leave the remainder of the protein intact - // • Use caution in the presence of multiple transcripts - - GeneConstraint geneContraint = GeneConstraints.geneConstraint(variantEvaluation.getGeneSymbol()); - // Should this be using the hasCompatibleDiseaseMatches variable? - boolean inGeneWithKnownDiseaseAssociations = !knownDiseases.isEmpty(); - if (inGeneWithKnownDiseaseAssociations && isLossOfFunctionEffect(variantEvaluation.getVariantEffect()) - && (geneContraint != null && geneContraint.isLossOfFunctionIntolerant()) - ) { - if (variantEvaluation.hasTranscriptAnnotations()) { - TranscriptAnnotation transcriptAnnotation = variantEvaluation.getTranscriptAnnotations().get(0); - if (predictedToLeadToNmd(transcriptAnnotation)) { - acmgEvidenceBuilder.add(PVS1); - } else { - // Not predicted to lead to NMD? Downgrade to STRONG - acmgEvidenceBuilder.add(PVS1, Evidence.STRONG); - } - } else { - // shouldn't happen that there are no transcript annotations, but just in case... - acmgEvidenceBuilder.add(PVS1, Evidence.STRONG); + private void assignBS1(AcmgEvidence.Builder acmgEvidenceBuilder, Variant variant, FrequencyData frequencyData, ClinVarData clinVarData) { + boolean isReportedPorLP = clinVarData.starRating() >= 1 && isPathOrLikelyPath(clinVarData.getPrimaryInterpretation()); + if ((acmgEvidenceBuilder.contains(BA1) || acmgEvidenceBuilder.contains(PM2)) && !isReportedPorLP) { + float maxNonFounderPopFreq = frequencyData.maxFreqForPopulation(FrequencySource.NON_FOUNDER_POPS); + if (variant.contigId() == 25 && maxNonFounderPopFreq > 0.5) { + // hard cutoff for MT based on AMP/ACMG "Specifications of the ACMG/AMP standards and guidelines + //for mitochondrial DNA variant interpretation" doi:10.1002/humu.24107 + acmgEvidenceBuilder.add(BS1); + } else if (maxNonFounderPopFreq > 1.5) { + // InterVar uses a cutoff of 1% but this is half our default max for AR filtering (2%). More empirical + // gene-specific cut-offs can be generated from the max frequencies seen for the most common P/LP clinvar + // variant - i.e. GeneStats required + acmgEvidenceBuilder.add(BS1); } } } - private boolean isLossOfFunctionEffect(VariantEffect variantEffect) { - return variantEffect == VariantEffect.START_LOST - || variantEffect == VariantEffect.STOP_LOST - || variantEffect == VariantEffect.STOP_GAINED - || variantEffect == VariantEffect.FRAMESHIFT_ELONGATION - || variantEffect == VariantEffect.FRAMESHIFT_TRUNCATION - || variantEffect == VariantEffect.FRAMESHIFT_VARIANT - || variantEffect == VariantEffect.SPLICE_ACCEPTOR_VARIANT - || variantEffect == VariantEffect.SPLICE_DONOR_VARIANT - || variantEffect == VariantEffect.EXON_LOSS_VARIANT; - } - - private boolean predictedToLeadToNmd(TranscriptAnnotation transcriptAnnotation) { - // predicted to lead to NMD if in last exon or last 50bp of penultimate exon, or in single exon transcript - boolean notInLastExon = transcriptAnnotation.getRank() < transcriptAnnotation.getRankTotal(); - boolean isSingleExonTranscript = transcriptAnnotation.getRankTotal() == 1; - return transcriptAnnotation.getRankType() == TranscriptAnnotation.RankType.EXON && (notInLastExon || isSingleExonTranscript); - } - - private boolean compatibleWithRecessive(ModeOfInheritance modeOfInheritance) { - if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_RECESSIVE) { - return true; - } - return probandSex == Individual.Sex.FEMALE && modeOfInheritance == ModeOfInheritance.X_RECESSIVE; - } - - private boolean compatibleWithDominant(ModeOfInheritance modeOfInheritance) { - if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_DOMINANT) { - return true; - } - if (probandSex == Individual.Sex.MALE && (modeOfInheritance == ModeOfInheritance.X_RECESSIVE || modeOfInheritance == ModeOfInheritance.X_DOMINANT)) { - return true; + // BS2 "Observed in a healthy adult individual for a recessive (homozygous), dominant (heterozygous), or X-linked (hemizygous) disorder, with full penetrance expected at an early age" + private void assignBS2(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ModeOfInheritance modeOfInheritance, List diseases, FrequencyData frequencyData, ClinVarData clinVarData) { + // TODO: need GENE MOI + //We first determine the mode of inheritance of the gene, then compares the allele count (see allele frequency for quality checks) to the corresponding threshold: + // + // recessive or X-linked genes: allele count greater than 2, + // dominant genes: allele count greater than 5. + // + //Rule BS2 is not evaluated if rule BA1 was triggered, to avoid double-counting the same evidence, and for performance we disable BS2 if rule PM2 triggered. + int alleleCount = frequencyData.frequencies().stream().filter(frequency -> FrequencySource.NON_FOUNDER_POPS.contains(frequency.source())).mapToInt(Frequency::homs).sum(); + if (!(acmgEvidenceBuilder.contains(BA1) || acmgEvidenceBuilder.contains(PM2))) { + if (alleleCount > 2 && (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_RECESSIVE || modeOfInheritance == ModeOfInheritance.X_RECESSIVE || modeOfInheritance == ModeOfInheritance.X_DOMINANT)) { + acmgEvidenceBuilder.add(BS2); + } + if (alleleCount > 5 && modeOfInheritance == ModeOfInheritance.AUTOSOMAL_DOMINANT) { + acmgEvidenceBuilder.add(BS2); + } } - return (probandSex == Individual.Sex.FEMALE || probandSex == Individual.Sex.UNKNOWN) && modeOfInheritance == ModeOfInheritance.X_DOMINANT; } /** @@ -340,97 +285,14 @@ private boolean inCis(SampleGenotype thisVariantGenotype, SampleGenotype otherVa && thisVariantGenotype.equals(otherVariantGenotype); } - - // PM1 "Located in a mutational hot spot and/or critical and well-established functional domain (e.g., active site of an enzyme) without benign variation" - private void assignPM1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Map localClinVarData) { - // TODO - need UniProt domain / site info and clinvar counts - // can upgrade to STRONG - // https://www.cell.com/ajhg/fulltext/S0002-9297(22)00461-X suggests to limit the combined evidence from PM1 and PP3 to strong - int pathCount = 0; - int vusCount = 0; - int benignCount = 0; - for (var entry : localClinVarData.entrySet()) { - ClinVarData clinVarData = entry.getValue(); - ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); - if (isMissenseOrInframeIndel(clinVarData.getVariantEffect())) { - if (isPathOrLikelyPath(primaryInterpretation)) { - pathCount++; - } - if (primaryInterpretation == ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) { - vusCount++; - } - if (isBenignOrLikelyBenign(primaryInterpretation)) { - benignCount++; - } - } - } - logger.debug("PM1 evidence for region {} {}:{}-{} Paths: {} VUSs: {} Benigns: {}", variantEvaluation.getGenomeAssembly(), variantEvaluation.contigId(), variantEvaluation.start() - 25, variantEvaluation.end() + 25, pathCount, vusCount, benignCount); - if (pathCount >= 4 && benignCount == 0) { - // could do funkier thing to score other path variants by severity/star rating and distance to variant - if (pathCount > vusCount) { - acmgEvidenceBuilder.add(PM1); - logger.debug("{} -> {}", variantEvaluation, PM1); - } else { - acmgEvidenceBuilder.add(PM1, Evidence.SUPPORTING); - logger.debug("{} -> {}_Supporting", variantEvaluation, PM1); - } - } - } - - private void assignPS1PM5(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Map localClinVarData) { - if (variantEvaluation.getTranscriptAnnotations().isEmpty()) { - return; - } - String variantProteinChange = variantEvaluation.getTranscriptAnnotations().get(0).getHgvsProtein(); - Matcher queryVariantMatcher = MISSENSE_HGVS_P.matcher(variantProteinChange); - boolean matches = queryVariantMatcher.matches(); - String aaRefPos = matches ? queryVariantMatcher.group("refPos") : ""; - String aaAlt = matches ? queryVariantMatcher.group("alt") : ""; - for (var entry : localClinVarData.entrySet()) { - ClinVarData clinVarData = entry.getValue(); - ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); - if (isMissenseOrInframeIndel(clinVarData.getVariantEffect()) && isPathOrLikelyPath(primaryInterpretation)) { - GenomicVariant clinVarVariant = entry.getKey(); - if (Math.abs(clinVarVariant.distanceTo(variantEvaluation)) <= 2 && GenomicVariant.compare(clinVarVariant, variantEvaluation) != 0) { - // within codon so check for same AA change or different AA change - Matcher clinvarMatcher = MISSENSE_HGVS_P.matcher(clinVarData.getHgvsProtein()); - if (clinvarMatcher.matches()) { - String clnAaRefPos = clinvarMatcher.group("refPos"); - String clnAaAlt = clinvarMatcher.group("alt"); - if (aaRefPos.equals(clnAaRefPos)) { - if (aaAlt.equals(clnAaAlt)) { - // PS1 "Same amino acid change as a previously established pathogenic variant regardless of nucleotide change" - Evidence evidence; - if (clinVarData.starRating() >= 2) { - evidence = Evidence.STRONG; - } else if (clinVarData.starRating() == 1) { - evidence = Evidence.MODERATE; - } else { - evidence = Evidence.SUPPORTING; - } - logger.debug("{} -> {}_{}", clinVarData.getHgvsProtein(), PS1, evidence); - acmgEvidenceBuilder.add(PS1, evidence); - } else { - Evidence evidence = clinVarData.starRating() >= 2 ? Evidence.MODERATE : Evidence.SUPPORTING; - logger.debug("{} -> {}_{}", clinVarData.getHgvsProtein(), PM5, evidence); - // PM5 "Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before" - acmgEvidenceBuilder.add(PM5, evidence); - } - } - } - } - } - } - } - - private boolean isMissenseOrInframeIndel(VariantEffect variantEffect) { - return variantEffect == VariantEffect.MISSENSE_VARIANT || variantEffect == VariantEffect.INFRAME_DELETION || variantEffect == VariantEffect.INFRAME_INSERTION; - } - /** * PP5 "Reputable source recently reports variant as pathogenic, but the evidence is not available to the laboratory to perform an independent evaluation" */ private void assignPP5(AcmgEvidence.Builder acmgEvidenceBuilder, ClinVarData clinVarData) { + // avoid double-counting by not assigning if PS1 already assigned + if (acmgEvidenceBuilder.contains(PS1)) { + return; + } ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); boolean pathOrLikelyPath = isPathOrLikelyPath(primaryInterpretation); if (pathOrLikelyPath && clinVarData.starRating() == 1) { @@ -457,8 +319,11 @@ private void assignBP6(AcmgEvidence.Builder acmgEvidenceBuilder, ClinVarData cli boolean benignOrLikelyBenign = isBenignOrLikelyBenign(primaryInterpretation); if (benignOrLikelyBenign && clinVarData.starRating() == 1) { acmgEvidenceBuilder.add(BP6); - } else if (benignOrLikelyBenign && clinVarData.starRating() >= 2) { + } else if (benignOrLikelyBenign && clinVarData.starRating() == 2) { acmgEvidenceBuilder.add(BP6, Evidence.STRONG); + } else if (benignOrLikelyBenign && clinVarData.starRating() >= 3) { + // expert panel or practice guidelines + acmgEvidenceBuilder.add(BP6, Evidence.VERY_STRONG); } } @@ -515,8 +380,8 @@ private boolean possibleDeNovo(SampleGenotype ancestorGenotype, SampleGenotype p private void assignPM2(AcmgEvidence.Builder acmgEvidenceBuilder, FrequencyData frequencyData, ModeOfInheritance modeOfInheritance) { // allow local frequency occurrences as these are unverifiable as to their size or content. Also do not use isRepresentedInDatabase() // as this will exclude anything with an rsID which could be a ClinVar variant not seen in any population database. - boolean absentFromDatabase = frequencyData.isEmpty() || (frequencyData.size() == 1 && frequencyData.containsFrequencySource(FrequencySource.LOCAL)); - boolean atVeryLowFrequencyIfRecessive = modeOfInheritance.isRecessive() && frequencyData.maxFreqForPopulation(FrequencySource.NON_FOUNDER_POPS) < 0.01f; + boolean absentFromDatabase = frequencyData.maxFreq() == 0f || (frequencyData.size() == 1 && frequencyData.containsFrequencySource(FrequencySource.LOCAL)); + boolean atVeryLowFrequencyIfRecessive = (modeOfInheritance.isRecessive() || modeOfInheritance == ModeOfInheritance.ANY) && frequencyData.maxFreqForPopulation(FrequencySource.NON_FOUNDER_POPS) < 0.01f; if (absentFromDatabase || atVeryLowFrequencyIfRecessive) { acmgEvidenceBuilder.add(PM2, Evidence.SUPPORTING); } @@ -555,102 +420,6 @@ private void assignPM6(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluati } } - /** - * PP3 "Multiple lines of computational evidence support a deleterious effect on the gene or gene product (conservation, evolutionary, splicing impact, etc.)" - * BP4 "Multiple lines of computational evidence suggest no impact on gene or gene product (conservation, evolutionary, splicing impact, etc.)" - */ - private void assignPP3orBP4(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityData pathogenicityData) { - // These approaches are broadly similar in recommending just using one predictor and REVEL is consistently - // seen to out-perform other predictors. In our testing we also found that the REVEL-only approach worked best. - - // updated from "Evidence-based calibration of computational tools for missense variant pathogenicity - // classification and ClinGen recommendations for clinical use of PP3/BP4 criteria" - // https://www.biorxiv.org/content/10.1101/2022.03.17.484479v1 - // - var revelScore = pathogenicityData.pathogenicityScore(PathogenicitySource.REVEL); - if (revelScore != null) { - assignRevelBasedPP3BP4Classification(acmgEvidenceBuilder, revelScore); - } else { - // See "Assessing performance of pathogenicity predictors using clinically relevant variant datasets" - // http://dx.doi.org/10.1136/jmedgenet-2020-107003 - assignEnsembleBasedPP3BP4Classification(acmgEvidenceBuilder, pathogenicityData); - } - } - - /* - * Updated classification from "Evidence-based calibration of computational tools for missense variant pathogenicity - * classification and ClinGen recommendations for clinical use of PP3/BP4 criteria" - * https://www.biorxiv.org/content/10.1101/2022.03.17.484479v1 - * - * This method provided much better - */ - private void assignRevelBasedPP3BP4Classification(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityScore revelScore) { - var revel = revelScore.getRawScore(); - // Taken from table 2 of https://www.biorxiv.org/content/10.1101/2022.03.17.484479v1 - // P_Strong P_Moderate P_Supporting B_Supporting B_Moderate B_Strong B_Very Strong - // ≥ 0.932 [0.773, 0.932) [0.644, 0.773) (0.183, 0.290] (0.016, 0.183] (0.003, 0.016] ≤ 0.003 - // PATHOGENIC categories - - if (revel >= 0.932f) { - acmgEvidenceBuilder.add(PP3, Evidence.STRONG); - } else if (revel < 0.932f && revel >= 0.773f) { - acmgEvidenceBuilder.add(PP3, Evidence.MODERATE); - } else if (revel < 0.773f && revel >= 0.644f) { - acmgEvidenceBuilder.add(PP3, Evidence.SUPPORTING); - } - // BENIGN categories - else if (revel > 0.183f && revel <= 0.290f) { - acmgEvidenceBuilder.add(BP4, Evidence.SUPPORTING); - } else if (revel > 0.016f && revel <= 0.183f) { - acmgEvidenceBuilder.add(BP4, Evidence.MODERATE); - } else if (revel > 0.003f && revel <= 0.016f) { - acmgEvidenceBuilder.add(BP4, Evidence.STRONG); - } else if (revel <= 0.003f) { - acmgEvidenceBuilder.add(BP4, Evidence.VERY_STRONG); - } - } - - /* - * Ensemble-based approach suggested in - * See "Assessing performance of pathogenicity predictors using clinically relevant variant datasets" - * http://dx.doi.org/10.1136/jmedgenet-2020-107003 - */ - private void assignEnsembleBasedPP3BP4Classification(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityData pathogenicityData) { - int numBenign = 0; - int numPathogenic = 0; - List pathogenicityScores = pathogenicityData.pathogenicityScores(); - for (PathogenicityScore pathogenicityScore : pathogenicityScores) { - if (isPathogenic(pathogenicityScore)) { - numPathogenic++; - } else { - numBenign++; - } - } - if (pathogenicityScores.size() > 1 && numPathogenic > numBenign) { - acmgEvidenceBuilder.add(PP3); - } - if (pathogenicityScores.size() > 1 && numBenign > numPathogenic) { - acmgEvidenceBuilder.add(BP4); - } - } - - private boolean isPathogenic(PathogenicityScore pathogenicityScore) { - if (pathogenicityScore instanceof SiftScore score) { - return score.getRawScore() < SiftScore.SIFT_THRESHOLD; - } - if (pathogenicityScore instanceof MutationTasterScore score) { - return score.getScore() > MutationTasterScore.MTASTER_THRESHOLD; - } - if (pathogenicityScore instanceof PolyPhenScore score) { - return score.getScore() > PolyPhenScore.POLYPHEN_PROB_DAMAGING_THRESHOLD; - } - if (pathogenicityScore instanceof CaddScore score) { - // 95-99% most deleterious. - return score.getRawScore() >= 13.0f; - } - return pathogenicityScore.getScore() > 0.5f; - } - /** * PP4 "Patient’s phenotype or family history is highly specific for a disease with a single genetic etiology" */ @@ -659,8 +428,10 @@ private void assignPP4(AcmgEvidence.Builder acmgEvidenceBuilder, List diseaseModelPhenotypeMatch : compatibleDiseaseMatches) { humanGenePhenotypeScoreForMoi = Math.max(humanGenePhenotypeScoreForMoi, diseaseModelPhenotypeMatch.getScore()); } - if (humanGenePhenotypeScoreForMoi >= 0.6) { - acmgEvidenceBuilder.add(PP4); + if (humanGenePhenotypeScoreForMoi >= 0.7f) { + acmgEvidenceBuilder.add(PP4, Evidence.MODERATE); + } else if (humanGenePhenotypeScoreForMoi < 0.7f && humanGenePhenotypeScoreForMoi >= 0.51f) { + acmgEvidenceBuilder.add(PP4, Evidence.SUPPORTING); } } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgCriterion.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgCriterion.java index 9d6b328c0..a72284b03 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgCriterion.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgCriterion.java @@ -79,12 +79,12 @@ public static Evidence parseValue(String displayString) { } catch (IllegalArgumentException ex) { // swallow and try the display string } - return switch (displayString) { - case "StandAlone" -> STAND_ALONE; - case "VeryStrong" -> VERY_STRONG; - case "Strong" -> STRONG; - case "Moderate" -> MODERATE; - case "Supporting" -> SUPPORTING; + return switch (displayString.toLowerCase()) { + case "standalone" -> STAND_ALONE; + case "verystrong" -> VERY_STRONG; + case "strong" -> STRONG; + case "moderate" -> MODERATE; + case "supporting" -> SUPPORTING; default -> throw new IllegalArgumentException("Unrecognised evidence value '" + displayString + "'"); }; } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidence.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidence.java index 46657d01d..57ce72482 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidence.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidence.java @@ -246,6 +246,24 @@ public String toString() { return '[' + stringJoiner.toString() + ']'; } + public static AcmgEvidence parseAcmgEvidence(String criteria) { + if (criteria == null || criteria.isEmpty()) { + return AcmgEvidence.empty(); + } + AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder(); + for (String criterion : criteria.replace("[", "").replace("]", "").split("[, ]+")) { + String[] criteriaModifier = criterion.trim().split("_"); + AcmgCriterion acmgCriterion = AcmgCriterion.valueOf(criteriaModifier[0]); + if (criteriaModifier.length == 2) { + AcmgCriterion.Evidence modifier = AcmgCriterion.Evidence.parseValue(criteriaModifier[1]); + acmgEvidenceBuilder.add(acmgCriterion, modifier); + } else { + acmgEvidenceBuilder.add(acmgCriterion); + } + } + return acmgEvidenceBuilder.build(); + } + public static class Builder { private final EnumMap evidence = new EnumMap<>(AcmgCriterion.class); @@ -264,6 +282,19 @@ public boolean contains(AcmgCriterion acmgCriterion) { return evidence.containsKey(acmgCriterion); } + public boolean containsWithEvidence(AcmgCriterion acmgCriterion, Evidence evidenceStrength) { + return evidence.containsKey(acmgCriterion) && evidence.get(acmgCriterion) == evidenceStrength; + } + + public void remove(AcmgCriterion acmgCriterion) { + evidence.remove(acmgCriterion); + } + + @Nullable + public Evidence evidenceForCategory(AcmgCriterion acmgCriterion) { + return evidence.get(acmgCriterion); + } + public AcmgEvidence build() { return evidence.isEmpty() ? EMPTY : new AcmgEvidence(evidence); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgMissenseInFrameIndelEvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgMissenseInFrameIndelEvidenceAssigner.java new file mode 100644 index 000000000..2f3b436b6 --- /dev/null +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgMissenseInFrameIndelEvidenceAssigner.java @@ -0,0 +1,294 @@ +package org.monarchinitiative.exomiser.core.analysis.util.acmg; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import de.charite.compbio.jannovar.mendel.ModeOfInheritance; +import org.monarchinitiative.exomiser.core.genome.dao.ClinVarDao; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; +import org.monarchinitiative.exomiser.core.model.VariantEvaluation; +import org.monarchinitiative.exomiser.core.model.pathogenicity.*; +import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; +import org.monarchinitiative.svart.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*; + +class AcmgMissenseInFrameIndelEvidenceAssigner { + + private static final Logger logger = LoggerFactory.getLogger(AcmgMissenseInFrameIndelEvidenceAssigner.class); + + // e.g. p.(Lys567Thr) + private static final Pattern MISSENSE_HGVS_P = Pattern.compile("p\\.\\((?[A-Z][a-z]{2}\\d+)(?[A-Z][a-z]{2})\\)"); + + private AcmgMissenseInFrameIndelEvidenceAssigner() { + // static utility class + } + + static void assignMissenseEvidence(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ModeOfInheritance modeOfInheritance, List knownDiseases, ClinVarDao clinVarDao) { + // ignore non-missense, truncating, splice or mitochondrial variants + if (isMissenseOrInframeIndel(variantEvaluation.getVariantEffect()) && variantEvaluation.contigId() != 25) { + // ensure region is within contig bounds + Contig contig = variantEvaluation.contig(); + var upStream = Math.max(1, variantEvaluation.start() - 25); + var downStream = Math.min(contig.length(), variantEvaluation.start() + 25); + GenomicInterval genomicInterval = GenomicInterval.of(contig, Strand.POSITIVE, Coordinates.oneBased(upStream, downStream)); + var localClinVarData = clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval); + // PS1 "Same amino acid change as a previously established pathogenic variant regardless of nucleotide change" + // PM5 "Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before" + assignPS1PM5(acmgEvidenceBuilder, variantEvaluation, localClinVarData); + // PM1 "Located in a mutational hot spot and/or critical and well-established functional domain (e.g., active site of an enzyme) without benign variation" + assignPM1(acmgEvidenceBuilder, variantEvaluation, localClinVarData); + // TODO: BP3 "In-frame deletions/insertions in a repetitive region without a known function" - requires domain information. + + // PP2 "Missense variant in a gene that has a low rate of benign missense variation and in which missense variants are a common mechanism of disease." + // BP1 "Missense variant in a gene for which primarily truncating variants are known to cause disease." + assignPP2orBP1(acmgEvidenceBuilder, variantEvaluation, clinVarDao); + // PP3 "Multiple lines of computational evidence support a deleterious effect on the gene or gene product (conservation, evolutionary, splicing impact, etc.)" + // BP4 "Multiple lines of computational evidence suggest no impact on gene or gene product (conservation, evolutionary, splicing impact, etc.)" + assignPP3orBP4(acmgEvidenceBuilder, variantEvaluation.getPathogenicityData()); + } + } + + // PP2 - Missense variant in a gene that has a low rate of benign missense variation and in which missense variants are a common mechanism of disease. + // BP1 - Missense variant in a gene for which primarily truncating variants are known to cause disease. + private static void assignPP2orBP1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) { + GeneStatistics geneStatistics = clinVarDao.getGeneStatistics(variantEvaluation.getGeneSymbol()); + boolean missenseOrInframeIndel = isMissenseOrInframeIndel(variantEvaluation.getVariantEffect()); + if (missenseOrInframeIndel && isPP2Gene(geneStatistics)) { + acmgEvidenceBuilder.add(PP2); + } else if (missenseOrInframeIndel && isBP1Gene(geneStatistics)) { + acmgEvidenceBuilder.add(BP1); + } + } + + // a gene that has a low rate of benign missense variation and in which missense variants are a common mechanism of disease. + private static boolean isPP2Gene(GeneStatistics geneStatistics) { + int missensePathCount = geneStatistics.missensePathCount(); + int missenseVusCount = geneStatistics.missenseVusCount(); + int missenseBenignCount = geneStatistics.missenseBenignCount(); + return missensePathCount / (float) geneStatistics.pathCount() >= 0.75 && (missensePathCount / (float) missenseBenignCount >= 0.9) && ((float) missenseBenignCount / (missenseBenignCount + missenseVusCount + missensePathCount) <= 0.05); + } + + // a gene for which primarily truncating variants are known to cause disease. + private static boolean isBP1Gene(GeneStatistics geneStatistics) { + return geneStatistics.lofPathCount() / (float) geneStatistics.pathCount() >= 0.75; + } + + // PS1 "Same amino acid change as a previously established pathogenic variant regardless of nucleotide change" + // PM5 "Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before" + private static void assignPS1PM5(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Map localClinVarData) { + if (variantEvaluation.getTranscriptAnnotations().isEmpty()) { + return; + } + String variantProteinChange = variantEvaluation.getTranscriptAnnotations().get(0).getHgvsProtein(); + Matcher queryVariantMatcher = MISSENSE_HGVS_P.matcher(variantProteinChange); + boolean matches = queryVariantMatcher.matches(); + String aaRefPos = matches ? queryVariantMatcher.group("refPos") : ""; + String aaAlt = matches ? queryVariantMatcher.group("alt") : ""; + for (var entry : localClinVarData.entrySet()) { + ClinVarData clinVarData = entry.getValue(); + ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); + if (isMissenseOrInframeIndel(clinVarData.getVariantEffect()) && isPathOrLikelyPath(primaryInterpretation)) { + GenomicVariant clinVarVariant = entry.getKey(); + if (Math.abs(clinVarVariant.distanceTo(variantEvaluation)) <= 2 && GenomicVariant.compare(clinVarVariant, variantEvaluation) != 0) { + // within codon so check for same AA change or different AA change + Matcher clinvarMatcher = MISSENSE_HGVS_P.matcher(clinVarData.getHgvsProtein()); + if (clinvarMatcher.matches()) { + String clnAaRefPos = clinvarMatcher.group("refPos"); + String clnAaAlt = clinvarMatcher.group("alt"); + if (aaRefPos.equals(clnAaRefPos)) { + if (aaAlt.equals(clnAaAlt)) { + // PS1 "Same amino acid change as a previously established pathogenic variant regardless of nucleotide change" + AcmgCriterion.Evidence evidence; + if (clinVarData.starRating() >= 2) { + evidence = AcmgCriterion.Evidence.STRONG; + } else if (clinVarData.starRating() == 1) { + evidence = AcmgCriterion.Evidence.MODERATE; + } else { + evidence = AcmgCriterion.Evidence.SUPPORTING; + } + logger.debug("{} -> {}_{}", clinVarData.getHgvsProtein(), PS1, evidence); + acmgEvidenceBuilder.add(PS1, evidence); + } else { + AcmgCriterion.Evidence evidence = clinVarData.starRating() >= 2 ? AcmgCriterion.Evidence.MODERATE : AcmgCriterion.Evidence.SUPPORTING; + logger.debug("{} -> {}_{}", clinVarData.getHgvsProtein(), PM5, evidence); + // PM5 "Novel missense change at an amino acid residue where a different missense change determined to be pathogenic has been seen before" + acmgEvidenceBuilder.add(PM5, evidence); + } + } + } + } + } + } + } + + // PM1 "Located in a mutational hot spot and/or critical and well-established functional domain (e.g., active site of an enzyme) without benign variation" + private static void assignPM1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Map localClinVarData) { + // TODO - need UniProt domain / site info and clinvar counts + // can upgrade to STRONG + // https://www.cell.com/ajhg/fulltext/S0002-9297(22)00461-X suggests to limit the combined evidence from PM1 and PP3 to strong + int pathCount = 0; + int vusCount = 0; + int benignCount = 0; + for (var entry : localClinVarData.entrySet()) { + ClinVarData clinVarData = entry.getValue(); + ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); + if (isMissenseOrInframeIndel(clinVarData.getVariantEffect())) { + if (isPathOrLikelyPath(primaryInterpretation)) { + pathCount++; + } + if (primaryInterpretation == ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) { + vusCount++; + } + if (isBenignOrLikelyBenign(primaryInterpretation)) { + benignCount++; + } + } + } + logger.debug("PM1 evidence for region {} {}:{}-{} Paths: {} VUSs: {} Benigns: {}", variantEvaluation.getGenomeAssembly(), variantEvaluation.contigId(), variantEvaluation.start() - 25, variantEvaluation.end() + 25, pathCount, vusCount, benignCount); + if (pathCount >= 4 && benignCount == 0) { + // could do funkier thing to score other path variants by severity/star rating and distance to variant + if (pathCount > vusCount) { + acmgEvidenceBuilder.add(PM1); + logger.debug("{} -> {}", variantEvaluation, PM1); + } else { + acmgEvidenceBuilder.add(PM1, Evidence.SUPPORTING); + logger.debug("{} -> {}_Supporting", variantEvaluation, PM1); + } + } + } + + private static boolean isMissenseOrInframeIndel(VariantEffect variantEffect) { + return variantEffect == VariantEffect.MISSENSE_VARIANT || variantEffect == VariantEffect.INFRAME_DELETION || variantEffect == VariantEffect.INFRAME_INSERTION; + } + + private static boolean isBenignOrLikelyBenign(ClinVarData.ClinSig clinSig) { + return clinSig == ClinVarData.ClinSig.BENIGN || clinSig == ClinVarData.ClinSig.LIKELY_BENIGN || clinSig == ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN; + } + + private static boolean isPathOrLikelyPath(ClinVarData.ClinSig clinSig) { + return clinSig == ClinVarData.ClinSig.PATHOGENIC || clinSig == ClinVarData.ClinSig.LIKELY_PATHOGENIC || clinSig == ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; + } + + + /** + * PP3 "Multiple lines of computational evidence support a deleterious effect on the gene or gene product (conservation, evolutionary, splicing impact, etc.)" + * BP4 "Multiple lines of computational evidence suggest no impact on gene or gene product (conservation, evolutionary, splicing impact, etc.)" + */ + private static void assignPP3orBP4(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityData pathogenicityData) { + // These approaches are broadly similar in recommending just using one predictor and REVEL is consistently + // seen to out-perform other predictors. In our testing we also found that the REVEL-only approach worked best. + + // updated from "Evidence-based calibration of computational tools for missense variant pathogenicity + // classification and ClinGen recommendations for clinical use of PP3/BP4 criteria" + // https://www.biorxiv.org/content/10.1101/2022.03.17.484479v1 + // + var revelScore = pathogenicityData.pathogenicityScore(PathogenicitySource.REVEL); + if (revelScore != null) { + assignRevelBasedPP3BP4Classification(acmgEvidenceBuilder, revelScore); + } else { + // See "Assessing performance of pathogenicity predictors using clinically relevant variant datasets" + // http://dx.doi.org/10.1136/jmedgenet-2020-107003 + assignEnsembleBasedPP3BP4Classification(acmgEvidenceBuilder, pathogenicityData); + } + } + + /* + * Updated classification from "Evidence-based calibration of computational tools for missense variant pathogenicity + * classification and ClinGen recommendations for clinical use of PP3/BP4 criteria" + * https://doi.org/10.1016/j.ajhg.2022.10.013 + * + * This method provided much better + */ + private static void assignRevelBasedPP3BP4Classification(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityScore revelScore) { + var revel = revelScore.getRawScore(); + // Taken from table 2 of https://doi.org/10.1016/j.ajhg.2022.10.013 + // P_Strong P_Moderate P_Supporting B_Supporting B_Moderate B_Strong B_Very Strong + // ≥ 0.932 [0.773, 0.932) [0.644, 0.773) (0.183, 0.290] (0.016, 0.183] (0.003, 0.016] ≤ 0.003 + // PATHOGENIC categories + + if (revel >= 0.932f) { + acmgEvidenceBuilder.add(PP3, Evidence.STRONG); + } else if (revel < 0.932f && revel >= 0.773f) { + acmgEvidenceBuilder.add(PP3, Evidence.MODERATE); + } else if (revel < 0.773f && revel >= 0.644f) { + acmgEvidenceBuilder.add(PP3, Evidence.SUPPORTING); + } + // BENIGN categories + // Cap at SUPPORTING as this leads to huge VUS -> LB + else if (revel <= 0.290f) { + acmgEvidenceBuilder.add(BP4, Evidence.SUPPORTING); + } +// else if (revel > 0.183f && revel <= 0.290f) { +// acmgEvidenceBuilder.add(BP4, Evidence.SUPPORTING); +// } else if (revel > 0.016f && revel <= 0.183f) { +// acmgEvidenceBuilder.add(BP4, Evidence.MODERATE); +// } else if (revel > 0.003f && revel <= 0.016f) { +// acmgEvidenceBuilder.add(BP4, Evidence.STRONG); +// } else if (revel <= 0.003f) { +// acmgEvidenceBuilder.add(BP4, Evidence.VERY_STRONG); +// } + + // PM1 & PP3 should only reach a max of STRONG i.e. 4 points + // "Furthermore, it is challenging to separate these shared attributes for tools such as MutPred2 and VEST4 that + // implicitly incorporate some notion of structural and functional importance to each variant position. To address + // this potential overlap or double-counting of PP3/BP4 and PM1, we recommend that laboratories limit the sum of + // the evidence strength of PP3 and PM1 to strong. This would allow PP3 to be invoked as supporting or moderate + // along with PM1 to be invoked as moderate, which would be the same as limiting the sum of PP3 and PM1 to 4 points + // in the Bayes points implementation." + Evidence pm1Ev = acmgEvidenceBuilder.evidenceForCategory(PM1); + Evidence pp3Ev = acmgEvidenceBuilder.evidenceForCategory(PP3); + if (pp3Ev == Evidence.STRONG && pm1Ev != null) { + // defer to PP3 Strong as this should be a lot more sophisticated than the PM1 assignment criteria + logger.debug("Removing PM1_{} as PP3_{} is at threshold of {}", pm1Ev, pp3Ev, Evidence.STRONG); + acmgEvidenceBuilder.remove(PM1); + } + } + + /* + * Ensemble-based approach suggested in + * See "Assessing performance of pathogenicity predictors using clinically relevant variant datasets" + * http://dx.doi.org/10.1136/jmedgenet-2020-107003 + */ + private static void assignEnsembleBasedPP3BP4Classification(AcmgEvidence.Builder acmgEvidenceBuilder, PathogenicityData pathogenicityData) { + int numBenign = 0; + int numPathogenic = 0; + List pathogenicityScores = pathogenicityData.pathogenicityScores(); + for (PathogenicityScore pathogenicityScore : pathogenicityScores) { + if (isPathogenic(pathogenicityScore)) { + numPathogenic++; + } else { + numBenign++; + } + } + if (pathogenicityScores.size() > 1 && numPathogenic > numBenign) { + acmgEvidenceBuilder.add(PP3); + } + if (pathogenicityScores.size() > 1 && numBenign > numPathogenic) { + acmgEvidenceBuilder.add(BP4); + } + } + + private static boolean isPathogenic(PathogenicityScore pathogenicityScore) { + if (pathogenicityScore instanceof SiftScore score) { + return score.getRawScore() < SiftScore.SIFT_THRESHOLD; + } + if (pathogenicityScore instanceof MutationTasterScore score) { + return score.getScore() > MutationTasterScore.MTASTER_THRESHOLD; + } + if (pathogenicityScore instanceof PolyPhenScore score) { + return score.getScore() > PolyPhenScore.POLYPHEN_PROB_DAMAGING_THRESHOLD; + } + if (pathogenicityScore instanceof CaddScore score) { + // 95-99% most deleterious. + return score.getRawScore() >= 13.0f; + } + return pathogenicityScore.getScore() > 0.5f; + } + +} diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java new file mode 100644 index 000000000..710b9fc60 --- /dev/null +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java @@ -0,0 +1,168 @@ +package org.monarchinitiative.exomiser.core.analysis.util.acmg; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import de.charite.compbio.jannovar.mendel.ModeOfInheritance; +import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraint; +import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraints; +import org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex; +import org.monarchinitiative.exomiser.core.model.TranscriptAnnotation; +import org.monarchinitiative.exomiser.core.model.VariantEvaluation; +import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; + +import java.util.List; + +import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.PVS1; + +class AcmgPVS1EvidenceAssigner { + + private AcmgPVS1EvidenceAssigner() { + } + + /** + * Applies PVS1 according to guidelines in Recommendations for interpreting the loss of function PVS1 ACMG/AMP variant criterion. + *

+ * PVS1 "null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon deletion) in a gene where LOF is a known mechanism of disease" + */ + static void assignPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Sex probandSex, ModeOfInheritance + modeOfInheritance, List knownDiseases) { + // TODO: add new method - gene.getAssociatedDiseases() + // also need ClinvarCache.getClinvarDataForGene(GeneIdentifier geneIdentifier) + + // Certain types of variants (e.g., nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single exon or multiexon + // deletion) can often be assumed to disrupt gene function by leading to a complete absence of the gene product by lack of tran- + // scription or nonsense-mediated decay of an altered transcript. One must, however, exercise caution when classifying these + // variants as pathogenic by considering the following principles: + // (i) When classifying such variants as pathogenic, one must ensure that null variants are a known mechanism of + // pathogenicity consistent with the established inheritance pattern for the disease. For example, there are genes for + // which only heterozygous missense variants cause disease and null variants are benign in a heterozygous state (e.g., + // many hypertrophic cardiomyopathy genes). A novel heterozygous nonsense variant in the MYH7 gene would + // not be considered pathogenic for dominant hypertrophic cardiomyopathy based solely on this evidence, whereas a + // novel heterozygous nonsense variant in the CFTR gene would likely be considered a recessive pathogenic variant. + // Caveats: + // • Beware of genes where LOF is not a known disease mechanism (e.g., GFAP, MYH7) + // • Use caution interpreting LOF variants at the extreme 3′ end of a gene + // • Use caution with splice variants that are predicted to lead to exon skipping but leave the remainder of the protein intact + // • Use caution in the presence of multiple transcripts + + GeneConstraint geneConstraint = GeneConstraints.geneConstraint(variantEvaluation.getGeneSymbol()); + // Should this be using the hasCompatibleDiseaseMatches variable? + boolean inGeneWithKnownDiseaseAssociations = !knownDiseases.isEmpty(); + // TODO should modify the final strength according to ClinGen/GenCC known D2G validity - Table 1 of 10.1002/humu.23626 + VariantEffect variantEffect = variantEvaluation.getVariantEffect(); + if (inGeneWithKnownDiseaseAssociations + && isLossOfFunctionEffect(variantEffect) + && (geneConstraint != null && geneConstraint.isLossOfFunctionIntolerant()) + ){ + switch (variantEffect) { + case STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT -> + assignNonsenseOrFrameshiftPVS1(acmgEvidenceBuilder, variantEvaluation); + case SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT -> + assignCanonicalSpliceDonorOrAcceptorPVS1(acmgEvidenceBuilder, variantEvaluation); + case TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT -> + assignDeletionPVS1(acmgEvidenceBuilder, variantEvaluation); + case START_LOST -> assignInitiationCodonPVS1(acmgEvidenceBuilder, variantEvaluation); + default -> { // do not assign PVS1 + } + } + } + } + + private static void assignNonsenseOrFrameshiftPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) { + if (predictedToLeadToNmd(variantEvaluation)) { + acmgEvidenceBuilder.add(PVS1); + } else { + acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG); + } + } + + private static void assignCanonicalSpliceDonorOrAcceptorPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) { + if (predictedToLeadToNmd(variantEvaluation)) { + acmgEvidenceBuilder.add(PVS1); + } else { + acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG); + } + } + + private static void assignDeletionPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) { + // full gene deletion -> PVS1 (Full gene deletion of haploinsufficient gene should be considered P (PVS1_StandAlone?) given absence of conflicting data) + VariantEffect variantEffect = variantEvaluation.getVariantEffect(); + if (variantEffect == VariantEffect.TRANSCRIPT_ABLATION || variantEffect == VariantEffect.EXON_LOSS_VARIANT && predictedToLeadToNmd(variantEvaluation)) { + // deletion of entire transcript + acmgEvidenceBuilder.add(PVS1); + } else { + acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG); + } + } + + private static void assignInitiationCodonPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) { + acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.MODERATE); + } + + private static boolean isLossOfFunctionEffect(VariantEffect variantEffect) { + return isNonsenseOrFrameshift(variantEffect) + || isCanonicalSpliceDonorOrAcceptor(variantEffect) + || isGeneOrExonDeletion(variantEffect) + || isStartLost(variantEffect); + } + + private static boolean isNonsenseOrFrameshift(VariantEffect variantEffect) { + return variantEffect == VariantEffect.STOP_LOST + || variantEffect == VariantEffect.STOP_GAINED + || variantEffect == VariantEffect.FRAMESHIFT_ELONGATION + || variantEffect == VariantEffect.FRAMESHIFT_TRUNCATION + || variantEffect == VariantEffect.FRAMESHIFT_VARIANT; + } + + private static boolean isCanonicalSpliceDonorOrAcceptor(VariantEffect variantEffect) { + return variantEffect == VariantEffect.SPLICE_ACCEPTOR_VARIANT || variantEffect == VariantEffect.SPLICE_DONOR_VARIANT; + } + + private static boolean isGeneOrExonDeletion(VariantEffect variantEffect) { + return variantEffect == VariantEffect.EXON_LOSS_VARIANT || variantEffect == VariantEffect.TRANSCRIPT_ABLATION; + } + + private static boolean isStartLost(VariantEffect variantEffect) { + return variantEffect == VariantEffect.START_LOST; + } + + private static boolean predictedToLeadToNmd(VariantEvaluation variantEvaluation) { + if (variantEvaluation.hasTranscriptAnnotations()) { + TranscriptAnnotation transcriptAnnotation = variantEvaluation.getTranscriptAnnotations().get(0); + return predictedToLeadToNmd(transcriptAnnotation); + } + return false; + } + + // this is a crude method as we don't have the actual transcript coordinates here, so we'll only be able to apply the + // last exon and single exon gene rules + private static boolean predictedToLeadToNmd(TranscriptAnnotation transcriptAnnotation) { + // predicted to lead to NMD if not in last exon or last 50bp of penultimate exon, or is in multi-exon transcript. + // In other words a variant does NOT trigger NMD if it is located: + // - in the last exon + // - in the last 50 bases of the penultimate exon + // - in an exon larger than 400 bases + // - within the first 150 (CDS) bases of the transcription start site + // - in a single-exon gene + boolean notInLastExon = transcriptAnnotation.getRank() < transcriptAnnotation.getRankTotal(); // will be false for single exon genes where rank == rankTotal + VariantEffect variantEffect = transcriptAnnotation.getVariantEffect(); + boolean isExonicOrCanonicalSpliceSite = (transcriptAnnotation.getRankType() == TranscriptAnnotation.RankType.EXON) || variantEffect == VariantEffect.SPLICE_ACCEPTOR_VARIANT || variantEffect == VariantEffect.SPLICE_DONOR_VARIANT; + return isExonicOrCanonicalSpliceSite && notInLastExon; + } + + private static boolean compatibleWithRecessive(Sex probandSex, ModeOfInheritance modeOfInheritance) { + if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_RECESSIVE) { + return true; + } + return probandSex == Sex.FEMALE && modeOfInheritance == ModeOfInheritance.X_RECESSIVE; + } + + private static boolean compatibleWithDominant(Sex probandSex, ModeOfInheritance modeOfInheritance) { + if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_DOMINANT) { + return true; + } + if (probandSex == Sex.MALE && (modeOfInheritance == ModeOfInheritance.X_RECESSIVE || modeOfInheritance == ModeOfInheritance.X_DOMINANT)) { + return true; + } + return (probandSex == Sex.FEMALE || probandSex == Sex.UNKNOWN) && modeOfInheritance == ModeOfInheritance.X_DOMINANT; + } +} diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java new file mode 100644 index 000000000..9dd1a7d09 --- /dev/null +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java @@ -0,0 +1,229 @@ +package org.monarchinitiative.exomiser.core.analysis.util.acmg; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import de.charite.compbio.jannovar.mendel.ModeOfInheritance; +import org.monarchinitiative.exomiser.core.genome.dao.ClinVarDao; +import org.monarchinitiative.exomiser.core.model.VariantEvaluation; +import org.monarchinitiative.exomiser.core.model.pathogenicity.*; +import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; +import org.monarchinitiative.svart.*; + +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static de.charite.compbio.jannovar.annotation.VariantEffect.*; +import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*; + +class AcmgSpliceEvidenceAssigner { + + private AcmgSpliceEvidenceAssigner() { + } + + /** + * Applies PVS1, PS1, PP3, BP4, BP7 to splice region variants according to "Using the ACMG/AMP framework to capture + * evidence related to predicted and observed impact on splicing: Recommendations from the ClinGen SVI Splicing Subgroup" + * https://doi.org/10.1016/j.ajhg.2023.06.002 + * @param acmgEvidenceBuilder + * @param variantEvaluation + * @param modeOfInheritance + * @param knownDiseases + * @param clinVarDao + */ + static void assignSpliceEvidence(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ModeOfInheritance modeOfInheritance, List knownDiseases, ClinVarDao clinVarDao) { + VariantEffect variantEffect = variantEvaluation.getVariantEffect(); + PathogenicityData pathogenicityData = variantEvaluation.getPathogenicityData(); + if (isSpliceDonorAcceptorSpliceVariant(variantEffect)) { + // PVS1 decision tree - this should have been independently added by the PVS1EvidenceAssigner class + // add PS1 modifier if PVS1 was assigned + assignDonorAcceptorPS1(acmgEvidenceBuilder, variantEvaluation, clinVarDao); + } else if (isNonDonorAcceptorSpliceRegionVariant(variantEffect)) { + // PP3+PS1 or BP4+BP7 + // The database only contains SpliceAI scores > 0.1. This means that any SNP with no score in the database can be considered as + // not impacting splicing. + PathogenicityScore spliceAiScore = pathogenicityData.pathogenicityScore(PathogenicitySource.SPLICE_AI); + if (variantEvaluation.variantType() == VariantType.SNV) { + float score = spliceAiScore == null ? 0 : spliceAiScore.getScore(); + assignSpliceAiBasedPP3Classification(acmgEvidenceBuilder, score); + if (acmgEvidenceBuilder.contains(PP3)) { + assignNonDonorAcceptorPS1(acmgEvidenceBuilder, variantEvaluation, clinVarDao); + } else if (acmgEvidenceBuilder.contains(BP4)) { + assignSilentIntronicBP7(acmgEvidenceBuilder, variantEvaluation); + } + } + } + // Note - this is independent of the Splicing guidelines BP7 criteria which is for + // BP7 "A synonymous (silent) variant for which splicing prediction algorithms predict no impact to the splice consensus sequence nor the creation of a new splice site AND the nucleotide is not highly conserved" + ClinVarData clinVarData = pathogenicityData.clinVarData(); + assignSynonymousBP7(acmgEvidenceBuilder, variantEvaluation, pathogenicityData, clinVarData); + } + + private static boolean assignSpliceAiBasedPP3Classification(AcmgEvidence.Builder acmgEvidenceBuilder, float spliceAiScore) { + // "Using the ACMG/AMP framework to capture evidence related to predicted and observed impact on splicing: Recommendations from the ClinGen SVI Splicing Subgroup" + // DOI: 10.1016/j.ajhg.2023.06.002 + // variants outside of the +/-1,2 canonical splice acceptor/donor with a spliceAI: + // score >= 0.2 can be assigned PP3 + // score < 0.1 can be assigned BP4 + if (spliceAiScore >= 0.2) { + acmgEvidenceBuilder.add(PP3); + return true; + } else if (spliceAiScore < 0.1) { + acmgEvidenceBuilder.add(BP4); + return true; + } + return false; + } + + private static void assignNonDonorAcceptorPS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) { + var localClinVarData = getClinVarDataSurroundingVariant(variantEvaluation, clinVarDao); + Set ps1Evidence = EnumSet.noneOf(Evidence.class); + for (var entry : localClinVarData.entrySet()) { + GenomicVariant clinVarVariant = entry.getKey(); + ClinVarData clinVarData = entry.getValue(); + ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); + if (isNonDonorAcceptorSpliceRegionVariant(clinVarData.getVariantEffect()) && isPathOrLikelyPath(primaryInterpretation)) { + // same position in comparison to VUA + if (clinVarVariant.contains(variantEvaluation)) { + if (isPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.STRONG); + } else if (isLikelyPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.MODERATE); + } + } else { + // same splice region + if (isPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.MODERATE); + } else if (isLikelyPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.SUPPORTING); + } + } + } + } + addPs1Evidence(acmgEvidenceBuilder, ps1Evidence); + } + + private static final Pattern CDS_INTRONIC_HGVS = Pattern.compile("c\\.\\d+(?[+-])(?\\d+)[A-Z]+>[A-Z]+"); + private static final int DONOR_REGION_END = 6; + private static final int ACCEPTOR_REGION_START = 20; + private static void assignSilentIntronicBP7(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) { + // (C) Silent (excluding last 3 nucleotides of exon and first nucleotide of exon) and intronic variants + // at or beyond the +7 and −21 positions (conservative designation for donor/acceptor splice region) or otherwise + // at or beyond the +7 and −4 positions (less conservative designation for the minimal donor/acceptor splice region). + + // currently we can't use the actual transcript to calculate the variant's position in the intron/exon, + // so we need to use the HGVS expression instead. + if (variantEvaluation.hasTranscriptAnnotations()) { + String hgvsc = variantEvaluation.getTranscriptAnnotations().get(0).getHgvsCdna(); + Matcher matcher = CDS_INTRONIC_HGVS.matcher(hgvsc); + if (matcher.matches()) { + String intronEnd = matcher.group("intronEnd"); + int position = Integer.parseInt(matcher.group("intronPos")); + if ("+".equals(intronEnd) && position > DONOR_REGION_END || "-".equals(intronEnd) && position > ACCEPTOR_REGION_START) { + acmgEvidenceBuilder.add(BP7); + } + } + } + } + + // BP7 "A synonymous (silent) variant for which splicing prediction algorithms predict no impact to the splice consensus sequence nor the creation of a new splice site AND the nucleotide is not highly conserved" + private static void assignSynonymousBP7(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, PathogenicityData pathogenicityData, ClinVarData clinVarData) { + boolean isReportedPorLP = clinVarData.starRating() >= 1 && isPathOrLikelyPath(clinVarData.getPrimaryInterpretation()); + if (variantEvaluation.getVariantEffect() == SYNONYMOUS_VARIANT && !isReportedPorLP) { + PathogenicityScore spliceAiScore = pathogenicityData.pathogenicityScore(PathogenicitySource.SPLICE_AI); + if (spliceAiScore == null || spliceAiScore.getScore() < 0.1) { + acmgEvidenceBuilder.add(BP7); + } + } + } + + private static void assignDonorAcceptorPS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) { + Evidence pvs1Evidence = acmgEvidenceBuilder.evidenceForCategory(PVS1); + if (pvs1Evidence == null) { + return; + } + // find other clinvar variants in same splice region... + var localClinVarData = getClinVarDataSurroundingVariant(variantEvaluation, clinVarDao); + for (var entry : localClinVarData.entrySet()) { + ClinVarData clinVarData = entry.getValue(); + ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation(); + // Table 2. PS1 code weights for variants with same predicted splicing event as known (likely) pathogenic variant + VariantEffect clinVarVariantEffect = clinVarData.getVariantEffect(); + if (isSpliceVariant(clinVarVariantEffect) && isPathOrLikelyPath(primaryInterpretation)) { + Set ps1Evidence = EnumSet.noneOf(Evidence.class); + if (pvs1Evidence == Evidence.VERY_STRONG) { + // same position in comparison to VUA + if (isSpliceDonorAcceptorSpliceVariant(clinVarVariantEffect) && isPath(primaryInterpretation) || + isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isLikelyPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.SUPPORTING); + } + } + else if (pvs1Evidence == Evidence.STRONG || pvs1Evidence == Evidence.MODERATE || pvs1Evidence == Evidence.SUPPORTING) { + // same splice region as VUA + if (isSpliceDonorAcceptorSpliceVariant(clinVarVariantEffect) && isPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.STRONG); + } else if (isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.MODERATE); + } else if (isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isLikelyPath(primaryInterpretation)) { + ps1Evidence.add(Evidence.SUPPORTING ); + } + } + addPs1Evidence(acmgEvidenceBuilder, ps1Evidence); + } + } + } + + private static Map getClinVarDataSurroundingVariant(VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) { + // ensure region is within contig bounds + Contig contig = variantEvaluation.contig(); + var upStream = Math.max(1, variantEvaluation.start() - 25); + var downStream = Math.min(contig.length(), variantEvaluation.start() + 25); + GenomicInterval genomicInterval = GenomicInterval.of(contig, Strand.POSITIVE, Coordinates.oneBased(upStream, downStream)); + return clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval); + } + + private static boolean isPath(ClinVarData.ClinSig clinSig) { + return clinSig == ClinVarData.ClinSig.PATHOGENIC || clinSig == ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; + } + + private static boolean isLikelyPath(ClinVarData.ClinSig clinSig) { + return clinSig == ClinVarData.ClinSig.LIKELY_PATHOGENIC; + } + + private static boolean isPathOrLikelyPath(ClinVarData.ClinSig clinSig) { + return clinSig == ClinVarData.ClinSig.PATHOGENIC || clinSig == ClinVarData.ClinSig.LIKELY_PATHOGENIC || clinSig == ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; + } + + /** + * Returns true if the variantEffect is a SPLICE_REGION, SPLICE_DONOR or SPLICE_ACCEPTOR type. + * + * @param variantEffect + * @return + */ + private static boolean isSpliceVariant(VariantEffect variantEffect) { + return isSpliceDonorAcceptorSpliceVariant(variantEffect) || isNonDonorAcceptorSpliceRegionVariant(variantEffect); + } + + private static boolean isNonDonorAcceptorSpliceRegionVariant(VariantEffect variantEffect) { + return variantEffect == SPLICE_REGION_VARIANT; + } + + private static boolean isSpliceDonorAcceptorSpliceVariant(VariantEffect variantEffect) { + return variantEffect == SPLICE_DONOR_VARIANT || variantEffect == SPLICE_ACCEPTOR_VARIANT; + } + + private static void addPs1Evidence(AcmgEvidence.Builder acmgEvidenceBuilder, Set ps1Evidence) { + Evidence maxPs1Strength = null; + if (ps1Evidence.contains(Evidence.STRONG)) { + maxPs1Strength = Evidence.STRONG; + } else if (ps1Evidence.contains(Evidence.MODERATE)) { + maxPs1Strength = Evidence.MODERATE; + } else if (ps1Evidence.contains(Evidence.SUPPORTING)) { + maxPs1Strength = Evidence.SUPPORTING; + } + + if (maxPs1Strength != null) { + acmgEvidenceBuilder.add(PS1, maxPs1Strength); + } + } + +} diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java index 84743ff79..8e86c6eb3 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java @@ -20,6 +20,8 @@ package org.monarchinitiative.exomiser.core.genome; +import jakarta.annotation.Nonnull; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; import org.monarchinitiative.exomiser.core.model.*; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource; @@ -97,17 +99,22 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) { + public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) { return variantDataService.findClinVarRecordsOverlappingInterval(genomicInterval); } + + @Override + public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) { + return variantDataService.getGeneStatistics(geneSymbol); + } } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java index ab36e4b5a..01ae4bc2d 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java @@ -37,6 +37,7 @@ import java.util.*; import java.util.stream.Stream; +import static de.charite.compbio.jannovar.annotation.VariantEffect.MNV; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.toUnmodifiableList; @@ -173,8 +174,11 @@ private List buildTranscriptAnnotations(List a private TranscriptAnnotation toTranscriptAnnotation(Annotation annotation) { AnnotationLocation annoLoc = annotation.getAnnoLoc(); + // add transcript and other variant effects to TranscriptAnnotation? +// annotation.getTranscript() return TranscriptAnnotation.builder() - .variantEffect(getVariantEffectOrDefault(annotation.getMostPathogenicVarType(), VariantEffect.SEQUENCE_VARIANT)) + .variantEffect(getVariantEffectOrDefault(annotation.getEffects(), VariantEffect.SEQUENCE_VARIANT)) +// .variantEffects(annotation.getEffects()) .accession(TranscriptModelUtil.getTranscriptAccession(annotation.getTranscript())) .geneSymbol(buildGeneSymbol(annotation)) .hgvsGenomic((annotation.getGenomicNTChange() == null) ? "" : annotation.getGenomicNTChangeStr()) @@ -199,8 +203,17 @@ private TranscriptAnnotation.RankType getRankType(RankType annoLocRankType) { return TranscriptAnnotation.RankType.UNDEFINED; } - private VariantEffect getVariantEffectOrDefault(VariantEffect annotatedEffect, VariantEffect defaultEffect) { - return annotatedEffect == null ? defaultEffect : annotatedEffect; + private VariantEffect getVariantEffectOrDefault(Collection annotatedEffects, VariantEffect defaultEffect) { + if (annotatedEffects.isEmpty()) { + return defaultEffect; + } + // remove MNV from annotated effects. Jannovar labels block substitutions as MNVs which will override more damaging + // effects such as STOP_LOSS, STOP_GAIN, SPLICE_DONOR, SPLICE_ACCEPTOR and prevent potential assignment of PVS1. + // This could be solved by changing TranscriptAnnotation to have a Set and methods to interrogate + // the state of this e.g. transcriptAnnotation.isStopLoss(), .isStopGain(). n.b. this will also affect + // ClinVarData and VariantEvaluation which currently only use a single VariantEffect field. + return annotatedEffects.size() == 1 ? annotatedEffects.iterator().next() : + annotatedEffects.stream().filter(effect -> effect != MNV).findFirst().orElse(defaultEffect); } private int getDistFromNearestGene(Annotation annotation) { diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java index 32e925266..b4b7b91d6 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java @@ -21,7 +21,8 @@ package org.monarchinitiative.exomiser.core.genome; -import de.charite.compbio.jannovar.annotation.VariantEffect; +import jakarta.annotation.Nonnull; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; import org.monarchinitiative.exomiser.core.genome.dao.*; import org.monarchinitiative.exomiser.core.model.Variant; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; @@ -156,11 +157,10 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set pathogenicitySources, P } @Override - public ClinVarData getClinVarData(Variant variant) { + public ClinVarData getClinVarData(@Nonnull Variant variant) { return clinVarDao.getClinVarData(variant); } @Override - public ClinVarData getClinVarData(GenomicVariant genomicVariant) { + public ClinVarData getClinVarData(@Nonnull GenomicVariant genomicVariant) { return clinVarDao.getClinVarData(genomicVariant); } @Override - public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) { + public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) { return clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval); } + @Override + public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) { + return clinVarDao.getGeneStatistics(geneSymbol); + } + public static Builder builder() { return new Builder(); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java index 6ad0f2d4f..5b8d8d755 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java @@ -1,8 +1,9 @@ package org.monarchinitiative.exomiser.core.genome.dao; +import jakarta.annotation.Nonnull; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; import org.monarchinitiative.exomiser.core.model.Variant; import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; -import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.svart.GenomicInterval; import org.monarchinitiative.svart.GenomicVariant; @@ -16,9 +17,11 @@ */ public interface ClinVarDao { - ClinVarData getClinVarData(Variant variant); + ClinVarData getClinVarData(@Nonnull Variant variant); - ClinVarData getClinVarData(GenomicVariant genomicVariant); + ClinVarData getClinVarData(@Nonnull GenomicVariant genomicVariant); - Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval); + Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval); + + GeneStatistics getGeneStatistics(@Nonnull String geneSymbol); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java index c897599d6..00776b74f 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java @@ -1,7 +1,9 @@ package org.monarchinitiative.exomiser.core.genome.dao; +import de.charite.compbio.jannovar.annotation.VariantEffect; import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil; import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor; import org.monarchinitiative.exomiser.core.model.Variant; @@ -12,10 +14,15 @@ import org.slf4j.LoggerFactory; import javax.annotation.Nonnull; +import java.time.Duration; +import java.time.Instant; +import java.util.HashMap; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; +import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS; + /** * @since 14.0.0 */ @@ -24,9 +31,30 @@ public class ClinVarDaoMvStore implements ClinVarDao { private static final Logger logger = LoggerFactory.getLogger(ClinVarDaoMvStore.class); private final MVMap clinVarMap; + private final Map geneStats; public ClinVarDaoMvStore(MVStore mvStore) { clinVarMap = MvStoreUtil.openClinVarMVMap(mvStore); + geneStats = calculateGeneStatistics(clinVarMap); + } + + private Map calculateGeneStatistics(MVMap clinVarMap) { + Instant start = Instant.now(); + Map geneStatisticsBuilders = new LinkedHashMap<>(1900); + for (Map.Entry entry : clinVarMap.entrySet()) { + AlleleProto.ClinVar clinVar = entry.getValue(); + ClinVarData.ReviewStatus reviewStatus = AlleleProtoAdaptor.toReviewStatus(clinVar.getReviewStatus()); + if (reviewStatus != CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS && reviewStatus.starRating() >= 1) { + VariantEffect variantEffect = AlleleProtoAdaptor.toVariantEffect(clinVar.getVariantEffect()); + ClinVarData.ClinSig clinSig = AlleleProtoAdaptor.toClinSig(clinVar.getPrimaryInterpretation()); + geneStatisticsBuilders.compute(clinVar.getGeneSymbol(), (k, v) -> (v == null) ? GeneStatistics.builder(k, variantEffect, clinSig) : v.put(variantEffect, clinSig)); + } + } + final Map stats = new HashMap<>(geneStatisticsBuilders.size()); + geneStatisticsBuilders.forEach((k, v) -> stats.put(k, v.build())); + Instant end = Instant.now(); + logger.info("Created {} ClinVar gene stats in {} ms", stats.size(), Duration.between(start, end).toMillis()); + return stats; } @Override @@ -45,7 +73,7 @@ private ClinVarData getClinVarData(@Nonnull AlleleProto.AlleleKey alleleKey) { } @Override - public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) { + public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) { Contig contig = genomicInterval.contig(); int chr = genomicInterval.contigId(); @@ -80,6 +108,11 @@ public Map findClinVarRecordsOverlappingInterval(Ge return results; } + @Override + public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) { + return geneStats.getOrDefault(geneSymbol, GeneStatistics.builder(geneSymbol).build()); + } + private String broadFormat(AlleleProto.AlleleKey alleleKey) { return alleleKey.getChr() + "-" + alleleKey.getPosition() + "-" + alleleKey.getRef() + "-" + alleleKey.getAlt(); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java index d9e12d579..39695073f 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java @@ -284,7 +284,7 @@ private static Set toClinSigSet(List proto return converted; } - private static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) { + public static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) { return switch (protoClinSig) { case BENIGN -> ClinVarData.ClinSig.BENIGN; case BENIGN_OR_LIKELY_BENIGN -> ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN; @@ -301,11 +301,14 @@ private static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) { case OTHER -> ClinVarData.ClinSig.OTHER; case PROTECTIVE -> ClinVarData.ClinSig.PROTECTIVE; case RISK_FACTOR -> ClinVarData.ClinSig.RISK_FACTOR; + case UNCERTAIN_RISK_ALLELE -> ClinVarData.ClinSig.UNCERTAIN_RISK_ALLELE; + case LIKELY_RISK_ALLELE -> ClinVarData.ClinSig.LIKELY_RISK_ALLELE; + case ESTABLISHED_RISK_ALLELE -> ClinVarData.ClinSig.ESTABLISHED_RISK_ALLELE; case NOT_PROVIDED, UNRECOGNIZED -> ClinVarData.ClinSig.NOT_PROVIDED; }; } - private static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus protoReviewStatus) { + public static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus protoReviewStatus) { return switch (protoReviewStatus) { case NO_ASSERTION_PROVIDED -> ClinVarData.ReviewStatus.NO_ASSERTION_PROVIDED; case NO_ASSERTION_CRITERIA_PROVIDED -> ClinVarData.ReviewStatus.NO_ASSERTION_CRITERIA_PROVIDED; @@ -319,7 +322,7 @@ private static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus prot }; } - private static VariantEffect toVariantEffect(AlleleProto.VariantEffect clinVarVariantEffect) { + public static VariantEffect toVariantEffect(AlleleProto.VariantEffect clinVarVariantEffect) { return switch (clinVarVariantEffect) { case SEQUENCE_VARIANT -> VariantEffect.SEQUENCE_VARIANT; case CHROMOSOME_NUMBER_VARIATION -> VariantEffect.CHROMOSOME_NUMBER_VARIATION; diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java new file mode 100644 index 000000000..8ffe93c6d --- /dev/null +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java @@ -0,0 +1,273 @@ +package org.monarchinitiative.exomiser.core.model; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; + +import java.util.EnumMap; +import java.util.Map; +import java.util.StringJoiner; + +import static de.charite.compbio.jannovar.annotation.VariantEffect.*; + +/** + * Data class for collating ClinVar pathogenicity assertions against variant effect, primarily for the application of + * ACMG criteria BP1 and PP2. + * + * @since 14.1.0 + */ +public class GeneStatistics { + + private static final int PATH_INDEX = 0; + private static final int VUS_INDEX = 1; + private static final int BENIGN_INDEX = 2; + + private static final VariantEffect[] LOF_EFFECTS = { + STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT, + SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT, + TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT, + START_LOST + }; + + private final String geneSymbol; + private final Map clinVarCounts; + + private GeneStatistics(String geneSymbol, Map clinVarCounts) { + this.geneSymbol = geneSymbol; + this.clinVarCounts = clinVarCounts; + } + + /** + * @return The gene symbol for the gene related to these statistics + */ + public String geneSymbol() { + return geneSymbol; + } + + /** + * @return The sum of all pathogenic / likely pathogenic variants associated with this gene. + */ + public int pathCount() { + return totalCountForIndex(PATH_INDEX); + } + + /** + * @return The sum of all variants of uncertain significance (VUS) associated with this gene. + */ + public int vusCount() { + return totalCountForIndex(VUS_INDEX); + } + + /** + * @return The sum of all benign / likely benign variants associated with this gene. + */ + public int benignCount() { + return totalCountForIndex(BENIGN_INDEX); + } + + private int totalCountForIndex(int i) { + int count = 0; + for (Map.Entry variantEffectEntry : clinVarCounts.entrySet()) { + count += variantEffectEntry.getValue()[i]; + } + return count; + } + + /** + * The sum of all pathogenic / likely pathogenic Loss Of Function variants associated with this gene. These are + * defined here as: + *

+     *      STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+     *      SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+     *      TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+     *      START_LOST
+     * 
+ * + * @return The sum of all pathogenic / likely pathogenic Loss Of Function variants associated with this gene. + */ + public int lofPathCount() { + return pathCount(LOF_EFFECTS); + } + + /** + * The sum of all variants of uncertain significance (VUS) Loss Of Function variants associated with this gene. These are + * defined here as: + *
+     *      STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+     *      SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+     *      TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+     *      START_LOST
+     * 
+ * + * @return The sum of all variants of uncertain significance (VUS) Loss Of Function variants associated with this gene. + */ + public int lofVusCount() { + return vusCount(LOF_EFFECTS); + } + + /** + * The sum of all benign / likely benign Loss Of Function variants associated with this gene. These are defined here + * as: + *
+     *      STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+     *      SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+     *      TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+     *      START_LOST
+     * 
+ * + * @return The sum of all benign / likely benign Loss Of Function variants associated with this gene. + */ + public int lofBenignCount() { + return benignCount(LOF_EFFECTS); + } + + /** + * @return The sum of all pathogenic / likely pathogenic missense variants associated with this gene. + */ + public int missensePathCount() { + return pathCount(VariantEffect.MISSENSE_VARIANT); + } + + /** + * @return The sum of all VUS missense variants associated with this gene. + */ + public int missenseVusCount() { + return vusCount(VariantEffect.MISSENSE_VARIANT); + } + + /** + * @return The sum of all benign / likely benign missense variants associated with this gene. + */ + public int missenseBenignCount() { + return benignCount(VariantEffect.MISSENSE_VARIANT); + } + + /** + * @return The sum of all pathogenic / likely pathogenic variants for the given variantEffect associated with this gene. + */ + public int pathCount(VariantEffect variantEffect) { + return effectIndexCount(variantEffect, PATH_INDEX); + } + + /** + * @return The sum of all VUS variants for the given variantEffect associated with this gene. + */ + public int vusCount(VariantEffect variantEffect) { + return effectIndexCount(variantEffect, VUS_INDEX); + } + + /** + * @return The sum of all benign / likely benign variants for the given variantEffect associated with this gene. + */ + public int benignCount(VariantEffect variantEffect) { + return effectIndexCount(variantEffect, BENIGN_INDEX); + } + + /** + * @return The sum of all pathogenic / likely pathogenic variants for the given variantEffects associated with this gene. + */ + public int pathCount(VariantEffect... variantEffect) { + return effectIndexCounts(variantEffect, PATH_INDEX); + } + + /** + * @return The sum of all VUS variants for the given variantEffects associated with this gene. + */ + public int vusCount(VariantEffect... variantEffect) { + return effectIndexCounts(variantEffect, VUS_INDEX); + } + + /** + * @return The sum of all benign / likely benign variants for the given variantEffects associated with this gene. + */ + public int benignCount(VariantEffect... variantEffect) { + return effectIndexCounts(variantEffect, BENIGN_INDEX); + } + + private int effectIndexCounts(VariantEffect[] variantEffect, int index) { + int count = 0; + for (int i = 0; i < variantEffect.length; i++) { + count += effectIndexCount(variantEffect[i], index); + } + return count; + } + + private int effectIndexCount(VariantEffect variantEffect, int i) { + return clinVarCounts.getOrDefault(variantEffect, new int[3])[i]; + } + + @Override + public String toString() { + StringBuilder statsBuilder = new StringBuilder("{"); + clinVarCounts.forEach((key, value) -> statsBuilder.append(key).append("=").append("[").append(prettyStats(value)).append("], ")); + if (statsBuilder.length() > 1) { + statsBuilder.delete(statsBuilder.length() - 2, statsBuilder.length()); + } + statsBuilder.append("}"); + return "GeneStatistics{" + + "geneSymbol=" + geneSymbol + ',' + + " counts=" + statsBuilder + + '}'; + } + + private static final String[] categories = {"P", "VUS", "B"}; + + private String prettyStats(int[] counts) { + StringJoiner countStr = new StringJoiner(", "); + for (int i = 0; i < 3; i++) { + countStr.add(categories[i] + "=" + counts[i]); + } + return countStr.toString(); + } + + public static Builder builder(String geneSymbol) { + return new Builder(geneSymbol); + } + + public static Builder builder(String geneSymbol, VariantEffect variantEffect, ClinVarData.ClinSig clinSig) { + return new Builder(geneSymbol, variantEffect, clinSig); + } + + public static class Builder { + + private final String geneSymbol; + private final Map geneStats = new EnumMap<>(VariantEffect.class); + + public Builder(String geneSymbol) { + this.geneSymbol = geneSymbol; + } + + public Builder(String geneSymbol, VariantEffect variantEffect, ClinVarData.ClinSig clinSig) { + this.geneSymbol = geneSymbol; + put(variantEffect, clinSig); + } + + public Builder put(VariantEffect variantEffect, ClinVarData.ClinSig clinSig) { + int countIndex = getCountIndex(clinSig); + if (countIndex != -1) { + int[] counts = geneStats.get(variantEffect); + if (counts == null) { + counts = new int[3]; + counts[countIndex]++; + geneStats.put(variantEffect, counts); + } else { + counts[countIndex]++; + } + } + return this; + } + + private static int getCountIndex(ClinVarData.ClinSig clinSig) { + return switch (clinSig) { + case PATHOGENIC, PATHOGENIC_OR_LIKELY_PATHOGENIC, LIKELY_PATHOGENIC -> PATH_INDEX; + case UNCERTAIN_SIGNIFICANCE -> VUS_INDEX; + case LIKELY_BENIGN, BENIGN_OR_LIKELY_BENIGN, BENIGN -> BENIGN_INDEX; + default -> -1; + }; + } + + public GeneStatistics build() { + return new GeneStatistics(geneSymbol, geneStats); + } + } + +} diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java index 7ba62d7af..372668572 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java @@ -38,6 +38,9 @@ public enum RankType { private final VariantEffect variantEffect; +// private final Set variantEffects; + + private final String geneSymbol; private final String accession; @@ -75,6 +78,10 @@ public VariantEffect getVariantEffect() { return variantEffect; } +// public Set getVariantEffects() { +// return variantEffects; +// } + public String getGeneSymbol() { return geneSymbol; } @@ -148,6 +155,8 @@ public static class Builder { private VariantEffect variantEffect = VariantEffect.SEQUENCE_VARIANT; +// private Set variantEffects = EnumSet.noneOf(VariantEffect.class); + private String geneSymbol = ""; private String accession = ""; diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java index d597fb17d..4719c550f 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java @@ -37,6 +37,10 @@ public class ClinVarData { private static final ClinVarData EMPTY = new Builder().build(); + /** + * Based on the categories from the ClinVar docs and + * what actually shows up in the VCF file. + */ public enum ClinSig { // ACMG/AMP-based PATHOGENIC, @@ -47,6 +51,11 @@ public enum ClinSig { BENIGN_OR_LIKELY_BENIGN, BENIGN, CONFLICTING_PATHOGENICITY_INTERPRETATIONS, + // As recommended by ClinGen (https://pubmed.ncbi.nlm.nih.gov/38054408/) for variants with decreased penetrance + // for Mendelian diseases. + UNCERTAIN_RISK_ALLELE, + LIKELY_RISK_ALLELE, + ESTABLISHED_RISK_ALLELE, //Non-ACMG-based AFFECTS, ASSOCIATION, @@ -83,11 +92,16 @@ public int starRating() { public static ReviewStatus parseReviewStatus(String reviewStatus) { return switch (reviewStatus.replace("_", " ")) { - case "no assertion criteria provided" -> NO_ASSERTION_CRITERIA_PROVIDED; - case "no interpretation for the single variant", "no interpretation for the individual variant" -> - NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT; + case "no assertion provided", + "no assertion criteria provided", + "no classification provided", + "no classifications from unflagged records" -> NO_ASSERTION_CRITERIA_PROVIDED; + case "no interpretation for the single variant", + "no interpretation for the individual variant", + "no classification for the single variant"-> NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT; case "criteria provided, single submitter" -> CRITERIA_PROVIDED_SINGLE_SUBMITTER; - case "criteria provided, conflicting interpretations" -> CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS; + case "criteria provided, conflicting interpretations", + "criteria provided, conflicting classifications" -> CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS; case "criteria provided, multiple submitters, no conflicts" -> CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS; case "reviewed by expert panel" -> REVIEWED_BY_EXPERT_PANEL; diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java index 6c42cd088..335886ac5 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java @@ -139,6 +139,8 @@ public boolean hasPredictedScore(PathogenicitySource pathogenicitySource) { * @param pathogenicitySource * @return */ + + @Nullable public PathogenicityScore pathogenicityScore(PathogenicitySource pathogenicitySource) { return pathogenicityScores.get(pathogenicitySource); } diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java index c03557550..7fa2a99d1 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java @@ -1,7 +1,24 @@ package org.monarchinitiative.exomiser.core.model.pathogenicity; +/** + * The SpliceAI (Jaganathan et al, Cell 2019) max delta score. + * This is the max score from the DL/DG/AL/AG scores >= minScore. The min score for this was set to 0.2, so scores lower + * than this will likely not be present in the database, unless guidance changes later. + *
+ * Delta score of a variant, defined as the maximum of (DS_AG, DS_AL, DS_DG, DS_DL), ranges from 0 to 1 and can be interpreted as the probability of the variant being splice-altering. In the paper, a detailed characterization is provided for 0.2 (high recall), 0.5 (recommended), and 0.8 (high precision) cutoffs. Delta position conveys information about the location where splicing changes relative to the variant position (positive values are downstream of the variant, negative values are upstream). + *
+ * + * These annotations are free for academic and not-for-profit use; other use requires a commercial license from Illumina, Inc. + * + * https://github.com/Illumina/SpliceAI + **/ public class SpliceAiScore extends BasePathogenicityScore { + public static float NON_SPLICEOGENIC_SCORE = 0.1f; + public static float PERMISSIVE_SPLICEOGENIC_SCORE = 0.2f; + public static float DEFAULT_SPLICEOGENIC_SCORE = 0.5f; + public static float HIGH_CONFIDENCE_SPLICEOGENIC_SCORE = 0.8f; + public static SpliceAiScore of(float score) { return new SpliceAiScore(score); } diff --git a/exomiser-core/src/main/proto/allele.proto b/exomiser-core/src/main/proto/allele.proto index 9a06eabf8..b60781860 100644 --- a/exomiser-core/src/main/proto/allele.proto +++ b/exomiser-core/src/main/proto/allele.proto @@ -161,6 +161,11 @@ message ClinVar { OTHER = 12; PROTECTIVE = 13; RISK_FACTOR = 14; + // As recommended by ClinGen (https://pubmed.ncbi.nlm.nih.gov/38054408/) for variants with decreased penetrance + // for Mendelian diseases. + UNCERTAIN_RISK_ALLELE = 15; + LIKELY_RISK_ALLELE = 16; + ESTABLISHED_RISK_ALLELE = 17; } enum ReviewStatus { diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java index a60df8c2e..5866028fd 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java @@ -27,8 +27,8 @@ import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.CsvSource; import org.monarchinitiative.exomiser.core.genome.TestFactory; -import org.monarchinitiative.exomiser.core.genome.TestGenomeDataService; import org.monarchinitiative.exomiser.core.genome.TestVariantDataService; +import org.monarchinitiative.exomiser.core.genome.VariantDataService; import org.monarchinitiative.exomiser.core.model.*; import org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Status; import org.monarchinitiative.exomiser.core.model.frequency.Frequency; @@ -42,7 +42,9 @@ import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; import org.monarchinitiative.exomiser.core.prioritisers.model.InheritanceMode; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -52,6 +54,7 @@ import static org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex.FEMALE; import static org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex.MALE; import static org.monarchinitiative.exomiser.core.model.Pedigree.justProband; +import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*; class Acmg2015EvidenceAssignerTest { @@ -60,31 +63,61 @@ private Acmg2015EvidenceAssigner acmgEvidenceAssigner(String probandId, Pedigree return new Acmg2015EvidenceAssigner(probandId, pedigree, TestVariantDataService.stub()); } + private Acmg2015EvidenceAssigner acmgEvidenceAssigner(String probandId, Pedigree pedigree, Map expectedClinVarData) { + return new Acmg2015EvidenceAssigner(probandId, pedigree, TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build()); + } + @Test void throwsExceptionWithMismatchedIds() { assertThrows(IllegalArgumentException.class, () -> acmgEvidenceAssigner("Zaphod", justProband("Ford", MALE))); } - - @Test - void testAssignsPVS1() { + + @ParameterizedTest + @CsvSource({ + // nonsense, middle-exon NMD predicted + "STOP_LOST, 2, VERY_STRONG", + "STOP_GAINED, 2, VERY_STRONG", + "FRAMESHIFT_ELONGATION, 2, VERY_STRONG", + "FRAMESHIFT_TRUNCATION, 2, VERY_STRONG", + "FRAMESHIFT_VARIANT, 2, VERY_STRONG", + // nonsense, last exon - NMD escape + "STOP_LOST, 5, STRONG", + "STOP_GAINED, 5, STRONG", + "FRAMESHIFT_ELONGATION, 5, STRONG", + "FRAMESHIFT_TRUNCATION, 5, STRONG", + "FRAMESHIFT_VARIANT, 5, STRONG", + // splice donor/acceptor site, middle-exon NMD predicted + "SPLICE_DONOR_VARIANT, 2, VERY_STRONG", + "SPLICE_ACCEPTOR_VARIANT, 2, VERY_STRONG", + // splice donor/acceptor site, last exon - NMD escape + "SPLICE_DONOR_VARIANT, 5, STRONG", + "SPLICE_ACCEPTOR_VARIANT, 5, STRONG", + // transcript ablation / exon loss + "TRANSCRIPT_ABLATION, 2, VERY_STRONG", // sufficient as a STAND_ALONE in the case of HI genes and no conflicting evidence + "EXON_LOSS_VARIANT, 2, VERY_STRONG", // NMD + "EXON_LOSS_VARIANT, 5, STRONG", // NMD escape + // start lost + "START_LOST, 1, MODERATE", + }) + void testAssignsPVS1(VariantEffect variantEffect, int exonNumber, AcmgCriterion.Evidence evidence) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel // requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1 TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder() - .variantEffect(VariantEffect.START_LOST) + .variantEffect(variantEffect) .rankType(TranscriptAnnotation.RankType.EXON) - .rank(1) + .rank(exonNumber) .rankTotal(5) .build(); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") .geneSymbol("PTEN") .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment - .variantEffect(VariantEffect.START_LOST) + .variantEffect(variantEffect) .annotations(List.of(transcriptAnnotation)) .build(); Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of()); - assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PVS1).build())); + assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PVS1, evidence).build())); } @ParameterizedTest @@ -113,7 +146,7 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", probandSex)); // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder() - .variantEffect(VariantEffect.START_LOST) + .variantEffect(VariantEffect.SPLICE_DONOR_VARIANT) .rankType(TranscriptAnnotation.RankType.EXON) .rank(1) .rankTotal(5) @@ -121,7 +154,7 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") .geneSymbol("PTEN") .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment - .variantEffect(VariantEffect.START_LOST) + .variantEffect(VariantEffect.SPLICE_DONOR_VARIANT) .annotations(List.of(transcriptAnnotation)) .build(); Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(diseaseInheritanceMode).diseaseType(Disease.DiseaseType.DISEASE).build(); @@ -131,6 +164,107 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease assertThat(acmgEvidence, equalTo(expected)); } + @ParameterizedTest + @CsvSource({ + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 3, PS1, STRONG", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 2, PS1, STRONG", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 1, PS1, MODERATE", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 0, PS1, SUPPORTING", + + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 3, PM5, MODERATE", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 2, PM5, MODERATE", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 1, PM5, SUPPORTING", + "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 0, PM5, SUPPORTING", + }) + void testAssignsPS1orPM5(VariantEffect variantEffect, String hgvsP, String clinVarHgvsP, ClinVarData.ClinSig clinSig, int clinVarStarRating, AcmgCriterion acmgCriterion, AcmgCriterion.Evidence evidence) { + ClinVarData clinVarData = ClinVarData.builder() + .primaryInterpretation(clinSig) + .variantEffect(VariantEffect.MISSENSE_VARIANT) + .reviewStatus(starRatingToReviewStatus(clinVarStarRating)) + .hgvsProtein(clinVarHgvsP) + .build(); + Variant overlapping = TestFactory.variantBuilder(10, 89624227, "A", "C").build(); + + VariantDataService variantDataService = TestVariantDataService.builder().put(overlapping, clinVarData).build(); + + TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder() + .variantEffect(variantEffect) + .hgvsProtein(hgvsP) + .build(); + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") + .geneSymbol("GENE") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .variantEffect(variantEffect) + .annotations(List.of(transcriptAnnotation)) + .build(); + + Acmg2015EvidenceAssigner instance = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), variantDataService); + + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of()); + assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(acmgCriterion, evidence).build())); + } + + private ClinVarData.ReviewStatus starRatingToReviewStatus(int clinVarStarRating) { + return switch (clinVarStarRating) { + case 4 -> ClinVarData.ReviewStatus.PRACTICE_GUIDELINE; + case 3 -> ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL; + case 2 -> ClinVarData.ReviewStatus.CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS; + case 1 -> ClinVarData.ReviewStatus.CRITERIA_PROVIDED_SINGLE_SUBMITTER; + default -> ClinVarData.ReviewStatus.NO_ASSERTION_CRITERIA_PROVIDED; + }; + } + + @ParameterizedTest + @CsvSource({ + // requires 4 or more missense P/LP variants in the local region + "1, 3, 0, 0, 0, PM1, MODERATE", + "3, 3, 0, 0, 0, PM1, MODERATE", + "0, 4, 0, 0, 0, PM1, MODERATE", + "0, 4, 3, 0, 0, PM1, MODERATE", + "4, 0, 3, 0, 0, PM1, MODERATE", + "0, 4, 4, 0, 0, PM1, SUPPORTING", + "4, 0, 4, 0, 0, PM1, SUPPORTING", + "0, 0, 0, 0, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty + "0, 0, 4, 0, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty + "0, 0, 4, 1, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty + "4, 0, 4, 1, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty + "4, 0, 0, 1, 1, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty + }) + void assignPM1(int p, int lp, int vus, int lb, int b, AcmgCriterion expectedCriteria, Evidence expectedEvidence) { + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") + .geneSymbol("GENE") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .variantEffect(VariantEffect.MISSENSE_VARIANT) + .annotations(List.of(TranscriptAnnotation.builder() + .build())) + .build(); + + TestVariantDataService.Builder variantDataServiceBuilder = TestVariantDataService.builder(); + + int[] categoryCounts = {p, lp, vus, lb, b}; + ClinVarData.ClinSig[] categories = {PATHOGENIC, LIKELY_PATHOGENIC, UNCERTAIN_SIGNIFICANCE, LIKELY_BENIGN, BENIGN}; + int currentPos = variantEvaluation.start(); + for (int i = 0; i < categoryCounts.length; i++) { + for (int j = 0; j < categoryCounts[i]; j++) { + Variant variant = TestFactory.variantBuilder(10, ++currentPos, "A", "C").build(); + ClinVarData clinVarData = ClinVarData.builder() + .primaryInterpretation(categories[i]) + .variantEffect(VariantEffect.MISSENSE_VARIANT) + .build(); + variantDataServiceBuilder.put(variant, clinVarData); + } + } + TestVariantDataService variantDataService = variantDataServiceBuilder.build(); + Acmg2015EvidenceAssigner instance = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), variantDataService); + + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of()); + AcmgEvidence expected = expectedCriteria == BP1 ? AcmgEvidence.empty() : AcmgEvidence.builder().add(expectedCriteria, expectedEvidence).build(); + assertThat(acmgEvidence, equalTo(expected)); + } + + @Test void testAssignsPS2() { Individual proband = Individual.builder().id("proband").motherId("mother").fatherId("father").sex(MALE).status(Status.AFFECTED).build(); @@ -267,7 +401,7 @@ void testAssignsPM3() { .geneSymbol("PTEN") // n.b. start loss variant - will trigger PVS1 .variantEffect(VariantEffect.START_LOST) - .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) + .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) .sampleGenotypes(SampleGenotypes.of( SampleData.of("proband", SampleGenotype.parseGenotype("1|0")) )) @@ -294,7 +428,7 @@ void testAssignsBP2_InCisWithPathAR() { .build(); VariantEvaluation pathogenic = TestFactory.variantBuilder(10, 89624227, "A", "G") - .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) + .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) .sampleGenotypes(SampleGenotypes.of( SampleData.of("proband", SampleGenotype.parseGenotype("0|1")) )) @@ -329,7 +463,7 @@ void testAssignsBP2_InTransWithPathAD() { .geneSymbol("PTEN") // n.b. start loss variant - will trigger PVS1 .variantEffect(VariantEffect.START_LOST) - .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) + .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build())) .sampleGenotypes(SampleGenotypes.of( SampleData.of("proband", SampleGenotype.parseGenotype("1|0")) )) @@ -402,11 +536,13 @@ void testAssignsPP3() { } @ParameterizedTest - @CsvSource ({ + @CsvSource({ "MVP, 1.0f, , ", - "REVEL, 1.0f, PP3, STRONG" + "REVEL, 1.0f, PP3, STRONG", + "CADD, 0.0f, , ", + "REVEL, 0.0f, BP4, SUPPORTING" }) - void testAssignsPP3_singleScoreIsInsufficientUnlessItsRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) { + void testAssignsPP3BP4_singleScoreIsInsufficientUnlessItsRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") .geneSymbol("PTEN") @@ -475,26 +611,7 @@ void testAssignsBP4() { assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.BP4).build())); } - @ParameterizedTest - @CsvSource ({ - "CADD, 0.0f, , ", - "REVEL, 0.0f, BP4, VERY_STRONG" - }) - void testAssignsBP4_singleScoreIsInsufficientIfNotRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) { - Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); - VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") - .geneSymbol("PTEN") - .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment - .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(pathogenicitySource, pathogenicityScore))) - .variantEffect(VariantEffect.MISSENSE_VARIANT) - .build(); - Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); - AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of()); - - AcmgEvidence expected = acmgCriterion == null ? AcmgEvidence.empty() : AcmgEvidence.builder().add(acmgCriterion, evidence).build(); - assertThat(acmgEvidence, equalTo(expected)); - } - + @Test void testAssignsBP4_majorityMustBeBenign() { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") @@ -513,15 +630,15 @@ void testAssignsBP4_majorityMustBeBenign() { } @ParameterizedTest - @CsvSource ({ + @CsvSource({ "0.932, PP3, STRONG", "0.773, PP3, MODERATE", "0.644f, PP3, SUPPORTING", "0.290f, BP4, SUPPORTING", - "0.183f, BP4, MODERATE", - "0.016f, BP4, STRONG", - "0.003f, BP4, VERY_STRONG", + "0.183f, BP4, SUPPORTING", + "0.016f, BP4, SUPPORTING", + "0.003f, BP4, SUPPORTING", }) public void testRevelOverridesAllOtherScores(float revelScore, AcmgCriterion acmgCriterion, Evidence evidence) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); @@ -545,8 +662,12 @@ public void testRevelOverridesAllOtherScores(float revelScore, AcmgCriterion acm } // PP4 - @Test - void testAssignsPP4() { + @ParameterizedTest + @CsvSource({ + "0.51, SUPPORTING", + "0.7, MODERATE", + }) + void testAssignsPP4(double phenotypeScore, Evidence evidence) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") .geneSymbol("PTEN") @@ -555,10 +676,11 @@ void testAssignsPP4() { .build(); Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); // High phenotype match triggers - PP4 - List> compatibleDiseaseMatches = List.of(ModelPhenotypeMatch.of(0.6, cowdenSyndrome, List.of())); + List> compatibleDiseaseMatches = List.of(ModelPhenotypeMatch.of(phenotypeScore, cowdenSyndrome, List.of())); AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), compatibleDiseaseMatches); - assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PP4).build())); + AcmgEvidence expected = evidence == null ? AcmgEvidence.empty() : AcmgEvidence.builder().add(PP4, evidence).build(); + assertThat(acmgEvidence, equalTo(expected)); } @Nested @@ -582,7 +704,7 @@ void testAssignsPP5(String reviewStatus, AcmgCriterion.Evidence evidence) { .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // n.b. missense variant - will not trigger PVS1 .variantEffect(VariantEffect.MISSENSE_VARIANT) - .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build())) + .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build())) .build(); Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); @@ -599,8 +721,8 @@ void testAssignsPP5(String reviewStatus, AcmgCriterion.Evidence evidence) { value = { "criteria provided, single submitter; SUPPORTING", "criteria provided, multiple submitters, no conflicts; STRONG", - "reviewed by expert panel; STRONG", - "practice guideline; STRONG", + "reviewed by expert panel; VERY_STRONG", + "practice guideline; VERY_STRONG", }) void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", Pedigree.empty()); @@ -612,7 +734,7 @@ void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) { .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AFRICAN_INC_AFRICAN_AMERICAN, 1.42f))) // n.b. missense variant - will not trigger PVS1 .variantEffect(VariantEffect.MISSENSE_VARIANT) - .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.BENIGN).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build())) + .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(BENIGN).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build())) .build(); Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); @@ -624,18 +746,32 @@ void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) { } } - @Test - void testAssignsBA1() { + @ParameterizedTest + @CsvSource({ + "ESP_AA, true", + "THOUSAND_GENOMES, true", + "TOPMED, true", + "UK10K, true", + "GNOMAD_E_ASJ, false", + "GNOMAD_E_FIN, false", + "GNOMAD_E_OTH, false", + "GNOMAD_G_AMI, false", + "GNOMAD_G_MID, false", + "GNOMAD_E_AFR, true", + "GNOMAD_G_AMR, true", + "GNOMAD_G_NFE, true", + }) + void testAssignsBA1(FrequencySource frequencySource, boolean expected) { Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE)); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") .geneSymbol("PTEN") - // high allele freq - triggers BA1 assignment - .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 5.0f))) + // high allele freq IN A GNOMAD NON-FOUNDER POP - triggers BA1 assignment + .frequencyData(FrequencyData.of(Frequency.of(frequencySource, 5.0f))) .variantEffect(VariantEffect.MISSENSE_VARIANT) .build(); AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(), List.of()); - assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.BA1).build())); + assertThat(acmgEvidence.hasCriterion(AcmgCriterion.BA1), equalTo(expected)); } @Test @@ -684,4 +820,182 @@ void testAssignsBS4() { assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(BS4).build())); } + + @Nested + class SplicingEvidence { + + @ParameterizedTest + @CsvSource(value = { + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, '', -", + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, SUPPORTING", + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|PATHOGENIC, -", + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE:SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + + "SPLICE_ACCEPTOR_VARIANT, STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_ACCEPTOR_VARIANT, MODERATE, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_ACCEPTOR_VARIANT, SUPPORTING, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_ACCEPTOR_VARIANT, STRONG, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_ACCEPTOR_VARIANT, MODERATE, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_ACCEPTOR_VARIANT, SUPPORTING, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, STRONG, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG", + + "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE", + "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE", + "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE", + + "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING", + + "SPLICE_DONOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -", + "SPLICE_DONOR_VARIANT, VERY_STRONG, MISSENSE_VARIANT|PATHOGENIC, -", + "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -", + "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -", + "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -", + }, nullValues = {"-"}) + void testAssignSpliceAcceptorDonorPS1(VariantEffect variantEffect, Evidence pvs1Evidence, String clinVarVariants, Evidence ps1Evidence) { + // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel + // requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1 + + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C") + .geneSymbol("PTEN") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .variantEffect(variantEffect) +// .annotations(List.of(transcriptAnnotation)) + .build(); + Map expectedClinVarData = parseExpectedClinvarData(variantEvaluation, clinVarVariants); + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + + VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build(); + AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder() + .add(PVS1, pvs1Evidence); + AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService); + + AcmgEvidence.Builder expected = AcmgEvidence.builder().add(PVS1, pvs1Evidence); + if (ps1Evidence != null) { + expected.add(PS1, ps1Evidence); + } + assertThat(acmgEvidenceBuilder.build(), equalTo(expected.build())); + } + + @ParameterizedTest + @CsvSource(value = { + // same variant location + "SPLICE_REGION_VARIANT, 0.0, '', BP4", + "SPLICE_REGION_VARIANT, 0.09, '', BP4", + "SPLICE_REGION_VARIANT, 0.1, '', ", + "SPLICE_REGION_VARIANT, 0.2, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, PP3", + "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|PATHOGENIC, PP3 PS1_Strong", + "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, PP3 PS1_Moderate", + }) + void testAssignNonDonorAcceptorSpliceRegionPS1(VariantEffect variantEffect, float spliceAiScore, String clinVarVariants, String acmgEvidence) { + // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel + // requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1 + + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C") + .geneSymbol("PTEN") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore))) + .variantEffect(variantEffect) + .build(); + Map expectedClinVarData = parseExpectedClinvarData(variantEvaluation, clinVarVariants); + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + + VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build(); + AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder(); + AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService); + + AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence); + assertThat(acmgEvidenceBuilder.build(), equalTo(expected)); + } + + @ParameterizedTest + @CsvSource(value = { + // different variant location + "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|PATHOGENIC, PP3 PS1_Moderate", + "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, PP3 PS1_Supporting", + + }) + void testAssignNonDonorAcceptorSpliceRegionPS1VuaDifferentToExistingClinVarVariant(VariantEffect variantEffect, float spliceAiScore, String clinVarVariants, String acmgEvidence) { + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C") + .geneSymbol("PTEN") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore))) + .variantEffect(variantEffect) + .build(); + + VariantEvaluation knownSpliceRegionVariant = TestFactory.variantBuilder(10, variantEvaluation.start() + 1, "G", "C") + .geneSymbol("PTEN") + .variantEffect(variantEffect) + .build(); + Map expectedClinVarData = parseExpectedClinvarData(knownSpliceRegionVariant, clinVarVariants); + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + + VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build(); + AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder(); + AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService); + + AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence); + assertThat(acmgEvidenceBuilder.build(), equalTo(expected)); + } + + @ParameterizedTest + @CsvSource(value = { + "INTRON_VARIANT, c.300+7A>C, 0.7, ", + "SYNONYMOUS_VARIANT, c.300A>C, 0.3, ", + "SPLICE_REGION_VARIANT, c.300+7A>C, 0.8, PP3", + "SPLICE_REGION_VARIANT, c.300+7A>C, 0.1, ", + "SPLICE_REGION_VARIANT, c.300+6A>C, 0.0, BP4", + "SPLICE_REGION_VARIANT, c.300+7A>C, 0.0, BP4 BP7", + "SPLICE_REGION_VARIANT, c.400-20A>C, 0.0, BP4", + "SPLICE_REGION_VARIANT, c.400-21A>C, 0.0, BP4 BP7", + "SPLICE_REGION_VARIANT, c.400-22A>C, 0.0, BP4 BP7", + }) + void testSilentAndIntronicBP7(VariantEffect variantEffect, String hgvsc, float spliceAiScore, String acmgEvidence) { + TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder() + .variantEffect(variantEffect) + .hgvsCdna(hgvsc) + .build(); + VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") + .geneSymbol("PTEN") + .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment + .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore))) + .variantEffect(variantEffect) + .annotations(List.of(transcriptAnnotation)) + .build(); + Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build(); + AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder(); + AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), TestVariantDataService.stub()); + + AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence); + assertThat(acmgEvidenceBuilder.build(), equalTo(expected)); + } + + private Map parseExpectedClinvarData(VariantEvaluation variantEvaluation, String clinVarVariants) { + // SPLICE_ACCEPTOR_VARIANT|PATHOGENIC:SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC:MISSENSE_VARIANT|UNKNOWN_SIGNIFICANCE + if (clinVarVariants.isEmpty()) { + return Map.of(); + } + Map clinVarData = new LinkedHashMap<>(); + String[] clinVars = clinVarVariants.split(":"); + for (int i = 0; i < clinVars.length; i++) { + String[] clinVarInfo = clinVars[i].split("\\|"); + Variant variant = VariantEvaluation.builder().variant(variantEvaluation.contig(), variantEvaluation.strand(), variantEvaluation.coordinates().extend(+i, -i), "A", "T").build(); + ClinVarData clinVar = ClinVarData.builder() + .variantEffect(VariantEffect.valueOf(clinVarInfo[0])) + .primaryInterpretation(ClinVarData.ClinSig.valueOf(clinVarInfo[1])) + .build(); + clinVarData.put(variant, clinVar); + } + return clinVarData; + } + + } } \ No newline at end of file diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java index 05a47eb8a..a6f813e12 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java @@ -66,6 +66,8 @@ void calculatePathAcmgAssignments() { .rankType(TranscriptAnnotation.RankType.EXON) .rank(1) .rankTotal(9) + .hgvsCdna("c.1A>C") + .hgvsProtein("p.0?") .build(); VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G") @@ -95,16 +97,16 @@ void calculatePathAcmgAssignments() { gene.addPriorityResult(omimPriorityResult); AcmgEvidence acmgEvidence = AcmgEvidence.builder() - .add(AcmgCriterion.PVS1) + .add(AcmgCriterion.PVS1, AcmgCriterion.Evidence.MODERATE) .add(AcmgCriterion.PM2, AcmgCriterion.Evidence.SUPPORTING) - .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.STRONG) - .add(AcmgCriterion.PP4) +// .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.STRONG) // not a missense variant + .add(AcmgCriterion.PP4, AcmgCriterion.Evidence.MODERATE) .add(AcmgCriterion.PP5, AcmgCriterion.Evidence.VERY_STRONG) .build(); AcmgAssignment acmgAssignment = AcmgAssignment.of(variantEvaluation, gene.getGeneIdentifier(), ModeOfInheritance.AUTOSOMAL_DOMINANT, cowdenSyndrome, acmgEvidence, AcmgClassification.PATHOGENIC); AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), TestVariantDataService.stub()); - AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acgs2020Classifier()); + AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acmg2020PointsBasedClassifier()); List acmgAssignments = instance.calculateAcmgAssignments(ModeOfInheritance.AUTOSOMAL_DOMINANT, gene, List.of(variantEvaluation), compatibleDiseaseMatches); assertThat(acmgAssignments, equalTo(List.of(acmgAssignment))); } @@ -138,7 +140,7 @@ void calculateVusAcmgAssignments() { AcmgAssignment acmgAssignment = AcmgAssignment.of(variantEvaluation, gene.getGeneIdentifier(), ModeOfInheritance.AUTOSOMAL_DOMINANT, cowdenSyndrome, acmgEvidence, AcmgClassification.UNCERTAIN_SIGNIFICANCE); AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), TestVariantDataService.stub()); - AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acgs2020Classifier()); + AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acmg2020PointsBasedClassifier()); List acmgAssignments = instance.calculateAcmgAssignments(ModeOfInheritance.AUTOSOMAL_DOMINANT, gene, List.of(variantEvaluation), compatibleDiseaseMatches); assertThat(acmgAssignments, equalTo(List.of(acmgAssignment))); } diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java index f2568b849..912c7110f 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java @@ -31,7 +31,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*; -public class AcmgEvidenceTest { +class AcmgEvidenceTest { @Test void testEmptyBuilder() { @@ -50,13 +50,13 @@ void testStaticFactoryDefaultEvidence() { } @Test - public void testToStringDefaultEvidence() { + void testToStringDefaultEvidence() { AcmgEvidence instance = AcmgEvidence.builder().add(PVS1).add(PS1).build(); assertThat(instance.toString(), equalTo("[PVS1, PS1]")); } @Test - public void testToStringModifiedEvidence() { + void testToStringModifiedEvidence() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1, Evidence.STRONG) .add(PS1, Evidence.MODERATE) @@ -65,8 +65,31 @@ public void testToStringModifiedEvidence() { assertThat(instance.toString(), equalTo("[PVS1_Strong, PS1_Moderate, PP3_VeryStrong]")); } + @ParameterizedTest + @CsvSource(value = { + "'[PVS1, PS1_Moderate, PP3_VeryStrong]'", + "'[PVS1,PS1_Moderate,PP3_VeryStrong]'", + "'PVS1, PS1_Moderate, PP3_VeryStrong'", + "'PVS1 PS1_Moderate PP3_VeryStrong'", + "'[PVS1 PS1_Moderate PP3_VeryStrong['", + // stupid shit + "'PVS1 PS1_Moderate PP3_VeryStrong'", + "'PVS1 PS1_Moderate PP3_VERYSTRONG'", + "'PVS1 PS1_Moderate PP3_verystrong'", + "'PVS1 PS1_Moderate, PP3_verystrong'", + "'PVS1 PS1_Moderate,PP3_verystrong'", + }) + void testParseEvidence(String acmgEvidence) { + AcmgEvidence expected = AcmgEvidence.builder() + .add(PVS1) + .add(PS1, Evidence.MODERATE) + .add(PP3, Evidence.VERY_STRONG) + .build(); + assertThat(AcmgEvidence.parseAcmgEvidence(acmgEvidence), equalTo(expected)); + } + @Test - public void testGetEvidenceOverwriteInputInBuilder() { + void testGetEvidenceOverwriteInputInBuilder() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1, Evidence.STRONG) .add(PVS1) @@ -75,7 +98,7 @@ public void testGetEvidenceOverwriteInputInBuilder() { } @Test - public void testGetEvidenceDefaultValue() { + void testGetEvidenceDefaultValue() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1) .build(); @@ -83,7 +106,7 @@ public void testGetEvidenceDefaultValue() { } @Test - public void testGetEvidenceModifiedValue() { + void testGetEvidenceModifiedValue() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1, Evidence.MODERATE) .build(); @@ -91,7 +114,7 @@ public void testGetEvidenceModifiedValue() { } @Test - public void testGetEvidenceNoValue() { + void testGetEvidenceNoValue() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1, Evidence.MODERATE) .build(); @@ -99,7 +122,7 @@ public void testGetEvidenceNoValue() { } @Test - public void testContainsEvidence() { + void testContainsEvidence() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1, Evidence.MODERATE) .build(); @@ -108,7 +131,7 @@ public void testContainsEvidence() { } @Test - public void testBuilderContainsEvidence() { + void testBuilderContainsEvidence() { AcmgEvidence.Builder instance = AcmgEvidence.builder() .add(PVS1, Evidence.MODERATE); assertThat(instance.contains(PVS1), equalTo(true)); @@ -116,13 +139,22 @@ public void testBuilderContainsEvidence() { } @Test - public void testSizeWhenEmpty() { + void testBuilderContainsEvidenceWithStrength() { + AcmgEvidence.Builder instance = AcmgEvidence.builder() + .add(PVS1, Evidence.MODERATE); + assertThat(instance.containsWithEvidence(PVS1, Evidence.MODERATE), equalTo(true)); + assertThat(instance.containsWithEvidence(PVS1, Evidence.VERY_STRONG), equalTo(false)); + assertThat(instance.containsWithEvidence(PM3, Evidence.MODERATE), equalTo(false)); + } + + @Test + void testSizeWhenEmpty() { AcmgEvidence instance = AcmgEvidence.builder().build(); assertThat(instance.size(), equalTo(0)); } @Test - public void testSizeWithElement() { + void testSizeWithElement() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1) .build(); @@ -130,13 +162,13 @@ public void testSizeWithElement() { } @Test - public void testIsEmpty() { + void testIsEmpty() { AcmgEvidence instance = AcmgEvidence.builder().build(); assertThat(instance.isEmpty(), equalTo(true)); } @Test - public void testNotEmpty() { + void testNotEmpty() { AcmgEvidence instance = AcmgEvidence.builder() .add(PVS1) .build(); @@ -244,7 +276,7 @@ void testEvidence() { "BA1, -8, 0.000", // BA1 is a bit of an anomaly - it's intended as a hard filter, so doesn't fit into the points system }) void testPosteriorProb(String criteria, int points, double posteriorProb) { - AcmgEvidence acmgEvidence = parseAcmgEvidence(criteria); + AcmgEvidence acmgEvidence = AcmgEvidence.parseAcmgEvidence(criteria); assertThat(acmgEvidence.points(), equalTo(points)); assertThat(acmgEvidence.postProbPath(), closeTo(posteriorProb, 0.001)); } diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java index 32ce1561b..c7967ae9d 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java @@ -25,8 +25,8 @@ */ package org.monarchinitiative.exomiser.core.genome; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; +import jakarta.annotation.Nonnull; +import org.monarchinitiative.exomiser.core.model.GeneStatistics; import org.monarchinitiative.exomiser.core.model.Variant; import org.monarchinitiative.exomiser.core.model.frequency.Frequency; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; @@ -55,11 +55,15 @@ public class TestVariantDataService implements VariantDataService { private final Set expectedWhiteList; private final Map expectedFrequencyData; private final Map expectedPathogenicityData; + private final Map expectedClinVarData; + private final Map expectedGeneStats; private TestVariantDataService(Builder builder) { - this.expectedWhiteList = ImmutableSet.copyOf(builder.expectedWhiteList); - this.expectedFrequencyData = ImmutableMap.copyOf(builder.expectedFrequencyData); - this.expectedPathogenicityData = ImmutableMap.copyOf(builder.expectedPathogenicityData); + this.expectedWhiteList = Set.copyOf(builder.expectedWhiteList); + this.expectedFrequencyData = Map.copyOf(builder.expectedFrequencyData); + this.expectedPathogenicityData = Map.copyOf(builder.expectedPathogenicityData); + this.expectedClinVarData = Map.copyOf(builder.expectedClinVarData); + this.expectedGeneStats = Map.copyOf(builder.expectedGeneStats); } /** @@ -105,7 +109,7 @@ public FrequencyData getVariantFrequencyData(Variant variant, Set wanted = allFrequencyData.frequencies() .stream() .filter(frequency -> frequencySources.contains(frequency.source())) - .collect(Collectors.toList()); + .toList(); return FrequencyData.of(allFrequencyData.getRsId(), wanted); } @@ -123,18 +127,25 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) { - return null; + public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) { + return expectedClinVarData.entrySet().stream() + .filter(entry -> entry.getKey().overlapsWith(genomicInterval)) + .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue)); + } + + @Override + public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) { + return expectedGeneStats.getOrDefault(geneSymbol, GeneStatistics.builder(geneSymbol).build()); } public static Builder builder() { @@ -145,6 +156,8 @@ public static class Builder { private Set expectedWhiteList = new HashSet<>(); private Map expectedFrequencyData = new HashMap<>(); private Map expectedPathogenicityData = new HashMap<>(); + private Map expectedClinVarData = new HashMap<>(); + private Map expectedGeneStats = new HashMap<>(); public Builder expectedWhiteList(Set expectedWhiteList) { this.expectedWhiteList = expectedWhiteList; @@ -176,6 +189,26 @@ public Builder put(Variant variant, PathogenicityData expectedPathogenicityData) return this; } + public Builder expectedClinVarData(Map expectedClinVarData) { + this.expectedClinVarData = expectedClinVarData; + return this; + } + + public Builder put(Variant variant, ClinVarData clinVarData) { + this.expectedClinVarData.put(variant, clinVarData); + return this; + } + + public Builder expectedGeneStats(Map expectedGeneStats) { + this.expectedGeneStats = expectedGeneStats; + return this; + } + + public Builder put(GeneStatistics geneStatistics) { + this.expectedGeneStats.put(geneStatistics.geneSymbol(), geneStatistics); + return this; + } + public TestVariantDataService build() { return new TestVariantDataService(this); } @@ -213,6 +246,11 @@ public ClinVarData getClinVarData(GenomicVariant genomicVariant) { public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) { return Map.of(); } + + @Override + public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) { + return GeneStatistics.builder(geneSymbol).build(); + } } } diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java index e8ff886ca..1d8febbd4 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java @@ -202,7 +202,7 @@ public void serviceQueryForSynonymousVariantReturnsEmptyPathogenicityData() { Mockito.when(defaultPathogenicityDao.getPathogenicityData(variant)).thenReturn(PathogenicityData.of(MutationTasterScore.of(1f))); Mockito.when(clinVarDao.getClinVarData(variant)).thenReturn(ClinVarData.empty()); PathogenicityData result = instance.getVariantPathogenicityData(variant, EnumSet.of(PathogenicitySource.MUTATION_TASTER)); - assertThat(result, equalTo(PathogenicityData.empty())); + assertThat(result, equalTo(PathogenicityData.of(MutationTasterScore.of(1f)))); } @Test diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java index 7e55da727..5c99c114c 100644 --- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java @@ -1,11 +1,13 @@ package org.monarchinitiative.exomiser.core.genome.dao; +import de.charite.compbio.jannovar.annotation.VariantEffect; import de.charite.compbio.jannovar.data.JannovarData; import de.charite.compbio.jannovar.mendel.ModeOfInheritance; import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraint; import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraints; import org.monarchinitiative.exomiser.core.analysis.util.acmg.*; import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; @@ -15,20 +17,24 @@ import org.monarchinitiative.exomiser.core.genome.jannovar.JannovarDataSourceLoader; import org.monarchinitiative.exomiser.core.model.*; import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData; +import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource; import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityData; -import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityScore; import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource; +import org.monarchinitiative.exomiser.core.prioritisers.dao.DefaultDiseaseDao; +import org.monarchinitiative.exomiser.core.prioritisers.dao.DiseaseDao; +import org.monarchinitiative.exomiser.core.prioritisers.model.Disease; +import org.monarchinitiative.exomiser.core.prioritisers.model.InheritanceMode; import org.monarchinitiative.exomiser.core.proto.AlleleProto; import org.monarchinitiative.svart.*; import java.nio.file.Path; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; +import java.util.stream.Collectors; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; +import static org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource.*; class ClinVarDaoMvStoreTest { @@ -52,9 +58,8 @@ private AlleleProto.AlleleKey parseAlleleKey(String broadFormatVariant) { .build(); } - private GenomicVariant variant(String broadFormatVariant) { - String[] fields = broadFormatVariant.split("-"); - return GenomicVariant.of(GenomeAssembly.HG19.getContigByName(fields[0]), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, Integer.parseInt(fields[1]), fields[2]), fields[2], fields[3]); + private GenomicVariant variant(String variant) { + return parseVariant(GenomeAssembly.HG19, variant); } @Test @@ -79,6 +84,31 @@ void getClinVarData() { } } + @Test + void getClinVarDataNotReturningConflictingSig() { + try (MVStore mvStore = new MVStore.Builder().open()) { + var clinvarMap = MvStoreUtil.openClinVarMVMap(mvStore); + AlleleProto.AlleleKey alleleKey = parseAlleleKey("1-200-A-T"); + AlleleProto.ClinVar clinVar = AlleleProto.ClinVar.newBuilder() + .setVariationId("12345") + .setPrimaryInterpretation(AlleleProto.ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .setReviewStatus(AlleleProto.ClinVar.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) + .putClinSigCounts("PATHOGENIC", 5) + .putClinSigCounts("UNCERTAIN_SIGNIFICANCE", 5) + .build(); + clinvarMap.put(alleleKey, clinVar); + + ClinVarDao instance = new ClinVarDaoMvStore(mvStore); + System.out.println("AlleleProtoAdaptor.toClinVarData: " + AlleleProtoAdaptor.toClinVarData(clinVar)); + GenomicVariant clinVarVariant = variant("1-200-A-T"); + System.out.println("ClinVarDao.getClinVarData: " + instance.getClinVarData(clinVarVariant)); + assertThat(instance.getClinVarData(clinVarVariant), equalTo(AlleleProtoAdaptor.toClinVarData(clinVar))); + + GenomicVariant nonClinVarVariant = variant("1-200-A-A"); + assertThat(instance.getClinVarData(nonClinVarVariant), equalTo(ClinVarData.empty())); + } + } + @Test void emptyStore() { ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore(); @@ -217,14 +247,39 @@ void setPositionInButLengthIncorrectTest() { @Disabled("manual testing utility") @Test void manualDataExplorer() { - MVStore mvStore = new MVStore.Builder().fileName("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_clinvar.mv.db").readOnly().open(); + // variant under assessment in 'Broad format' (i.e. hyphen seperated chr-pos-ref-alt) + String vua = "19-54123563-A-G"; // hg19-22-51019849-C-T possible cryptic splice variant? SPLICE_AI: 0.230 (PP3), ALPHA_MISSENSE: 0.125 + ModeOfInheritance modeOfInheritance = ModeOfInheritance.ANY; + // 37:10-124797364 // 38:10-123037848-G-A + 10-123051188-T-TAA + 10-123037804-C-A + // 10-89624227-A-G PTEN PATHOGENIC 3* START_LOST + // 10-89624245-G-T PTEN PATHOGENIC 2* STOP_GAINED + // 10-89624242-A-G PTEN PATHOGENIC 3* MISSENSE_VARIANT + + // 4-15589553-G-C - Hypothetical, not in ClinVar, LIKELY_PATHOGENIC [PVS1_Strong, PS1, PM2_Supporting] points=9? + // 1-63868019-G-A | 1-63877002-G-A + + // 2-232079571-G-A + // FP: + // MT-12706-T-C + // 11-6638385-C-T + // 12-103234252-T-C + + // 17-48275792-A-G SPLICE_DONOR_VARIANT (LP, ClinVar) + // 17-48275815-T-G SYNONYMOUS_VARIANT (VUS, ClinVar) + // + // 3'UTR hg38-20-58909654-A-G high spliceAI + GenomeAssembly assembly = GenomeAssembly.HG38; + + String dataDir = "/home/hhx640/Documents/exomiser-data/"; + String dataVersion = "2406_" + assembly; + + MVStore mvStore = new MVStore.Builder().fileName(dataDir + dataVersion + "/" + dataVersion + "_clinvar.mv.db").readOnly().open(); ClinVarDaoMvStore clinVarDao = new ClinVarDaoMvStore(mvStore); - MVStore alleleStore = new MVStore.Builder().fileName("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_variants.mv.db").readOnly().open(); + MVStore alleleStore = new MVStore.Builder().fileName(dataDir + dataVersion + "/" + dataVersion + "_variants.mv.db").readOnly().open(); AllelePropertiesDao allelePropertiesDao = new AllelePropertiesDaoMvStore(alleleStore); - GenomeAssembly assembly = GenomeAssembly.HG19; - JannovarData jannovarData = JannovarDataSourceLoader.loadJannovarData(Path.of("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_transcripts_ensembl.ser")); + JannovarData jannovarData = JannovarDataSourceLoader.loadJannovarData(Path.of(dataDir + dataVersion + "/" + dataVersion + "_transcripts_ensembl.ser")); VariantAnnotator variantAnnotator = new JannovarVariantAnnotator(assembly, jannovarData, ChromosomalRegionIndex.empty()); // 18-57550760-A-T NA // 18-57550753-A-C LP // 55217985 (hg19) @@ -237,8 +292,9 @@ void manualDataExplorer() { // https://mart.ensembl.org/info/genome/genebuild/canonical.html (see also vitt). Ideally the Jannovar Annotations // should be sorted before being converted to TranscriptAnnotations. This isn't an issue if MANE only // transcripts are being used as these are the only ones available to report on. - GenomicVariant genomicVariant = variant(assembly, "10-89624227-A-G"); // 10-123256215-T-G hg38:10-121496701-T-G + GenomicVariant genomicVariant = parseVariant(assembly, vua); // 10-123256215-T-G hg38:10-121496701-T-G + // hg38:4-25145092-C-T ENST00000382103.7(SEPSECS):c.846G>A (SEPSECS)p.(Leu282=) SpliceAI=0.42 // 10-89624227-A-G (PTEN) // 10-123247514-C-G : PS1, PM1, PM2_Supporting, PM5_Supporting, PP3, PP5_Strong // 10-123247517-T-G : PS1_Supporting, PM1, PM2_Supporting, PM5_Supporting, PP2, PP3 @@ -246,18 +302,14 @@ void manualDataExplorer() { AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(genomicVariant); // encode as VariantKey (https://doi.org/10.1101/473744) == 8 bytes fixed size (long); // System.out.println("AlleleKey size (bytes): " + alleleKey.getSerializedSize()); // SNP = 13 bytes, 11 bases = 23 - System.out.println(); - - System.out.println(clinVarDao.getClinVarData(genomicVariant)); - Map clinVarRecordsOverlappingInterval = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(25, 25)); - clinVarRecordsOverlappingInterval.forEach((variant, clinVarData) -> System.out.println(toBroad(variant) + " : " + clinVarData)); System.out.println(); + Set pathSources = EnumSet.of(REVEL, MVP, ALPHA_MISSENSE, SPLICE_AI, REMM); List variantAnnotations = variantAnnotator.annotate(genomicVariant); AlleleProto.AlleleProperties alleleProperties = allelePropertiesDao.getAlleleProperties(alleleKey, assembly); - FrequencyData frequencyData = AlleleProtoAdaptor.toFrequencyData(alleleProperties); + FrequencyData frequencyData = AlleleProtoAdaptor.toFrequencyData(alleleProperties).toBuilder().filterSources(FrequencySource.NON_FOUNDER_POPS).build(); PathogenicityData pathogenicityData = AlleleProtoAdaptor.toPathogenicityData(alleleProperties); - pathogenicityData = PathogenicityData.of(pathogenicityData.pathogenicityScores().stream().filter(score -> score.getSource() == PathogenicitySource.ALPHA_MISSENSE).toList()); + pathogenicityData = PathogenicityData.of(pathogenicityData.pathogenicityScores().stream().filter(score -> pathSources.contains(score.getSource())).toList()); ClinVarData clinVarData = clinVarDao.getClinVarData(genomicVariant); VariantAnnotation variantAnnotation = variantAnnotations.isEmpty() ? null : variantAnnotations.get(0); VariantEvaluation variantEvaluation = VariantEvaluation.builder() @@ -268,45 +320,84 @@ void manualDataExplorer() { .annotations(variantAnnotation.getTranscriptAnnotations()) .frequencyData(frequencyData) .pathogenicityData(PathogenicityData.of(clinVarData, pathogenicityData.pathogenicityScores())) - .compatibleInheritanceModes(Set.of(ModeOfInheritance.AUTOSOMAL_RECESSIVE)) +// .whiteListed() + .compatibleInheritanceModes(Set.of(modeOfInheritance)) .build(); TranscriptAnnotation transcriptAnnotation = variantAnnotation != null && variantAnnotation.hasTranscriptAnnotations() ? variantAnnotation.getTranscriptAnnotations().get(0) : null; - System.out.println(toBroad(genomicVariant) + (variantAnnotation == null ? "" : " " + variantAnnotation.getGeneSymbol() + (transcriptAnnotation == null ? "" : ":" + transcriptAnnotation.getHgvsCdna() + ":" + transcriptAnnotation.getHgvsProtein() ) + " " + variantAnnotation.getVariantEffect())); - System.out.println(frequencyData.getRsId() + " " + (transcriptAnnotation == null ? "" : transcriptAnnotation.getAccession())); + Set varEffects = variantAnnotation != null && variantAnnotation.hasTranscriptAnnotations() ? variantAnnotation.getTranscriptAnnotations().stream().map(TranscriptAnnotation::getVariantEffect).collect(Collectors.toUnmodifiableSet()) : Set.of(); + System.out.println(toBroad(genomicVariant) + " " + (frequencyData.getRsId().isEmpty() ? "-" : frequencyData.getRsId()) + " " + (variantAnnotation == null ? "" : variantAnnotation.getGeneSymbol() + (transcriptAnnotation == null ? "" : ":" + transcriptAnnotation.getAccession() + ":" + transcriptAnnotation.getHgvsCdna() + ":" + transcriptAnnotation.getHgvsProtein() + " " + transcriptAnnotation.getRankType() + " " + transcriptAnnotation.getRank() + "/" + transcriptAnnotation.getRankTotal()) + " " + varEffects)); String geneSymbol = variantAnnotation.getGeneSymbol(); - System.out.println(geneSymbol + " " + GeneConstraints.geneConstraint(geneSymbol)); - System.out.println(clinVarData.isEmpty() ? "" : clinVarData.getPrimaryInterpretation() + " (" + clinVarData.starRating() + "*) " + clinVarData.getVariationId() + " " + clinVarData.getGeneSymbol() + ":" + clinVarData.getHgvsCdna() + ":" + clinVarData.getHgvsProtein() + " " + clinVarData.getVariantEffect()); - System.out.println("Variant score: " + variantEvaluation.getVariantScore() + " Frequency score: " + variantEvaluation.getFrequencyScore() + " Pathogenicity score: " + variantEvaluation.getPathogenicityScore()); - System.out.println("Frequency data:"); + GeneConstraint geneConstraint = GeneConstraints.geneConstraint(geneSymbol); + if (geneConstraint != null) { + System.out.println(geneSymbol + " " + geneConstraint + (geneConstraint.isLossOfFunctionIntolerant() ? (" (LOF INTOLERANT)") : "")); + } + System.out.println("ClinVar: " + (clinVarData.isEmpty() ? "-" : formatClinVarData(clinVarData))); + System.out.println("Variant score: " + variantEvaluation.getVariantScore() + " Frequency score: " + variantEvaluation.getFrequencyScore() + " Pathogenicity score: " + variantEvaluation.getPathogenicityScore() + (variantEvaluation.isWhiteListed() ? " (WHITELIST VARIANT - all scores set to 1.0)" : "")); + System.out.println("Frequency score: " + frequencyData.frequencyScore()); if (frequencyData.isEmpty()) { - System.out.println("\n\t-"); + System.out.println("\t(no frequency data)"); } else { - System.out.println("\tfrequency score: " + frequencyData.frequencyScore()); - frequencyData.frequencies().forEach(freq -> System.out.println("\t" + freq.source() + "=" + freq.frequency() + "(" + freq.ac() + "|" + freq.an() + "|" + freq.homs() + ")")); + frequencyData.frequencies().forEach(freq -> System.out.println("\t" + freq.source() + "=" + freq.frequency() + (freq.an() == 0 ? "" : "(" + freq.ac() + "|" + freq.an() + "|" + freq.homs() + ")"))); } - System.out.println("Pathogenicity scores:"); + System.out.println("Pathogenicity score: " + pathogenicityData.pathogenicityScore()); if (pathogenicityData.isEmpty()) { - System.out.println("\t-"); + System.out.println("\t(no pathogenicity data)"); } else { - System.out.println("\tpathogenicity score: " + pathogenicityData.pathogenicityScore()); pathogenicityData.pathogenicityScores().forEach(path -> System.out.println("\t" + path)); } AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("sample", Pedigree.justProband("sample"), clinVarDao); - var acmgEvidence = acmgEvidenceAssigner.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_RECESSIVE, List.of(variantEvaluation), List.of(), List.of()); + Disease disease = Disease.builder().diseaseId("DISEASE:1").diseaseName("disease").inheritanceMode(InheritanceMode.AUTOSOMAL_RECESSIVE).build(); + AcmgEvidence acmgEvidence = acmgEvidenceAssigner.assignVariantAcmgEvidence(variantEvaluation, modeOfInheritance, List.of(variantEvaluation), List.of(disease), List.of()); AcmgEvidenceClassifier classifier = new Acmg2020PointsBasedClassifier(); // should use an AcmgAssignmentCalculator to figure out the correct disease-gene association to use - System.out.println(classifier.classify(acmgEvidence) + " " + acmgEvidence + " points=" + acmgEvidence.points()); + System.out.println(classifier.classify(acmgEvidence) + " " + acmgEvidence + " points=" + acmgEvidence.points() + " score=" + acmgEvidence.postProbPath()); + +// var manualEvidence = AcmgEvidence.builder() +// .add(AcmgCriterion.PM2, AcmgCriterion.Evidence.SUPPORTING) +// .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.SUPPORTING) +// .add(AcmgCriterion.PS1) +// .build(); +// System.out.println(classifier.classify(manualEvidence) + " " + manualEvidence + " points=" + manualEvidence.points()); + + System.out.println(); + + GenomicRegion clinVarSearchInterval = genomicVariant.withPadding(25, 25); + String interval = clinVarSearchInterval.contig().name() + ":" + clinVarSearchInterval.startOneBased() + "-" + clinVarSearchInterval.end(); + System.out.println("ClinVar neighbourhood " + interval + " (" + genomicVariant.start() + " +/-25 bases):"); + Map clinVarRecordsOverlappingInterval = clinVarDao.findClinVarRecordsOverlappingInterval(clinVarSearchInterval); + clinVarRecordsOverlappingInterval.forEach((variant, data) -> System.out.println(toBroad(variant) + " : " + formatClinVarData(data))); + //TODO add known diseases + } + + private String formatClinVarData(ClinVarData clinVarData) { + return clinVarData.getPrimaryInterpretation() + (clinVarData.getPrimaryInterpretation() == ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS ? " " + clinVarData.getConflictingInterpretationCounts() : "") + " (" + clinVarData.starRating() + "*) " + clinVarData.getVariationId() + " " + clinVarData.getGeneSymbol() + ":" + clinVarData.getHgvsCdna() + ":" + clinVarData.getHgvsProtein() + " " + clinVarData.getVariantEffect() + " (" + clinVarData.getReviewStatus() + ")"; } private String toBroad(GenomicVariant genomicVariant) { return genomicVariant.contig().name() + '-' + genomicVariant.start() + '-' + genomicVariant.ref() + '-' + genomicVariant.alt(); } - private GenomicVariant variant(GenomeAssembly genomeAssembly, String broadFormatVariant) { - String[] fields = broadFormatVariant.split("-"); + /** + * Parses small sequence variants of the form `chr-pos-ref-alt`. For example: + *
+     *     SNP  1-12345-A-T
+     *     SNP  1:12345:A:T
+     *     DEL  1-12345-AT-A
+     *     INS  1-12345-A-AT
+     *     MNV  1-12345-AT-GC
+     *     INV  1-12345-AT-TA
+     * 
+ * Separators can be either hyphens '-' or colons ':'. The genomic assembly is also required. + * + * @param genomeAssembly Genomic assembly for the variant coordinates + * @param s The string representation of the variant + * @return A {@link GenomicVariant} + */ + private GenomicVariant parseVariant(GenomeAssembly genomeAssembly, String s) { + String[] fields = s.split("[-:]"); return GenomicVariant.of(genomeAssembly.getContigByName(fields[0]), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, Integer.parseInt(fields[1]), fields[2]), fields[2], fields[3]); } } \ No newline at end of file diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java new file mode 100644 index 000000000..e631e323d --- /dev/null +++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java @@ -0,0 +1,116 @@ +package org.monarchinitiative.exomiser.core.model; + +import de.charite.compbio.jannovar.annotation.VariantEffect; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; + +import static de.charite.compbio.jannovar.annotation.VariantEffect.*; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +class GeneStatisticsTest { + + @Test + void testBuilderNoStats() { + var instance = GeneStatistics.builder("WIBBLE").build(); + assertThat(instance.geneSymbol(), equalTo("WIBBLE")); + assertThat(instance.pathCount(), equalTo(0)); + assertThat(instance.vusCount(), equalTo(0)); + assertThat(instance.benignCount(), equalTo(0)); + } + + + @Test + void testBuilderStats() { + var instance = GeneStatistics.builder("WIBBLE") + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.LIKELY_PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.BENIGN) + // LOF effects + .put(VariantEffect.STOP_GAINED, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.STOP_LOST, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.SPLICE_ACCEPTOR_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.SPLICE_DONOR_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.EXON_LOSS_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.START_LOST, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.START_LOST, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(VariantEffect.TRANSCRIPT_ABLATION, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.FRAMESHIFT_ELONGATION, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.FRAMESHIFT_TRUNCATION, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.FRAMESHIFT_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + // other effect + .put(VariantEffect.INTRON_VARIANT, ClinVarData.ClinSig.BENIGN) + .build(); + assertThat(instance.geneSymbol(), equalTo("WIBBLE")); + assertThat(instance.pathCount(), equalTo(13)); + assertThat(instance.vusCount(), equalTo(3)); + assertThat(instance.benignCount(), equalTo(2)); + + assertThat(instance.missensePathCount(), equalTo(3)); + assertThat(instance.missenseVusCount(), equalTo(2)); + assertThat(instance.missenseBenignCount(), equalTo(1)); + + assertThat(instance.lofPathCount(), equalTo(10)); + assertThat(instance.lofVusCount(), equalTo(1)); + assertThat(instance.lofBenignCount(), equalTo(0)); + } + + @Test + void testIgnoresNonPathVusBenignStatus() { + var instance = GeneStatistics.builder("WIBBLE", MISSENSE_VARIANT, ClinVarData.ClinSig.ESTABLISHED_RISK_ALLELE).build(); + assertThat(instance.pathCount(), equalTo(0)); + assertThat(instance.vusCount(), equalTo(0)); + assertThat(instance.benignCount(), equalTo(0)); + + } + + @Test + void testToString() { + var instance = GeneStatistics.builder("WIBBLE") + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.LIKELY_PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.BENIGN) + .put(VariantEffect.STOP_GAINED, ClinVarData.ClinSig.PATHOGENIC) + .put(VariantEffect.INTRON_VARIANT, ClinVarData.ClinSig.BENIGN) + .build(); + + var expected = "GeneStatistics{geneSymbol=WIBBLE, counts={STOP_GAINED=[P=1, VUS=0, B=0], MISSENSE_VARIANT=[P=3, VUS=2, B=1], INTRON_VARIANT=[P=0, VUS=0, B=1]}}"; + assertThat(instance.toString(), equalTo(expected)); + } + + @Test + void testEmptyString() { + assertThat(GeneStatistics.builder("WIBBLE").build().toString(), equalTo("GeneStatistics{geneSymbol=WIBBLE, counts={}}")); + } + + @Test + void testCanGetStatsForArbitraryVariantEffect() { + var instance = GeneStatistics.builder("WIBBLE") + .put(SYNONYMOUS_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .build(); + + assertThat(instance.vusCount(), equalTo(1)); + assertThat(instance.vusCount(SYNONYMOUS_VARIANT), equalTo(instance.vusCount())); + } + + @Test + void testCanGetStatsForArbitraryVariantEffects() { + var instance = GeneStatistics.builder("WIBBLE") + .put(FIVE_PRIME_UTR_EXON_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(INTRON_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE) + .put(MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC) + .build(); + + assertThat(instance.vusCount(), equalTo(2)); + assertThat(instance.vusCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT, MISSENSE_VARIANT), equalTo(instance.vusCount())); + assertThat(instance.vusCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT), equalTo(instance.vusCount())); + assertThat(instance.pathCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT, MISSENSE_VARIANT), equalTo(instance.pathCount())); + assertThat(instance.pathCount(MISSENSE_VARIANT), equalTo(instance.pathCount())); + } +} \ No newline at end of file diff --git a/exomiser-core/src/test/resources/jannovar/default_sources.ini b/exomiser-core/src/test/resources/jannovar/default_sources.ini index ad75402a9..efe2cc684 100644 --- a/exomiser-core/src/test/resources/jannovar/default_sources.ini +++ b/exomiser-core/src/test/resources/jannovar/default_sources.ini @@ -106,7 +106,7 @@ rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.g type=ucsc alias=MT,M,chrM chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz @@ -119,7 +119,7 @@ knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownTo type = ensembl alias = MT,M,chrM chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions gtf = https://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz cdna = https://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.cdna.all.fa.gz @@ -133,7 +133,7 @@ type = refseq alias = MT,M,chrM allowNonCodingNm = true chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz @@ -146,7 +146,7 @@ alias = MT,M,chrM onlyCurated = true allowNonCodingNm = true chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz @@ -158,7 +158,7 @@ type = refseq alias = MT,M,chrM allowNonCodingNm = true chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_top_level_2017-01-13.gff3.gz rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_rna.fa.gz @@ -171,7 +171,7 @@ alias = MT,M,chrM onlyCurated = true allowNonCodingNm = true chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_top_level_2017-01-13.gff3.gz rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_rna.fa.gz diff --git a/exomiser-data-genome/pom.xml b/exomiser-data-genome/pom.xml index 198882fc5..a5915dffd 100644 --- a/exomiser-data-genome/pom.xml +++ b/exomiser-data-genome/pom.xml @@ -31,7 +31,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA @@ -48,6 +48,12 @@ org.apache.commons commons-vfs2 2.9.0 + + + commons-logging + commons-logging + + info.picocli @@ -59,6 +65,17 @@ flyway-core ${flyway.version} + + org.duckdb + duckdb_jdbc + 1.0.0 + + + org.rocksdb + rocksdbjni + 7.6.0 + test + diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java index d52e38c8a..2bc3cd167 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java @@ -74,7 +74,7 @@ public class BuildCommand implements Callable { private Path buildClinVar; @Option(names = "--transcripts", converter = TranscriptSourceConverter.class, split = ",", arity = "0..1", fallbackValue = "ensembl,refseq,ucsc", description = "List of transcript databases to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}") private List transcriptSources; - @Option(names = "--variants", split = ",", arity = "0..1", fallbackValue = "esp,exac,uk10k,topmed,dbsnp,gnomad-exome,gnomad-genome,gnomad-mito,alfa,dbnsfp", description = "List of variant data sources to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}") + @Option(names = "--variants", split = ",", arity = "0..1", fallbackValue = "esp,exac,uk10k,topmed,dbsnp,gnomad-exome,gnomad-genome,gnomad-mito,alfa,dbnsfp,splice-ai", description = "List of variant data sources to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}") private List variantSources; @Option(names = "--genome", description = "Flag to trigger building of genome data.") private boolean buildGenome; diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java index 0acc7f170..dfb99e5fc 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java @@ -20,12 +20,8 @@ package org.monarchinitiative.exomiser.data.genome; -import org.h2.mvstore.MVMap; import org.h2.mvstore.MVStore; import org.h2.mvstore.MVStoreTool; -import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil; -import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleKey; -import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleProperties; import org.monarchinitiative.exomiser.data.genome.indexers.Indexer; import org.monarchinitiative.exomiser.data.genome.indexers.MvStoreAlleleIndexer; import org.monarchinitiative.exomiser.data.genome.model.Allele; @@ -58,7 +54,7 @@ public VariantDatabaseBuildRunner(BuildInfo buildInfo, Path buildPath, List hg19AlleleResources() { // ExAC removed as this is part of gnomad-exomes alleleResources.put("esp", espAlleleResource()); alleleResources.put("dbnsfp", dbnsfpAlleleResource()); + alleleResources.put("splice-ai", spliceAiAlleleResource()); // CLinVar removed - now handled as a separate data source return alleleResources.build(); } + public SpliceAiAlleleResource spliceAiAlleleResource() { + return alleleResource(SpliceAiAlleleResource.class, "hg19.splice-ai"); + } + public DbSnpAlleleResource dbSnpAlleleResource() { return alleleResource(DbSnpAlleleResource.class, "hg19.dbsnp"); } diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java index 0ede208a6..b78337d23 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java @@ -90,11 +90,16 @@ public Map hg38AlleleResources() { // ExAC removed as this is part of gnomad-exomes v2.1 // ESP removed as this is part of gnomad-exomes v4 alleleResources.put("dbnsfp", dbnsfpAlleleResource()); + alleleResources.put("splice-ai", spliceAiAlleleResource()); // CLinVar removed - now handled as a separate data source return alleleResources.build(); } + public SpliceAiAlleleResource spliceAiAlleleResource() { + return alleleResource(SpliceAiAlleleResource.class, "hg38.splice-ai"); + } + public DbSnpAlleleResource dbSnpAlleleResource() { return alleleResource(DbSnpAlleleResource.class, "hg38.dbsnp"); } diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java index eafdce07c..25d785932 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java @@ -172,6 +172,9 @@ private static ClinVar.ClinSig toProtoClinSig(ClinVarData.ClinSig clinSig) { case PATHOGENIC_OR_LIKELY_PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC; case PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC; case CONFLICTING_PATHOGENICITY_INTERPRETATIONS -> ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS; + case UNCERTAIN_RISK_ALLELE -> ClinVar.ClinSig.UNCERTAIN_RISK_ALLELE; + case LIKELY_RISK_ALLELE -> ClinVar.ClinSig.LIKELY_RISK_ALLELE; + case ESTABLISHED_RISK_ALLELE -> ClinVar.ClinSig.ESTABLISHED_RISK_ALLELE; case AFFECTS -> ClinVar.ClinSig.AFFECTS; case ASSOCIATION -> ClinVar.ClinSig.ASSOCIATION; case DRUG_RESPONSE -> ClinVar.ClinSig.DRUG_RESPONSE; diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java index 5126a6f61..1732c027f 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java @@ -28,6 +28,7 @@ import org.slf4j.LoggerFactory; import java.util.*; +import java.util.stream.Collectors; import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*; @@ -38,6 +39,47 @@ public class ClinVarAlleleParser extends VcfAlleleParser { private static final Logger logger = LoggerFactory.getLogger(ClinVarAlleleParser.class); + private static final Map CLINSIG_MAP; + + static { + Map temp = new HashMap<>(); + temp.put("Uncertain_significance", UNCERTAIN_SIGNIFICANCE); +// temp.put("Uncertain_significance/Uncertain_risk_allele", UNCERTAIN_SIGNIFICANCE); + temp.put("Benign", BENIGN); +// temp.put("Benign/Likely_benign", BENIGN_OR_LIKELY_BENIGN); + temp.put("Likely_benign", LIKELY_BENIGN); + temp.put("Conflicting_interpretations_of_pathogenicity", CONFLICTING_PATHOGENICITY_INTERPRETATIONS); + temp.put("Conflicting_classifications_of_pathogenicity", CONFLICTING_PATHOGENICITY_INTERPRETATIONS); + temp.put("Likely_pathogenic", LIKELY_PATHOGENIC); + temp.put("Likely_pathogenic,_low_penetrance", LIKELY_PATHOGENIC); + temp.put("Likely_pathogenic/Likely_risk_allele", LIKELY_PATHOGENIC); + temp.put("Likely_pathogenic/Established_risk_allele", LIKELY_PATHOGENIC); + temp.put("Pathogenic/Likely_pathogenic", PATHOGENIC_OR_LIKELY_PATHOGENIC); + temp.put("Pathogenic/Likely_pathogenic/Likely_risk_allele", PATHOGENIC_OR_LIKELY_PATHOGENIC); + temp.put("Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance", PATHOGENIC_OR_LIKELY_PATHOGENIC); + temp.put("Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance/Established_risk_allele" + , PATHOGENIC_OR_LIKELY_PATHOGENIC); + temp.put("Pathogenic", PATHOGENIC); + temp.put("Pathogenic,_low_penetrance", PATHOGENIC); + temp.put("Pathogenic/Likely_risk_allele", PATHOGENIC); + temp.put("Pathogenic/Pathogenic,_low_penetrance", PATHOGENIC); + temp.put("Uncertain_risk_allele", UNCERTAIN_RISK_ALLELE); + temp.put("Likely_risk_allele", LIKELY_RISK_ALLELE); + temp.put("Established_risk_allele", ESTABLISHED_RISK_ALLELE); + temp.put("not_provided", NOT_PROVIDED); + temp.put("no_classification_for_the_single_variant", NOT_PROVIDED); + temp.put("no_classifications_from_unflagged_records", NOT_PROVIDED); + temp.put("Affects", AFFECTS); + temp.put("association", ASSOCIATION); + temp.put("association_not_found", ASSOCIATION); // TODO add ASSOCIATION_NOT_FOUND + temp.put("drug_response", DRUG_RESPONSE); + temp.put("other", OTHER); + temp.put("confers_sensitivity", OTHER); + temp.put("protective", PROTECTIVE); + temp.put("risk_factor", RISK_FACTOR); + CLINSIG_MAP = Map.copyOf(temp); + } + @Override List parseInfoField(List alleles, String info) { for (Allele allele : alleles) { @@ -68,11 +110,9 @@ private ClinVarData parseClinVarData(Allele allele, String info) { String value = keyValue[1]; switch (key) { case "CLNSIG": - String[] clinsigs = value.split(",_"); - ClinSig primary = parseClinSig(clinsigs[0]); - Set secondary = parseSecondaryClinSig(clinsigs); - clinVarBuilder.primaryInterpretation(primary); - clinVarBuilder.secondaryInterpretations(secondary); + String[] clinsigs = value.split("\\|"); + clinVarBuilder.primaryInterpretation(parseClinSig(clinsigs[0])); + clinVarBuilder.secondaryInterpretations(parseSecondaryClinSig(clinsigs)); break; case "CLNREVSTAT": //CLNREVSTAT criteria_provided,_conflicting_interpretations, criteria_provided,_multiple_submitters,_no_conflicts, criteria_provided,_single_submitter, no_assertion_criteria_provided, no_assertion_provided, no_interpretation_for_the_single_variant, practice_guideline, reviewed_by_expert_panel @@ -143,23 +183,33 @@ private ClinSig parseClinSig(String clinsig) { // Likely_benign=52064, Likely_pathogenic=15127, Pathogenic=46803, Pathogenic/Likely_pathogenic=3278, // Uncertain_significance=120418, association=148, drug_response=290, not_provided=10980, other=1796, protective=30, // risk_factor=411 - return switch (clinsig) { - case "Uncertain_significance" -> UNCERTAIN_SIGNIFICANCE; - case "Benign" -> BENIGN; - case "Benign/Likely_benign" -> BENIGN_OR_LIKELY_BENIGN; - case "Likely_benign" -> LIKELY_BENIGN; - case "Conflicting_interpretations_of_pathogenicity" -> CONFLICTING_PATHOGENICITY_INTERPRETATIONS; - case "Likely_pathogenic" -> LIKELY_PATHOGENIC; - case "Pathogenic/Likely_pathogenic" -> PATHOGENIC_OR_LIKELY_PATHOGENIC; - case "Pathogenic" -> PATHOGENIC; - case "Affects" -> AFFECTS; - case "association" -> ASSOCIATION; - case "drug_response" -> DRUG_RESPONSE; - case "other" -> OTHER; - case "protective" -> PROTECTIVE; - case "risk_factor" -> RISK_FACTOR; - default -> NOT_PROVIDED; - }; + + // this is yuk! + clinsig = clinsig.replace(",_low_penetrance", ""); + Set clinSigs = Arrays.stream(clinsig.split("/")) + .map(CLINSIG_MAP::get) + .collect(Collectors.toCollection(() -> EnumSet.noneOf(ClinSig.class))); + + if (clinSigs.isEmpty()) { + throw new IllegalArgumentException("'" + clinsig + "' is not a recognised ClinVar CLNSIG value"); + } + if (clinSigs.size() == 1) { + return clinSigs.iterator().next(); + } + // remove the risk allele if present and try to figure out if it's a P/LP or B/LB + clinSigs.removeAll(EnumSet.of(UNCERTAIN_RISK_ALLELE, LIKELY_RISK_ALLELE, ESTABLISHED_RISK_ALLELE)); + + if (clinSigs.size() == 1) { + return clinSigs.iterator().next(); + } + else if (clinSigs.contains(PATHOGENIC) && clinSigs.contains(LIKELY_PATHOGENIC)) { + return PATHOGENIC_OR_LIKELY_PATHOGENIC; + } + else if (clinSigs.contains(BENIGN) && clinSigs.contains(LIKELY_BENIGN)) { + return BENIGN_OR_LIKELY_BENIGN; + } + // shouldn't get here.... + throw new IllegalArgumentException("'" + clinsig + "' is not a recognised ClinVar CLNSIG value"); } } diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java index 15a623387..95676cdba 100644 --- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java @@ -41,7 +41,6 @@ public class DbSnpAlleleParser extends VcfAlleleParser { @Override List parseInfoField(List alleles, String info) { - System.out.println(info); Map> minorAlleleFrequencies = parseMinorAlleleFrequencies(info); for (Map.Entry> entry : minorAlleleFrequencies.entrySet()) { diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java new file mode 100644 index 000000000..6f9046263 --- /dev/null +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java @@ -0,0 +1,34 @@ +package org.monarchinitiative.exomiser.data.genome.model.parsers; + +import org.monarchinitiative.exomiser.core.genome.Contigs; +import org.monarchinitiative.exomiser.core.proto.AlleleProto; +import org.monarchinitiative.exomiser.data.genome.model.Allele; + +import java.util.List; + +public class SpliceAiAlleleParser implements AlleleParser { + @Override + public List parseLine(String line) { + if (line.startsWith("#")) { + // comment line. + return List.of(); + } + var allele = parseAllele(line); + return List.of(allele); + } + + private Allele parseAllele(String line) { + String[] fields = line.split("\t"); + if (fields.length != 5) { + throw new IllegalStateException("Expected 5 fields but found " + fields.length + " in line " + line); + } + int chr = Contigs.parseId(fields[0]); // 1..22,X,Y,MT + int start = Integer.parseInt(fields[1]); + String ref = fields[2]; + String alt = fields[3]; + float score = Float.parseFloat(fields[4]); + Allele allele = new Allele(chr, start, ref, alt); + allele.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(score).build()); + return allele; + } +} diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java new file mode 100644 index 000000000..8184b85ff --- /dev/null +++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java @@ -0,0 +1,14 @@ +package org.monarchinitiative.exomiser.data.genome.model.resource; + +import org.monarchinitiative.exomiser.data.genome.model.archive.TabixArchive; +import org.monarchinitiative.exomiser.data.genome.model.parsers.SpliceAiAlleleParser; + +import java.net.URL; +import java.nio.file.Path; + +public class SpliceAiAlleleResource extends AbstractAlleleResource { + + public SpliceAiAlleleResource(String name, URL resourceUrl, Path resourcePath) { + super(name, resourceUrl, new TabixArchive(resourcePath), new SpliceAiAlleleParser()); + } +} diff --git a/exomiser-data-genome/src/main/resources/application.properties b/exomiser-data-genome/src/main/resources/application.properties index b1e64131d..7da65c540 100644 --- a/exomiser-data-genome/src/main/resources/application.properties +++ b/exomiser-data-genome/src/main/resources/application.properties @@ -19,6 +19,8 @@ # spring.flyway.enabled=false spring.h2.console.enabled=true +logging.file.path=${build-dir} + # build-dir defines the main directory which will be used to build the databases. build-dir=. build-version=1711 @@ -62,6 +64,10 @@ hg19.uk10k.file-dir=${hg19.variant-dir} hg19.dbnsfp.file-name=dbNSFP4.5a.zip hg19.dbnsfp.file-url=ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/ hg19.dbnsfp.file-dir=${hg19.variant-dir} +# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg19.vcf.gz to only include variants max(delta_score) >= 1.0 +hg19.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz +hg19.splice-ai.file-url=file://${hg19.variant-dir} +hg19.splice-ai.file-dir=${hg19.variant-dir} #hg38 hg38.variant-dir=${build-dir}/hg38/variants @@ -69,11 +75,11 @@ hg38.variant-processed-dir=${hg38.variant-dir}/processed hg38.genome-dir=${build-dir}/hg38/genome hg38.genome-processed-dir=${hg38.genome-dir}/processed hg38.ensembl-variation-url=http://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/ -hg38.gnomad-exome.file-name=gnomad-v4/exomes +hg38.gnomad-exome.file-name=gnomad-v4-1/exomes hg38.gnomad-exome.file-url=https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/liftover_grch38/vcf/exomes/ hg38.gnomad-exome.file-dir=${hg38.variant-dir} # hg38.gnomad-genome.file-name should be a directory containing the gnomad.genomes.v3.1.2.sites.chr$i.vcf.bgz files -hg38.gnomad-genome.file-name=gnomad-v4/genomes +hg38.gnomad-genome.file-name=gnomad-v4-1/genomes hg38.gnomad-genome.file-url=https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.2/vcf/genomes/gnomad.genomes.v3.1.2.sites.chr1.vcf.bgz hg38.gnomad-genome.file-dir=${hg38.variant-dir} hg38.gnomad-mito.file-name=gnomad.genomes.v3.1.sites.chrM.vcf.bgz @@ -106,3 +112,7 @@ hg38.uk10k.file-dir=${hg38.variant-dir} hg38.dbnsfp.file-name=${hg19.dbnsfp.file-name} hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url} hg38.dbnsfp.file-dir=${hg19.variant-dir} +# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg38.vcf.gz to only include variants max(delta_score) >= 1.0 +hg38.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg38.vcf.gz +hg38.splice-ai.file-url=file://${hg38.variant-dir} +hg38.splice-ai.file-dir=${hg38.variant-dir} diff --git a/exomiser-data-genome/src/main/resources/default_sources.ini b/exomiser-data-genome/src/main/resources/default_sources.ini index 4588af999..3d4310442 100644 --- a/exomiser-data-genome/src/main/resources/default_sources.ini +++ b/exomiser-data-genome/src/main/resources/default_sources.ini @@ -54,7 +54,7 @@ type=ucsc alias=MT,M,chrM chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java new file mode 100644 index 000000000..cf59518fc --- /dev/null +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java @@ -0,0 +1,48 @@ +package org.monarchinitiative.exomiser.data.genome; + + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.monarchinitiative.exomiser.core.genome.GenomeAssembly; +import org.monarchinitiative.exomiser.data.genome.model.AlleleResource; +import org.monarchinitiative.exomiser.data.genome.model.BuildInfo; +import org.monarchinitiative.exomiser.data.genome.model.parsers.DbNsfpColumnIndex; +import org.monarchinitiative.exomiser.data.genome.model.resource.DbNsfp4AlleleResource; +import org.monarchinitiative.exomiser.data.genome.model.resource.SpliceAiAlleleResource; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.*; + +class VariantDatabaseBuildRunnerTest { + + @Test + void testBuild(@TempDir Path tempDir) throws IOException { + BuildInfo buildInfo = BuildInfo.of(GenomeAssembly.HG38, "2404"); + List alleleResources = List.of(new DbNsfp4AlleleResource("hg38.dbnsfp", null, Path.of("src/test/resources/dbNSFP4.0_test.zip"), DbNsfpColumnIndex.HG38)); + VariantDatabaseBuildRunner instance = new VariantDatabaseBuildRunner(buildInfo, tempDir, alleleResources); + instance.run(); + Path variantDatabase = instance.variantDatabasePath(); + assertThat(Files.exists(variantDatabase), is(true)); + assertThat(Files.size(variantDatabase), greaterThan(0L)); + } + + @Disabled("Manual testing") + @Test + void manualTestBuild() throws IOException { + BuildInfo buildInfo = BuildInfo.of(GenomeAssembly.HG19, "2404"); + Path buildPath = Path.of("/data/exomiser-build/"); + List alleleResources = List.of(new SpliceAiAlleleResource("hg19.splice-ai", null, buildPath.resolve("spliceai_max_delta_scores_sig_only.masked.snv.hg19.vcf.gz"))); + VariantDatabaseBuildRunner instance = new VariantDatabaseBuildRunner(buildInfo, buildPath, alleleResources); + instance.run(); + Path variantDatabase = instance.variantDatabasePath(); + assertThat(Files.exists(variantDatabase), is(true)); + assertThat(Files.size(variantDatabase), greaterThan(0L)); + } + +} \ No newline at end of file diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java index 4c0137223..16a14dfa0 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java @@ -87,6 +87,7 @@ public void testResources() { // exac removed as this is part of gnomad alleleResources.put("esp", instance.espAlleleResource()); alleleResources.put("dbnsfp", instance.dbnsfpAlleleResource()); + alleleResources.put("splice-ai", instance.spliceAiAlleleResource()); Map expectedResources = alleleResources.build(); diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java index c7accf5e4..ca58e094d 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java @@ -88,6 +88,7 @@ public void testResources() { // esp removed as this is part of gnomad since v4 alleleResources.put("dbnsfp", instance.dbnsfpAlleleResource()); // clinvar removed as this is now standalone + alleleResources.put("splice-ai", instance.spliceAiAlleleResource()); Map expectedResources = alleleResources.build(); diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java index 30b9281d9..d6357748b 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java @@ -199,10 +199,14 @@ public void convertClinVar() { @Test public void convertClinVarConflictingInterpretationCounts() { ClinVarData clinVarData = ClinVarData.builder() + .primaryInterpretation(ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) .conflictingInterpretationCounts(Map.of(ClinVarData.ClinSig.PATHOGENIC, 3, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE, 2)) .build(); ClinVar expected = ClinVar.newBuilder() + .setPrimaryInterpretation(ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .setReviewStatus(ClinVar.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) .putAllClinSigCounts(Map.of("PATHOGENIC", 3, "UNCERTAIN_SIGNIFICANCE", 2)) .build(); diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java index c19fbe9cd..7c787edf1 100644 --- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java @@ -20,13 +20,25 @@ package org.monarchinitiative.exomiser.data.genome.model.parsers; +import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; +import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor; import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData; +import org.monarchinitiative.exomiser.core.proto.AlleleProto; +import org.monarchinitiative.exomiser.data.genome.indexers.AlleleConverter; import org.monarchinitiative.exomiser.data.genome.model.Allele; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; + +import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*; +import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ReviewStatus.*; /** @@ -86,8 +98,8 @@ public void testParseClnSigPathogenic() { expected.setRsId("7105"); expected.setClinVarData(ClinVarData.builder() .variationId("7105") - .primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC) - .reviewStatus(ClinVarData.ReviewStatus.PRACTICE_GUIDELINE) + .primaryInterpretation(PATHOGENIC) + .reviewStatus(PRACTICE_GUIDELINE) .build()); assertParseLineEquals(line, List.of(expected)); @@ -100,8 +112,8 @@ public void testParseClnSigLikelyPathogenic() { expected.setRsId("121918506"); expected.setClinVarData(ClinVarData.builder() .variationId("374823") - .primaryInterpretation(ClinVarData.ClinSig.LIKELY_PATHOGENIC) - .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_SINGLE_SUBMITTER) + .primaryInterpretation(LIKELY_PATHOGENIC) + .reviewStatus(CRITERIA_PROVIDED_SINGLE_SUBMITTER) .build()); assertParseLineEquals(line, List.of(expected)); @@ -112,7 +124,7 @@ public void testParseClnSig() { String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Benign/Likely_benign"; Allele expected = new Allele(1, 28590, "T", "TTGG"); expected.setClinVarData(ClinVarData.builder() - .primaryInterpretation(ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN) + .primaryInterpretation(BENIGN_OR_LIKELY_BENIGN) .build()); assertParseLineEquals(line, List.of(expected)); @@ -120,11 +132,13 @@ public void testParseClnSig() { @Test public void testParseClnSigWithSecondary() { - String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Pathogenic/Likely_pathogenic,_other,_association"; + // as of approx. Feb 2024 ClinVar changed their delimiters from ',_' to '|' so the parser will now only recognise + // files released after this date. + String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Pathogenic/Likely_pathogenic|other|association"; Allele expected = new Allele(1, 28590, "T", "TTGG"); expected.setClinVarData(ClinVarData.builder() - .primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC) - .secondaryInterpretations(EnumSet.of(ClinVarData.ClinSig.OTHER, ClinVarData.ClinSig.ASSOCIATION)) + .primaryInterpretation(PATHOGENIC_OR_LIKELY_PATHOGENIC) + .secondaryInterpretations(EnumSet.of(OTHER, ASSOCIATION)) .build()); assertParseLineEquals(line, List.of(expected)); @@ -144,7 +158,7 @@ public void testParseClnRevStat() { String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNREVSTAT=criteria_provided,_conflicting_interpretations"; Allele expected = new Allele(1, 28590, "T", "TTGG"); expected.setClinVarData(ClinVarData.builder() - .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) + .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) .build()); assertParseLineEquals(line, List.of(expected)); @@ -155,7 +169,7 @@ public void testParseClinSigIncl() { String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIGINCL=424752:Pathogenic"; Allele expected = new Allele(1, 28590, "T", "TTGG"); expected.setClinVarData(ClinVarData.builder() - .includedAlleles(Map.of("424752", ClinVarData.ClinSig.PATHOGENIC)) + .includedAlleles(Map.of("424752", PATHOGENIC)) .build()); assertParseLineEquals(line, List.of(expected)); @@ -167,8 +181,8 @@ public void testParseClinSigInclMulti() { Allele expected = new Allele(1, 28590, "T", "TTGG"); expected.setClinVarData(ClinVarData.builder() .includedAlleles(Map.of( - "424752", ClinVarData.ClinSig.PATHOGENIC, - "15612", ClinVarData.ClinSig.OTHER + "424752", PATHOGENIC, + "15612", OTHER )) .build()); @@ -182,9 +196,9 @@ public void testParseClinSigNoNumbers() { expected.setRsId("25511"); expected.setClinVarData(ClinVarData.builder() .variationId("25511") - .reviewStatus(ClinVarData.ReviewStatus.NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT) + .reviewStatus(NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT) .includedAlleles(Map.of( - "8152", ClinVarData.ClinSig.PATHOGENIC + "8152", PATHOGENIC )) .build()); @@ -198,15 +212,76 @@ void testParseClinSigConf() { expected.setRsId("369982920"); expected.setClinVarData(ClinVarData.builder() .variationId("218256") - .primaryInterpretation(ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS) - .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) + .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) + // CLNSIGCONF=Pathogenic(5)|Uncertain_significance(1) + .conflictingInterpretationCounts(Map.of(PATHOGENIC, 5, UNCERTAIN_SIGNIFICANCE, 1)) + .build()); + + assertParseLineEquals(line, List.of(expected)); + } + + /** + * Test for issue 560 + */ + @Test + void testParseClinSigConfClassification() { + String line = "11\t47352652\t506638\tA\tG\t.\t.\tALLELEID=503825;CLNDISDB=MedGen:CN230736|MedGen:CN169374;CLNDN=Cardiovascular_phenotype|not_specified;CLNHGVS=NC_000011.10:g.47352652A>G;CLNREVSTAT=criteria_provided,_conflicting_classifications;CLNSIG=Conflicting_classifications_of_pathogenicity;CLNSIGCONF=Uncertain_significance(1)|Likely_benign(1);CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA658797649;GENEINFO=MYBPC3:4607;ORIGIN=1;RS=886048383"; + Allele expected = new Allele(11, 47352652, "A", "G"); + expected.setRsId("886048383"); + expected.setClinVarData(ClinVarData.builder() + .variationId("506638") + .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) // CLNSIGCONF=Pathogenic(5)|Uncertain_significance(1) - .conflictingInterpretationCounts(Map.of(ClinVarData.ClinSig.PATHOGENIC, 5, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE, 1)) + .conflictingInterpretationCounts(Map.of(UNCERTAIN_SIGNIFICANCE, 1, LIKELY_BENIGN, 1)) + .build()); + assertParseLineEquals(line, List.of(expected)); + } + + @Test + void unrecognisedClinSigCombo() { + String line = "X\t153763492\t100055\tT\tC\t.\t.\tAF_TGP=0.09457;ALLELEID=25399;CLNDISDB=MONDO:MONDO:0005775,MedGen:C2939465|MONDO:MONDO:0010480,MedGen:C2720289,OMIM:300908,Orphanet:466026|MeSH:D030342,MedGen:C0950123|MedGen:C3661900|MedGen:CN169374|.|MONDO:MONDO:0021024,MedGen:C1970028,OMIM:611162,Orphanet:673|MedGen:C3806712,OMIM:300910,Orphanet:391330;CLNDN=G6PD_deficiency|Anemia,_nonspherocytic_hemolytic,_due_to_G6PD_deficiency|Inborn_genetic_diseases|not_provided|not_specified|G6PD_A+|Malaria,_susceptibility_to|Bone_mineral_density_quantitative_trait_locus_18;CLNHGVS=NC_000023.10:g.153763492T>C;CLNREVSTAT=criteria_provided,_conflicting_classifications;CLNSIG=Conflicting_classifications_of_pathogenicity;CLNSIGCONF=Pathogenic(10)|Likely_pathogenic(1)|Uncertain_significance(5)|Benign(1)|Likely_benign(3);CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA120939|Genetic_Testing_Registry_(GTR):GTR000167633|OMIM:305900.0001|OMIM:305900.0002|OMIM:305900.0023;GENEINFO=G6PD:2539;MC=SO:0001583|missense_variant;ORIGIN=17;RS=1050829;CLNDISDBINCL=MONDO:MONDO:0010480,MedGen:C2720289,OMIM:300908,Orphanet:466026|.|MONDO:MONDO:0005775,MedGen:C2939465;CLNDNINCL=Anemia,_nonspherocytic_hemolytic,_due_to_G6PD_deficiency|G6PD_SANTAMARIA|G6PD_deficiency;CLNSIGINCL=10361:Likely_pathogenic/Established_risk_allele|10382:Likely_pathogenic|1065168:Pathogenic|1722597:Likely_pathogenic|1722617:Likely_pathogenic|1722622:Likely_pathogenic|1722693:Uncertain_significance|1722697:Uncertain_significance|1722698:Uncertain_significance|1722726:Likely_pathogenic|1722747:Likely_pathogenic"; + Allele expected = new Allele(23, 153763492, "T", "C"); + expected.setRsId("1050829"); + Map includedAlleles = new HashMap<>(); + includedAlleles.put("1722747", LIKELY_PATHOGENIC); + includedAlleles.put("1722726", LIKELY_PATHOGENIC); + includedAlleles.put("1722617", LIKELY_PATHOGENIC); + includedAlleles.put("1722622", LIKELY_PATHOGENIC); + includedAlleles.put("10361", LIKELY_PATHOGENIC); // CLNSIGINCL=10361:Likely_pathogenic/Established_risk_allele + includedAlleles.put("10382", LIKELY_PATHOGENIC); + includedAlleles.put("1722597", LIKELY_PATHOGENIC); + includedAlleles.put("1722697", UNCERTAIN_SIGNIFICANCE); + includedAlleles.put("1722698", UNCERTAIN_SIGNIFICANCE); + includedAlleles.put("1065168", PATHOGENIC); + includedAlleles.put("1722693", UNCERTAIN_SIGNIFICANCE); + expected.setClinVarData(ClinVarData.builder() + .variationId("100055") + .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS) + .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS) + .conflictingInterpretationCounts(Map.of(PATHOGENIC, 10, LIKELY_PATHOGENIC, 1, UNCERTAIN_SIGNIFICANCE, 5, LIKELY_BENIGN, 3, BENIGN, 1)) + .includedAlleles(includedAlleles) .build()); + assertParseLineEquals(line, List.of(expected)); + } + @Test + void clnSigLowPenetrance() { + String line = "1\t94473807\t7888\tC\tT\t.\t.\tALLELEID=22927;RS=1800553;CLNREVSTAT=criteria_provided,_multiple_submitters,_no_conflicts;CLNSIG=Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance;"; + Allele expected = new Allele(1, 94473807, "C", "T"); + expected.setRsId("1800553"); + expected.setClinVarData(ClinVarData.builder() + .variationId("7888") + .primaryInterpretation(PATHOGENIC_OR_LIKELY_PATHOGENIC) + .reviewStatus(CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS) + .build()); assertParseLineEquals(line, List.of(expected)); } + // 4 6301479 215392 G C . . ALLELEID=211061;CLNDISDB=MedGen:CN517202|MONDO:MONDO:0009101,MedGen:C4551693,OMIM:222300,Orphanet:3463;CLNDN=not_provided|Wolfram_syndrome_1;CLNHGVS=NC_000004.12:g.6301479G>C;CLNREVSTAT=criteria_provided,_multiple_submitters,_no_conflicts;CLNSIG=Likely_pathogenic/Likely_risk_allele;CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA322818;GENEINFO=WFS1:7466;MC=SO:0001583|missense_variant;ORIGIN=1;RS=753237278 + + //CLNSIGINCL=424752:Pathogenic //CLNSIGINCL=15127:other|15128:other|15334:Pathogenic|15335:Pathogenic|15336:Pathogenic|15337:Pathogenic|15610:Pathogenic|15612:other @@ -217,5 +292,153 @@ void testParseClinSigConf() { //CLNVC counts: Deletion=22574, Duplication=9075, Indel=2162, Insertion=2358, Inversion=32, Microsatellite=15, Variation=9, copy_number_loss=4, single_nucleotide_variant=264009 //CLNREVSTAT counts: criteria_provided,_conflicting_interpretations=12678, criteria_provided,_multiple_submitters,_no_conflicts=34967, criteria_provided,_single_submitter=197277, no_assertion_criteria_provided=34308, no_assertion_provided=10980, no_interpretation_for_the_single_variant=500, practice_guideline=23, reviewed_by_expert_panel=8786 + + @Disabled("data exploration") + @Test + void testClnSigValues() { + Path clinVarVcfFile = Path.of("clinvar_hg38.vcf.gz"); + var infoFieldCounts = getInfoFieldCounts(clinVarVcfFile, "CLNSIG"); + for (Map.Entry entry : infoFieldCounts.entrySet()) { + System.out.println(entry); + } + //##fileDate=2024-05-09 + //##INFO= + //Affects=142 + //Affects|risk_factor=1 + //Benign=202927 + //Benign/Likely_benign=48319 + //Benign/Likely_benign|association=1 + //Benign/Likely_benign|drug_response=2 + //Benign/Likely_benign|drug_response|other=3 + //Benign/Likely_benign|other=9 + //Benign/Likely_benign|other|risk_factor=1 + //Benign/Likely_benign|risk_factor=2 + //Benign|Affects=1 + //Benign|association=5 + //Benign|confers_sensitivity=2 + //Benign|drug_response=2 + //Benign|other=9 + //Benign|risk_factor=3 + //Conflicting_classifications_of_pathogenicity=125784 + //Conflicting_classifications_of_pathogenicity|Affects=2 + //Conflicting_classifications_of_pathogenicity|association=8 + //Conflicting_classifications_of_pathogenicity|association|risk_factor=2 + //Conflicting_classifications_of_pathogenicity|drug_response=2 + //Conflicting_classifications_of_pathogenicity|drug_response|other=1 + //Conflicting_classifications_of_pathogenicity|other=25 + //Conflicting_classifications_of_pathogenicity|other|risk_factor=2 + //Conflicting_classifications_of_pathogenicity|protective=1 + //Conflicting_classifications_of_pathogenicity|risk_factor=15 + //Likely_benign=874860 + //Likely_benign|association=1 + //Likely_benign|drug_response|other=4 + //Likely_benign|other=12 + //Likely_benign|risk_factor=1 + //Likely_pathogenic=82058 + //Likely_pathogenic,_low_penetrance=6 + //Likely_pathogenic/Likely_risk_allele=17 + //Likely_pathogenic|Affects=1 + //Likely_pathogenic|association=5 + //Likely_pathogenic|drug_response=21 + //Likely_pathogenic|other=2 + //Likely_pathogenic|protective=1 + //Likely_pathogenic|risk_factor=10 + //Likely_risk_allele=97 + //Pathogenic=157595 + //Pathogenic/Likely_pathogenic=26459 + //Pathogenic/Likely_pathogenic/Likely_risk_allele=4 + //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance=6 + //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance/Established_risk_allele=1 + //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance|other=1 + //Pathogenic/Likely_pathogenic|drug_response=3 + //Pathogenic/Likely_pathogenic|other=11 + //Pathogenic/Likely_pathogenic|risk_factor=6 + //Pathogenic/Likely_risk_allele=9 + //Pathogenic/Likely_risk_allele|risk_factor=1 + //Pathogenic/Pathogenic,_low_penetrance|other=1 + //Pathogenic/Pathogenic,_low_penetrance|other|risk_factor=1 + //Pathogenic|Affects=6 + //Pathogenic|association=6 + //Pathogenic|association|protective=1 + //Pathogenic|confers_sensitivity=1 + //Pathogenic|drug_response=54 + //Pathogenic|other=71 + //Pathogenic|protective=2 + //Pathogenic|risk_factor=22 + //Uncertain_risk_allele=35 + //Uncertain_risk_allele|risk_factor=1 + //Uncertain_significance=1358995 + //Uncertain_significance/Uncertain_risk_allele=153 + //Uncertain_significance|association=8 + //Uncertain_significance|drug_response=11 + //Uncertain_significance|other=4 + //Uncertain_significance|risk_factor=7 + //association=341 + //association_not_found=4 + //association|drug_response|risk_factor=1 + //association|risk_factor=1 + //confers_sensitivity=11 + //confers_sensitivity|other=1 + //drug_response=1869 + //drug_response|other=1 + //drug_response|risk_factor=3 + //no_classification_for_the_single_variant=683 + //no_classifications_from_unflagged_records=7 + //not_provided=9808 + //other=1588 + //other|risk_factor=1 + //protective=39 + //protective|risk_factor=4 + //risk_factor=407 + } + + @Disabled("data exploration") + @Test + void testClnRevStatValues() { + Path clinVarVcfFile = Path.of("clinvar.vcf.gz"); + var infoFieldCounts = getInfoFieldCounts(clinVarVcfFile, "CLNREVSTAT"); + for (Map.Entry entry : infoFieldCounts.entrySet()) { + System.out.println(entry); + } + //##fileDate=2024-05-09 + //##INFO= + //criteria_provided,_conflicting_classifications=125583 + //criteria_provided,_multiple_submitters,_no_conflicts=425991 + //criteria_provided,_single_submitter=2266963 + //no_assertion_criteria_provided=48407 + //no_classification_for_the_single_variant=683 + //no_classification_provided=9806 + //no_classifications_from_unflagged_records=13 + //practice_guideline=51 + //reviewed_by_expert_panel=16215 + } + + private Map getInfoFieldCounts(Path clinVarVcfFile, String infoKey) { + try (Stream lines = new BufferedReader(new InputStreamReader(new BufferedInputStream(new GZIPInputStream(Files.newInputStream(clinVarVcfFile))))).lines()) { + var result = lines + .peek(line -> {if (line.startsWith("#")) {System.out.println(line);}}) + .filter(line -> !line.startsWith("#")) + .map(findInfoField(infoKey)) + .collect(Collectors.groupingBy(Function.identity(), Collectors.counting())); + return new TreeMap<>(result); + } catch (IOException e) { + System.err.println(e); + } + return Map.of(); + } + + private Function findInfoField(String infoKey) { + return line -> { + var fields = line.split("\t"); + var info= fields[7]; + var infoFields = info.split(";"); + for (String field : infoFields) { + if (field.startsWith(infoKey)) { + return field.split("=")[1]; + } + } + return ""; + }; + } } diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java new file mode 100644 index 000000000..c2baa7285 --- /dev/null +++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java @@ -0,0 +1,131 @@ +package org.monarchinitiative.exomiser.data.genome.model.parsers; + +import htsjdk.samtools.util.BlockCompressedOutputStream; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.monarchinitiative.exomiser.core.genome.Contigs; +import org.monarchinitiative.exomiser.core.proto.AlleleProto; +import org.monarchinitiative.exomiser.data.genome.model.Allele; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +class SpliceAiAlleleParserTest { + + @Test + void parseNonSexChrLine() { + String line = "10\t12345\tA\tC\t1.0"; + List result = new SpliceAiAlleleParser().parseLine(line); + Allele expected = new Allele(10, 12345, "A", "C"); + expected.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(1f).build()); + assertThat(result, equalTo(List.of(expected))); + } + + @Test + void parseSexChrLine() { + String line = "X\t12345\tA\tC\t1.0"; + List result = new SpliceAiAlleleParser().parseLine(line); + Allele expected = new Allele(23, 12345, "A", "C"); + expected.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(1f).build()); + assertThat(result, equalTo(List.of(expected))); + } + + /** + * This is a one-off method to prepare the SpliceAI data by selecting only the max score from the DL/DG/AL/AG scores + * >= minScore. This is recommended as >= 0.2 for high recall, increasing to 0.5 as a default and 0.8 for high-precision. + * confidence. Jaganathan et al, Cell 2019 and https://github.com/Illumina/SpliceAI + * + *
+ * Delta score of a variant, defined as the maximum of (DS_AG, DS_AL, DS_DG, DS_DL), ranges from 0 to 1 and can be interpreted as the probability of the variant being splice-altering. In the paper, a detailed characterization is provided for 0.2 (high recall), 0.5 (recommended), and 0.8 (high precision) cutoffs. Delta position conveys information about the location where splicing changes relative to the variant position (positive values are downstream of the variant, negative values are upstream). + *
+ * + * These annotations are free for academic and not-for-profit use; other use requires a commercial license from Illumina, Inc. + * + **/ + @Disabled + @Test + void prepareSpliceAiData() { + Path dataPath = Path.of("/home/hhx640/Documents/exomiser-build/"); + Path inFile = dataPath.resolve("spliceai_scores.masked.snv.hg19.vcf.gz"); + Path outFile = dataPath.resolve("spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz"); + // the ACMG guidelines for use of SpliceAI scores recommend anything under 0.1 to be scored BP4 and over 0.2 PP3 + // so, we're using that cutoff here. This means that any SNP with no score in the database can be considered as + // not impacting splicing. + float minScore = 0.1f; + SpliceAiMaxDeltaScoreParser.build(inFile, outFile, minScore); + } + + private static class SpliceAiMaxDeltaScoreParser { + + private static final Logger logger = LoggerFactory.getLogger(SpliceAiMaxDeltaScoreParser.class); + + public static void build(Path spliceAiInputPath, Path outputBgZipPath, float minScore) { + long count = 0; + logger.info("Parsing SpliceAI scores from {} with a maximum delta score >= {}", spliceAiInputPath.getFileName(), minScore); + try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(Files.newInputStream(spliceAiInputPath)))); + BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new BlockCompressedOutputStream(outputBgZipPath.toFile(), 6))); + ) { + for (String line; (line = bufferedReader.readLine()) != null; ) { + if (line.startsWith("##")) { + bufferedWriter.write(line); + bufferedWriter.newLine(); + continue; + } + if (line.startsWith("#")) { + bufferedWriter.write("#CHROM\tPOS\tREF\tALT\tSPLICE_AI_MAX_DELTA_SCORE"); + bufferedWriter.newLine(); + continue; + } + //##INFO= + //1 69092 . T A . . SpliceAI=A|OR4F5|0.00|0.00|0.22|0.00|2|41|1|23 + String[] fields = line.split("\t"); + if (fields.length != 8) { + throw new IllegalArgumentException("Expected 8 fields but found " + fields.length + " in line " + line); + } + String contig = Contigs.toString(Contigs.parseId(fields[0])); // 1..22,X,Y,MT + int start = Integer.parseInt(fields[1]); + String ref = fields[3]; + String alt = fields[4]; + float score = parseSpliceAiScore(fields[7]); + // hg38 contains some oddly named unplaced contigs which end up with a contig of 0 and this leads to + // failed tabix indexing, so check and remove these. + if (!contig.equals("0") && score >= minScore) { + count++; + if (count % 1_000_000 == 0) { + logger.info("Current contig {} - written {} variants. Current: {}-{}-{}-{} SpliceAI={}", contig, count, contig, start, ref, alt, score); + } + String outLine = contig + "\t" + start + "\t" + ref + "\t"+ alt + "\t" + score; + bufferedWriter.write(outLine); + bufferedWriter.newLine(); + } + } + logger.info("Done - written {} variants", count); + } catch (IOException e) { + logger.error("Unable to write file " + outputBgZipPath, e); + } + } + + static float parseSpliceAiScore(String info) { + String[] fields = info.split("\\|"); + // SpliceAIv1.3 variant annotation. These include delta scores (DS) and delta positions (DP) for acceptor gain (AG), + // acceptor loss (AL), donor gain (DG), and donor loss (DL). + // Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL" + // SpliceAI=A|OR4F5|0.00|0.00|0.22|0.00|2|41|1|23 + // we want the max delta score for gain or loss + float max = 0f; + for (int i = 2; i < 6; i++) { + max = Math.max(max, Float.parseFloat(fields[i])); + } + return max; + } + + } +} \ No newline at end of file diff --git a/exomiser-data-genome/src/test/resources/application-test.properties b/exomiser-data-genome/src/test/resources/application-test.properties index f7154636f..a0ca8ee8c 100644 --- a/exomiser-data-genome/src/test/resources/application-test.properties +++ b/exomiser-data-genome/src/test/resources/application-test.properties @@ -19,6 +19,8 @@ # spring.flyway.enabled=false spring.h2.console.enabled=true +logging.file.path=${build-dir} + #build-dir defines the main directory which will be used to build the databases. build-dir=src/test/resources build-version=1711 @@ -60,6 +62,11 @@ hg19.uk10k.file-url=${hg19.ensembl-variation-url} hg19.dbnsfp.file-name=dbNSFPv3.4a.zip hg19.dbnsfp.file-dir=${hg19.variant-dir} hg19.dbnsfp.file-url=ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/ +# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg19.vcf.gz to only include variants max(delta_score) >= 1.0 +hg19.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz +hg19.splice-ai.file-url=file://${hg19.variant-dir} +hg19.splice-ai.file-dir=${hg19.variant-dir} + #hg38 hg38.variant-dir=${build-dir}/hg38/variants hg38.variant-processed-dir=${hg38.variant-dir}/processed @@ -101,4 +108,8 @@ hg38.uk10k.file-url=${hg38.ensembl-variation-url} #DbNSFP contains the hg19 and hg38 coordinates in the same file hg38.dbnsfp.file-name=${hg19.dbnsfp.file-name} hg38.dbnsfp.file-dir=${hg19.dbnsfp.file-dir} -hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url} \ No newline at end of file +hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url} +# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg38.vcf.gz to only include variants max(delta_score) >= 1.0 +hg38.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg38.vcf.gz +hg38.splice-ai.file-url=file://${hg38.variant-dir} +hg38.splice-ai.file-dir=${hg38.variant-dir} \ No newline at end of file diff --git a/exomiser-data-genome/src/test/resources/default_sources.ini b/exomiser-data-genome/src/test/resources/default_sources.ini index 7561361a9..a1e6e2fd4 100644 --- a/exomiser-data-genome/src/test/resources/default_sources.ini +++ b/exomiser-data-genome/src/test/resources/default_sources.ini @@ -54,7 +54,7 @@ type=ucsc alias=MT,M,chrM chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz -chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 +chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13 chrToAccessions.format = chr_accessions knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz diff --git a/exomiser-data-phenotype/pom.xml b/exomiser-data-phenotype/pom.xml index 79034b9f6..8d0eab613 100644 --- a/exomiser-data-phenotype/pom.xml +++ b/exomiser-data-phenotype/pom.xml @@ -31,7 +31,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA diff --git a/exomiser-rest-prioritiser/pom.xml b/exomiser-rest-prioritiser/pom.xml index 410424d28..e890622ae 100644 --- a/exomiser-rest-prioritiser/pom.xml +++ b/exomiser-rest-prioritiser/pom.xml @@ -27,7 +27,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA exomiser-rest-prioritiser diff --git a/exomiser-spring-boot-autoconfigure/pom.xml b/exomiser-spring-boot-autoconfigure/pom.xml index f7b6387dc..511a9d667 100644 --- a/exomiser-spring-boot-autoconfigure/pom.xml +++ b/exomiser-spring-boot-autoconfigure/pom.xml @@ -35,7 +35,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA diff --git a/exomiser-spring-boot-starter/pom.xml b/exomiser-spring-boot-starter/pom.xml index c7c045e11..18b31bc51 100644 --- a/exomiser-spring-boot-starter/pom.xml +++ b/exomiser-spring-boot-starter/pom.xml @@ -28,7 +28,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA exomiser-spring-boot-starter diff --git a/exomiser-spring-boot-test/pom.xml b/exomiser-spring-boot-test/pom.xml index 1b4491c94..b2bc3be56 100644 --- a/exomiser-spring-boot-test/pom.xml +++ b/exomiser-spring-boot-test/pom.xml @@ -27,7 +27,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA exomiser-spring-boot-test diff --git a/exomiser-web/pom.xml b/exomiser-web/pom.xml index 5adf01eee..f628241fc 100644 --- a/exomiser-web/pom.xml +++ b/exomiser-web/pom.xml @@ -31,7 +31,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA diff --git a/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java b/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java index c43e0ce86..b4e1912f9 100644 --- a/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java +++ b/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java @@ -79,8 +79,6 @@ public class SubmitJobController { VariantEffect.DOWNSTREAM_GENE_VARIANT, VariantEffect.CODING_TRANSCRIPT_INTRON_VARIANT, VariantEffect.NON_CODING_TRANSCRIPT_INTRON_VARIANT, - VariantEffect.SYNONYMOUS_VARIANT, - VariantEffect.SPLICE_REGION_VARIANT, VariantEffect.REGULATORY_REGION_VARIANT ); diff --git a/phenix-repository/pom.xml b/phenix-repository/pom.xml index b3bfd5633..a500b5e87 100644 --- a/phenix-repository/pom.xml +++ b/phenix-repository/pom.xml @@ -7,7 +7,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA phenix-repository diff --git a/pom.xml b/pom.xml index ad2622e3a..a8eb37c2b 100644 --- a/pom.xml +++ b/pom.xml @@ -24,7 +24,7 @@ org.monarchinitiative.exomiser exomiser - 14.0.2 + 14.1.0-ALPHA pom exomiser @@ -172,7 +172,11 @@ com.zaxxer HikariCP - + + org.duckdb + duckdb_jdbc + 1.0.0 +