pathogenicityScores = pathogenicityData.pathogenicityScores();
+ for (PathogenicityScore pathogenicityScore : pathogenicityScores) {
+ if (isPathogenic(pathogenicityScore)) {
+ numPathogenic++;
+ } else {
+ numBenign++;
+ }
+ }
+ if (pathogenicityScores.size() > 1 && numPathogenic > numBenign) {
+ acmgEvidenceBuilder.add(PP3);
+ }
+ if (pathogenicityScores.size() > 1 && numBenign > numPathogenic) {
+ acmgEvidenceBuilder.add(BP4);
+ }
+ }
+
+ private static boolean isPathogenic(PathogenicityScore pathogenicityScore) {
+ if (pathogenicityScore instanceof SiftScore score) {
+ return score.getRawScore() < SiftScore.SIFT_THRESHOLD;
+ }
+ if (pathogenicityScore instanceof MutationTasterScore score) {
+ return score.getScore() > MutationTasterScore.MTASTER_THRESHOLD;
+ }
+ if (pathogenicityScore instanceof PolyPhenScore score) {
+ return score.getScore() > PolyPhenScore.POLYPHEN_PROB_DAMAGING_THRESHOLD;
+ }
+ if (pathogenicityScore instanceof CaddScore score) {
+ // 95-99% most deleterious.
+ return score.getRawScore() >= 13.0f;
+ }
+ return pathogenicityScore.getScore() > 0.5f;
+ }
+
+}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java
new file mode 100644
index 000000000..710b9fc60
--- /dev/null
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgPVS1EvidenceAssigner.java
@@ -0,0 +1,168 @@
+package org.monarchinitiative.exomiser.core.analysis.util.acmg;
+
+import de.charite.compbio.jannovar.annotation.VariantEffect;
+import de.charite.compbio.jannovar.mendel.ModeOfInheritance;
+import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraint;
+import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraints;
+import org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex;
+import org.monarchinitiative.exomiser.core.model.TranscriptAnnotation;
+import org.monarchinitiative.exomiser.core.model.VariantEvaluation;
+import org.monarchinitiative.exomiser.core.prioritisers.model.Disease;
+
+import java.util.List;
+
+import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.PVS1;
+
+class AcmgPVS1EvidenceAssigner {
+
+ private AcmgPVS1EvidenceAssigner() {
+ }
+
+ /**
+ * Applies PVS1 according to guidelines in Recommendations for interpreting the loss of function PVS1 ACMG/AMP variant criterion.
+ *
+ * PVS1 "null variant (nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single or multiexon deletion) in a gene where LOF is a known mechanism of disease"
+ */
+ static void assignPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, Sex probandSex, ModeOfInheritance
+ modeOfInheritance, List knownDiseases) {
+ // TODO: add new method - gene.getAssociatedDiseases()
+ // also need ClinvarCache.getClinvarDataForGene(GeneIdentifier geneIdentifier)
+
+ // Certain types of variants (e.g., nonsense, frameshift, canonical ±1 or 2 splice sites, initiation codon, single exon or multiexon
+ // deletion) can often be assumed to disrupt gene function by leading to a complete absence of the gene product by lack of tran-
+ // scription or nonsense-mediated decay of an altered transcript. One must, however, exercise caution when classifying these
+ // variants as pathogenic by considering the following principles:
+ // (i) When classifying such variants as pathogenic, one must ensure that null variants are a known mechanism of
+ // pathogenicity consistent with the established inheritance pattern for the disease. For example, there are genes for
+ // which only heterozygous missense variants cause disease and null variants are benign in a heterozygous state (e.g.,
+ // many hypertrophic cardiomyopathy genes). A novel heterozygous nonsense variant in the MYH7 gene would
+ // not be considered pathogenic for dominant hypertrophic cardiomyopathy based solely on this evidence, whereas a
+ // novel heterozygous nonsense variant in the CFTR gene would likely be considered a recessive pathogenic variant.
+ // Caveats:
+ // • Beware of genes where LOF is not a known disease mechanism (e.g., GFAP, MYH7)
+ // • Use caution interpreting LOF variants at the extreme 3′ end of a gene
+ // • Use caution with splice variants that are predicted to lead to exon skipping but leave the remainder of the protein intact
+ // • Use caution in the presence of multiple transcripts
+
+ GeneConstraint geneConstraint = GeneConstraints.geneConstraint(variantEvaluation.getGeneSymbol());
+ // Should this be using the hasCompatibleDiseaseMatches variable?
+ boolean inGeneWithKnownDiseaseAssociations = !knownDiseases.isEmpty();
+ // TODO should modify the final strength according to ClinGen/GenCC known D2G validity - Table 1 of 10.1002/humu.23626
+ VariantEffect variantEffect = variantEvaluation.getVariantEffect();
+ if (inGeneWithKnownDiseaseAssociations
+ && isLossOfFunctionEffect(variantEffect)
+ && (geneConstraint != null && geneConstraint.isLossOfFunctionIntolerant())
+ ){
+ switch (variantEffect) {
+ case STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT ->
+ assignNonsenseOrFrameshiftPVS1(acmgEvidenceBuilder, variantEvaluation);
+ case SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT ->
+ assignCanonicalSpliceDonorOrAcceptorPVS1(acmgEvidenceBuilder, variantEvaluation);
+ case TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT ->
+ assignDeletionPVS1(acmgEvidenceBuilder, variantEvaluation);
+ case START_LOST -> assignInitiationCodonPVS1(acmgEvidenceBuilder, variantEvaluation);
+ default -> { // do not assign PVS1
+ }
+ }
+ }
+ }
+
+ private static void assignNonsenseOrFrameshiftPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) {
+ if (predictedToLeadToNmd(variantEvaluation)) {
+ acmgEvidenceBuilder.add(PVS1);
+ } else {
+ acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG);
+ }
+ }
+
+ private static void assignCanonicalSpliceDonorOrAcceptorPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) {
+ if (predictedToLeadToNmd(variantEvaluation)) {
+ acmgEvidenceBuilder.add(PVS1);
+ } else {
+ acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG);
+ }
+ }
+
+ private static void assignDeletionPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) {
+ // full gene deletion -> PVS1 (Full gene deletion of haploinsufficient gene should be considered P (PVS1_StandAlone?) given absence of conflicting data)
+ VariantEffect variantEffect = variantEvaluation.getVariantEffect();
+ if (variantEffect == VariantEffect.TRANSCRIPT_ABLATION || variantEffect == VariantEffect.EXON_LOSS_VARIANT && predictedToLeadToNmd(variantEvaluation)) {
+ // deletion of entire transcript
+ acmgEvidenceBuilder.add(PVS1);
+ } else {
+ acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.STRONG);
+ }
+ }
+
+ private static void assignInitiationCodonPVS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) {
+ acmgEvidenceBuilder.add(PVS1, AcmgCriterion.Evidence.MODERATE);
+ }
+
+ private static boolean isLossOfFunctionEffect(VariantEffect variantEffect) {
+ return isNonsenseOrFrameshift(variantEffect)
+ || isCanonicalSpliceDonorOrAcceptor(variantEffect)
+ || isGeneOrExonDeletion(variantEffect)
+ || isStartLost(variantEffect);
+ }
+
+ private static boolean isNonsenseOrFrameshift(VariantEffect variantEffect) {
+ return variantEffect == VariantEffect.STOP_LOST
+ || variantEffect == VariantEffect.STOP_GAINED
+ || variantEffect == VariantEffect.FRAMESHIFT_ELONGATION
+ || variantEffect == VariantEffect.FRAMESHIFT_TRUNCATION
+ || variantEffect == VariantEffect.FRAMESHIFT_VARIANT;
+ }
+
+ private static boolean isCanonicalSpliceDonorOrAcceptor(VariantEffect variantEffect) {
+ return variantEffect == VariantEffect.SPLICE_ACCEPTOR_VARIANT || variantEffect == VariantEffect.SPLICE_DONOR_VARIANT;
+ }
+
+ private static boolean isGeneOrExonDeletion(VariantEffect variantEffect) {
+ return variantEffect == VariantEffect.EXON_LOSS_VARIANT || variantEffect == VariantEffect.TRANSCRIPT_ABLATION;
+ }
+
+ private static boolean isStartLost(VariantEffect variantEffect) {
+ return variantEffect == VariantEffect.START_LOST;
+ }
+
+ private static boolean predictedToLeadToNmd(VariantEvaluation variantEvaluation) {
+ if (variantEvaluation.hasTranscriptAnnotations()) {
+ TranscriptAnnotation transcriptAnnotation = variantEvaluation.getTranscriptAnnotations().get(0);
+ return predictedToLeadToNmd(transcriptAnnotation);
+ }
+ return false;
+ }
+
+ // this is a crude method as we don't have the actual transcript coordinates here, so we'll only be able to apply the
+ // last exon and single exon gene rules
+ private static boolean predictedToLeadToNmd(TranscriptAnnotation transcriptAnnotation) {
+ // predicted to lead to NMD if not in last exon or last 50bp of penultimate exon, or is in multi-exon transcript.
+ // In other words a variant does NOT trigger NMD if it is located:
+ // - in the last exon
+ // - in the last 50 bases of the penultimate exon
+ // - in an exon larger than 400 bases
+ // - within the first 150 (CDS) bases of the transcription start site
+ // - in a single-exon gene
+ boolean notInLastExon = transcriptAnnotation.getRank() < transcriptAnnotation.getRankTotal(); // will be false for single exon genes where rank == rankTotal
+ VariantEffect variantEffect = transcriptAnnotation.getVariantEffect();
+ boolean isExonicOrCanonicalSpliceSite = (transcriptAnnotation.getRankType() == TranscriptAnnotation.RankType.EXON) || variantEffect == VariantEffect.SPLICE_ACCEPTOR_VARIANT || variantEffect == VariantEffect.SPLICE_DONOR_VARIANT;
+ return isExonicOrCanonicalSpliceSite && notInLastExon;
+ }
+
+ private static boolean compatibleWithRecessive(Sex probandSex, ModeOfInheritance modeOfInheritance) {
+ if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_RECESSIVE) {
+ return true;
+ }
+ return probandSex == Sex.FEMALE && modeOfInheritance == ModeOfInheritance.X_RECESSIVE;
+ }
+
+ private static boolean compatibleWithDominant(Sex probandSex, ModeOfInheritance modeOfInheritance) {
+ if (modeOfInheritance == ModeOfInheritance.AUTOSOMAL_DOMINANT) {
+ return true;
+ }
+ if (probandSex == Sex.MALE && (modeOfInheritance == ModeOfInheritance.X_RECESSIVE || modeOfInheritance == ModeOfInheritance.X_DOMINANT)) {
+ return true;
+ }
+ return (probandSex == Sex.FEMALE || probandSex == Sex.UNKNOWN) && modeOfInheritance == ModeOfInheritance.X_DOMINANT;
+ }
+}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java
new file mode 100644
index 000000000..9dd1a7d09
--- /dev/null
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgSpliceEvidenceAssigner.java
@@ -0,0 +1,229 @@
+package org.monarchinitiative.exomiser.core.analysis.util.acmg;
+
+import de.charite.compbio.jannovar.annotation.VariantEffect;
+import de.charite.compbio.jannovar.mendel.ModeOfInheritance;
+import org.monarchinitiative.exomiser.core.genome.dao.ClinVarDao;
+import org.monarchinitiative.exomiser.core.model.VariantEvaluation;
+import org.monarchinitiative.exomiser.core.model.pathogenicity.*;
+import org.monarchinitiative.exomiser.core.prioritisers.model.Disease;
+import org.monarchinitiative.svart.*;
+
+import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static de.charite.compbio.jannovar.annotation.VariantEffect.*;
+import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*;
+
+class AcmgSpliceEvidenceAssigner {
+
+ private AcmgSpliceEvidenceAssigner() {
+ }
+
+ /**
+ * Applies PVS1, PS1, PP3, BP4, BP7 to splice region variants according to "Using the ACMG/AMP framework to capture
+ * evidence related to predicted and observed impact on splicing: Recommendations from the ClinGen SVI Splicing Subgroup"
+ * https://doi.org/10.1016/j.ajhg.2023.06.002
+ * @param acmgEvidenceBuilder
+ * @param variantEvaluation
+ * @param modeOfInheritance
+ * @param knownDiseases
+ * @param clinVarDao
+ */
+ static void assignSpliceEvidence(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ModeOfInheritance modeOfInheritance, List knownDiseases, ClinVarDao clinVarDao) {
+ VariantEffect variantEffect = variantEvaluation.getVariantEffect();
+ PathogenicityData pathogenicityData = variantEvaluation.getPathogenicityData();
+ if (isSpliceDonorAcceptorSpliceVariant(variantEffect)) {
+ // PVS1 decision tree - this should have been independently added by the PVS1EvidenceAssigner class
+ // add PS1 modifier if PVS1 was assigned
+ assignDonorAcceptorPS1(acmgEvidenceBuilder, variantEvaluation, clinVarDao);
+ } else if (isNonDonorAcceptorSpliceRegionVariant(variantEffect)) {
+ // PP3+PS1 or BP4+BP7
+ // The database only contains SpliceAI scores > 0.1. This means that any SNP with no score in the database can be considered as
+ // not impacting splicing.
+ PathogenicityScore spliceAiScore = pathogenicityData.pathogenicityScore(PathogenicitySource.SPLICE_AI);
+ if (variantEvaluation.variantType() == VariantType.SNV) {
+ float score = spliceAiScore == null ? 0 : spliceAiScore.getScore();
+ assignSpliceAiBasedPP3Classification(acmgEvidenceBuilder, score);
+ if (acmgEvidenceBuilder.contains(PP3)) {
+ assignNonDonorAcceptorPS1(acmgEvidenceBuilder, variantEvaluation, clinVarDao);
+ } else if (acmgEvidenceBuilder.contains(BP4)) {
+ assignSilentIntronicBP7(acmgEvidenceBuilder, variantEvaluation);
+ }
+ }
+ }
+ // Note - this is independent of the Splicing guidelines BP7 criteria which is for
+ // BP7 "A synonymous (silent) variant for which splicing prediction algorithms predict no impact to the splice consensus sequence nor the creation of a new splice site AND the nucleotide is not highly conserved"
+ ClinVarData clinVarData = pathogenicityData.clinVarData();
+ assignSynonymousBP7(acmgEvidenceBuilder, variantEvaluation, pathogenicityData, clinVarData);
+ }
+
+ private static boolean assignSpliceAiBasedPP3Classification(AcmgEvidence.Builder acmgEvidenceBuilder, float spliceAiScore) {
+ // "Using the ACMG/AMP framework to capture evidence related to predicted and observed impact on splicing: Recommendations from the ClinGen SVI Splicing Subgroup"
+ // DOI: 10.1016/j.ajhg.2023.06.002
+ // variants outside of the +/-1,2 canonical splice acceptor/donor with a spliceAI:
+ // score >= 0.2 can be assigned PP3
+ // score < 0.1 can be assigned BP4
+ if (spliceAiScore >= 0.2) {
+ acmgEvidenceBuilder.add(PP3);
+ return true;
+ } else if (spliceAiScore < 0.1) {
+ acmgEvidenceBuilder.add(BP4);
+ return true;
+ }
+ return false;
+ }
+
+ private static void assignNonDonorAcceptorPS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) {
+ var localClinVarData = getClinVarDataSurroundingVariant(variantEvaluation, clinVarDao);
+ Set ps1Evidence = EnumSet.noneOf(Evidence.class);
+ for (var entry : localClinVarData.entrySet()) {
+ GenomicVariant clinVarVariant = entry.getKey();
+ ClinVarData clinVarData = entry.getValue();
+ ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation();
+ if (isNonDonorAcceptorSpliceRegionVariant(clinVarData.getVariantEffect()) && isPathOrLikelyPath(primaryInterpretation)) {
+ // same position in comparison to VUA
+ if (clinVarVariant.contains(variantEvaluation)) {
+ if (isPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.STRONG);
+ } else if (isLikelyPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.MODERATE);
+ }
+ } else {
+ // same splice region
+ if (isPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.MODERATE);
+ } else if (isLikelyPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.SUPPORTING);
+ }
+ }
+ }
+ }
+ addPs1Evidence(acmgEvidenceBuilder, ps1Evidence);
+ }
+
+ private static final Pattern CDS_INTRONIC_HGVS = Pattern.compile("c\\.\\d+(?[+-])(?\\d+)[A-Z]+>[A-Z]+");
+ private static final int DONOR_REGION_END = 6;
+ private static final int ACCEPTOR_REGION_START = 20;
+ private static void assignSilentIntronicBP7(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation) {
+ // (C) Silent (excluding last 3 nucleotides of exon and first nucleotide of exon) and intronic variants
+ // at or beyond the +7 and −21 positions (conservative designation for donor/acceptor splice region) or otherwise
+ // at or beyond the +7 and −4 positions (less conservative designation for the minimal donor/acceptor splice region).
+
+ // currently we can't use the actual transcript to calculate the variant's position in the intron/exon,
+ // so we need to use the HGVS expression instead.
+ if (variantEvaluation.hasTranscriptAnnotations()) {
+ String hgvsc = variantEvaluation.getTranscriptAnnotations().get(0).getHgvsCdna();
+ Matcher matcher = CDS_INTRONIC_HGVS.matcher(hgvsc);
+ if (matcher.matches()) {
+ String intronEnd = matcher.group("intronEnd");
+ int position = Integer.parseInt(matcher.group("intronPos"));
+ if ("+".equals(intronEnd) && position > DONOR_REGION_END || "-".equals(intronEnd) && position > ACCEPTOR_REGION_START) {
+ acmgEvidenceBuilder.add(BP7);
+ }
+ }
+ }
+ }
+
+ // BP7 "A synonymous (silent) variant for which splicing prediction algorithms predict no impact to the splice consensus sequence nor the creation of a new splice site AND the nucleotide is not highly conserved"
+ private static void assignSynonymousBP7(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, PathogenicityData pathogenicityData, ClinVarData clinVarData) {
+ boolean isReportedPorLP = clinVarData.starRating() >= 1 && isPathOrLikelyPath(clinVarData.getPrimaryInterpretation());
+ if (variantEvaluation.getVariantEffect() == SYNONYMOUS_VARIANT && !isReportedPorLP) {
+ PathogenicityScore spliceAiScore = pathogenicityData.pathogenicityScore(PathogenicitySource.SPLICE_AI);
+ if (spliceAiScore == null || spliceAiScore.getScore() < 0.1) {
+ acmgEvidenceBuilder.add(BP7);
+ }
+ }
+ }
+
+ private static void assignDonorAcceptorPS1(AcmgEvidence.Builder acmgEvidenceBuilder, VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) {
+ Evidence pvs1Evidence = acmgEvidenceBuilder.evidenceForCategory(PVS1);
+ if (pvs1Evidence == null) {
+ return;
+ }
+ // find other clinvar variants in same splice region...
+ var localClinVarData = getClinVarDataSurroundingVariant(variantEvaluation, clinVarDao);
+ for (var entry : localClinVarData.entrySet()) {
+ ClinVarData clinVarData = entry.getValue();
+ ClinVarData.ClinSig primaryInterpretation = clinVarData.getPrimaryInterpretation();
+ // Table 2. PS1 code weights for variants with same predicted splicing event as known (likely) pathogenic variant
+ VariantEffect clinVarVariantEffect = clinVarData.getVariantEffect();
+ if (isSpliceVariant(clinVarVariantEffect) && isPathOrLikelyPath(primaryInterpretation)) {
+ Set ps1Evidence = EnumSet.noneOf(Evidence.class);
+ if (pvs1Evidence == Evidence.VERY_STRONG) {
+ // same position in comparison to VUA
+ if (isSpliceDonorAcceptorSpliceVariant(clinVarVariantEffect) && isPath(primaryInterpretation) ||
+ isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isLikelyPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.SUPPORTING);
+ }
+ }
+ else if (pvs1Evidence == Evidence.STRONG || pvs1Evidence == Evidence.MODERATE || pvs1Evidence == Evidence.SUPPORTING) {
+ // same splice region as VUA
+ if (isSpliceDonorAcceptorSpliceVariant(clinVarVariantEffect) && isPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.STRONG);
+ } else if (isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.MODERATE);
+ } else if (isNonDonorAcceptorSpliceRegionVariant(clinVarVariantEffect) && isLikelyPath(primaryInterpretation)) {
+ ps1Evidence.add(Evidence.SUPPORTING );
+ }
+ }
+ addPs1Evidence(acmgEvidenceBuilder, ps1Evidence);
+ }
+ }
+ }
+
+ private static Map getClinVarDataSurroundingVariant(VariantEvaluation variantEvaluation, ClinVarDao clinVarDao) {
+ // ensure region is within contig bounds
+ Contig contig = variantEvaluation.contig();
+ var upStream = Math.max(1, variantEvaluation.start() - 25);
+ var downStream = Math.min(contig.length(), variantEvaluation.start() + 25);
+ GenomicInterval genomicInterval = GenomicInterval.of(contig, Strand.POSITIVE, Coordinates.oneBased(upStream, downStream));
+ return clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval);
+ }
+
+ private static boolean isPath(ClinVarData.ClinSig clinSig) {
+ return clinSig == ClinVarData.ClinSig.PATHOGENIC || clinSig == ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC;
+ }
+
+ private static boolean isLikelyPath(ClinVarData.ClinSig clinSig) {
+ return clinSig == ClinVarData.ClinSig.LIKELY_PATHOGENIC;
+ }
+
+ private static boolean isPathOrLikelyPath(ClinVarData.ClinSig clinSig) {
+ return clinSig == ClinVarData.ClinSig.PATHOGENIC || clinSig == ClinVarData.ClinSig.LIKELY_PATHOGENIC || clinSig == ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC;
+ }
+
+ /**
+ * Returns true if the variantEffect is a SPLICE_REGION, SPLICE_DONOR or SPLICE_ACCEPTOR type.
+ *
+ * @param variantEffect
+ * @return
+ */
+ private static boolean isSpliceVariant(VariantEffect variantEffect) {
+ return isSpliceDonorAcceptorSpliceVariant(variantEffect) || isNonDonorAcceptorSpliceRegionVariant(variantEffect);
+ }
+
+ private static boolean isNonDonorAcceptorSpliceRegionVariant(VariantEffect variantEffect) {
+ return variantEffect == SPLICE_REGION_VARIANT;
+ }
+
+ private static boolean isSpliceDonorAcceptorSpliceVariant(VariantEffect variantEffect) {
+ return variantEffect == SPLICE_DONOR_VARIANT || variantEffect == SPLICE_ACCEPTOR_VARIANT;
+ }
+
+ private static void addPs1Evidence(AcmgEvidence.Builder acmgEvidenceBuilder, Set ps1Evidence) {
+ Evidence maxPs1Strength = null;
+ if (ps1Evidence.contains(Evidence.STRONG)) {
+ maxPs1Strength = Evidence.STRONG;
+ } else if (ps1Evidence.contains(Evidence.MODERATE)) {
+ maxPs1Strength = Evidence.MODERATE;
+ } else if (ps1Evidence.contains(Evidence.SUPPORTING)) {
+ maxPs1Strength = Evidence.SUPPORTING;
+ }
+
+ if (maxPs1Strength != null) {
+ acmgEvidenceBuilder.add(PS1, maxPs1Strength);
+ }
+ }
+
+}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java
index 84743ff79..8e86c6eb3 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/GenomeAnalysisServiceImpl.java
@@ -20,6 +20,8 @@
package org.monarchinitiative.exomiser.core.genome;
+import jakarta.annotation.Nonnull;
+import org.monarchinitiative.exomiser.core.model.GeneStatistics;
import org.monarchinitiative.exomiser.core.model.*;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource;
@@ -97,17 +99,22 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
+ public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) {
return variantDataService.findClinVarRecordsOverlappingInterval(genomicInterval);
}
+
+ @Override
+ public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) {
+ return variantDataService.getGeneStatistics(geneSymbol);
+ }
}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java
index ab36e4b5a..01ae4bc2d 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/JannovarSmallVariantAnnotator.java
@@ -37,6 +37,7 @@
import java.util.*;
import java.util.stream.Stream;
+import static de.charite.compbio.jannovar.annotation.VariantEffect.MNV;
import static java.util.stream.Collectors.groupingBy;
import static java.util.stream.Collectors.toUnmodifiableList;
@@ -173,8 +174,11 @@ private List buildTranscriptAnnotations(List a
private TranscriptAnnotation toTranscriptAnnotation(Annotation annotation) {
AnnotationLocation annoLoc = annotation.getAnnoLoc();
+ // add transcript and other variant effects to TranscriptAnnotation?
+// annotation.getTranscript()
return TranscriptAnnotation.builder()
- .variantEffect(getVariantEffectOrDefault(annotation.getMostPathogenicVarType(), VariantEffect.SEQUENCE_VARIANT))
+ .variantEffect(getVariantEffectOrDefault(annotation.getEffects(), VariantEffect.SEQUENCE_VARIANT))
+// .variantEffects(annotation.getEffects())
.accession(TranscriptModelUtil.getTranscriptAccession(annotation.getTranscript()))
.geneSymbol(buildGeneSymbol(annotation))
.hgvsGenomic((annotation.getGenomicNTChange() == null) ? "" : annotation.getGenomicNTChangeStr())
@@ -199,8 +203,17 @@ private TranscriptAnnotation.RankType getRankType(RankType annoLocRankType) {
return TranscriptAnnotation.RankType.UNDEFINED;
}
- private VariantEffect getVariantEffectOrDefault(VariantEffect annotatedEffect, VariantEffect defaultEffect) {
- return annotatedEffect == null ? defaultEffect : annotatedEffect;
+ private VariantEffect getVariantEffectOrDefault(Collection annotatedEffects, VariantEffect defaultEffect) {
+ if (annotatedEffects.isEmpty()) {
+ return defaultEffect;
+ }
+ // remove MNV from annotated effects. Jannovar labels block substitutions as MNVs which will override more damaging
+ // effects such as STOP_LOSS, STOP_GAIN, SPLICE_DONOR, SPLICE_ACCEPTOR and prevent potential assignment of PVS1.
+ // This could be solved by changing TranscriptAnnotation to have a Set and methods to interrogate
+ // the state of this e.g. transcriptAnnotation.isStopLoss(), .isStopGain(). n.b. this will also affect
+ // ClinVarData and VariantEvaluation which currently only use a single VariantEffect field.
+ return annotatedEffects.size() == 1 ? annotatedEffects.iterator().next() :
+ annotatedEffects.stream().filter(effect -> effect != MNV).findFirst().orElse(defaultEffect);
}
private int getDistFromNearestGene(Annotation annotation) {
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java
index 32e925266..b4b7b91d6 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImpl.java
@@ -21,7 +21,8 @@
package org.monarchinitiative.exomiser.core.genome;
-import de.charite.compbio.jannovar.annotation.VariantEffect;
+import jakarta.annotation.Nonnull;
+import org.monarchinitiative.exomiser.core.model.GeneStatistics;
import org.monarchinitiative.exomiser.core.genome.dao.*;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData;
@@ -156,11 +157,10 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set pathogenicitySources, P
}
@Override
- public ClinVarData getClinVarData(Variant variant) {
+ public ClinVarData getClinVarData(@Nonnull Variant variant) {
return clinVarDao.getClinVarData(variant);
}
@Override
- public ClinVarData getClinVarData(GenomicVariant genomicVariant) {
+ public ClinVarData getClinVarData(@Nonnull GenomicVariant genomicVariant) {
return clinVarDao.getClinVarData(genomicVariant);
}
@Override
- public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
+ public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) {
return clinVarDao.findClinVarRecordsOverlappingInterval(genomicInterval);
}
+ @Override
+ public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) {
+ return clinVarDao.getGeneStatistics(geneSymbol);
+ }
+
public static Builder builder() {
return new Builder();
}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java
index 6ad0f2d4f..5b8d8d755 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDao.java
@@ -1,8 +1,9 @@
package org.monarchinitiative.exomiser.core.genome.dao;
+import jakarta.annotation.Nonnull;
+import org.monarchinitiative.exomiser.core.model.GeneStatistics;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
-import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.GenomicInterval;
import org.monarchinitiative.svart.GenomicVariant;
@@ -16,9 +17,11 @@
*/
public interface ClinVarDao {
- ClinVarData getClinVarData(Variant variant);
+ ClinVarData getClinVarData(@Nonnull Variant variant);
- ClinVarData getClinVarData(GenomicVariant genomicVariant);
+ ClinVarData getClinVarData(@Nonnull GenomicVariant genomicVariant);
- Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval);
+ Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval);
+
+ GeneStatistics getGeneStatistics(@Nonnull String geneSymbol);
}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java
index c897599d6..00776b74f 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStore.java
@@ -1,7 +1,9 @@
package org.monarchinitiative.exomiser.core.genome.dao;
+import de.charite.compbio.jannovar.annotation.VariantEffect;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
+import org.monarchinitiative.exomiser.core.model.GeneStatistics;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.Variant;
@@ -12,10 +14,15 @@
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
+import java.time.Duration;
+import java.time.Instant;
+import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
+import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS;
+
/**
* @since 14.0.0
*/
@@ -24,9 +31,30 @@ public class ClinVarDaoMvStore implements ClinVarDao {
private static final Logger logger = LoggerFactory.getLogger(ClinVarDaoMvStore.class);
private final MVMap clinVarMap;
+ private final Map geneStats;
public ClinVarDaoMvStore(MVStore mvStore) {
clinVarMap = MvStoreUtil.openClinVarMVMap(mvStore);
+ geneStats = calculateGeneStatistics(clinVarMap);
+ }
+
+ private Map calculateGeneStatistics(MVMap clinVarMap) {
+ Instant start = Instant.now();
+ Map geneStatisticsBuilders = new LinkedHashMap<>(1900);
+ for (Map.Entry entry : clinVarMap.entrySet()) {
+ AlleleProto.ClinVar clinVar = entry.getValue();
+ ClinVarData.ReviewStatus reviewStatus = AlleleProtoAdaptor.toReviewStatus(clinVar.getReviewStatus());
+ if (reviewStatus != CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS && reviewStatus.starRating() >= 1) {
+ VariantEffect variantEffect = AlleleProtoAdaptor.toVariantEffect(clinVar.getVariantEffect());
+ ClinVarData.ClinSig clinSig = AlleleProtoAdaptor.toClinSig(clinVar.getPrimaryInterpretation());
+ geneStatisticsBuilders.compute(clinVar.getGeneSymbol(), (k, v) -> (v == null) ? GeneStatistics.builder(k, variantEffect, clinSig) : v.put(variantEffect, clinSig));
+ }
+ }
+ final Map stats = new HashMap<>(geneStatisticsBuilders.size());
+ geneStatisticsBuilders.forEach((k, v) -> stats.put(k, v.build()));
+ Instant end = Instant.now();
+ logger.info("Created {} ClinVar gene stats in {} ms", stats.size(), Duration.between(start, end).toMillis());
+ return stats;
}
@Override
@@ -45,7 +73,7 @@ private ClinVarData getClinVarData(@Nonnull AlleleProto.AlleleKey alleleKey) {
}
@Override
- public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
+ public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) {
Contig contig = genomicInterval.contig();
int chr = genomicInterval.contigId();
@@ -80,6 +108,11 @@ public Map findClinVarRecordsOverlappingInterval(Ge
return results;
}
+ @Override
+ public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) {
+ return geneStats.getOrDefault(geneSymbol, GeneStatistics.builder(geneSymbol).build());
+ }
+
private String broadFormat(AlleleProto.AlleleKey alleleKey) {
return alleleKey.getChr() + "-" + alleleKey.getPosition() + "-" + alleleKey.getRef() + "-" + alleleKey.getAlt();
}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java
index d9e12d579..39695073f 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/AlleleProtoAdaptor.java
@@ -284,7 +284,7 @@ private static Set toClinSigSet(List proto
return converted;
}
- private static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) {
+ public static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) {
return switch (protoClinSig) {
case BENIGN -> ClinVarData.ClinSig.BENIGN;
case BENIGN_OR_LIKELY_BENIGN -> ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN;
@@ -301,11 +301,14 @@ private static ClinVarData.ClinSig toClinSig(ClinVar.ClinSig protoClinSig) {
case OTHER -> ClinVarData.ClinSig.OTHER;
case PROTECTIVE -> ClinVarData.ClinSig.PROTECTIVE;
case RISK_FACTOR -> ClinVarData.ClinSig.RISK_FACTOR;
+ case UNCERTAIN_RISK_ALLELE -> ClinVarData.ClinSig.UNCERTAIN_RISK_ALLELE;
+ case LIKELY_RISK_ALLELE -> ClinVarData.ClinSig.LIKELY_RISK_ALLELE;
+ case ESTABLISHED_RISK_ALLELE -> ClinVarData.ClinSig.ESTABLISHED_RISK_ALLELE;
case NOT_PROVIDED, UNRECOGNIZED -> ClinVarData.ClinSig.NOT_PROVIDED;
};
}
- private static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus protoReviewStatus) {
+ public static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus protoReviewStatus) {
return switch (protoReviewStatus) {
case NO_ASSERTION_PROVIDED -> ClinVarData.ReviewStatus.NO_ASSERTION_PROVIDED;
case NO_ASSERTION_CRITERIA_PROVIDED -> ClinVarData.ReviewStatus.NO_ASSERTION_CRITERIA_PROVIDED;
@@ -319,7 +322,7 @@ private static ClinVarData.ReviewStatus toReviewStatus(ClinVar.ReviewStatus prot
};
}
- private static VariantEffect toVariantEffect(AlleleProto.VariantEffect clinVarVariantEffect) {
+ public static VariantEffect toVariantEffect(AlleleProto.VariantEffect clinVarVariantEffect) {
return switch (clinVarVariantEffect) {
case SEQUENCE_VARIANT -> VariantEffect.SEQUENCE_VARIANT;
case CHROMOSOME_NUMBER_VARIATION -> VariantEffect.CHROMOSOME_NUMBER_VARIATION;
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java
new file mode 100644
index 000000000..8ffe93c6d
--- /dev/null
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/GeneStatistics.java
@@ -0,0 +1,273 @@
+package org.monarchinitiative.exomiser.core.model;
+
+import de.charite.compbio.jannovar.annotation.VariantEffect;
+import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
+
+import java.util.EnumMap;
+import java.util.Map;
+import java.util.StringJoiner;
+
+import static de.charite.compbio.jannovar.annotation.VariantEffect.*;
+
+/**
+ * Data class for collating ClinVar pathogenicity assertions against variant effect, primarily for the application of
+ * ACMG criteria BP1 and PP2.
+ *
+ * @since 14.1.0
+ */
+public class GeneStatistics {
+
+ private static final int PATH_INDEX = 0;
+ private static final int VUS_INDEX = 1;
+ private static final int BENIGN_INDEX = 2;
+
+ private static final VariantEffect[] LOF_EFFECTS = {
+ STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+ SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+ TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+ START_LOST
+ };
+
+ private final String geneSymbol;
+ private final Map clinVarCounts;
+
+ private GeneStatistics(String geneSymbol, Map clinVarCounts) {
+ this.geneSymbol = geneSymbol;
+ this.clinVarCounts = clinVarCounts;
+ }
+
+ /**
+ * @return The gene symbol for the gene related to these statistics
+ */
+ public String geneSymbol() {
+ return geneSymbol;
+ }
+
+ /**
+ * @return The sum of all pathogenic / likely pathogenic variants associated with this gene.
+ */
+ public int pathCount() {
+ return totalCountForIndex(PATH_INDEX);
+ }
+
+ /**
+ * @return The sum of all variants of uncertain significance (VUS) associated with this gene.
+ */
+ public int vusCount() {
+ return totalCountForIndex(VUS_INDEX);
+ }
+
+ /**
+ * @return The sum of all benign / likely benign variants associated with this gene.
+ */
+ public int benignCount() {
+ return totalCountForIndex(BENIGN_INDEX);
+ }
+
+ private int totalCountForIndex(int i) {
+ int count = 0;
+ for (Map.Entry variantEffectEntry : clinVarCounts.entrySet()) {
+ count += variantEffectEntry.getValue()[i];
+ }
+ return count;
+ }
+
+ /**
+ * The sum of all pathogenic / likely pathogenic Loss Of Function variants associated with this gene. These are
+ * defined here as:
+ *
+ * STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+ * SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+ * TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+ * START_LOST
+ *
+ *
+ * @return The sum of all pathogenic / likely pathogenic Loss Of Function variants associated with this gene.
+ */
+ public int lofPathCount() {
+ return pathCount(LOF_EFFECTS);
+ }
+
+ /**
+ * The sum of all variants of uncertain significance (VUS) Loss Of Function variants associated with this gene. These are
+ * defined here as:
+ *
+ * STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+ * SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+ * TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+ * START_LOST
+ *
+ *
+ * @return The sum of all variants of uncertain significance (VUS) Loss Of Function variants associated with this gene.
+ */
+ public int lofVusCount() {
+ return vusCount(LOF_EFFECTS);
+ }
+
+ /**
+ * The sum of all benign / likely benign Loss Of Function variants associated with this gene. These are defined here
+ * as:
+ *
+ * STOP_LOST, STOP_GAINED, FRAMESHIFT_ELONGATION, FRAMESHIFT_TRUNCATION, FRAMESHIFT_VARIANT,
+ * SPLICE_DONOR_VARIANT, SPLICE_ACCEPTOR_VARIANT,
+ * TRANSCRIPT_ABLATION, EXON_LOSS_VARIANT,
+ * START_LOST
+ *
+ *
+ * @return The sum of all benign / likely benign Loss Of Function variants associated with this gene.
+ */
+ public int lofBenignCount() {
+ return benignCount(LOF_EFFECTS);
+ }
+
+ /**
+ * @return The sum of all pathogenic / likely pathogenic missense variants associated with this gene.
+ */
+ public int missensePathCount() {
+ return pathCount(VariantEffect.MISSENSE_VARIANT);
+ }
+
+ /**
+ * @return The sum of all VUS missense variants associated with this gene.
+ */
+ public int missenseVusCount() {
+ return vusCount(VariantEffect.MISSENSE_VARIANT);
+ }
+
+ /**
+ * @return The sum of all benign / likely benign missense variants associated with this gene.
+ */
+ public int missenseBenignCount() {
+ return benignCount(VariantEffect.MISSENSE_VARIANT);
+ }
+
+ /**
+ * @return The sum of all pathogenic / likely pathogenic variants for the given variantEffect associated with this gene.
+ */
+ public int pathCount(VariantEffect variantEffect) {
+ return effectIndexCount(variantEffect, PATH_INDEX);
+ }
+
+ /**
+ * @return The sum of all VUS variants for the given variantEffect associated with this gene.
+ */
+ public int vusCount(VariantEffect variantEffect) {
+ return effectIndexCount(variantEffect, VUS_INDEX);
+ }
+
+ /**
+ * @return The sum of all benign / likely benign variants for the given variantEffect associated with this gene.
+ */
+ public int benignCount(VariantEffect variantEffect) {
+ return effectIndexCount(variantEffect, BENIGN_INDEX);
+ }
+
+ /**
+ * @return The sum of all pathogenic / likely pathogenic variants for the given variantEffects associated with this gene.
+ */
+ public int pathCount(VariantEffect... variantEffect) {
+ return effectIndexCounts(variantEffect, PATH_INDEX);
+ }
+
+ /**
+ * @return The sum of all VUS variants for the given variantEffects associated with this gene.
+ */
+ public int vusCount(VariantEffect... variantEffect) {
+ return effectIndexCounts(variantEffect, VUS_INDEX);
+ }
+
+ /**
+ * @return The sum of all benign / likely benign variants for the given variantEffects associated with this gene.
+ */
+ public int benignCount(VariantEffect... variantEffect) {
+ return effectIndexCounts(variantEffect, BENIGN_INDEX);
+ }
+
+ private int effectIndexCounts(VariantEffect[] variantEffect, int index) {
+ int count = 0;
+ for (int i = 0; i < variantEffect.length; i++) {
+ count += effectIndexCount(variantEffect[i], index);
+ }
+ return count;
+ }
+
+ private int effectIndexCount(VariantEffect variantEffect, int i) {
+ return clinVarCounts.getOrDefault(variantEffect, new int[3])[i];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder statsBuilder = new StringBuilder("{");
+ clinVarCounts.forEach((key, value) -> statsBuilder.append(key).append("=").append("[").append(prettyStats(value)).append("], "));
+ if (statsBuilder.length() > 1) {
+ statsBuilder.delete(statsBuilder.length() - 2, statsBuilder.length());
+ }
+ statsBuilder.append("}");
+ return "GeneStatistics{" +
+ "geneSymbol=" + geneSymbol + ',' +
+ " counts=" + statsBuilder +
+ '}';
+ }
+
+ private static final String[] categories = {"P", "VUS", "B"};
+
+ private String prettyStats(int[] counts) {
+ StringJoiner countStr = new StringJoiner(", ");
+ for (int i = 0; i < 3; i++) {
+ countStr.add(categories[i] + "=" + counts[i]);
+ }
+ return countStr.toString();
+ }
+
+ public static Builder builder(String geneSymbol) {
+ return new Builder(geneSymbol);
+ }
+
+ public static Builder builder(String geneSymbol, VariantEffect variantEffect, ClinVarData.ClinSig clinSig) {
+ return new Builder(geneSymbol, variantEffect, clinSig);
+ }
+
+ public static class Builder {
+
+ private final String geneSymbol;
+ private final Map geneStats = new EnumMap<>(VariantEffect.class);
+
+ public Builder(String geneSymbol) {
+ this.geneSymbol = geneSymbol;
+ }
+
+ public Builder(String geneSymbol, VariantEffect variantEffect, ClinVarData.ClinSig clinSig) {
+ this.geneSymbol = geneSymbol;
+ put(variantEffect, clinSig);
+ }
+
+ public Builder put(VariantEffect variantEffect, ClinVarData.ClinSig clinSig) {
+ int countIndex = getCountIndex(clinSig);
+ if (countIndex != -1) {
+ int[] counts = geneStats.get(variantEffect);
+ if (counts == null) {
+ counts = new int[3];
+ counts[countIndex]++;
+ geneStats.put(variantEffect, counts);
+ } else {
+ counts[countIndex]++;
+ }
+ }
+ return this;
+ }
+
+ private static int getCountIndex(ClinVarData.ClinSig clinSig) {
+ return switch (clinSig) {
+ case PATHOGENIC, PATHOGENIC_OR_LIKELY_PATHOGENIC, LIKELY_PATHOGENIC -> PATH_INDEX;
+ case UNCERTAIN_SIGNIFICANCE -> VUS_INDEX;
+ case LIKELY_BENIGN, BENIGN_OR_LIKELY_BENIGN, BENIGN -> BENIGN_INDEX;
+ default -> -1;
+ };
+ }
+
+ public GeneStatistics build() {
+ return new GeneStatistics(geneSymbol, geneStats);
+ }
+ }
+
+}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java
index 7ba62d7af..372668572 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/TranscriptAnnotation.java
@@ -38,6 +38,9 @@ public enum RankType {
private final VariantEffect variantEffect;
+// private final Set variantEffects;
+
+
private final String geneSymbol;
private final String accession;
@@ -75,6 +78,10 @@ public VariantEffect getVariantEffect() {
return variantEffect;
}
+// public Set getVariantEffects() {
+// return variantEffects;
+// }
+
public String getGeneSymbol() {
return geneSymbol;
}
@@ -148,6 +155,8 @@ public static class Builder {
private VariantEffect variantEffect = VariantEffect.SEQUENCE_VARIANT;
+// private Set variantEffects = EnumSet.noneOf(VariantEffect.class);
+
private String geneSymbol = "";
private String accession = "";
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java
index d597fb17d..4719c550f 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/ClinVarData.java
@@ -37,6 +37,10 @@ public class ClinVarData {
private static final ClinVarData EMPTY = new Builder().build();
+ /**
+ * Based on the categories from the ClinVar docs and
+ * what actually shows up in the VCF file.
+ */
public enum ClinSig {
// ACMG/AMP-based
PATHOGENIC,
@@ -47,6 +51,11 @@ public enum ClinSig {
BENIGN_OR_LIKELY_BENIGN,
BENIGN,
CONFLICTING_PATHOGENICITY_INTERPRETATIONS,
+ // As recommended by ClinGen (https://pubmed.ncbi.nlm.nih.gov/38054408/) for variants with decreased penetrance
+ // for Mendelian diseases.
+ UNCERTAIN_RISK_ALLELE,
+ LIKELY_RISK_ALLELE,
+ ESTABLISHED_RISK_ALLELE,
//Non-ACMG-based
AFFECTS,
ASSOCIATION,
@@ -83,11 +92,16 @@ public int starRating() {
public static ReviewStatus parseReviewStatus(String reviewStatus) {
return switch (reviewStatus.replace("_", " ")) {
- case "no assertion criteria provided" -> NO_ASSERTION_CRITERIA_PROVIDED;
- case "no interpretation for the single variant", "no interpretation for the individual variant" ->
- NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT;
+ case "no assertion provided",
+ "no assertion criteria provided",
+ "no classification provided",
+ "no classifications from unflagged records" -> NO_ASSERTION_CRITERIA_PROVIDED;
+ case "no interpretation for the single variant",
+ "no interpretation for the individual variant",
+ "no classification for the single variant"-> NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT;
case "criteria provided, single submitter" -> CRITERIA_PROVIDED_SINGLE_SUBMITTER;
- case "criteria provided, conflicting interpretations" -> CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS;
+ case "criteria provided, conflicting interpretations",
+ "criteria provided, conflicting classifications" -> CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS;
case "criteria provided, multiple submitters, no conflicts" ->
CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS;
case "reviewed by expert panel" -> REVIEWED_BY_EXPERT_PANEL;
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java
index 6c42cd088..335886ac5 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/PathogenicityData.java
@@ -139,6 +139,8 @@ public boolean hasPredictedScore(PathogenicitySource pathogenicitySource) {
* @param pathogenicitySource
* @return
*/
+
+ @Nullable
public PathogenicityScore pathogenicityScore(PathogenicitySource pathogenicitySource) {
return pathogenicityScores.get(pathogenicitySource);
}
diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java
index c03557550..7fa2a99d1 100644
--- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java
+++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/model/pathogenicity/SpliceAiScore.java
@@ -1,7 +1,24 @@
package org.monarchinitiative.exomiser.core.model.pathogenicity;
+/**
+ * The SpliceAI (Jaganathan et al, Cell 2019) max delta score.
+ * This is the max score from the DL/DG/AL/AG scores >= minScore. The min score for this was set to 0.2, so scores lower
+ * than this will likely not be present in the database, unless guidance changes later.
+ *
+ * Delta score of a variant, defined as the maximum of (DS_AG, DS_AL, DS_DG, DS_DL), ranges from 0 to 1 and can be interpreted as the probability of the variant being splice-altering. In the paper, a detailed characterization is provided for 0.2 (high recall), 0.5 (recommended), and 0.8 (high precision) cutoffs. Delta position conveys information about the location where splicing changes relative to the variant position (positive values are downstream of the variant, negative values are upstream).
+ *
+ *
+ * These annotations are free for academic and not-for-profit use; other use requires a commercial license from Illumina, Inc.
+ *
+ * https://github.com/Illumina/SpliceAI
+ **/
public class SpliceAiScore extends BasePathogenicityScore {
+ public static float NON_SPLICEOGENIC_SCORE = 0.1f;
+ public static float PERMISSIVE_SPLICEOGENIC_SCORE = 0.2f;
+ public static float DEFAULT_SPLICEOGENIC_SCORE = 0.5f;
+ public static float HIGH_CONFIDENCE_SPLICEOGENIC_SCORE = 0.8f;
+
public static SpliceAiScore of(float score) {
return new SpliceAiScore(score);
}
diff --git a/exomiser-core/src/main/proto/allele.proto b/exomiser-core/src/main/proto/allele.proto
index 9a06eabf8..b60781860 100644
--- a/exomiser-core/src/main/proto/allele.proto
+++ b/exomiser-core/src/main/proto/allele.proto
@@ -161,6 +161,11 @@ message ClinVar {
OTHER = 12;
PROTECTIVE = 13;
RISK_FACTOR = 14;
+ // As recommended by ClinGen (https://pubmed.ncbi.nlm.nih.gov/38054408/) for variants with decreased penetrance
+ // for Mendelian diseases.
+ UNCERTAIN_RISK_ALLELE = 15;
+ LIKELY_RISK_ALLELE = 16;
+ ESTABLISHED_RISK_ALLELE = 17;
}
enum ReviewStatus {
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java
index a60df8c2e..5866028fd 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/Acmg2015EvidenceAssignerTest.java
@@ -27,8 +27,8 @@
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;
import org.monarchinitiative.exomiser.core.genome.TestFactory;
-import org.monarchinitiative.exomiser.core.genome.TestGenomeDataService;
import org.monarchinitiative.exomiser.core.genome.TestVariantDataService;
+import org.monarchinitiative.exomiser.core.genome.VariantDataService;
import org.monarchinitiative.exomiser.core.model.*;
import org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Status;
import org.monarchinitiative.exomiser.core.model.frequency.Frequency;
@@ -42,7 +42,9 @@
import org.monarchinitiative.exomiser.core.prioritisers.model.Disease;
import org.monarchinitiative.exomiser.core.prioritisers.model.InheritanceMode;
+import java.util.LinkedHashMap;
import java.util.List;
+import java.util.Map;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
@@ -52,6 +54,7 @@
import static org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex.FEMALE;
import static org.monarchinitiative.exomiser.core.model.Pedigree.Individual.Sex.MALE;
import static org.monarchinitiative.exomiser.core.model.Pedigree.justProband;
+import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*;
class Acmg2015EvidenceAssignerTest {
@@ -60,31 +63,61 @@ private Acmg2015EvidenceAssigner acmgEvidenceAssigner(String probandId, Pedigree
return new Acmg2015EvidenceAssigner(probandId, pedigree, TestVariantDataService.stub());
}
+ private Acmg2015EvidenceAssigner acmgEvidenceAssigner(String probandId, Pedigree pedigree, Map expectedClinVarData) {
+ return new Acmg2015EvidenceAssigner(probandId, pedigree, TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build());
+ }
+
@Test
void throwsExceptionWithMismatchedIds() {
assertThrows(IllegalArgumentException.class, () -> acmgEvidenceAssigner("Zaphod", justProband("Ford", MALE)));
}
-
- @Test
- void testAssignsPVS1() {
+
+ @ParameterizedTest
+ @CsvSource({
+ // nonsense, middle-exon NMD predicted
+ "STOP_LOST, 2, VERY_STRONG",
+ "STOP_GAINED, 2, VERY_STRONG",
+ "FRAMESHIFT_ELONGATION, 2, VERY_STRONG",
+ "FRAMESHIFT_TRUNCATION, 2, VERY_STRONG",
+ "FRAMESHIFT_VARIANT, 2, VERY_STRONG",
+ // nonsense, last exon - NMD escape
+ "STOP_LOST, 5, STRONG",
+ "STOP_GAINED, 5, STRONG",
+ "FRAMESHIFT_ELONGATION, 5, STRONG",
+ "FRAMESHIFT_TRUNCATION, 5, STRONG",
+ "FRAMESHIFT_VARIANT, 5, STRONG",
+ // splice donor/acceptor site, middle-exon NMD predicted
+ "SPLICE_DONOR_VARIANT, 2, VERY_STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, 2, VERY_STRONG",
+ // splice donor/acceptor site, last exon - NMD escape
+ "SPLICE_DONOR_VARIANT, 5, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, 5, STRONG",
+ // transcript ablation / exon loss
+ "TRANSCRIPT_ABLATION, 2, VERY_STRONG", // sufficient as a STAND_ALONE in the case of HI genes and no conflicting evidence
+ "EXON_LOSS_VARIANT, 2, VERY_STRONG", // NMD
+ "EXON_LOSS_VARIANT, 5, STRONG", // NMD escape
+ // start lost
+ "START_LOST, 1, MODERATE",
+ })
+ void testAssignsPVS1(VariantEffect variantEffect, int exonNumber, AcmgCriterion.Evidence evidence) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
// https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel
// requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1
TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder()
- .variantEffect(VariantEffect.START_LOST)
+ .variantEffect(variantEffect)
.rankType(TranscriptAnnotation.RankType.EXON)
- .rank(1)
+ .rank(exonNumber)
.rankTotal(5)
.build();
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
.geneSymbol("PTEN")
.frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
- .variantEffect(VariantEffect.START_LOST)
+ .variantEffect(variantEffect)
.annotations(List.of(transcriptAnnotation))
.build();
Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of());
- assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PVS1).build()));
+ assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PVS1, evidence).build()));
}
@ParameterizedTest
@@ -113,7 +146,7 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", probandSex));
// https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel
TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder()
- .variantEffect(VariantEffect.START_LOST)
+ .variantEffect(VariantEffect.SPLICE_DONOR_VARIANT)
.rankType(TranscriptAnnotation.RankType.EXON)
.rank(1)
.rankTotal(5)
@@ -121,7 +154,7 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
.geneSymbol("PTEN")
.frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
- .variantEffect(VariantEffect.START_LOST)
+ .variantEffect(VariantEffect.SPLICE_DONOR_VARIANT)
.annotations(List.of(transcriptAnnotation))
.build();
Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(diseaseInheritanceMode).diseaseType(Disease.DiseaseType.DISEASE).build();
@@ -131,6 +164,107 @@ void testAssignsPVS1(Pedigree.Individual.Sex probandSex, InheritanceMode disease
assertThat(acmgEvidence, equalTo(expected));
}
+ @ParameterizedTest
+ @CsvSource({
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 3, PS1, STRONG",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 2, PS1, STRONG",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 1, PS1, MODERATE",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Thr), PATHOGENIC, 0, PS1, SUPPORTING",
+
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 3, PM5, MODERATE",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 2, PM5, MODERATE",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 1, PM5, SUPPORTING",
+ "MISSENSE_VARIANT, p.(Ala42Thr), p.(Ala42Cys), PATHOGENIC, 0, PM5, SUPPORTING",
+ })
+ void testAssignsPS1orPM5(VariantEffect variantEffect, String hgvsP, String clinVarHgvsP, ClinVarData.ClinSig clinSig, int clinVarStarRating, AcmgCriterion acmgCriterion, AcmgCriterion.Evidence evidence) {
+ ClinVarData clinVarData = ClinVarData.builder()
+ .primaryInterpretation(clinSig)
+ .variantEffect(VariantEffect.MISSENSE_VARIANT)
+ .reviewStatus(starRatingToReviewStatus(clinVarStarRating))
+ .hgvsProtein(clinVarHgvsP)
+ .build();
+ Variant overlapping = TestFactory.variantBuilder(10, 89624227, "A", "C").build();
+
+ VariantDataService variantDataService = TestVariantDataService.builder().put(overlapping, clinVarData).build();
+
+ TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder()
+ .variantEffect(variantEffect)
+ .hgvsProtein(hgvsP)
+ .build();
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
+ .geneSymbol("GENE")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .variantEffect(variantEffect)
+ .annotations(List.of(transcriptAnnotation))
+ .build();
+
+ Acmg2015EvidenceAssigner instance = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), variantDataService);
+
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+ AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of());
+ assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(acmgCriterion, evidence).build()));
+ }
+
+ private ClinVarData.ReviewStatus starRatingToReviewStatus(int clinVarStarRating) {
+ return switch (clinVarStarRating) {
+ case 4 -> ClinVarData.ReviewStatus.PRACTICE_GUIDELINE;
+ case 3 -> ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL;
+ case 2 -> ClinVarData.ReviewStatus.CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS;
+ case 1 -> ClinVarData.ReviewStatus.CRITERIA_PROVIDED_SINGLE_SUBMITTER;
+ default -> ClinVarData.ReviewStatus.NO_ASSERTION_CRITERIA_PROVIDED;
+ };
+ }
+
+ @ParameterizedTest
+ @CsvSource({
+ // requires 4 or more missense P/LP variants in the local region
+ "1, 3, 0, 0, 0, PM1, MODERATE",
+ "3, 3, 0, 0, 0, PM1, MODERATE",
+ "0, 4, 0, 0, 0, PM1, MODERATE",
+ "0, 4, 3, 0, 0, PM1, MODERATE",
+ "4, 0, 3, 0, 0, PM1, MODERATE",
+ "0, 4, 4, 0, 0, PM1, SUPPORTING",
+ "4, 0, 4, 0, 0, PM1, SUPPORTING",
+ "0, 0, 0, 0, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty
+ "0, 0, 4, 0, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty
+ "0, 0, 4, 1, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty
+ "4, 0, 4, 1, 0, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty
+ "4, 0, 0, 1, 1, BP1, SUPPORTING", // Here BP1 is acting as a null marker, results should be empty
+ })
+ void assignPM1(int p, int lp, int vus, int lb, int b, AcmgCriterion expectedCriteria, Evidence expectedEvidence) {
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
+ .geneSymbol("GENE")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .variantEffect(VariantEffect.MISSENSE_VARIANT)
+ .annotations(List.of(TranscriptAnnotation.builder()
+ .build()))
+ .build();
+
+ TestVariantDataService.Builder variantDataServiceBuilder = TestVariantDataService.builder();
+
+ int[] categoryCounts = {p, lp, vus, lb, b};
+ ClinVarData.ClinSig[] categories = {PATHOGENIC, LIKELY_PATHOGENIC, UNCERTAIN_SIGNIFICANCE, LIKELY_BENIGN, BENIGN};
+ int currentPos = variantEvaluation.start();
+ for (int i = 0; i < categoryCounts.length; i++) {
+ for (int j = 0; j < categoryCounts[i]; j++) {
+ Variant variant = TestFactory.variantBuilder(10, ++currentPos, "A", "C").build();
+ ClinVarData clinVarData = ClinVarData.builder()
+ .primaryInterpretation(categories[i])
+ .variantEffect(VariantEffect.MISSENSE_VARIANT)
+ .build();
+ variantDataServiceBuilder.put(variant, clinVarData);
+ }
+ }
+ TestVariantDataService variantDataService = variantDataServiceBuilder.build();
+ Acmg2015EvidenceAssigner instance = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), variantDataService);
+
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+ AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of());
+ AcmgEvidence expected = expectedCriteria == BP1 ? AcmgEvidence.empty() : AcmgEvidence.builder().add(expectedCriteria, expectedEvidence).build();
+ assertThat(acmgEvidence, equalTo(expected));
+ }
+
+
@Test
void testAssignsPS2() {
Individual proband = Individual.builder().id("proband").motherId("mother").fatherId("father").sex(MALE).status(Status.AFFECTED).build();
@@ -267,7 +401,7 @@ void testAssignsPM3() {
.geneSymbol("PTEN")
// n.b. start loss variant - will trigger PVS1
.variantEffect(VariantEffect.START_LOST)
- .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
+ .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
.sampleGenotypes(SampleGenotypes.of(
SampleData.of("proband", SampleGenotype.parseGenotype("1|0"))
))
@@ -294,7 +428,7 @@ void testAssignsBP2_InCisWithPathAR() {
.build();
VariantEvaluation pathogenic = TestFactory.variantBuilder(10, 89624227, "A", "G")
- .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
+ .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
.sampleGenotypes(SampleGenotypes.of(
SampleData.of("proband", SampleGenotype.parseGenotype("0|1"))
))
@@ -329,7 +463,7 @@ void testAssignsBP2_InTransWithPathAD() {
.geneSymbol("PTEN")
// n.b. start loss variant - will trigger PVS1
.variantEffect(VariantEffect.START_LOST)
- .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
+ .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.REVIEWED_BY_EXPERT_PANEL).build()))
.sampleGenotypes(SampleGenotypes.of(
SampleData.of("proband", SampleGenotype.parseGenotype("1|0"))
))
@@ -402,11 +536,13 @@ void testAssignsPP3() {
}
@ParameterizedTest
- @CsvSource ({
+ @CsvSource({
"MVP, 1.0f, , ",
- "REVEL, 1.0f, PP3, STRONG"
+ "REVEL, 1.0f, PP3, STRONG",
+ "CADD, 0.0f, , ",
+ "REVEL, 0.0f, BP4, SUPPORTING"
})
- void testAssignsPP3_singleScoreIsInsufficientUnlessItsRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) {
+ void testAssignsPP3BP4_singleScoreIsInsufficientUnlessItsRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
.geneSymbol("PTEN")
@@ -475,26 +611,7 @@ void testAssignsBP4() {
assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.BP4).build()));
}
- @ParameterizedTest
- @CsvSource ({
- "CADD, 0.0f, , ",
- "REVEL, 0.0f, BP4, VERY_STRONG"
- })
- void testAssignsBP4_singleScoreIsInsufficientIfNotRevel(PathogenicitySource pathogenicitySource, float pathogenicityScore, AcmgCriterion acmgCriterion, Evidence evidence) {
- Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
- VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
- .geneSymbol("PTEN")
- .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
- .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(pathogenicitySource, pathogenicityScore)))
- .variantEffect(VariantEffect.MISSENSE_VARIANT)
- .build();
- Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
- AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), List.of());
-
- AcmgEvidence expected = acmgCriterion == null ? AcmgEvidence.empty() : AcmgEvidence.builder().add(acmgCriterion, evidence).build();
- assertThat(acmgEvidence, equalTo(expected));
- }
-
+ @Test
void testAssignsBP4_majorityMustBeBenign() {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
@@ -513,15 +630,15 @@ void testAssignsBP4_majorityMustBeBenign() {
}
@ParameterizedTest
- @CsvSource ({
+ @CsvSource({
"0.932, PP3, STRONG",
"0.773, PP3, MODERATE",
"0.644f, PP3, SUPPORTING",
"0.290f, BP4, SUPPORTING",
- "0.183f, BP4, MODERATE",
- "0.016f, BP4, STRONG",
- "0.003f, BP4, VERY_STRONG",
+ "0.183f, BP4, SUPPORTING",
+ "0.016f, BP4, SUPPORTING",
+ "0.003f, BP4, SUPPORTING",
})
public void testRevelOverridesAllOtherScores(float revelScore, AcmgCriterion acmgCriterion, Evidence evidence) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
@@ -545,8 +662,12 @@ public void testRevelOverridesAllOtherScores(float revelScore, AcmgCriterion acm
}
// PP4
- @Test
- void testAssignsPP4() {
+ @ParameterizedTest
+ @CsvSource({
+ "0.51, SUPPORTING",
+ "0.7, MODERATE",
+ })
+ void testAssignsPP4(double phenotypeScore, Evidence evidence) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
.geneSymbol("PTEN")
@@ -555,10 +676,11 @@ void testAssignsPP4() {
.build();
Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
// High phenotype match triggers - PP4
- List> compatibleDiseaseMatches = List.of(ModelPhenotypeMatch.of(0.6, cowdenSyndrome, List.of()));
+ List> compatibleDiseaseMatches = List.of(ModelPhenotypeMatch.of(phenotypeScore, cowdenSyndrome, List.of()));
AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(cowdenSyndrome), compatibleDiseaseMatches);
- assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.PP4).build()));
+ AcmgEvidence expected = evidence == null ? AcmgEvidence.empty() : AcmgEvidence.builder().add(PP4, evidence).build();
+ assertThat(acmgEvidence, equalTo(expected));
}
@Nested
@@ -582,7 +704,7 @@ void testAssignsPP5(String reviewStatus, AcmgCriterion.Evidence evidence) {
.frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f)))
// n.b. missense variant - will not trigger PVS1
.variantEffect(VariantEffect.MISSENSE_VARIANT)
- .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build()))
+ .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(PATHOGENIC).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build()))
.build();
Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
@@ -599,8 +721,8 @@ void testAssignsPP5(String reviewStatus, AcmgCriterion.Evidence evidence) {
value = {
"criteria provided, single submitter; SUPPORTING",
"criteria provided, multiple submitters, no conflicts; STRONG",
- "reviewed by expert panel; STRONG",
- "practice guideline; STRONG",
+ "reviewed by expert panel; VERY_STRONG",
+ "practice guideline; VERY_STRONG",
})
void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", Pedigree.empty());
@@ -612,7 +734,7 @@ void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) {
.frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AFRICAN_INC_AFRICAN_AMERICAN, 1.42f)))
// n.b. missense variant - will not trigger PVS1
.variantEffect(VariantEffect.MISSENSE_VARIANT)
- .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(ClinVarData.ClinSig.BENIGN).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build()))
+ .pathogenicityData(PathogenicityData.of(ClinVarData.builder().primaryInterpretation(BENIGN).reviewStatus(ClinVarData.ReviewStatus.parseReviewStatus(reviewStatus)).build()))
.build();
Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
@@ -624,18 +746,32 @@ void testAssignsBP6(String reviewStatus, AcmgCriterion.Evidence evidence) {
}
}
- @Test
- void testAssignsBA1() {
+ @ParameterizedTest
+ @CsvSource({
+ "ESP_AA, true",
+ "THOUSAND_GENOMES, true",
+ "TOPMED, true",
+ "UK10K, true",
+ "GNOMAD_E_ASJ, false",
+ "GNOMAD_E_FIN, false",
+ "GNOMAD_E_OTH, false",
+ "GNOMAD_G_AMI, false",
+ "GNOMAD_G_MID, false",
+ "GNOMAD_E_AFR, true",
+ "GNOMAD_G_AMR, true",
+ "GNOMAD_G_NFE, true",
+ })
+ void testAssignsBA1(FrequencySource frequencySource, boolean expected) {
Acmg2015EvidenceAssigner instance = acmgEvidenceAssigner("proband", justProband("proband", MALE));
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
.geneSymbol("PTEN")
- // high allele freq - triggers BA1 assignment
- .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 5.0f)))
+ // high allele freq IN A GNOMAD NON-FOUNDER POP - triggers BA1 assignment
+ .frequencyData(FrequencyData.of(Frequency.of(frequencySource, 5.0f)))
.variantEffect(VariantEffect.MISSENSE_VARIANT)
.build();
AcmgEvidence acmgEvidence = instance.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(variantEvaluation), List.of(), List.of());
- assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(AcmgCriterion.BA1).build()));
+ assertThat(acmgEvidence.hasCriterion(AcmgCriterion.BA1), equalTo(expected));
}
@Test
@@ -684,4 +820,182 @@ void testAssignsBS4() {
assertThat(acmgEvidence, equalTo(AcmgEvidence.builder().add(BS4).build()));
}
+
+ @Nested
+ class SplicingEvidence {
+
+ @ParameterizedTest
+ @CsvSource(value = {
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, '', -",
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, SUPPORTING",
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|PATHOGENIC, -",
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+ "SPLICE_ACCEPTOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE:SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+
+ "SPLICE_ACCEPTOR_VARIANT, STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, MODERATE, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, SUPPORTING, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, STRONG, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, STRONG, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, MODERATE, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_ACCEPTOR_VARIANT, SUPPORTING, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, STRONG, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+ "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_DONOR_VARIANT|PATHOGENIC, STRONG",
+
+ "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE",
+ "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE",
+ "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|PATHOGENIC, MODERATE",
+
+ "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+ "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+ "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, SUPPORTING",
+
+ "SPLICE_DONOR_VARIANT, VERY_STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -",
+ "SPLICE_DONOR_VARIANT, VERY_STRONG, MISSENSE_VARIANT|PATHOGENIC, -",
+ "SPLICE_DONOR_VARIANT, STRONG, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -",
+ "SPLICE_DONOR_VARIANT, MODERATE, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -",
+ "SPLICE_DONOR_VARIANT, SUPPORTING, SPLICE_REGION_VARIANT|UNCERTAIN_SIGNIFICANCE, -",
+ }, nullValues = {"-"})
+ void testAssignSpliceAcceptorDonorPS1(VariantEffect variantEffect, Evidence pvs1Evidence, String clinVarVariants, Evidence ps1Evidence) {
+ // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel
+ // requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1
+
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C")
+ .geneSymbol("PTEN")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .variantEffect(variantEffect)
+// .annotations(List.of(transcriptAnnotation))
+ .build();
+ Map expectedClinVarData = parseExpectedClinvarData(variantEvaluation, clinVarVariants);
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+
+ VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build();
+ AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder()
+ .add(PVS1, pvs1Evidence);
+ AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService);
+
+ AcmgEvidence.Builder expected = AcmgEvidence.builder().add(PVS1, pvs1Evidence);
+ if (ps1Evidence != null) {
+ expected.add(PS1, ps1Evidence);
+ }
+ assertThat(acmgEvidenceBuilder.build(), equalTo(expected.build()));
+ }
+
+ @ParameterizedTest
+ @CsvSource(value = {
+ // same variant location
+ "SPLICE_REGION_VARIANT, 0.0, '', BP4",
+ "SPLICE_REGION_VARIANT, 0.09, '', BP4",
+ "SPLICE_REGION_VARIANT, 0.1, '', ",
+ "SPLICE_REGION_VARIANT, 0.2, SPLICE_ACCEPTOR_VARIANT|PATHOGENIC, PP3",
+ "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|PATHOGENIC, PP3 PS1_Strong",
+ "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, PP3 PS1_Moderate",
+ })
+ void testAssignNonDonorAcceptorSpliceRegionPS1(VariantEffect variantEffect, float spliceAiScore, String clinVarVariants, String acmgEvidence) {
+ // https://www.ncbi.nlm.nih.gov/clinvar/variation/484600/ 3* PATHOGENIC variant - reviewed by expert panel
+ // requires variant to be on a transcript predicted to undergo NMD in a LoF-intolerant gene for full PVS1
+
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C")
+ .geneSymbol("PTEN")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore)))
+ .variantEffect(variantEffect)
+ .build();
+ Map expectedClinVarData = parseExpectedClinvarData(variantEvaluation, clinVarVariants);
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+
+ VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build();
+ AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder();
+ AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService);
+
+ AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence);
+ assertThat(acmgEvidenceBuilder.build(), equalTo(expected));
+ }
+
+ @ParameterizedTest
+ @CsvSource(value = {
+ // different variant location
+ "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|PATHOGENIC, PP3 PS1_Moderate",
+ "SPLICE_REGION_VARIANT, 0.2, SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC, PP3 PS1_Supporting",
+
+ })
+ void testAssignNonDonorAcceptorSpliceRegionPS1VuaDifferentToExistingClinVarVariant(VariantEffect variantEffect, float spliceAiScore, String clinVarVariants, String acmgEvidence) {
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89693009, "G", "C")
+ .geneSymbol("PTEN")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore)))
+ .variantEffect(variantEffect)
+ .build();
+
+ VariantEvaluation knownSpliceRegionVariant = TestFactory.variantBuilder(10, variantEvaluation.start() + 1, "G", "C")
+ .geneSymbol("PTEN")
+ .variantEffect(variantEffect)
+ .build();
+ Map expectedClinVarData = parseExpectedClinvarData(knownSpliceRegionVariant, clinVarVariants);
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+
+ VariantDataService testVariantDataService = TestVariantDataService.builder().expectedClinVarData(expectedClinVarData).build();
+ AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder();
+ AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), testVariantDataService);
+
+ AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence);
+ assertThat(acmgEvidenceBuilder.build(), equalTo(expected));
+ }
+
+ @ParameterizedTest
+ @CsvSource(value = {
+ "INTRON_VARIANT, c.300+7A>C, 0.7, ",
+ "SYNONYMOUS_VARIANT, c.300A>C, 0.3, ",
+ "SPLICE_REGION_VARIANT, c.300+7A>C, 0.8, PP3",
+ "SPLICE_REGION_VARIANT, c.300+7A>C, 0.1, ",
+ "SPLICE_REGION_VARIANT, c.300+6A>C, 0.0, BP4",
+ "SPLICE_REGION_VARIANT, c.300+7A>C, 0.0, BP4 BP7",
+ "SPLICE_REGION_VARIANT, c.400-20A>C, 0.0, BP4",
+ "SPLICE_REGION_VARIANT, c.400-21A>C, 0.0, BP4 BP7",
+ "SPLICE_REGION_VARIANT, c.400-22A>C, 0.0, BP4 BP7",
+ })
+ void testSilentAndIntronicBP7(VariantEffect variantEffect, String hgvsc, float spliceAiScore, String acmgEvidence) {
+ TranscriptAnnotation transcriptAnnotation = TranscriptAnnotation.builder()
+ .variantEffect(variantEffect)
+ .hgvsCdna(hgvsc)
+ .build();
+ VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
+ .geneSymbol("PTEN")
+ .frequencyData(FrequencyData.of(Frequency.of(FrequencySource.EXAC_AMERICAN, 0.1f))) // prevent PM2 assignment
+ .pathogenicityData(PathogenicityData.of(PathogenicityScore.of(PathogenicitySource.SPLICE_AI, spliceAiScore)))
+ .variantEffect(variantEffect)
+ .annotations(List.of(transcriptAnnotation))
+ .build();
+ Disease cowdenSyndrome = Disease.builder().diseaseId("OMIM:158350").diseaseName("COWDEN SYNDROME 1; CWS1").inheritanceMode(InheritanceMode.AUTOSOMAL_DOMINANT).diseaseType(Disease.DiseaseType.DISEASE).build();
+ AcmgEvidence.Builder acmgEvidenceBuilder = AcmgEvidence.builder();
+ AcmgSpliceEvidenceAssigner.assignSpliceEvidence(acmgEvidenceBuilder, variantEvaluation, ModeOfInheritance.AUTOSOMAL_DOMINANT, List.of(cowdenSyndrome), TestVariantDataService.stub());
+
+ AcmgEvidence expected = AcmgEvidence.parseAcmgEvidence(acmgEvidence);
+ assertThat(acmgEvidenceBuilder.build(), equalTo(expected));
+ }
+
+ private Map parseExpectedClinvarData(VariantEvaluation variantEvaluation, String clinVarVariants) {
+ // SPLICE_ACCEPTOR_VARIANT|PATHOGENIC:SPLICE_REGION_VARIANT|LIKELY_PATHOGENIC:MISSENSE_VARIANT|UNKNOWN_SIGNIFICANCE
+ if (clinVarVariants.isEmpty()) {
+ return Map.of();
+ }
+ Map clinVarData = new LinkedHashMap<>();
+ String[] clinVars = clinVarVariants.split(":");
+ for (int i = 0; i < clinVars.length; i++) {
+ String[] clinVarInfo = clinVars[i].split("\\|");
+ Variant variant = VariantEvaluation.builder().variant(variantEvaluation.contig(), variantEvaluation.strand(), variantEvaluation.coordinates().extend(+i, -i), "A", "T").build();
+ ClinVarData clinVar = ClinVarData.builder()
+ .variantEffect(VariantEffect.valueOf(clinVarInfo[0]))
+ .primaryInterpretation(ClinVarData.ClinSig.valueOf(clinVarInfo[1]))
+ .build();
+ clinVarData.put(variant, clinVar);
+ }
+ return clinVarData;
+ }
+
+ }
}
\ No newline at end of file
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java
index 05a47eb8a..a6f813e12 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgAssignmentCalculatorTest.java
@@ -66,6 +66,8 @@ void calculatePathAcmgAssignments() {
.rankType(TranscriptAnnotation.RankType.EXON)
.rank(1)
.rankTotal(9)
+ .hgvsCdna("c.1A>C")
+ .hgvsProtein("p.0?")
.build();
VariantEvaluation variantEvaluation = TestFactory.variantBuilder(10, 89624227, "A", "G")
@@ -95,16 +97,16 @@ void calculatePathAcmgAssignments() {
gene.addPriorityResult(omimPriorityResult);
AcmgEvidence acmgEvidence = AcmgEvidence.builder()
- .add(AcmgCriterion.PVS1)
+ .add(AcmgCriterion.PVS1, AcmgCriterion.Evidence.MODERATE)
.add(AcmgCriterion.PM2, AcmgCriterion.Evidence.SUPPORTING)
- .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.STRONG)
- .add(AcmgCriterion.PP4)
+// .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.STRONG) // not a missense variant
+ .add(AcmgCriterion.PP4, AcmgCriterion.Evidence.MODERATE)
.add(AcmgCriterion.PP5, AcmgCriterion.Evidence.VERY_STRONG)
.build();
AcmgAssignment acmgAssignment = AcmgAssignment.of(variantEvaluation, gene.getGeneIdentifier(), ModeOfInheritance.AUTOSOMAL_DOMINANT, cowdenSyndrome, acmgEvidence, AcmgClassification.PATHOGENIC);
AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), TestVariantDataService.stub());
- AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acgs2020Classifier());
+ AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acmg2020PointsBasedClassifier());
List acmgAssignments = instance.calculateAcmgAssignments(ModeOfInheritance.AUTOSOMAL_DOMINANT, gene, List.of(variantEvaluation), compatibleDiseaseMatches);
assertThat(acmgAssignments, equalTo(List.of(acmgAssignment)));
}
@@ -138,7 +140,7 @@ void calculateVusAcmgAssignments() {
AcmgAssignment acmgAssignment = AcmgAssignment.of(variantEvaluation, gene.getGeneIdentifier(), ModeOfInheritance.AUTOSOMAL_DOMINANT, cowdenSyndrome, acmgEvidence, AcmgClassification.UNCERTAIN_SIGNIFICANCE);
AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("proband", justProband("proband", MALE), TestVariantDataService.stub());
- AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acgs2020Classifier());
+ AcmgAssignmentCalculator instance = new AcmgAssignmentCalculator(acmgEvidenceAssigner, new Acmg2020PointsBasedClassifier());
List acmgAssignments = instance.calculateAcmgAssignments(ModeOfInheritance.AUTOSOMAL_DOMINANT, gene, List.of(variantEvaluation), compatibleDiseaseMatches);
assertThat(acmgAssignments, equalTo(List.of(acmgAssignment)));
}
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java
index f2568b849..912c7110f 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/analysis/util/acmg/AcmgEvidenceTest.java
@@ -31,7 +31,7 @@
import static org.hamcrest.Matchers.equalTo;
import static org.monarchinitiative.exomiser.core.analysis.util.acmg.AcmgCriterion.*;
-public class AcmgEvidenceTest {
+class AcmgEvidenceTest {
@Test
void testEmptyBuilder() {
@@ -50,13 +50,13 @@ void testStaticFactoryDefaultEvidence() {
}
@Test
- public void testToStringDefaultEvidence() {
+ void testToStringDefaultEvidence() {
AcmgEvidence instance = AcmgEvidence.builder().add(PVS1).add(PS1).build();
assertThat(instance.toString(), equalTo("[PVS1, PS1]"));
}
@Test
- public void testToStringModifiedEvidence() {
+ void testToStringModifiedEvidence() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1, Evidence.STRONG)
.add(PS1, Evidence.MODERATE)
@@ -65,8 +65,31 @@ public void testToStringModifiedEvidence() {
assertThat(instance.toString(), equalTo("[PVS1_Strong, PS1_Moderate, PP3_VeryStrong]"));
}
+ @ParameterizedTest
+ @CsvSource(value = {
+ "'[PVS1, PS1_Moderate, PP3_VeryStrong]'",
+ "'[PVS1,PS1_Moderate,PP3_VeryStrong]'",
+ "'PVS1, PS1_Moderate, PP3_VeryStrong'",
+ "'PVS1 PS1_Moderate PP3_VeryStrong'",
+ "'[PVS1 PS1_Moderate PP3_VeryStrong['",
+ // stupid shit
+ "'PVS1 PS1_Moderate PP3_VeryStrong'",
+ "'PVS1 PS1_Moderate PP3_VERYSTRONG'",
+ "'PVS1 PS1_Moderate PP3_verystrong'",
+ "'PVS1 PS1_Moderate, PP3_verystrong'",
+ "'PVS1 PS1_Moderate,PP3_verystrong'",
+ })
+ void testParseEvidence(String acmgEvidence) {
+ AcmgEvidence expected = AcmgEvidence.builder()
+ .add(PVS1)
+ .add(PS1, Evidence.MODERATE)
+ .add(PP3, Evidence.VERY_STRONG)
+ .build();
+ assertThat(AcmgEvidence.parseAcmgEvidence(acmgEvidence), equalTo(expected));
+ }
+
@Test
- public void testGetEvidenceOverwriteInputInBuilder() {
+ void testGetEvidenceOverwriteInputInBuilder() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1, Evidence.STRONG)
.add(PVS1)
@@ -75,7 +98,7 @@ public void testGetEvidenceOverwriteInputInBuilder() {
}
@Test
- public void testGetEvidenceDefaultValue() {
+ void testGetEvidenceDefaultValue() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1)
.build();
@@ -83,7 +106,7 @@ public void testGetEvidenceDefaultValue() {
}
@Test
- public void testGetEvidenceModifiedValue() {
+ void testGetEvidenceModifiedValue() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1, Evidence.MODERATE)
.build();
@@ -91,7 +114,7 @@ public void testGetEvidenceModifiedValue() {
}
@Test
- public void testGetEvidenceNoValue() {
+ void testGetEvidenceNoValue() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1, Evidence.MODERATE)
.build();
@@ -99,7 +122,7 @@ public void testGetEvidenceNoValue() {
}
@Test
- public void testContainsEvidence() {
+ void testContainsEvidence() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1, Evidence.MODERATE)
.build();
@@ -108,7 +131,7 @@ public void testContainsEvidence() {
}
@Test
- public void testBuilderContainsEvidence() {
+ void testBuilderContainsEvidence() {
AcmgEvidence.Builder instance = AcmgEvidence.builder()
.add(PVS1, Evidence.MODERATE);
assertThat(instance.contains(PVS1), equalTo(true));
@@ -116,13 +139,22 @@ public void testBuilderContainsEvidence() {
}
@Test
- public void testSizeWhenEmpty() {
+ void testBuilderContainsEvidenceWithStrength() {
+ AcmgEvidence.Builder instance = AcmgEvidence.builder()
+ .add(PVS1, Evidence.MODERATE);
+ assertThat(instance.containsWithEvidence(PVS1, Evidence.MODERATE), equalTo(true));
+ assertThat(instance.containsWithEvidence(PVS1, Evidence.VERY_STRONG), equalTo(false));
+ assertThat(instance.containsWithEvidence(PM3, Evidence.MODERATE), equalTo(false));
+ }
+
+ @Test
+ void testSizeWhenEmpty() {
AcmgEvidence instance = AcmgEvidence.builder().build();
assertThat(instance.size(), equalTo(0));
}
@Test
- public void testSizeWithElement() {
+ void testSizeWithElement() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1)
.build();
@@ -130,13 +162,13 @@ public void testSizeWithElement() {
}
@Test
- public void testIsEmpty() {
+ void testIsEmpty() {
AcmgEvidence instance = AcmgEvidence.builder().build();
assertThat(instance.isEmpty(), equalTo(true));
}
@Test
- public void testNotEmpty() {
+ void testNotEmpty() {
AcmgEvidence instance = AcmgEvidence.builder()
.add(PVS1)
.build();
@@ -244,7 +276,7 @@ void testEvidence() {
"BA1, -8, 0.000", // BA1 is a bit of an anomaly - it's intended as a hard filter, so doesn't fit into the points system
})
void testPosteriorProb(String criteria, int points, double posteriorProb) {
- AcmgEvidence acmgEvidence = parseAcmgEvidence(criteria);
+ AcmgEvidence acmgEvidence = AcmgEvidence.parseAcmgEvidence(criteria);
assertThat(acmgEvidence.points(), equalTo(points));
assertThat(acmgEvidence.postProbPath(), closeTo(posteriorProb, 0.001));
}
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java
index 32ce1561b..c7967ae9d 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/TestVariantDataService.java
@@ -25,8 +25,8 @@
*/
package org.monarchinitiative.exomiser.core.genome;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableSet;
+import jakarta.annotation.Nonnull;
+import org.monarchinitiative.exomiser.core.model.GeneStatistics;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.frequency.Frequency;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData;
@@ -55,11 +55,15 @@ public class TestVariantDataService implements VariantDataService {
private final Set expectedWhiteList;
private final Map expectedFrequencyData;
private final Map expectedPathogenicityData;
+ private final Map expectedClinVarData;
+ private final Map expectedGeneStats;
private TestVariantDataService(Builder builder) {
- this.expectedWhiteList = ImmutableSet.copyOf(builder.expectedWhiteList);
- this.expectedFrequencyData = ImmutableMap.copyOf(builder.expectedFrequencyData);
- this.expectedPathogenicityData = ImmutableMap.copyOf(builder.expectedPathogenicityData);
+ this.expectedWhiteList = Set.copyOf(builder.expectedWhiteList);
+ this.expectedFrequencyData = Map.copyOf(builder.expectedFrequencyData);
+ this.expectedPathogenicityData = Map.copyOf(builder.expectedPathogenicityData);
+ this.expectedClinVarData = Map.copyOf(builder.expectedClinVarData);
+ this.expectedGeneStats = Map.copyOf(builder.expectedGeneStats);
}
/**
@@ -105,7 +109,7 @@ public FrequencyData getVariantFrequencyData(Variant variant, Set wanted = allFrequencyData.frequencies()
.stream()
.filter(frequency -> frequencySources.contains(frequency.source()))
- .collect(Collectors.toList());
+ .toList();
return FrequencyData.of(allFrequencyData.getRsId(), wanted);
}
@@ -123,18 +127,25 @@ public PathogenicityData getVariantPathogenicityData(Variant variant, Set findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
- return null;
+ public Map findClinVarRecordsOverlappingInterval(@Nonnull GenomicInterval genomicInterval) {
+ return expectedClinVarData.entrySet().stream()
+ .filter(entry -> entry.getKey().overlapsWith(genomicInterval))
+ .collect(Collectors.toUnmodifiableMap(Map.Entry::getKey, Map.Entry::getValue));
+ }
+
+ @Override
+ public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) {
+ return expectedGeneStats.getOrDefault(geneSymbol, GeneStatistics.builder(geneSymbol).build());
}
public static Builder builder() {
@@ -145,6 +156,8 @@ public static class Builder {
private Set expectedWhiteList = new HashSet<>();
private Map expectedFrequencyData = new HashMap<>();
private Map expectedPathogenicityData = new HashMap<>();
+ private Map expectedClinVarData = new HashMap<>();
+ private Map expectedGeneStats = new HashMap<>();
public Builder expectedWhiteList(Set expectedWhiteList) {
this.expectedWhiteList = expectedWhiteList;
@@ -176,6 +189,26 @@ public Builder put(Variant variant, PathogenicityData expectedPathogenicityData)
return this;
}
+ public Builder expectedClinVarData(Map expectedClinVarData) {
+ this.expectedClinVarData = expectedClinVarData;
+ return this;
+ }
+
+ public Builder put(Variant variant, ClinVarData clinVarData) {
+ this.expectedClinVarData.put(variant, clinVarData);
+ return this;
+ }
+
+ public Builder expectedGeneStats(Map expectedGeneStats) {
+ this.expectedGeneStats = expectedGeneStats;
+ return this;
+ }
+
+ public Builder put(GeneStatistics geneStatistics) {
+ this.expectedGeneStats.put(geneStatistics.geneSymbol(), geneStatistics);
+ return this;
+ }
+
public TestVariantDataService build() {
return new TestVariantDataService(this);
}
@@ -213,6 +246,11 @@ public ClinVarData getClinVarData(GenomicVariant genomicVariant) {
public Map findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
return Map.of();
}
+
+ @Override
+ public GeneStatistics getGeneStatistics(@Nonnull String geneSymbol) {
+ return GeneStatistics.builder(geneSymbol).build();
+ }
}
}
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java
index e8ff886ca..1d8febbd4 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/VariantDataServiceImplTest.java
@@ -202,7 +202,7 @@ public void serviceQueryForSynonymousVariantReturnsEmptyPathogenicityData() {
Mockito.when(defaultPathogenicityDao.getPathogenicityData(variant)).thenReturn(PathogenicityData.of(MutationTasterScore.of(1f)));
Mockito.when(clinVarDao.getClinVarData(variant)).thenReturn(ClinVarData.empty());
PathogenicityData result = instance.getVariantPathogenicityData(variant, EnumSet.of(PathogenicitySource.MUTATION_TASTER));
- assertThat(result, equalTo(PathogenicityData.empty()));
+ assertThat(result, equalTo(PathogenicityData.of(MutationTasterScore.of(1f))));
}
@Test
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java
index 7e55da727..5c99c114c 100644
--- a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/genome/dao/ClinVarDaoMvStoreTest.java
@@ -1,11 +1,13 @@
package org.monarchinitiative.exomiser.core.genome.dao;
+import de.charite.compbio.jannovar.annotation.VariantEffect;
import de.charite.compbio.jannovar.data.JannovarData;
import de.charite.compbio.jannovar.mendel.ModeOfInheritance;
import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraint;
import org.monarchinitiative.exomiser.core.analysis.util.GeneConstraints;
import org.monarchinitiative.exomiser.core.analysis.util.acmg.*;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
@@ -15,20 +17,24 @@
import org.monarchinitiative.exomiser.core.genome.jannovar.JannovarDataSourceLoader;
import org.monarchinitiative.exomiser.core.model.*;
import org.monarchinitiative.exomiser.core.model.frequency.FrequencyData;
+import org.monarchinitiative.exomiser.core.model.frequency.FrequencySource;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityData;
-import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicityScore;
import org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource;
+import org.monarchinitiative.exomiser.core.prioritisers.dao.DefaultDiseaseDao;
+import org.monarchinitiative.exomiser.core.prioritisers.dao.DiseaseDao;
+import org.monarchinitiative.exomiser.core.prioritisers.model.Disease;
+import org.monarchinitiative.exomiser.core.prioritisers.model.InheritanceMode;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.*;
import java.nio.file.Path;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
+import java.util.stream.Collectors;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
+import static org.monarchinitiative.exomiser.core.model.pathogenicity.PathogenicitySource.*;
class ClinVarDaoMvStoreTest {
@@ -52,9 +58,8 @@ private AlleleProto.AlleleKey parseAlleleKey(String broadFormatVariant) {
.build();
}
- private GenomicVariant variant(String broadFormatVariant) {
- String[] fields = broadFormatVariant.split("-");
- return GenomicVariant.of(GenomeAssembly.HG19.getContigByName(fields[0]), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, Integer.parseInt(fields[1]), fields[2]), fields[2], fields[3]);
+ private GenomicVariant variant(String variant) {
+ return parseVariant(GenomeAssembly.HG19, variant);
}
@Test
@@ -79,6 +84,31 @@ void getClinVarData() {
}
}
+ @Test
+ void getClinVarDataNotReturningConflictingSig() {
+ try (MVStore mvStore = new MVStore.Builder().open()) {
+ var clinvarMap = MvStoreUtil.openClinVarMVMap(mvStore);
+ AlleleProto.AlleleKey alleleKey = parseAlleleKey("1-200-A-T");
+ AlleleProto.ClinVar clinVar = AlleleProto.ClinVar.newBuilder()
+ .setVariationId("12345")
+ .setPrimaryInterpretation(AlleleProto.ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .setReviewStatus(AlleleProto.ClinVar.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
+ .putClinSigCounts("PATHOGENIC", 5)
+ .putClinSigCounts("UNCERTAIN_SIGNIFICANCE", 5)
+ .build();
+ clinvarMap.put(alleleKey, clinVar);
+
+ ClinVarDao instance = new ClinVarDaoMvStore(mvStore);
+ System.out.println("AlleleProtoAdaptor.toClinVarData: " + AlleleProtoAdaptor.toClinVarData(clinVar));
+ GenomicVariant clinVarVariant = variant("1-200-A-T");
+ System.out.println("ClinVarDao.getClinVarData: " + instance.getClinVarData(clinVarVariant));
+ assertThat(instance.getClinVarData(clinVarVariant), equalTo(AlleleProtoAdaptor.toClinVarData(clinVar)));
+
+ GenomicVariant nonClinVarVariant = variant("1-200-A-A");
+ assertThat(instance.getClinVarData(nonClinVarVariant), equalTo(ClinVarData.empty()));
+ }
+ }
+
@Test
void emptyStore() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore();
@@ -217,14 +247,39 @@ void setPositionInButLengthIncorrectTest() {
@Disabled("manual testing utility")
@Test
void manualDataExplorer() {
- MVStore mvStore = new MVStore.Builder().fileName("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_clinvar.mv.db").readOnly().open();
+ // variant under assessment in 'Broad format' (i.e. hyphen seperated chr-pos-ref-alt)
+ String vua = "19-54123563-A-G"; // hg19-22-51019849-C-T possible cryptic splice variant? SPLICE_AI: 0.230 (PP3), ALPHA_MISSENSE: 0.125
+ ModeOfInheritance modeOfInheritance = ModeOfInheritance.ANY;
+ // 37:10-124797364 // 38:10-123037848-G-A + 10-123051188-T-TAA + 10-123037804-C-A
+ // 10-89624227-A-G PTEN PATHOGENIC 3* START_LOST
+ // 10-89624245-G-T PTEN PATHOGENIC 2* STOP_GAINED
+ // 10-89624242-A-G PTEN PATHOGENIC 3* MISSENSE_VARIANT
+
+ // 4-15589553-G-C - Hypothetical, not in ClinVar, LIKELY_PATHOGENIC [PVS1_Strong, PS1, PM2_Supporting] points=9?
+ // 1-63868019-G-A | 1-63877002-G-A
+
+ // 2-232079571-G-A
+ // FP:
+ // MT-12706-T-C
+ // 11-6638385-C-T
+ // 12-103234252-T-C
+
+ // 17-48275792-A-G SPLICE_DONOR_VARIANT (LP, ClinVar)
+ // 17-48275815-T-G SYNONYMOUS_VARIANT (VUS, ClinVar)
+ //
+ // 3'UTR hg38-20-58909654-A-G high spliceAI
+ GenomeAssembly assembly = GenomeAssembly.HG38;
+
+ String dataDir = "/home/hhx640/Documents/exomiser-data/";
+ String dataVersion = "2406_" + assembly;
+
+ MVStore mvStore = new MVStore.Builder().fileName(dataDir + dataVersion + "/" + dataVersion + "_clinvar.mv.db").readOnly().open();
ClinVarDaoMvStore clinVarDao = new ClinVarDaoMvStore(mvStore);
- MVStore alleleStore = new MVStore.Builder().fileName("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_variants.mv.db").readOnly().open();
+ MVStore alleleStore = new MVStore.Builder().fileName(dataDir + dataVersion + "/" + dataVersion + "_variants.mv.db").readOnly().open();
AllelePropertiesDao allelePropertiesDao = new AllelePropertiesDaoMvStore(alleleStore);
- GenomeAssembly assembly = GenomeAssembly.HG19;
- JannovarData jannovarData = JannovarDataSourceLoader.loadJannovarData(Path.of("/home/hhx640/Documents/exomiser-data/2402_hg19/2402_hg19_transcripts_ensembl.ser"));
+ JannovarData jannovarData = JannovarDataSourceLoader.loadJannovarData(Path.of(dataDir + dataVersion + "/" + dataVersion + "_transcripts_ensembl.ser"));
VariantAnnotator variantAnnotator = new JannovarVariantAnnotator(assembly, jannovarData, ChromosomalRegionIndex.empty());
// 18-57550760-A-T NA
// 18-57550753-A-C LP // 55217985 (hg19)
@@ -237,8 +292,9 @@ void manualDataExplorer() {
// https://mart.ensembl.org/info/genome/genebuild/canonical.html (see also vitt). Ideally the Jannovar Annotations
// should be sorted before being converted to TranscriptAnnotations. This isn't an issue if MANE only
// transcripts are being used as these are the only ones available to report on.
- GenomicVariant genomicVariant = variant(assembly, "10-89624227-A-G"); // 10-123256215-T-G hg38:10-121496701-T-G
+ GenomicVariant genomicVariant = parseVariant(assembly, vua); // 10-123256215-T-G hg38:10-121496701-T-G
+ // hg38:4-25145092-C-T ENST00000382103.7(SEPSECS):c.846G>A (SEPSECS)p.(Leu282=) SpliceAI=0.42
// 10-89624227-A-G (PTEN)
// 10-123247514-C-G : PS1, PM1, PM2_Supporting, PM5_Supporting, PP3, PP5_Strong
// 10-123247517-T-G : PS1_Supporting, PM1, PM2_Supporting, PM5_Supporting, PP2, PP3
@@ -246,18 +302,14 @@ void manualDataExplorer() {
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(genomicVariant);
// encode as VariantKey (https://doi.org/10.1101/473744) == 8 bytes fixed size (long);
// System.out.println("AlleleKey size (bytes): " + alleleKey.getSerializedSize()); // SNP = 13 bytes, 11 bases = 23
- System.out.println();
-
- System.out.println(clinVarDao.getClinVarData(genomicVariant));
- Map clinVarRecordsOverlappingInterval = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(25, 25));
- clinVarRecordsOverlappingInterval.forEach((variant, clinVarData) -> System.out.println(toBroad(variant) + " : " + clinVarData));
System.out.println();
+ Set pathSources = EnumSet.of(REVEL, MVP, ALPHA_MISSENSE, SPLICE_AI, REMM);
List variantAnnotations = variantAnnotator.annotate(genomicVariant);
AlleleProto.AlleleProperties alleleProperties = allelePropertiesDao.getAlleleProperties(alleleKey, assembly);
- FrequencyData frequencyData = AlleleProtoAdaptor.toFrequencyData(alleleProperties);
+ FrequencyData frequencyData = AlleleProtoAdaptor.toFrequencyData(alleleProperties).toBuilder().filterSources(FrequencySource.NON_FOUNDER_POPS).build();
PathogenicityData pathogenicityData = AlleleProtoAdaptor.toPathogenicityData(alleleProperties);
- pathogenicityData = PathogenicityData.of(pathogenicityData.pathogenicityScores().stream().filter(score -> score.getSource() == PathogenicitySource.ALPHA_MISSENSE).toList());
+ pathogenicityData = PathogenicityData.of(pathogenicityData.pathogenicityScores().stream().filter(score -> pathSources.contains(score.getSource())).toList());
ClinVarData clinVarData = clinVarDao.getClinVarData(genomicVariant);
VariantAnnotation variantAnnotation = variantAnnotations.isEmpty() ? null : variantAnnotations.get(0);
VariantEvaluation variantEvaluation = VariantEvaluation.builder()
@@ -268,45 +320,84 @@ void manualDataExplorer() {
.annotations(variantAnnotation.getTranscriptAnnotations())
.frequencyData(frequencyData)
.pathogenicityData(PathogenicityData.of(clinVarData, pathogenicityData.pathogenicityScores()))
- .compatibleInheritanceModes(Set.of(ModeOfInheritance.AUTOSOMAL_RECESSIVE))
+// .whiteListed()
+ .compatibleInheritanceModes(Set.of(modeOfInheritance))
.build();
TranscriptAnnotation transcriptAnnotation = variantAnnotation != null && variantAnnotation.hasTranscriptAnnotations() ? variantAnnotation.getTranscriptAnnotations().get(0) : null;
- System.out.println(toBroad(genomicVariant) + (variantAnnotation == null ? "" : " " + variantAnnotation.getGeneSymbol() + (transcriptAnnotation == null ? "" : ":" + transcriptAnnotation.getHgvsCdna() + ":" + transcriptAnnotation.getHgvsProtein() ) + " " + variantAnnotation.getVariantEffect()));
- System.out.println(frequencyData.getRsId() + " " + (transcriptAnnotation == null ? "" : transcriptAnnotation.getAccession()));
+ Set varEffects = variantAnnotation != null && variantAnnotation.hasTranscriptAnnotations() ? variantAnnotation.getTranscriptAnnotations().stream().map(TranscriptAnnotation::getVariantEffect).collect(Collectors.toUnmodifiableSet()) : Set.of();
+ System.out.println(toBroad(genomicVariant) + " " + (frequencyData.getRsId().isEmpty() ? "-" : frequencyData.getRsId()) + " " + (variantAnnotation == null ? "" : variantAnnotation.getGeneSymbol() + (transcriptAnnotation == null ? "" : ":" + transcriptAnnotation.getAccession() + ":" + transcriptAnnotation.getHgvsCdna() + ":" + transcriptAnnotation.getHgvsProtein() + " " + transcriptAnnotation.getRankType() + " " + transcriptAnnotation.getRank() + "/" + transcriptAnnotation.getRankTotal()) + " " + varEffects));
String geneSymbol = variantAnnotation.getGeneSymbol();
- System.out.println(geneSymbol + " " + GeneConstraints.geneConstraint(geneSymbol));
- System.out.println(clinVarData.isEmpty() ? "" : clinVarData.getPrimaryInterpretation() + " (" + clinVarData.starRating() + "*) " + clinVarData.getVariationId() + " " + clinVarData.getGeneSymbol() + ":" + clinVarData.getHgvsCdna() + ":" + clinVarData.getHgvsProtein() + " " + clinVarData.getVariantEffect());
- System.out.println("Variant score: " + variantEvaluation.getVariantScore() + " Frequency score: " + variantEvaluation.getFrequencyScore() + " Pathogenicity score: " + variantEvaluation.getPathogenicityScore());
- System.out.println("Frequency data:");
+ GeneConstraint geneConstraint = GeneConstraints.geneConstraint(geneSymbol);
+ if (geneConstraint != null) {
+ System.out.println(geneSymbol + " " + geneConstraint + (geneConstraint.isLossOfFunctionIntolerant() ? (" (LOF INTOLERANT)") : ""));
+ }
+ System.out.println("ClinVar: " + (clinVarData.isEmpty() ? "-" : formatClinVarData(clinVarData)));
+ System.out.println("Variant score: " + variantEvaluation.getVariantScore() + " Frequency score: " + variantEvaluation.getFrequencyScore() + " Pathogenicity score: " + variantEvaluation.getPathogenicityScore() + (variantEvaluation.isWhiteListed() ? " (WHITELIST VARIANT - all scores set to 1.0)" : ""));
+ System.out.println("Frequency score: " + frequencyData.frequencyScore());
if (frequencyData.isEmpty()) {
- System.out.println("\n\t-");
+ System.out.println("\t(no frequency data)");
} else {
- System.out.println("\tfrequency score: " + frequencyData.frequencyScore());
- frequencyData.frequencies().forEach(freq -> System.out.println("\t" + freq.source() + "=" + freq.frequency() + "(" + freq.ac() + "|" + freq.an() + "|" + freq.homs() + ")"));
+ frequencyData.frequencies().forEach(freq -> System.out.println("\t" + freq.source() + "=" + freq.frequency() + (freq.an() == 0 ? "" : "(" + freq.ac() + "|" + freq.an() + "|" + freq.homs() + ")")));
}
- System.out.println("Pathogenicity scores:");
+ System.out.println("Pathogenicity score: " + pathogenicityData.pathogenicityScore());
if (pathogenicityData.isEmpty()) {
- System.out.println("\t-");
+ System.out.println("\t(no pathogenicity data)");
} else {
- System.out.println("\tpathogenicity score: " + pathogenicityData.pathogenicityScore());
pathogenicityData.pathogenicityScores().forEach(path -> System.out.println("\t" + path));
}
AcmgEvidenceAssigner acmgEvidenceAssigner = new Acmg2015EvidenceAssigner("sample", Pedigree.justProband("sample"), clinVarDao);
- var acmgEvidence = acmgEvidenceAssigner.assignVariantAcmgEvidence(variantEvaluation, ModeOfInheritance.AUTOSOMAL_RECESSIVE, List.of(variantEvaluation), List.of(), List.of());
+ Disease disease = Disease.builder().diseaseId("DISEASE:1").diseaseName("disease").inheritanceMode(InheritanceMode.AUTOSOMAL_RECESSIVE).build();
+ AcmgEvidence acmgEvidence = acmgEvidenceAssigner.assignVariantAcmgEvidence(variantEvaluation, modeOfInheritance, List.of(variantEvaluation), List.of(disease), List.of());
AcmgEvidenceClassifier classifier = new Acmg2020PointsBasedClassifier();
// should use an AcmgAssignmentCalculator to figure out the correct disease-gene association to use
- System.out.println(classifier.classify(acmgEvidence) + " " + acmgEvidence + " points=" + acmgEvidence.points());
+ System.out.println(classifier.classify(acmgEvidence) + " " + acmgEvidence + " points=" + acmgEvidence.points() + " score=" + acmgEvidence.postProbPath());
+
+// var manualEvidence = AcmgEvidence.builder()
+// .add(AcmgCriterion.PM2, AcmgCriterion.Evidence.SUPPORTING)
+// .add(AcmgCriterion.PP3, AcmgCriterion.Evidence.SUPPORTING)
+// .add(AcmgCriterion.PS1)
+// .build();
+// System.out.println(classifier.classify(manualEvidence) + " " + manualEvidence + " points=" + manualEvidence.points());
+
+ System.out.println();
+
+ GenomicRegion clinVarSearchInterval = genomicVariant.withPadding(25, 25);
+ String interval = clinVarSearchInterval.contig().name() + ":" + clinVarSearchInterval.startOneBased() + "-" + clinVarSearchInterval.end();
+ System.out.println("ClinVar neighbourhood " + interval + " (" + genomicVariant.start() + " +/-25 bases):");
+ Map clinVarRecordsOverlappingInterval = clinVarDao.findClinVarRecordsOverlappingInterval(clinVarSearchInterval);
+ clinVarRecordsOverlappingInterval.forEach((variant, data) -> System.out.println(toBroad(variant) + " : " + formatClinVarData(data)));
+ //TODO add known diseases
+ }
+
+ private String formatClinVarData(ClinVarData clinVarData) {
+ return clinVarData.getPrimaryInterpretation() + (clinVarData.getPrimaryInterpretation() == ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS ? " " + clinVarData.getConflictingInterpretationCounts() : "") + " (" + clinVarData.starRating() + "*) " + clinVarData.getVariationId() + " " + clinVarData.getGeneSymbol() + ":" + clinVarData.getHgvsCdna() + ":" + clinVarData.getHgvsProtein() + " " + clinVarData.getVariantEffect() + " (" + clinVarData.getReviewStatus() + ")";
}
private String toBroad(GenomicVariant genomicVariant) {
return genomicVariant.contig().name() + '-' + genomicVariant.start() + '-' + genomicVariant.ref() + '-' + genomicVariant.alt();
}
- private GenomicVariant variant(GenomeAssembly genomeAssembly, String broadFormatVariant) {
- String[] fields = broadFormatVariant.split("-");
+ /**
+ * Parses small sequence variants of the form `chr-pos-ref-alt`. For example:
+ *
+ * SNP 1-12345-A-T
+ * SNP 1:12345:A:T
+ * DEL 1-12345-AT-A
+ * INS 1-12345-A-AT
+ * MNV 1-12345-AT-GC
+ * INV 1-12345-AT-TA
+ *
+ * Separators can be either hyphens '-' or colons ':'. The genomic assembly is also required.
+ *
+ * @param genomeAssembly Genomic assembly for the variant coordinates
+ * @param s The string representation of the variant
+ * @return A {@link GenomicVariant}
+ */
+ private GenomicVariant parseVariant(GenomeAssembly genomeAssembly, String s) {
+ String[] fields = s.split("[-:]");
return GenomicVariant.of(genomeAssembly.getContigByName(fields[0]), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, Integer.parseInt(fields[1]), fields[2]), fields[2], fields[3]);
}
}
\ No newline at end of file
diff --git a/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java
new file mode 100644
index 000000000..e631e323d
--- /dev/null
+++ b/exomiser-core/src/test/java/org/monarchinitiative/exomiser/core/model/GeneStatisticsTest.java
@@ -0,0 +1,116 @@
+package org.monarchinitiative.exomiser.core.model;
+
+import de.charite.compbio.jannovar.annotation.VariantEffect;
+import org.junit.jupiter.api.Test;
+import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
+
+import static de.charite.compbio.jannovar.annotation.VariantEffect.*;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+class GeneStatisticsTest {
+
+ @Test
+ void testBuilderNoStats() {
+ var instance = GeneStatistics.builder("WIBBLE").build();
+ assertThat(instance.geneSymbol(), equalTo("WIBBLE"));
+ assertThat(instance.pathCount(), equalTo(0));
+ assertThat(instance.vusCount(), equalTo(0));
+ assertThat(instance.benignCount(), equalTo(0));
+ }
+
+
+ @Test
+ void testBuilderStats() {
+ var instance = GeneStatistics.builder("WIBBLE")
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.LIKELY_PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.BENIGN)
+ // LOF effects
+ .put(VariantEffect.STOP_GAINED, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.STOP_LOST, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.SPLICE_ACCEPTOR_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.SPLICE_DONOR_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.EXON_LOSS_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.START_LOST, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.START_LOST, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(VariantEffect.TRANSCRIPT_ABLATION, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.FRAMESHIFT_ELONGATION, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.FRAMESHIFT_TRUNCATION, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.FRAMESHIFT_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ // other effect
+ .put(VariantEffect.INTRON_VARIANT, ClinVarData.ClinSig.BENIGN)
+ .build();
+ assertThat(instance.geneSymbol(), equalTo("WIBBLE"));
+ assertThat(instance.pathCount(), equalTo(13));
+ assertThat(instance.vusCount(), equalTo(3));
+ assertThat(instance.benignCount(), equalTo(2));
+
+ assertThat(instance.missensePathCount(), equalTo(3));
+ assertThat(instance.missenseVusCount(), equalTo(2));
+ assertThat(instance.missenseBenignCount(), equalTo(1));
+
+ assertThat(instance.lofPathCount(), equalTo(10));
+ assertThat(instance.lofVusCount(), equalTo(1));
+ assertThat(instance.lofBenignCount(), equalTo(0));
+ }
+
+ @Test
+ void testIgnoresNonPathVusBenignStatus() {
+ var instance = GeneStatistics.builder("WIBBLE", MISSENSE_VARIANT, ClinVarData.ClinSig.ESTABLISHED_RISK_ALLELE).build();
+ assertThat(instance.pathCount(), equalTo(0));
+ assertThat(instance.vusCount(), equalTo(0));
+ assertThat(instance.benignCount(), equalTo(0));
+
+ }
+
+ @Test
+ void testToString() {
+ var instance = GeneStatistics.builder("WIBBLE")
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.LIKELY_PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(VariantEffect.MISSENSE_VARIANT, ClinVarData.ClinSig.BENIGN)
+ .put(VariantEffect.STOP_GAINED, ClinVarData.ClinSig.PATHOGENIC)
+ .put(VariantEffect.INTRON_VARIANT, ClinVarData.ClinSig.BENIGN)
+ .build();
+
+ var expected = "GeneStatistics{geneSymbol=WIBBLE, counts={STOP_GAINED=[P=1, VUS=0, B=0], MISSENSE_VARIANT=[P=3, VUS=2, B=1], INTRON_VARIANT=[P=0, VUS=0, B=1]}}";
+ assertThat(instance.toString(), equalTo(expected));
+ }
+
+ @Test
+ void testEmptyString() {
+ assertThat(GeneStatistics.builder("WIBBLE").build().toString(), equalTo("GeneStatistics{geneSymbol=WIBBLE, counts={}}"));
+ }
+
+ @Test
+ void testCanGetStatsForArbitraryVariantEffect() {
+ var instance = GeneStatistics.builder("WIBBLE")
+ .put(SYNONYMOUS_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .build();
+
+ assertThat(instance.vusCount(), equalTo(1));
+ assertThat(instance.vusCount(SYNONYMOUS_VARIANT), equalTo(instance.vusCount()));
+ }
+
+ @Test
+ void testCanGetStatsForArbitraryVariantEffects() {
+ var instance = GeneStatistics.builder("WIBBLE")
+ .put(FIVE_PRIME_UTR_EXON_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(INTRON_VARIANT, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE)
+ .put(MISSENSE_VARIANT, ClinVarData.ClinSig.PATHOGENIC)
+ .build();
+
+ assertThat(instance.vusCount(), equalTo(2));
+ assertThat(instance.vusCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT, MISSENSE_VARIANT), equalTo(instance.vusCount()));
+ assertThat(instance.vusCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT), equalTo(instance.vusCount()));
+ assertThat(instance.pathCount(FIVE_PRIME_UTR_EXON_VARIANT, INTRON_VARIANT, MISSENSE_VARIANT), equalTo(instance.pathCount()));
+ assertThat(instance.pathCount(MISSENSE_VARIANT), equalTo(instance.pathCount()));
+ }
+}
\ No newline at end of file
diff --git a/exomiser-core/src/test/resources/jannovar/default_sources.ini b/exomiser-core/src/test/resources/jannovar/default_sources.ini
index ad75402a9..efe2cc684 100644
--- a/exomiser-core/src/test/resources/jannovar/default_sources.ini
+++ b/exomiser-core/src/test/resources/jannovar/default_sources.ini
@@ -106,7 +106,7 @@ rna=ftp://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/BUILD.36.3/RNA/rna.fa.g
type=ucsc
alias=MT,M,chrM
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
@@ -119,7 +119,7 @@ knownToLocusLink=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownTo
type = ensembl
alias = MT,M,chrM
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
gtf = https://ftp.ensembl.org/pub/grch37/current/gtf/homo_sapiens/Homo_sapiens.GRCh37.87.gtf.gz
cdna = https://ftp.ensembl.org/pub/grch37/current/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh37.cdna.all.fa.gz
@@ -133,7 +133,7 @@ type = refseq
alias = MT,M,chrM
allowNonCodingNm = true
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz
rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz
@@ -146,7 +146,7 @@ alias = MT,M,chrM
onlyCurated = true
allowNonCodingNm = true
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/GFF/ref_GRCh37.p13_top_level.gff3.gz
rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/RNA/rna.fa.gz
@@ -158,7 +158,7 @@ type = refseq
alias = MT,M,chrM
allowNonCodingNm = true
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_top_level_2017-01-13.gff3.gz
rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_rna.fa.gz
@@ -171,7 +171,7 @@ alias = MT,M,chrM
onlyCurated = true
allowNonCodingNm = true
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
gff = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_top_level_2017-01-13.gff3.gz
rna = https://ftp.ncbi.nlm.nih.gov/genomes/H_sapiens/GRCh37.p13_interim_annotation/interim_GRCh37.p13_rna.fa.gz
diff --git a/exomiser-data-genome/pom.xml b/exomiser-data-genome/pom.xml
index 198882fc5..a5915dffd 100644
--- a/exomiser-data-genome/pom.xml
+++ b/exomiser-data-genome/pom.xml
@@ -31,7 +31,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
@@ -48,6 +48,12 @@
org.apache.commons
commons-vfs2
2.9.0
+
+
+ commons-logging
+ commons-logging
+
+
info.picocli
@@ -59,6 +65,17 @@
flyway-core
${flyway.version}
+
+ org.duckdb
+ duckdb_jdbc
+ 1.0.0
+
+
+ org.rocksdb
+ rocksdbjni
+ 7.6.0
+ test
+
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java
index d52e38c8a..2bc3cd167 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/BuildCommand.java
@@ -74,7 +74,7 @@ public class BuildCommand implements Callable {
private Path buildClinVar;
@Option(names = "--transcripts", converter = TranscriptSourceConverter.class, split = ",", arity = "0..1", fallbackValue = "ensembl,refseq,ucsc", description = "List of transcript databases to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}")
private List transcriptSources;
- @Option(names = "--variants", split = ",", arity = "0..1", fallbackValue = "esp,exac,uk10k,topmed,dbsnp,gnomad-exome,gnomad-genome,gnomad-mito,alfa,dbnsfp", description = "List of variant data sources to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}")
+ @Option(names = "--variants", split = ",", arity = "0..1", fallbackValue = "esp,exac,uk10k,topmed,dbsnp,gnomad-exome,gnomad-genome,gnomad-mito,alfa,dbnsfp,splice-ai", description = "List of variant data sources to build. If specified without parameter, will build all sources: ${FALLBACK-VALUE}")
private List variantSources;
@Option(names = "--genome", description = "Flag to trigger building of genome data.")
private boolean buildGenome;
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java
index 0acc7f170..dfb99e5fc 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunner.java
@@ -20,12 +20,8 @@
package org.monarchinitiative.exomiser.data.genome;
-import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.h2.mvstore.MVStoreTool;
-import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
-import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleKey;
-import org.monarchinitiative.exomiser.core.proto.AlleleProto.AlleleProperties;
import org.monarchinitiative.exomiser.data.genome.indexers.Indexer;
import org.monarchinitiative.exomiser.data.genome.indexers.MvStoreAlleleIndexer;
import org.monarchinitiative.exomiser.data.genome.model.Allele;
@@ -58,7 +54,7 @@ public VariantDatabaseBuildRunner(BuildInfo buildInfo, Path buildPath, List hg19AlleleResources() {
// ExAC removed as this is part of gnomad-exomes
alleleResources.put("esp", espAlleleResource());
alleleResources.put("dbnsfp", dbnsfpAlleleResource());
+ alleleResources.put("splice-ai", spliceAiAlleleResource());
// CLinVar removed - now handled as a separate data source
return alleleResources.build();
}
+ public SpliceAiAlleleResource spliceAiAlleleResource() {
+ return alleleResource(SpliceAiAlleleResource.class, "hg19.splice-ai");
+ }
+
public DbSnpAlleleResource dbSnpAlleleResource() {
return alleleResource(DbSnpAlleleResource.class, "hg19.dbsnp");
}
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java
index 0ede208a6..b78337d23 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/config/Hg38Config.java
@@ -90,11 +90,16 @@ public Map hg38AlleleResources() {
// ExAC removed as this is part of gnomad-exomes v2.1
// ESP removed as this is part of gnomad-exomes v4
alleleResources.put("dbnsfp", dbnsfpAlleleResource());
+ alleleResources.put("splice-ai", spliceAiAlleleResource());
// CLinVar removed - now handled as a separate data source
return alleleResources.build();
}
+ public SpliceAiAlleleResource spliceAiAlleleResource() {
+ return alleleResource(SpliceAiAlleleResource.class, "hg38.splice-ai");
+ }
+
public DbSnpAlleleResource dbSnpAlleleResource() {
return alleleResource(DbSnpAlleleResource.class, "hg38.dbsnp");
}
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java
index eafdce07c..25d785932 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverter.java
@@ -172,6 +172,9 @@ private static ClinVar.ClinSig toProtoClinSig(ClinVarData.ClinSig clinSig) {
case PATHOGENIC_OR_LIKELY_PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC;
case PATHOGENIC -> ClinVar.ClinSig.PATHOGENIC;
case CONFLICTING_PATHOGENICITY_INTERPRETATIONS -> ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS;
+ case UNCERTAIN_RISK_ALLELE -> ClinVar.ClinSig.UNCERTAIN_RISK_ALLELE;
+ case LIKELY_RISK_ALLELE -> ClinVar.ClinSig.LIKELY_RISK_ALLELE;
+ case ESTABLISHED_RISK_ALLELE -> ClinVar.ClinSig.ESTABLISHED_RISK_ALLELE;
case AFFECTS -> ClinVar.ClinSig.AFFECTS;
case ASSOCIATION -> ClinVar.ClinSig.ASSOCIATION;
case DRUG_RESPONSE -> ClinVar.ClinSig.DRUG_RESPONSE;
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java
index 5126a6f61..1732c027f 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParser.java
@@ -28,6 +28,7 @@
import org.slf4j.LoggerFactory;
import java.util.*;
+import java.util.stream.Collectors;
import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*;
@@ -38,6 +39,47 @@ public class ClinVarAlleleParser extends VcfAlleleParser {
private static final Logger logger = LoggerFactory.getLogger(ClinVarAlleleParser.class);
+ private static final Map CLINSIG_MAP;
+
+ static {
+ Map temp = new HashMap<>();
+ temp.put("Uncertain_significance", UNCERTAIN_SIGNIFICANCE);
+// temp.put("Uncertain_significance/Uncertain_risk_allele", UNCERTAIN_SIGNIFICANCE);
+ temp.put("Benign", BENIGN);
+// temp.put("Benign/Likely_benign", BENIGN_OR_LIKELY_BENIGN);
+ temp.put("Likely_benign", LIKELY_BENIGN);
+ temp.put("Conflicting_interpretations_of_pathogenicity", CONFLICTING_PATHOGENICITY_INTERPRETATIONS);
+ temp.put("Conflicting_classifications_of_pathogenicity", CONFLICTING_PATHOGENICITY_INTERPRETATIONS);
+ temp.put("Likely_pathogenic", LIKELY_PATHOGENIC);
+ temp.put("Likely_pathogenic,_low_penetrance", LIKELY_PATHOGENIC);
+ temp.put("Likely_pathogenic/Likely_risk_allele", LIKELY_PATHOGENIC);
+ temp.put("Likely_pathogenic/Established_risk_allele", LIKELY_PATHOGENIC);
+ temp.put("Pathogenic/Likely_pathogenic", PATHOGENIC_OR_LIKELY_PATHOGENIC);
+ temp.put("Pathogenic/Likely_pathogenic/Likely_risk_allele", PATHOGENIC_OR_LIKELY_PATHOGENIC);
+ temp.put("Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance", PATHOGENIC_OR_LIKELY_PATHOGENIC);
+ temp.put("Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance/Established_risk_allele"
+ , PATHOGENIC_OR_LIKELY_PATHOGENIC);
+ temp.put("Pathogenic", PATHOGENIC);
+ temp.put("Pathogenic,_low_penetrance", PATHOGENIC);
+ temp.put("Pathogenic/Likely_risk_allele", PATHOGENIC);
+ temp.put("Pathogenic/Pathogenic,_low_penetrance", PATHOGENIC);
+ temp.put("Uncertain_risk_allele", UNCERTAIN_RISK_ALLELE);
+ temp.put("Likely_risk_allele", LIKELY_RISK_ALLELE);
+ temp.put("Established_risk_allele", ESTABLISHED_RISK_ALLELE);
+ temp.put("not_provided", NOT_PROVIDED);
+ temp.put("no_classification_for_the_single_variant", NOT_PROVIDED);
+ temp.put("no_classifications_from_unflagged_records", NOT_PROVIDED);
+ temp.put("Affects", AFFECTS);
+ temp.put("association", ASSOCIATION);
+ temp.put("association_not_found", ASSOCIATION); // TODO add ASSOCIATION_NOT_FOUND
+ temp.put("drug_response", DRUG_RESPONSE);
+ temp.put("other", OTHER);
+ temp.put("confers_sensitivity", OTHER);
+ temp.put("protective", PROTECTIVE);
+ temp.put("risk_factor", RISK_FACTOR);
+ CLINSIG_MAP = Map.copyOf(temp);
+ }
+
@Override
List parseInfoField(List alleles, String info) {
for (Allele allele : alleles) {
@@ -68,11 +110,9 @@ private ClinVarData parseClinVarData(Allele allele, String info) {
String value = keyValue[1];
switch (key) {
case "CLNSIG":
- String[] clinsigs = value.split(",_");
- ClinSig primary = parseClinSig(clinsigs[0]);
- Set secondary = parseSecondaryClinSig(clinsigs);
- clinVarBuilder.primaryInterpretation(primary);
- clinVarBuilder.secondaryInterpretations(secondary);
+ String[] clinsigs = value.split("\\|");
+ clinVarBuilder.primaryInterpretation(parseClinSig(clinsigs[0]));
+ clinVarBuilder.secondaryInterpretations(parseSecondaryClinSig(clinsigs));
break;
case "CLNREVSTAT":
//CLNREVSTAT criteria_provided,_conflicting_interpretations, criteria_provided,_multiple_submitters,_no_conflicts, criteria_provided,_single_submitter, no_assertion_criteria_provided, no_assertion_provided, no_interpretation_for_the_single_variant, practice_guideline, reviewed_by_expert_panel
@@ -143,23 +183,33 @@ private ClinSig parseClinSig(String clinsig) {
// Likely_benign=52064, Likely_pathogenic=15127, Pathogenic=46803, Pathogenic/Likely_pathogenic=3278,
// Uncertain_significance=120418, association=148, drug_response=290, not_provided=10980, other=1796, protective=30,
// risk_factor=411
- return switch (clinsig) {
- case "Uncertain_significance" -> UNCERTAIN_SIGNIFICANCE;
- case "Benign" -> BENIGN;
- case "Benign/Likely_benign" -> BENIGN_OR_LIKELY_BENIGN;
- case "Likely_benign" -> LIKELY_BENIGN;
- case "Conflicting_interpretations_of_pathogenicity" -> CONFLICTING_PATHOGENICITY_INTERPRETATIONS;
- case "Likely_pathogenic" -> LIKELY_PATHOGENIC;
- case "Pathogenic/Likely_pathogenic" -> PATHOGENIC_OR_LIKELY_PATHOGENIC;
- case "Pathogenic" -> PATHOGENIC;
- case "Affects" -> AFFECTS;
- case "association" -> ASSOCIATION;
- case "drug_response" -> DRUG_RESPONSE;
- case "other" -> OTHER;
- case "protective" -> PROTECTIVE;
- case "risk_factor" -> RISK_FACTOR;
- default -> NOT_PROVIDED;
- };
+
+ // this is yuk!
+ clinsig = clinsig.replace(",_low_penetrance", "");
+ Set clinSigs = Arrays.stream(clinsig.split("/"))
+ .map(CLINSIG_MAP::get)
+ .collect(Collectors.toCollection(() -> EnumSet.noneOf(ClinSig.class)));
+
+ if (clinSigs.isEmpty()) {
+ throw new IllegalArgumentException("'" + clinsig + "' is not a recognised ClinVar CLNSIG value");
+ }
+ if (clinSigs.size() == 1) {
+ return clinSigs.iterator().next();
+ }
+ // remove the risk allele if present and try to figure out if it's a P/LP or B/LB
+ clinSigs.removeAll(EnumSet.of(UNCERTAIN_RISK_ALLELE, LIKELY_RISK_ALLELE, ESTABLISHED_RISK_ALLELE));
+
+ if (clinSigs.size() == 1) {
+ return clinSigs.iterator().next();
+ }
+ else if (clinSigs.contains(PATHOGENIC) && clinSigs.contains(LIKELY_PATHOGENIC)) {
+ return PATHOGENIC_OR_LIKELY_PATHOGENIC;
+ }
+ else if (clinSigs.contains(BENIGN) && clinSigs.contains(LIKELY_BENIGN)) {
+ return BENIGN_OR_LIKELY_BENIGN;
+ }
+ // shouldn't get here....
+ throw new IllegalArgumentException("'" + clinsig + "' is not a recognised ClinVar CLNSIG value");
}
}
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java
index 15a623387..95676cdba 100644
--- a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/DbSnpAlleleParser.java
@@ -41,7 +41,6 @@ public class DbSnpAlleleParser extends VcfAlleleParser {
@Override
List parseInfoField(List alleles, String info) {
- System.out.println(info);
Map> minorAlleleFrequencies = parseMinorAlleleFrequencies(info);
for (Map.Entry> entry : minorAlleleFrequencies.entrySet()) {
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java
new file mode 100644
index 000000000..6f9046263
--- /dev/null
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParser.java
@@ -0,0 +1,34 @@
+package org.monarchinitiative.exomiser.data.genome.model.parsers;
+
+import org.monarchinitiative.exomiser.core.genome.Contigs;
+import org.monarchinitiative.exomiser.core.proto.AlleleProto;
+import org.monarchinitiative.exomiser.data.genome.model.Allele;
+
+import java.util.List;
+
+public class SpliceAiAlleleParser implements AlleleParser {
+ @Override
+ public List parseLine(String line) {
+ if (line.startsWith("#")) {
+ // comment line.
+ return List.of();
+ }
+ var allele = parseAllele(line);
+ return List.of(allele);
+ }
+
+ private Allele parseAllele(String line) {
+ String[] fields = line.split("\t");
+ if (fields.length != 5) {
+ throw new IllegalStateException("Expected 5 fields but found " + fields.length + " in line " + line);
+ }
+ int chr = Contigs.parseId(fields[0]); // 1..22,X,Y,MT
+ int start = Integer.parseInt(fields[1]);
+ String ref = fields[2];
+ String alt = fields[3];
+ float score = Float.parseFloat(fields[4]);
+ Allele allele = new Allele(chr, start, ref, alt);
+ allele.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(score).build());
+ return allele;
+ }
+}
diff --git a/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java
new file mode 100644
index 000000000..8184b85ff
--- /dev/null
+++ b/exomiser-data-genome/src/main/java/org/monarchinitiative/exomiser/data/genome/model/resource/SpliceAiAlleleResource.java
@@ -0,0 +1,14 @@
+package org.monarchinitiative.exomiser.data.genome.model.resource;
+
+import org.monarchinitiative.exomiser.data.genome.model.archive.TabixArchive;
+import org.monarchinitiative.exomiser.data.genome.model.parsers.SpliceAiAlleleParser;
+
+import java.net.URL;
+import java.nio.file.Path;
+
+public class SpliceAiAlleleResource extends AbstractAlleleResource {
+
+ public SpliceAiAlleleResource(String name, URL resourceUrl, Path resourcePath) {
+ super(name, resourceUrl, new TabixArchive(resourcePath), new SpliceAiAlleleParser());
+ }
+}
diff --git a/exomiser-data-genome/src/main/resources/application.properties b/exomiser-data-genome/src/main/resources/application.properties
index b1e64131d..7da65c540 100644
--- a/exomiser-data-genome/src/main/resources/application.properties
+++ b/exomiser-data-genome/src/main/resources/application.properties
@@ -19,6 +19,8 @@
#
spring.flyway.enabled=false
spring.h2.console.enabled=true
+logging.file.path=${build-dir}
+
# build-dir defines the main directory which will be used to build the databases.
build-dir=.
build-version=1711
@@ -62,6 +64,10 @@ hg19.uk10k.file-dir=${hg19.variant-dir}
hg19.dbnsfp.file-name=dbNSFP4.5a.zip
hg19.dbnsfp.file-url=ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/
hg19.dbnsfp.file-dir=${hg19.variant-dir}
+# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg19.vcf.gz to only include variants max(delta_score) >= 1.0
+hg19.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz
+hg19.splice-ai.file-url=file://${hg19.variant-dir}
+hg19.splice-ai.file-dir=${hg19.variant-dir}
#hg38
hg38.variant-dir=${build-dir}/hg38/variants
@@ -69,11 +75,11 @@ hg38.variant-processed-dir=${hg38.variant-dir}/processed
hg38.genome-dir=${build-dir}/hg38/genome
hg38.genome-processed-dir=${hg38.genome-dir}/processed
hg38.ensembl-variation-url=http://ftp.ensembl.org/pub/data_files/homo_sapiens/GRCh38/variation_genotype/
-hg38.gnomad-exome.file-name=gnomad-v4/exomes
+hg38.gnomad-exome.file-name=gnomad-v4-1/exomes
hg38.gnomad-exome.file-url=https://storage.googleapis.com/gcp-public-data--gnomad/release/2.1.1/liftover_grch38/vcf/exomes/
hg38.gnomad-exome.file-dir=${hg38.variant-dir}
# hg38.gnomad-genome.file-name should be a directory containing the gnomad.genomes.v3.1.2.sites.chr$i.vcf.bgz files
-hg38.gnomad-genome.file-name=gnomad-v4/genomes
+hg38.gnomad-genome.file-name=gnomad-v4-1/genomes
hg38.gnomad-genome.file-url=https://storage.googleapis.com/gcp-public-data--gnomad/release/3.1.2/vcf/genomes/gnomad.genomes.v3.1.2.sites.chr1.vcf.bgz
hg38.gnomad-genome.file-dir=${hg38.variant-dir}
hg38.gnomad-mito.file-name=gnomad.genomes.v3.1.sites.chrM.vcf.bgz
@@ -106,3 +112,7 @@ hg38.uk10k.file-dir=${hg38.variant-dir}
hg38.dbnsfp.file-name=${hg19.dbnsfp.file-name}
hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url}
hg38.dbnsfp.file-dir=${hg19.variant-dir}
+# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg38.vcf.gz to only include variants max(delta_score) >= 1.0
+hg38.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg38.vcf.gz
+hg38.splice-ai.file-url=file://${hg38.variant-dir}
+hg38.splice-ai.file-dir=${hg38.variant-dir}
diff --git a/exomiser-data-genome/src/main/resources/default_sources.ini b/exomiser-data-genome/src/main/resources/default_sources.ini
index 4588af999..3d4310442 100644
--- a/exomiser-data-genome/src/main/resources/default_sources.ini
+++ b/exomiser-data-genome/src/main/resources/default_sources.ini
@@ -54,7 +54,7 @@
type=ucsc
alias=MT,M,chrM
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java
new file mode 100644
index 000000000..cf59518fc
--- /dev/null
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/VariantDatabaseBuildRunnerTest.java
@@ -0,0 +1,48 @@
+package org.monarchinitiative.exomiser.data.genome;
+
+
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
+import org.monarchinitiative.exomiser.data.genome.model.AlleleResource;
+import org.monarchinitiative.exomiser.data.genome.model.BuildInfo;
+import org.monarchinitiative.exomiser.data.genome.model.parsers.DbNsfpColumnIndex;
+import org.monarchinitiative.exomiser.data.genome.model.resource.DbNsfp4AlleleResource;
+import org.monarchinitiative.exomiser.data.genome.model.resource.SpliceAiAlleleResource;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.*;
+
+class VariantDatabaseBuildRunnerTest {
+
+ @Test
+ void testBuild(@TempDir Path tempDir) throws IOException {
+ BuildInfo buildInfo = BuildInfo.of(GenomeAssembly.HG38, "2404");
+ List alleleResources = List.of(new DbNsfp4AlleleResource("hg38.dbnsfp", null, Path.of("src/test/resources/dbNSFP4.0_test.zip"), DbNsfpColumnIndex.HG38));
+ VariantDatabaseBuildRunner instance = new VariantDatabaseBuildRunner(buildInfo, tempDir, alleleResources);
+ instance.run();
+ Path variantDatabase = instance.variantDatabasePath();
+ assertThat(Files.exists(variantDatabase), is(true));
+ assertThat(Files.size(variantDatabase), greaterThan(0L));
+ }
+
+ @Disabled("Manual testing")
+ @Test
+ void manualTestBuild() throws IOException {
+ BuildInfo buildInfo = BuildInfo.of(GenomeAssembly.HG19, "2404");
+ Path buildPath = Path.of("/data/exomiser-build/");
+ List alleleResources = List.of(new SpliceAiAlleleResource("hg19.splice-ai", null, buildPath.resolve("spliceai_max_delta_scores_sig_only.masked.snv.hg19.vcf.gz")));
+ VariantDatabaseBuildRunner instance = new VariantDatabaseBuildRunner(buildInfo, buildPath, alleleResources);
+ instance.run();
+ Path variantDatabase = instance.variantDatabasePath();
+ assertThat(Files.exists(variantDatabase), is(true));
+ assertThat(Files.size(variantDatabase), greaterThan(0L));
+ }
+
+}
\ No newline at end of file
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java
index 4c0137223..16a14dfa0 100644
--- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg19ConfigTest.java
@@ -87,6 +87,7 @@ public void testResources() {
// exac removed as this is part of gnomad
alleleResources.put("esp", instance.espAlleleResource());
alleleResources.put("dbnsfp", instance.dbnsfpAlleleResource());
+ alleleResources.put("splice-ai", instance.spliceAiAlleleResource());
Map expectedResources = alleleResources.build();
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java
index c7accf5e4..ca58e094d 100644
--- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/config/Hg38ConfigTest.java
@@ -88,6 +88,7 @@ public void testResources() {
// esp removed as this is part of gnomad since v4
alleleResources.put("dbnsfp", instance.dbnsfpAlleleResource());
// clinvar removed as this is now standalone
+ alleleResources.put("splice-ai", instance.spliceAiAlleleResource());
Map expectedResources = alleleResources.build();
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java
index 30b9281d9..d6357748b 100644
--- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/indexers/AlleleConverterTest.java
@@ -199,10 +199,14 @@ public void convertClinVar() {
@Test
public void convertClinVarConflictingInterpretationCounts() {
ClinVarData clinVarData = ClinVarData.builder()
+ .primaryInterpretation(ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
.conflictingInterpretationCounts(Map.of(ClinVarData.ClinSig.PATHOGENIC, 3, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE, 2))
.build();
ClinVar expected = ClinVar.newBuilder()
+ .setPrimaryInterpretation(ClinVar.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .setReviewStatus(ClinVar.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
.putAllClinSigCounts(Map.of("PATHOGENIC", 3, "UNCERTAIN_SIGNIFICANCE", 2))
.build();
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java
index c19fbe9cd..7c787edf1 100644
--- a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/ClinVarAlleleParserTest.java
@@ -20,13 +20,25 @@
package org.monarchinitiative.exomiser.data.genome.model.parsers;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
+import org.monarchinitiative.exomiser.core.proto.AlleleProto;
+import org.monarchinitiative.exomiser.data.genome.indexers.AlleleConverter;
import org.monarchinitiative.exomiser.data.genome.model.Allele;
-import java.util.EnumSet;
-import java.util.List;
-import java.util.Map;
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.zip.GZIPInputStream;
+
+import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ClinSig.*;
+import static org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData.ReviewStatus.*;
/**
@@ -86,8 +98,8 @@ public void testParseClnSigPathogenic() {
expected.setRsId("7105");
expected.setClinVarData(ClinVarData.builder()
.variationId("7105")
- .primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC)
- .reviewStatus(ClinVarData.ReviewStatus.PRACTICE_GUIDELINE)
+ .primaryInterpretation(PATHOGENIC)
+ .reviewStatus(PRACTICE_GUIDELINE)
.build());
assertParseLineEquals(line, List.of(expected));
@@ -100,8 +112,8 @@ public void testParseClnSigLikelyPathogenic() {
expected.setRsId("121918506");
expected.setClinVarData(ClinVarData.builder()
.variationId("374823")
- .primaryInterpretation(ClinVarData.ClinSig.LIKELY_PATHOGENIC)
- .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_SINGLE_SUBMITTER)
+ .primaryInterpretation(LIKELY_PATHOGENIC)
+ .reviewStatus(CRITERIA_PROVIDED_SINGLE_SUBMITTER)
.build());
assertParseLineEquals(line, List.of(expected));
@@ -112,7 +124,7 @@ public void testParseClnSig() {
String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Benign/Likely_benign";
Allele expected = new Allele(1, 28590, "T", "TTGG");
expected.setClinVarData(ClinVarData.builder()
- .primaryInterpretation(ClinVarData.ClinSig.BENIGN_OR_LIKELY_BENIGN)
+ .primaryInterpretation(BENIGN_OR_LIKELY_BENIGN)
.build());
assertParseLineEquals(line, List.of(expected));
@@ -120,11 +132,13 @@ public void testParseClnSig() {
@Test
public void testParseClnSigWithSecondary() {
- String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Pathogenic/Likely_pathogenic,_other,_association";
+ // as of approx. Feb 2024 ClinVar changed their delimiters from ',_' to '|' so the parser will now only recognise
+ // files released after this date.
+ String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIG=Pathogenic/Likely_pathogenic|other|association";
Allele expected = new Allele(1, 28590, "T", "TTGG");
expected.setClinVarData(ClinVarData.builder()
- .primaryInterpretation(ClinVarData.ClinSig.PATHOGENIC_OR_LIKELY_PATHOGENIC)
- .secondaryInterpretations(EnumSet.of(ClinVarData.ClinSig.OTHER, ClinVarData.ClinSig.ASSOCIATION))
+ .primaryInterpretation(PATHOGENIC_OR_LIKELY_PATHOGENIC)
+ .secondaryInterpretations(EnumSet.of(OTHER, ASSOCIATION))
.build());
assertParseLineEquals(line, List.of(expected));
@@ -144,7 +158,7 @@ public void testParseClnRevStat() {
String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNREVSTAT=criteria_provided,_conflicting_interpretations";
Allele expected = new Allele(1, 28590, "T", "TTGG");
expected.setClinVarData(ClinVarData.builder()
- .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
+ .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
.build());
assertParseLineEquals(line, List.of(expected));
@@ -155,7 +169,7 @@ public void testParseClinSigIncl() {
String line = "1 28590 . T TTGG 999 PASS ALLELEID=12345;CLNSIGINCL=424752:Pathogenic";
Allele expected = new Allele(1, 28590, "T", "TTGG");
expected.setClinVarData(ClinVarData.builder()
- .includedAlleles(Map.of("424752", ClinVarData.ClinSig.PATHOGENIC))
+ .includedAlleles(Map.of("424752", PATHOGENIC))
.build());
assertParseLineEquals(line, List.of(expected));
@@ -167,8 +181,8 @@ public void testParseClinSigInclMulti() {
Allele expected = new Allele(1, 28590, "T", "TTGG");
expected.setClinVarData(ClinVarData.builder()
.includedAlleles(Map.of(
- "424752", ClinVarData.ClinSig.PATHOGENIC,
- "15612", ClinVarData.ClinSig.OTHER
+ "424752", PATHOGENIC,
+ "15612", OTHER
))
.build());
@@ -182,9 +196,9 @@ public void testParseClinSigNoNumbers() {
expected.setRsId("25511");
expected.setClinVarData(ClinVarData.builder()
.variationId("25511")
- .reviewStatus(ClinVarData.ReviewStatus.NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT)
+ .reviewStatus(NO_INTERPRETATION_FOR_THE_SINGLE_VARIANT)
.includedAlleles(Map.of(
- "8152", ClinVarData.ClinSig.PATHOGENIC
+ "8152", PATHOGENIC
))
.build());
@@ -198,15 +212,76 @@ void testParseClinSigConf() {
expected.setRsId("369982920");
expected.setClinVarData(ClinVarData.builder()
.variationId("218256")
- .primaryInterpretation(ClinVarData.ClinSig.CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
- .reviewStatus(ClinVarData.ReviewStatus.CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
+ .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
+ // CLNSIGCONF=Pathogenic(5)|Uncertain_significance(1)
+ .conflictingInterpretationCounts(Map.of(PATHOGENIC, 5, UNCERTAIN_SIGNIFICANCE, 1))
+ .build());
+
+ assertParseLineEquals(line, List.of(expected));
+ }
+
+ /**
+ * Test for issue 560
+ */
+ @Test
+ void testParseClinSigConfClassification() {
+ String line = "11\t47352652\t506638\tA\tG\t.\t.\tALLELEID=503825;CLNDISDB=MedGen:CN230736|MedGen:CN169374;CLNDN=Cardiovascular_phenotype|not_specified;CLNHGVS=NC_000011.10:g.47352652A>G;CLNREVSTAT=criteria_provided,_conflicting_classifications;CLNSIG=Conflicting_classifications_of_pathogenicity;CLNSIGCONF=Uncertain_significance(1)|Likely_benign(1);CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA658797649;GENEINFO=MYBPC3:4607;ORIGIN=1;RS=886048383";
+ Allele expected = new Allele(11, 47352652, "A", "G");
+ expected.setRsId("886048383");
+ expected.setClinVarData(ClinVarData.builder()
+ .variationId("506638")
+ .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
// CLNSIGCONF=Pathogenic(5)|Uncertain_significance(1)
- .conflictingInterpretationCounts(Map.of(ClinVarData.ClinSig.PATHOGENIC, 5, ClinVarData.ClinSig.UNCERTAIN_SIGNIFICANCE, 1))
+ .conflictingInterpretationCounts(Map.of(UNCERTAIN_SIGNIFICANCE, 1, LIKELY_BENIGN, 1))
+ .build());
+ assertParseLineEquals(line, List.of(expected));
+ }
+
+ @Test
+ void unrecognisedClinSigCombo() {
+ String line = "X\t153763492\t100055\tT\tC\t.\t.\tAF_TGP=0.09457;ALLELEID=25399;CLNDISDB=MONDO:MONDO:0005775,MedGen:C2939465|MONDO:MONDO:0010480,MedGen:C2720289,OMIM:300908,Orphanet:466026|MeSH:D030342,MedGen:C0950123|MedGen:C3661900|MedGen:CN169374|.|MONDO:MONDO:0021024,MedGen:C1970028,OMIM:611162,Orphanet:673|MedGen:C3806712,OMIM:300910,Orphanet:391330;CLNDN=G6PD_deficiency|Anemia,_nonspherocytic_hemolytic,_due_to_G6PD_deficiency|Inborn_genetic_diseases|not_provided|not_specified|G6PD_A+|Malaria,_susceptibility_to|Bone_mineral_density_quantitative_trait_locus_18;CLNHGVS=NC_000023.10:g.153763492T>C;CLNREVSTAT=criteria_provided,_conflicting_classifications;CLNSIG=Conflicting_classifications_of_pathogenicity;CLNSIGCONF=Pathogenic(10)|Likely_pathogenic(1)|Uncertain_significance(5)|Benign(1)|Likely_benign(3);CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA120939|Genetic_Testing_Registry_(GTR):GTR000167633|OMIM:305900.0001|OMIM:305900.0002|OMIM:305900.0023;GENEINFO=G6PD:2539;MC=SO:0001583|missense_variant;ORIGIN=17;RS=1050829;CLNDISDBINCL=MONDO:MONDO:0010480,MedGen:C2720289,OMIM:300908,Orphanet:466026|.|MONDO:MONDO:0005775,MedGen:C2939465;CLNDNINCL=Anemia,_nonspherocytic_hemolytic,_due_to_G6PD_deficiency|G6PD_SANTAMARIA|G6PD_deficiency;CLNSIGINCL=10361:Likely_pathogenic/Established_risk_allele|10382:Likely_pathogenic|1065168:Pathogenic|1722597:Likely_pathogenic|1722617:Likely_pathogenic|1722622:Likely_pathogenic|1722693:Uncertain_significance|1722697:Uncertain_significance|1722698:Uncertain_significance|1722726:Likely_pathogenic|1722747:Likely_pathogenic";
+ Allele expected = new Allele(23, 153763492, "T", "C");
+ expected.setRsId("1050829");
+ Map includedAlleles = new HashMap<>();
+ includedAlleles.put("1722747", LIKELY_PATHOGENIC);
+ includedAlleles.put("1722726", LIKELY_PATHOGENIC);
+ includedAlleles.put("1722617", LIKELY_PATHOGENIC);
+ includedAlleles.put("1722622", LIKELY_PATHOGENIC);
+ includedAlleles.put("10361", LIKELY_PATHOGENIC); // CLNSIGINCL=10361:Likely_pathogenic/Established_risk_allele
+ includedAlleles.put("10382", LIKELY_PATHOGENIC);
+ includedAlleles.put("1722597", LIKELY_PATHOGENIC);
+ includedAlleles.put("1722697", UNCERTAIN_SIGNIFICANCE);
+ includedAlleles.put("1722698", UNCERTAIN_SIGNIFICANCE);
+ includedAlleles.put("1065168", PATHOGENIC);
+ includedAlleles.put("1722693", UNCERTAIN_SIGNIFICANCE);
+ expected.setClinVarData(ClinVarData.builder()
+ .variationId("100055")
+ .primaryInterpretation(CONFLICTING_PATHOGENICITY_INTERPRETATIONS)
+ .reviewStatus(CRITERIA_PROVIDED_CONFLICTING_INTERPRETATIONS)
+ .conflictingInterpretationCounts(Map.of(PATHOGENIC, 10, LIKELY_PATHOGENIC, 1, UNCERTAIN_SIGNIFICANCE, 5, LIKELY_BENIGN, 3, BENIGN, 1))
+ .includedAlleles(includedAlleles)
.build());
+ assertParseLineEquals(line, List.of(expected));
+ }
+ @Test
+ void clnSigLowPenetrance() {
+ String line = "1\t94473807\t7888\tC\tT\t.\t.\tALLELEID=22927;RS=1800553;CLNREVSTAT=criteria_provided,_multiple_submitters,_no_conflicts;CLNSIG=Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance;";
+ Allele expected = new Allele(1, 94473807, "C", "T");
+ expected.setRsId("1800553");
+ expected.setClinVarData(ClinVarData.builder()
+ .variationId("7888")
+ .primaryInterpretation(PATHOGENIC_OR_LIKELY_PATHOGENIC)
+ .reviewStatus(CRITERIA_PROVIDED_MULTIPLE_SUBMITTERS_NO_CONFLICTS)
+ .build());
assertParseLineEquals(line, List.of(expected));
}
+ // 4 6301479 215392 G C . . ALLELEID=211061;CLNDISDB=MedGen:CN517202|MONDO:MONDO:0009101,MedGen:C4551693,OMIM:222300,Orphanet:3463;CLNDN=not_provided|Wolfram_syndrome_1;CLNHGVS=NC_000004.12:g.6301479G>C;CLNREVSTAT=criteria_provided,_multiple_submitters,_no_conflicts;CLNSIG=Likely_pathogenic/Likely_risk_allele;CLNVC=single_nucleotide_variant;CLNVCSO=SO:0001483;CLNVI=ClinGen:CA322818;GENEINFO=WFS1:7466;MC=SO:0001583|missense_variant;ORIGIN=1;RS=753237278
+
+
//CLNSIGINCL=424752:Pathogenic
//CLNSIGINCL=15127:other|15128:other|15334:Pathogenic|15335:Pathogenic|15336:Pathogenic|15337:Pathogenic|15610:Pathogenic|15612:other
@@ -217,5 +292,153 @@ void testParseClinSigConf() {
//CLNVC counts: Deletion=22574, Duplication=9075, Indel=2162, Insertion=2358, Inversion=32, Microsatellite=15, Variation=9, copy_number_loss=4, single_nucleotide_variant=264009
//CLNREVSTAT counts: criteria_provided,_conflicting_interpretations=12678, criteria_provided,_multiple_submitters,_no_conflicts=34967, criteria_provided,_single_submitter=197277, no_assertion_criteria_provided=34308, no_assertion_provided=10980, no_interpretation_for_the_single_variant=500, practice_guideline=23, reviewed_by_expert_panel=8786
+
+ @Disabled("data exploration")
+ @Test
+ void testClnSigValues() {
+ Path clinVarVcfFile = Path.of("clinvar_hg38.vcf.gz");
+ var infoFieldCounts = getInfoFieldCounts(clinVarVcfFile, "CLNSIG");
+ for (Map.Entry entry : infoFieldCounts.entrySet()) {
+ System.out.println(entry);
+ }
+ //##fileDate=2024-05-09
+ //##INFO=
+ //Affects=142
+ //Affects|risk_factor=1
+ //Benign=202927
+ //Benign/Likely_benign=48319
+ //Benign/Likely_benign|association=1
+ //Benign/Likely_benign|drug_response=2
+ //Benign/Likely_benign|drug_response|other=3
+ //Benign/Likely_benign|other=9
+ //Benign/Likely_benign|other|risk_factor=1
+ //Benign/Likely_benign|risk_factor=2
+ //Benign|Affects=1
+ //Benign|association=5
+ //Benign|confers_sensitivity=2
+ //Benign|drug_response=2
+ //Benign|other=9
+ //Benign|risk_factor=3
+ //Conflicting_classifications_of_pathogenicity=125784
+ //Conflicting_classifications_of_pathogenicity|Affects=2
+ //Conflicting_classifications_of_pathogenicity|association=8
+ //Conflicting_classifications_of_pathogenicity|association|risk_factor=2
+ //Conflicting_classifications_of_pathogenicity|drug_response=2
+ //Conflicting_classifications_of_pathogenicity|drug_response|other=1
+ //Conflicting_classifications_of_pathogenicity|other=25
+ //Conflicting_classifications_of_pathogenicity|other|risk_factor=2
+ //Conflicting_classifications_of_pathogenicity|protective=1
+ //Conflicting_classifications_of_pathogenicity|risk_factor=15
+ //Likely_benign=874860
+ //Likely_benign|association=1
+ //Likely_benign|drug_response|other=4
+ //Likely_benign|other=12
+ //Likely_benign|risk_factor=1
+ //Likely_pathogenic=82058
+ //Likely_pathogenic,_low_penetrance=6
+ //Likely_pathogenic/Likely_risk_allele=17
+ //Likely_pathogenic|Affects=1
+ //Likely_pathogenic|association=5
+ //Likely_pathogenic|drug_response=21
+ //Likely_pathogenic|other=2
+ //Likely_pathogenic|protective=1
+ //Likely_pathogenic|risk_factor=10
+ //Likely_risk_allele=97
+ //Pathogenic=157595
+ //Pathogenic/Likely_pathogenic=26459
+ //Pathogenic/Likely_pathogenic/Likely_risk_allele=4
+ //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance=6
+ //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance/Established_risk_allele=1
+ //Pathogenic/Likely_pathogenic/Pathogenic,_low_penetrance|other=1
+ //Pathogenic/Likely_pathogenic|drug_response=3
+ //Pathogenic/Likely_pathogenic|other=11
+ //Pathogenic/Likely_pathogenic|risk_factor=6
+ //Pathogenic/Likely_risk_allele=9
+ //Pathogenic/Likely_risk_allele|risk_factor=1
+ //Pathogenic/Pathogenic,_low_penetrance|other=1
+ //Pathogenic/Pathogenic,_low_penetrance|other|risk_factor=1
+ //Pathogenic|Affects=6
+ //Pathogenic|association=6
+ //Pathogenic|association|protective=1
+ //Pathogenic|confers_sensitivity=1
+ //Pathogenic|drug_response=54
+ //Pathogenic|other=71
+ //Pathogenic|protective=2
+ //Pathogenic|risk_factor=22
+ //Uncertain_risk_allele=35
+ //Uncertain_risk_allele|risk_factor=1
+ //Uncertain_significance=1358995
+ //Uncertain_significance/Uncertain_risk_allele=153
+ //Uncertain_significance|association=8
+ //Uncertain_significance|drug_response=11
+ //Uncertain_significance|other=4
+ //Uncertain_significance|risk_factor=7
+ //association=341
+ //association_not_found=4
+ //association|drug_response|risk_factor=1
+ //association|risk_factor=1
+ //confers_sensitivity=11
+ //confers_sensitivity|other=1
+ //drug_response=1869
+ //drug_response|other=1
+ //drug_response|risk_factor=3
+ //no_classification_for_the_single_variant=683
+ //no_classifications_from_unflagged_records=7
+ //not_provided=9808
+ //other=1588
+ //other|risk_factor=1
+ //protective=39
+ //protective|risk_factor=4
+ //risk_factor=407
+ }
+
+ @Disabled("data exploration")
+ @Test
+ void testClnRevStatValues() {
+ Path clinVarVcfFile = Path.of("clinvar.vcf.gz");
+ var infoFieldCounts = getInfoFieldCounts(clinVarVcfFile, "CLNREVSTAT");
+ for (Map.Entry entry : infoFieldCounts.entrySet()) {
+ System.out.println(entry);
+ }
+ //##fileDate=2024-05-09
+ //##INFO=
+ //criteria_provided,_conflicting_classifications=125583
+ //criteria_provided,_multiple_submitters,_no_conflicts=425991
+ //criteria_provided,_single_submitter=2266963
+ //no_assertion_criteria_provided=48407
+ //no_classification_for_the_single_variant=683
+ //no_classification_provided=9806
+ //no_classifications_from_unflagged_records=13
+ //practice_guideline=51
+ //reviewed_by_expert_panel=16215
+ }
+
+ private Map getInfoFieldCounts(Path clinVarVcfFile, String infoKey) {
+ try (Stream lines = new BufferedReader(new InputStreamReader(new BufferedInputStream(new GZIPInputStream(Files.newInputStream(clinVarVcfFile))))).lines()) {
+ var result = lines
+ .peek(line -> {if (line.startsWith("#")) {System.out.println(line);}})
+ .filter(line -> !line.startsWith("#"))
+ .map(findInfoField(infoKey))
+ .collect(Collectors.groupingBy(Function.identity(), Collectors.counting()));
+ return new TreeMap<>(result);
+ } catch (IOException e) {
+ System.err.println(e);
+ }
+ return Map.of();
+ }
+
+ private Function findInfoField(String infoKey) {
+ return line -> {
+ var fields = line.split("\t");
+ var info= fields[7];
+ var infoFields = info.split(";");
+ for (String field : infoFields) {
+ if (field.startsWith(infoKey)) {
+ return field.split("=")[1];
+ }
+ }
+ return "";
+ };
+ }
}
diff --git a/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java
new file mode 100644
index 000000000..c2baa7285
--- /dev/null
+++ b/exomiser-data-genome/src/test/java/org/monarchinitiative/exomiser/data/genome/model/parsers/SpliceAiAlleleParserTest.java
@@ -0,0 +1,131 @@
+package org.monarchinitiative.exomiser.data.genome.model.parsers;
+
+import htsjdk.samtools.util.BlockCompressedOutputStream;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+import org.monarchinitiative.exomiser.core.genome.Contigs;
+import org.monarchinitiative.exomiser.core.proto.AlleleProto;
+import org.monarchinitiative.exomiser.data.genome.model.Allele;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.equalTo;
+
+class SpliceAiAlleleParserTest {
+
+ @Test
+ void parseNonSexChrLine() {
+ String line = "10\t12345\tA\tC\t1.0";
+ List result = new SpliceAiAlleleParser().parseLine(line);
+ Allele expected = new Allele(10, 12345, "A", "C");
+ expected.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(1f).build());
+ assertThat(result, equalTo(List.of(expected)));
+ }
+
+ @Test
+ void parseSexChrLine() {
+ String line = "X\t12345\tA\tC\t1.0";
+ List result = new SpliceAiAlleleParser().parseLine(line);
+ Allele expected = new Allele(23, 12345, "A", "C");
+ expected.addPathogenicityScore(AlleleProto.PathogenicityScore.newBuilder().setPathogenicitySource(AlleleProto.PathogenicitySource.SPLICE_AI).setScore(1f).build());
+ assertThat(result, equalTo(List.of(expected)));
+ }
+
+ /**
+ * This is a one-off method to prepare the SpliceAI data by selecting only the max score from the DL/DG/AL/AG scores
+ * >= minScore. This is recommended as >= 0.2 for high recall, increasing to 0.5 as a default and 0.8 for high-precision.
+ * confidence. Jaganathan et al, Cell 2019 and https://github.com/Illumina/SpliceAI
+ *
+ *
+ * Delta score of a variant, defined as the maximum of (DS_AG, DS_AL, DS_DG, DS_DL), ranges from 0 to 1 and can be interpreted as the probability of the variant being splice-altering. In the paper, a detailed characterization is provided for 0.2 (high recall), 0.5 (recommended), and 0.8 (high precision) cutoffs. Delta position conveys information about the location where splicing changes relative to the variant position (positive values are downstream of the variant, negative values are upstream).
+ *
+ *
+ * These annotations are free for academic and not-for-profit use; other use requires a commercial license from Illumina, Inc.
+ *
+ **/
+ @Disabled
+ @Test
+ void prepareSpliceAiData() {
+ Path dataPath = Path.of("/home/hhx640/Documents/exomiser-build/");
+ Path inFile = dataPath.resolve("spliceai_scores.masked.snv.hg19.vcf.gz");
+ Path outFile = dataPath.resolve("spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz");
+ // the ACMG guidelines for use of SpliceAI scores recommend anything under 0.1 to be scored BP4 and over 0.2 PP3
+ // so, we're using that cutoff here. This means that any SNP with no score in the database can be considered as
+ // not impacting splicing.
+ float minScore = 0.1f;
+ SpliceAiMaxDeltaScoreParser.build(inFile, outFile, minScore);
+ }
+
+ private static class SpliceAiMaxDeltaScoreParser {
+
+ private static final Logger logger = LoggerFactory.getLogger(SpliceAiMaxDeltaScoreParser.class);
+
+ public static void build(Path spliceAiInputPath, Path outputBgZipPath, float minScore) {
+ long count = 0;
+ logger.info("Parsing SpliceAI scores from {} with a maximum delta score >= {}", spliceAiInputPath.getFileName(), minScore);
+ try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new GZIPInputStream(Files.newInputStream(spliceAiInputPath))));
+ BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new BlockCompressedOutputStream(outputBgZipPath.toFile(), 6)));
+ ) {
+ for (String line; (line = bufferedReader.readLine()) != null; ) {
+ if (line.startsWith("##")) {
+ bufferedWriter.write(line);
+ bufferedWriter.newLine();
+ continue;
+ }
+ if (line.startsWith("#")) {
+ bufferedWriter.write("#CHROM\tPOS\tREF\tALT\tSPLICE_AI_MAX_DELTA_SCORE");
+ bufferedWriter.newLine();
+ continue;
+ }
+ //##INFO=
+ //1 69092 . T A . . SpliceAI=A|OR4F5|0.00|0.00|0.22|0.00|2|41|1|23
+ String[] fields = line.split("\t");
+ if (fields.length != 8) {
+ throw new IllegalArgumentException("Expected 8 fields but found " + fields.length + " in line " + line);
+ }
+ String contig = Contigs.toString(Contigs.parseId(fields[0])); // 1..22,X,Y,MT
+ int start = Integer.parseInt(fields[1]);
+ String ref = fields[3];
+ String alt = fields[4];
+ float score = parseSpliceAiScore(fields[7]);
+ // hg38 contains some oddly named unplaced contigs which end up with a contig of 0 and this leads to
+ // failed tabix indexing, so check and remove these.
+ if (!contig.equals("0") && score >= minScore) {
+ count++;
+ if (count % 1_000_000 == 0) {
+ logger.info("Current contig {} - written {} variants. Current: {}-{}-{}-{} SpliceAI={}", contig, count, contig, start, ref, alt, score);
+ }
+ String outLine = contig + "\t" + start + "\t" + ref + "\t"+ alt + "\t" + score;
+ bufferedWriter.write(outLine);
+ bufferedWriter.newLine();
+ }
+ }
+ logger.info("Done - written {} variants", count);
+ } catch (IOException e) {
+ logger.error("Unable to write file " + outputBgZipPath, e);
+ }
+ }
+
+ static float parseSpliceAiScore(String info) {
+ String[] fields = info.split("\\|");
+ // SpliceAIv1.3 variant annotation. These include delta scores (DS) and delta positions (DP) for acceptor gain (AG),
+ // acceptor loss (AL), donor gain (DG), and donor loss (DL).
+ // Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL"
+ // SpliceAI=A|OR4F5|0.00|0.00|0.22|0.00|2|41|1|23
+ // we want the max delta score for gain or loss
+ float max = 0f;
+ for (int i = 2; i < 6; i++) {
+ max = Math.max(max, Float.parseFloat(fields[i]));
+ }
+ return max;
+ }
+
+ }
+}
\ No newline at end of file
diff --git a/exomiser-data-genome/src/test/resources/application-test.properties b/exomiser-data-genome/src/test/resources/application-test.properties
index f7154636f..a0ca8ee8c 100644
--- a/exomiser-data-genome/src/test/resources/application-test.properties
+++ b/exomiser-data-genome/src/test/resources/application-test.properties
@@ -19,6 +19,8 @@
#
spring.flyway.enabled=false
spring.h2.console.enabled=true
+logging.file.path=${build-dir}
+
#build-dir defines the main directory which will be used to build the databases.
build-dir=src/test/resources
build-version=1711
@@ -60,6 +62,11 @@ hg19.uk10k.file-url=${hg19.ensembl-variation-url}
hg19.dbnsfp.file-name=dbNSFPv3.4a.zip
hg19.dbnsfp.file-dir=${hg19.variant-dir}
hg19.dbnsfp.file-url=ftp://dbnsfp:dbnsfp@dbnsfp.softgenetics.com/
+# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg19.vcf.gz to only include variants max(delta_score) >= 1.0
+hg19.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg19.vcf.gz
+hg19.splice-ai.file-url=file://${hg19.variant-dir}
+hg19.splice-ai.file-dir=${hg19.variant-dir}
+
#hg38
hg38.variant-dir=${build-dir}/hg38/variants
hg38.variant-processed-dir=${hg38.variant-dir}/processed
@@ -101,4 +108,8 @@ hg38.uk10k.file-url=${hg38.ensembl-variation-url}
#DbNSFP contains the hg19 and hg38 coordinates in the same file
hg38.dbnsfp.file-name=${hg19.dbnsfp.file-name}
hg38.dbnsfp.file-dir=${hg19.dbnsfp.file-dir}
-hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url}
\ No newline at end of file
+hg38.dbnsfp.file-url=${hg19.dbnsfp.file-url}
+# The spliceAI file has been processed from the original spliceai_scores.masked.snv.hg38.vcf.gz to only include variants max(delta_score) >= 1.0
+hg38.splice-ai.file-name=spliceai_max_delta_scores_non_zero.masked.snv.hg38.vcf.gz
+hg38.splice-ai.file-url=file://${hg38.variant-dir}
+hg38.splice-ai.file-dir=${hg38.variant-dir}
\ No newline at end of file
diff --git a/exomiser-data-genome/src/test/resources/default_sources.ini b/exomiser-data-genome/src/test/resources/default_sources.ini
index 7561361a9..a1e6e2fd4 100644
--- a/exomiser-data-genome/src/test/resources/default_sources.ini
+++ b/exomiser-data-genome/src/test/resources/default_sources.ini
@@ -54,7 +54,7 @@
type=ucsc
alias=MT,M,chrM
chromInfo = http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/chromInfo.txt.gz
-chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/Homo_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
+chrToAccessions = https://ftp.ncbi.nlm.nih.gov/genomes/archive/old_refseq/H_sapiens/ARCHIVE/ANNOTATION_RELEASE.105/Assembled_chromosomes/chr_accessions_GRCh37.p13
chrToAccessions.format = chr_accessions
knownCanonical=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownCanonical.txt.gz
knownGene=http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/knownGene.txt.gz
diff --git a/exomiser-data-phenotype/pom.xml b/exomiser-data-phenotype/pom.xml
index 79034b9f6..8d0eab613 100644
--- a/exomiser-data-phenotype/pom.xml
+++ b/exomiser-data-phenotype/pom.xml
@@ -31,7 +31,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
diff --git a/exomiser-rest-prioritiser/pom.xml b/exomiser-rest-prioritiser/pom.xml
index 410424d28..e890622ae 100644
--- a/exomiser-rest-prioritiser/pom.xml
+++ b/exomiser-rest-prioritiser/pom.xml
@@ -27,7 +27,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
exomiser-rest-prioritiser
diff --git a/exomiser-spring-boot-autoconfigure/pom.xml b/exomiser-spring-boot-autoconfigure/pom.xml
index f7b6387dc..511a9d667 100644
--- a/exomiser-spring-boot-autoconfigure/pom.xml
+++ b/exomiser-spring-boot-autoconfigure/pom.xml
@@ -35,7 +35,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
diff --git a/exomiser-spring-boot-starter/pom.xml b/exomiser-spring-boot-starter/pom.xml
index c7c045e11..18b31bc51 100644
--- a/exomiser-spring-boot-starter/pom.xml
+++ b/exomiser-spring-boot-starter/pom.xml
@@ -28,7 +28,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
exomiser-spring-boot-starter
diff --git a/exomiser-spring-boot-test/pom.xml b/exomiser-spring-boot-test/pom.xml
index 1b4491c94..b2bc3be56 100644
--- a/exomiser-spring-boot-test/pom.xml
+++ b/exomiser-spring-boot-test/pom.xml
@@ -27,7 +27,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
exomiser-spring-boot-test
diff --git a/exomiser-web/pom.xml b/exomiser-web/pom.xml
index 5adf01eee..f628241fc 100644
--- a/exomiser-web/pom.xml
+++ b/exomiser-web/pom.xml
@@ -31,7 +31,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
diff --git a/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java b/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java
index c43e0ce86..b4e1912f9 100644
--- a/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java
+++ b/exomiser-web/src/main/java/org/monarchinitiative/exomiser/web/controller/SubmitJobController.java
@@ -79,8 +79,6 @@ public class SubmitJobController {
VariantEffect.DOWNSTREAM_GENE_VARIANT,
VariantEffect.CODING_TRANSCRIPT_INTRON_VARIANT,
VariantEffect.NON_CODING_TRANSCRIPT_INTRON_VARIANT,
- VariantEffect.SYNONYMOUS_VARIANT,
- VariantEffect.SPLICE_REGION_VARIANT,
VariantEffect.REGULATORY_REGION_VARIANT
);
diff --git a/phenix-repository/pom.xml b/phenix-repository/pom.xml
index b3bfd5633..a500b5e87 100644
--- a/phenix-repository/pom.xml
+++ b/phenix-repository/pom.xml
@@ -7,7 +7,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
phenix-repository
diff --git a/pom.xml b/pom.xml
index ad2622e3a..a8eb37c2b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,7 +24,7 @@
org.monarchinitiative.exomiser
exomiser
- 14.0.2
+ 14.1.0-ALPHA
pom
exomiser
@@ -172,7 +172,11 @@
com.zaxxer
HikariCP
-
+
+ org.duckdb
+ duckdb_jdbc
+ 1.0.0
+