diff --git a/mzLib/MzLibUtil/MzLibException.cs b/mzLib/MzLibUtil/MzLibException.cs
index 61ecc8d6b..885081433 100644
--- a/mzLib/MzLibUtil/MzLibException.cs
+++ b/mzLib/MzLibUtil/MzLibException.cs
@@ -1,8 +1,9 @@
-using System;
+#nullable enable
+using System;
namespace MzLibUtil
{
[Serializable]
- public class MzLibException(string message, Exception innerException = null)
+ public class MzLibException(string message, Exception? innerException = null)
: Exception(message, innerException);
}
\ No newline at end of file
diff --git a/mzLib/Omics/IBioPolymerWithSetMods.cs b/mzLib/Omics/IBioPolymerWithSetMods.cs
index 0b9926a01..12989b1f3 100644
--- a/mzLib/Omics/IBioPolymerWithSetMods.cs
+++ b/mzLib/Omics/IBioPolymerWithSetMods.cs
@@ -82,5 +82,86 @@ public static string GetBaseSequenceFromFullSequence(string fullSequence)
}
return sb.ToString();
}
+
+ ///
+ /// Returns a list of modifications and their OneBased index from a full sequence
+ ///
+ /// Full sequence
+ /// All known modifications
+ ///
+ /// When a full sequence is not in the correct format or a mod is not found in the allModsKnown dictionary
+ public static Dictionary GetModificationDictionaryFromFullSequence(string fullSequence,
+ Dictionary allModsKnown)
+ {
+ var allModsOneIsNterminus = new Dictionary();
+ var baseSequence = GetBaseSequenceFromFullSequence(fullSequence);
+ int currentModStart = 0;
+ int currentModificationLocation = 1;
+ bool currentlyReadingMod = false;
+ int bracketCount = 0;
+
+ for (int r = 0; r < fullSequence.Length; r++)
+ {
+ char c = fullSequence[r];
+ if (c == '[')
+ {
+ currentlyReadingMod = true;
+ if (bracketCount == 0)
+ {
+ currentModStart = r + 1;
+ }
+ bracketCount++;
+ }
+ else if (c == ']')
+ {
+ string modId = null;
+ bracketCount--;
+ if (bracketCount == 0)
+ {
+ try
+ {
+ //remove the beginning section (e.g. "Fixed", "Variable", "Uniprot")
+ string modString = fullSequence.Substring(currentModStart, r - currentModStart);
+ int splitIndex = modString.IndexOf(':');
+ string modType = modString.Substring(0, splitIndex);
+ modId = modString.Substring(splitIndex + 1, modString.Length - splitIndex - 1);
+ }
+ catch (Exception e)
+ {
+ throw new MzLibUtil.MzLibException(
+ "Error while trying to parse string into peptide: " + e.Message, e);
+
+ }
+ if (!allModsKnown.TryGetValue(modId, out var mod))
+ {
+ throw new MzLibUtil.MzLibException(
+ "Could not find modification while reading string: " + fullSequence);
+ }
+ if (mod.LocationRestriction.Contains("C-terminal.") && r == fullSequence.Length - 1)
+ {
+ currentModificationLocation = baseSequence.Length + 2;
+ }
+ allModsOneIsNterminus.Add(currentModificationLocation, mod);
+ currentlyReadingMod = false;
+ }
+ }
+ else if (!currentlyReadingMod)
+ {
+ currentModificationLocation++;
+ }
+ //else do nothing
+ }
+
+ return allModsOneIsNterminus;
+ }
+
+ ///
+ /// Returns a list of modifications from a full sequence
+ ///
+ /// Full sequence
+ /// All known modifications
+ ///
+ public static List GetModificationsFromFullSequence(string fullSequence,
+ Dictionary allModsKnown) => [.. GetModificationDictionaryFromFullSequence(fullSequence, allModsKnown).Values];
}
}
diff --git a/mzLib/Proteomics/AminoAcidPolymer/AminoAcidPolymer.cs b/mzLib/Proteomics/AminoAcidPolymer/AminoAcidPolymer.cs
index 1d7f1b231..1abb40e99 100644
--- a/mzLib/Proteomics/AminoAcidPolymer/AminoAcidPolymer.cs
+++ b/mzLib/Proteomics/AminoAcidPolymer/AminoAcidPolymer.cs
@@ -1103,7 +1103,7 @@ private void ParseSequence(string sequence)
{
modification = new OldSchoolChemicalFormulaModification(ChemicalFormula.ParseFormula(modString));
}
- catch (MzLibException)
+ catch (MzLibException e)
{
if (double.TryParse(modString, out double mass))
{
@@ -1111,7 +1111,7 @@ private void ParseSequence(string sequence)
}
else
{
- throw new MzLibException("Unable to correctly parse the following modification: " + modString);
+ throw new MzLibException("Unable to correctly parse the following modification: " + modString, e);
}
}
diff --git a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
index 8eb6e6bdf..aafec0a5e 100644
--- a/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
+++ b/mzLib/Proteomics/ProteolyticDigestion/PeptideWithSetModifications.cs
@@ -69,7 +69,7 @@ public PeptideWithSetModifications(string sequence, Dictionary
public void SetNonSerializedPeptideInfo(Dictionary idToMod, Dictionary accessionToProtein, DigestionParams dp)
{
- GetModsAfterDeserialization(idToMod);
+ _allModsOneIsNterminus = IBioPolymerWithSetMods.GetModificationDictionaryFromFullSequence(FullSequence, idToMod);
GetProteinAfterDeserialization(accessionToProtein);
_digestionParams = dp;
}
@@ -919,66 +919,6 @@ public void SetNonSerializedPeptideInfo(Dictionary idToMod
Dictionary accessionToProtein, IDigestionParams dp) =>
SetNonSerializedPeptideInfo(idToMod, accessionToProtein, (DigestionParams)dp);
- private void GetModsAfterDeserialization(Dictionary idToMod)
- {
- _allModsOneIsNterminus = new Dictionary();
- int currentModStart = 0;
- int currentModificationLocation = 1;
- bool currentlyReadingMod = false;
- int bracketCount = 0;
-
- for (int r = 0; r < FullSequence.Length; r++)
- {
- char c = FullSequence[r];
- if (c == '[')
- {
- currentlyReadingMod = true;
- if (bracketCount == 0)
- {
- currentModStart = r + 1;
- }
- bracketCount++;
- }
- else if (c == ']')
- {
- string modId = null;
- bracketCount--;
- if (bracketCount == 0)
- {
- try
- {
- //remove the beginning section (e.g. "Fixed", "Variable", "Uniprot")
- string modString = FullSequence.Substring(currentModStart, r - currentModStart);
- int splitIndex = modString.IndexOf(':');
- string modType = modString.Substring(0, splitIndex);
- modId = modString.Substring(splitIndex + 1, modString.Length - splitIndex - 1);
- }
- catch (Exception e)
- {
- throw new MzLibUtil.MzLibException(
- "Error while trying to parse string into peptide: " + e.Message);
- }
- if (!idToMod.TryGetValue(modId, out Modification mod))
- {
- throw new MzLibUtil.MzLibException(
- "Could not find modification while reading string: " + FullSequence);
- }
- if (mod.LocationRestriction.Contains("C-terminal.") && r == FullSequence.Length - 1)
- {
- currentModificationLocation = BaseSequence.Length + 2;
- }
- _allModsOneIsNterminus.Add(currentModificationLocation, mod);
- currentlyReadingMod = false;
- }
- }
- else if (!currentlyReadingMod)
- {
- currentModificationLocation++;
- }
- //else do nothing
- }
- }
-
private void GetProteinAfterDeserialization(Dictionary idToProtein)
{
Protein protein = null;
diff --git a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs
index 62a720c63..709b391ba 100644
--- a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs
+++ b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs
@@ -28,7 +28,7 @@ public static List ReadTsv(string filePath, out List un = new Dictionary();
+ var psiModDeserialized = Loaders.LoadPsiMod(Path.Combine(TestContext.CurrentContext.TestDirectory, "PSI-MOD.obo2.xml"));
+ Dictionary formalChargesDictionary = Loaders.GetFormalChargesDictionary(psiModDeserialized);
+ List UniProtPtms = Loaders.LoadUniprot(Path.Combine(TestContext.CurrentContext.TestDirectory, "ptmlist2.txt"),
+ formalChargesDictionary).ToList();
+ List proteins = ProteinDbLoader.LoadProteinXML(Path.Combine(TestContext.CurrentContext.TestDirectory, "DatabaseTests", "cRAP_databaseGPTMD.xml"),
+ true, DecoyType.None, UniProtPtms, false, new string[] { "exclude_me" }, out un);
+ var allKnownModDict = UniProtPtms.ToDictionary(p => p.IdWithMotif, p => p);
+ var digestionParameters = new DigestionParams(maxModsForPeptides: 3);
+
+ foreach (Protein p in proteins)
+ {
+ List digestedPeptides =
+ p.Digest(digestionParameters, [], [], null, null).ToList();
+ // take the most modified peptide by base sequence and ensure all methods function properly
+ foreach (var targetPeptide in digestedPeptides
+ .Where(pep => pep.FullSequence.Contains('['))
+ .GroupBy(pep => pep.BaseSequence)
+ .Select(pepGroup => pepGroup.MaxBy(pep => pep.AllModsOneIsNterminus.Count)))
+ {
+ var startResidue = targetPeptide.OneBasedStartResidue;
+ var endResidue = targetPeptide.OneBasedEndResidue;
+
+ // Pull our expected modifications based upon parent protein object with a maximum value of DigestionParameters.MaxMods
+ // A bunch of logic to count the number of expected modifications based upon the xml database entries
+ int expectedModCount = 0;
+ foreach (var modDictEntry in p.OneBasedPossibleLocalizedModifications
+ .Where(mod => mod.Key >= startResidue && mod.Key <= endResidue))
+ {
+ if (modDictEntry.Value.Count > 1)
+ {
+ var locRestrictions = modDictEntry.Value.Select(mod => mod.LocationRestriction).ToList();
+
+ if (locRestrictions.AllSame())
+ {
+ if (locRestrictions.First() == "Anywhere.")
+ expectedModCount++;
+ else if (locRestrictions.First() == "N-terminal." && modDictEntry.Key == startResidue)
+ expectedModCount++;
+ }
+ else if (modDictEntry.Value.Select(mod => mod.LocationRestriction).Contains("Anywhere.")
+ && modDictEntry.Value.Select(mod => mod.LocationRestriction)
+ .Contains("N-terminal."))
+ {
+ expectedModCount++;
+ if (modDictEntry.Key == startResidue)
+ expectedModCount++;
+ }
+ }
+ else
+ {
+ switch (modDictEntry.Value.First().LocationRestriction)
+ {
+ case "Anywhere.":
+ case "N-terminal." when modDictEntry.Key == startResidue:
+ expectedModCount++;
+ break;
+ }
+ }
+ }
+
+ expectedModCount = Math.Min(expectedModCount, digestionParameters.MaxMods);
+
+ var expectedModifications = p.OneBasedPossibleLocalizedModifications.Where(mod =>
+ mod.Key >= startResidue &&
+ mod.Key <= endResidue).SelectMany(mod => mod.Value).ToList();
+
+ // Parse modifications from PWSM and two IBioPolymerWithSetMods methods
+ var pwsmModDict = targetPeptide.AllModsOneIsNterminus;
+ var bpwsmModDict = IBioPolymerWithSetMods.GetModificationDictionaryFromFullSequence(targetPeptide.FullSequence, allKnownModDict);
+ var bpwsmModList = IBioPolymerWithSetMods.GetModificationsFromFullSequence(targetPeptide.FullSequence, allKnownModDict);
+
+ // Ensure all methods are in agreement by modification count
+ Assert.AreEqual(pwsmModDict.Count, expectedModCount);
+ Assert.AreEqual(bpwsmModDict.Count, expectedModCount);
+ Assert.AreEqual(bpwsmModList.Count, expectedModCount);
+
+ // Ensure all methods are in agreement by modification identify
+ foreach (var pwsmModification in pwsmModDict.Values)
+ Assert.Contains(pwsmModification, expectedModifications);
+ foreach (var pwsmModification in bpwsmModDict.Values)
+ Assert.Contains(pwsmModification, expectedModifications);
+ foreach (var pwsmModification in bpwsmModList)
+ Assert.Contains(pwsmModification, expectedModifications);
+ }
+ }
+ }
}
}
\ No newline at end of file