From 0e42411746fb3b94fe5558af9fd01256d09e44ab Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 14:34:44 -0500 Subject: [PATCH 01/10] added glycan localization level numberes --- mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs b/mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs index d12e50543..d2415cc9d 100644 --- a/mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs +++ b/mzLib/Proteomics/PSM/PsmTsvHeader_Glyco.cs @@ -2,10 +2,10 @@ { public enum LocalizationLevel { - Level1, - Level1b, - Level2, - Level3 + Level1 = 0, + Level1b = 1, + Level2 = 2, + Level3 = 3 } public class PsmTsvHeader_Glyco { From ed3f30d30a968025b15d0483aa8bf01fe50ab609 Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 14:39:22 -0500 Subject: [PATCH 02/10] added virtual to library spectrum --- mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs | 2 +- mzLib/mzLib.nuspec | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs b/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs index f68f4ef4c..1c4458cbd 100644 --- a/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs +++ b/mzLib/Omics/SpectrumMatch/LibrarySpectrum.cs @@ -15,7 +15,7 @@ public class LibrarySpectrum : MzSpectrum public List MatchedFragmentIons { get; set; } public bool IsDecoy { get; set; } - public string Name + public virtual string Name { get { return Sequence + "/" + ChargeState; } } diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index 626ecf12f..42685799d 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 1.0.547 + 1.0.548 mzLib Stef S. Stef S. @@ -81,4 +81,4 @@ - + \ No newline at end of file From f24ddef436e0f02c61c0b829320b7d2f6faf1377 Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 14:46:03 -0500 Subject: [PATCH 03/10] removed localization level in spectrumamtchfromtsv header --- mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs | 8 -------- mzLib/mzLib.nuspec | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs index fefd5d06a..136a2916d 100644 --- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs +++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs @@ -1,13 +1,5 @@ namespace Omics.SpectrumMatch { - //for glcyo - public enum LocalizationLevel - { - Level1, - Level1b, - Level2, - Level3 - } public class SpectrumMatchFromTsvHeader { // File and scan information diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index 42685799d..d3fa750cb 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 1.0.548 + 5.0.735 mzLib Stef S. Stef S. From 351e9f03e718227af6861c5fbd31de951b477d9e Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 17:44:43 -0500 Subject: [PATCH 04/10] adder intralink support --- mzLib/Readers/Readers.csproj | 1 + mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs | 6 ++++-- mzLib/Readers/Util/SupportedFileTypes.cs | 6 +++++- .../Test/FileReadingTests/SearchResults/XL_Intralinks.tsv | 7 +++++++ mzLib/Test/FileReadingTests/TestPsmFromTsv.cs | 2 ++ mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs | 2 ++ mzLib/Test/Test.csproj | 3 +++ 7 files changed, 24 insertions(+), 3 deletions(-) create mode 100644 mzLib/Test/FileReadingTests/SearchResults/XL_Intralinks.tsv diff --git a/mzLib/Readers/Readers.csproj b/mzLib/Readers/Readers.csproj index 076ed9e40..b81a8e2b0 100644 --- a/mzLib/Readers/Readers.csproj +++ b/mzLib/Readers/Readers.csproj @@ -17,6 +17,7 @@ + diff --git a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs index 62a720c63..659fe413a 100644 --- a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs +++ b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs @@ -54,10 +54,12 @@ public static List ReadTsv(string filePath, out List ".mgf", SupportedFileType.BrukerD => ".d", SupportedFileType.psmtsv => ".psmtsv", + SupportedFileType.IntralinkResults => "Intralinks.tsv", //SupportedFileType.osmtsv => ".osmtsv", SupportedFileType.ToppicPrsm => "_prsm.tsv", SupportedFileType.ToppicPrsmSingle => "_prsm_single.tsv", @@ -116,6 +118,8 @@ public static SupportedFileType ParseFileType(this string filePath) return SupportedFileType.MsPathFinderTDecoys; if (filePath.EndsWith(SupportedFileType.MsPathFinderTAllResults.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) return SupportedFileType.MsPathFinderTAllResults; + if (filePath.EndsWith(SupportedFileType.IntralinkResults.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) + return SupportedFileType.IntralinkResults; // these tsv cases are just .tsv and need an extra step to determine the type // currently need to distinguish between FlashDeconvTsv and MsFraggerPsm diff --git a/mzLib/Test/FileReadingTests/SearchResults/XL_Intralinks.tsv b/mzLib/Test/FileReadingTests/SearchResults/XL_Intralinks.tsv new file mode 100644 index 000000000..2adc7e829 --- /dev/null +++ b/mzLib/Test/FileReadingTests/SearchResults/XL_Intralinks.tsv @@ -0,0 +1,7 @@ +File Name Scan Number Precursor Scan Number Precursor MZ Precursor Charge Precursor Mass Cross Type Link Residues Peptide Info --> Protein Accession Protein Link Site Base Sequence Full Sequence Peptide Monoisotopic Mass Score Matched Ion Series Matched Ion Mass-To-Charge Ratios Matched Ion Mass Diff (Da) Matched Ion Mass Diff (Ppm) Matched Ion Intensities Matched Ion Counts Beta Peptide Info --> Beta Peptide Protein Accession Beta Peptide Protein LinkSite Beta Peptide Base Sequence Beta Peptide Full Sequence Beta Peptide Theoretical Mass Beta Peptide Score Beta Peptide Matched Ions Beta Peptide Matched Ion Mass-To-Charge Ratios Beta Peptide Matched Ion Mass Diff (Da) Beta Peptide Matched Ion Mass Diff (Ppm) Beta Peptide Matched Ion Intensities Beta Peptide Matched Ion Counts Summary Info --> XL Total Score Mass Diff (Da) AlphaIndexingRank Parent Ions ParentIonsNum AlphaParentIonMaxIntensityRank BetaParentIonMaxIntensityRank Decoy/Contaminant/Target QValue PEP PEP_QValue +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 13 12 814.2442253405868 5 4066.184744368539 Intra K;K P62399 69 LLDNAAADLAAISGQKPLITKAR LLDNAAADLAAISGQKPLITKAR(21) 2349.337993761 29.169323073176578 [y1+1, y4+1, y7+1, y7+1, y8+1, y8+1, y10+1, y10+1, y11+1, y11+1, y12+1, y13+1, y13+1, y14+1, y14+2, y15+2, y16+2, y17+2, y17+2, y18+2, y18+2, y19+2, y21+2, y21+2];[b2+1, b3+1, b4+1, b5+1, b6+1] [y1+1:175.11894, y4+1:529.31299, y7+1:852.53026, y7+1:884.49713, y8+1:980.62671, y8+1:1012.60724, y10+1:1165.69849, y10+1:1197.67297, y11+1:1252.73770, y11+1:1284.71973, y12+1:1365.81321, y13+1:1436.86389, y13+1:1468.81409, y14+1:1507.86951, y14+2:770.43798, y15+2:810.99864, y16+2:868.50718, y17+2:904.02582, y17+2:920.01357, y18+2:939.54368, y18+2:955.52997, y19+2:975.06372, y21+2:1089.59561, y21+2:1105.58327];[b2+1:227.17535, b3+1:342.20227, b4+1:456.24362, b5+1:527.28223, b6+1:598.31659] [y1+1:-0.00001, y4+1:0.00372, y7+1:0.00010, y7+1:-0.00510, y8+1:0.00159, y8+1:0.01004, y10+1:-0.00668, y10+1:-0.00426, y11+1:0.00051, y11+1:0.01046, y12+1:-0.00805, y13+1:0.00552, y13+1:-0.01636, y14+1:-0.02598, y14+2:0.00112, y15+2:0.01045, y16+2:0.00059, y17+2:0.00076, y17+2:0.00418, y18+2:-0.00064, y18+2:-0.00013, y19+2:0.00233, y21+2:-0.00375, y21+2:-0.00052];[b2+1:-0.00006, b3+1:-0.00008, b4+1:-0.00165, b5+1:-0.00016, b6+1:-0.00291] [y1+1:-0.06, y4+1:7.04, y7+1:0.12, y7+1:-5.78, y8+1:1.62, y8+1:9.93, y10+1:-5.73, y10+1:-3.56, y11+1:0.41, y11+1:8.15, y12+1:-5.89, y13+1:3.85, y13+1:-11.14, y14+1:-17.24, y14+2:0.72, y15+2:6.45, y16+2:0.34, y17+2:0.42, y17+2:2.27, y18+2:-0.34, y18+2:-0.07, y19+2:1.20, y21+2:-1.72, y21+2:-0.23];[b2+1:-0.26, b3+1:-0.23, b4+1:-3.63, b5+1:-0.31, b6+1:-4.88] [y1+1:33225, y4+1:38394, y7+1:166030, y7+1:131941, y8+1:36154, y8+1:27795, y10+1:47857, y10+1:33075, y11+1:106378, y11+1:71289, y12+1:41232, y13+1:39860, y13+1:21336, y14+1:23503, y14+2:41531, y15+2:69178, y16+2:123224, y17+2:193930, y17+2:82412, y18+2:244088, y18+2:120438, y19+2:72031, y21+2:171274, y21+2:76124];[b2+1:167093, b3+1:80372, b4+1:25267, b5+1:86956, b6+1:23956] 29 P62399 47 ITLNMGVGEAIADKK ITLNMGVGEAIADKK(14) 1558.838878688 29.126828994478604 [y1+1, y2+1, y2+1, y3+1, y3+1, y4+1, y4+1, y5+1, y5+1, y6+1, y6+1, y8+1, y8+1, y9+1, y9+1, y10+1, y10+1, y11+1, y11+1, y12+1, y12+1, y13+1, y13+1, y14+1, y14+1];[b2+1, b5+1];[M15+1, M15+2] [y1+1:147.11284, y2+1:329.21930, y2+1:361.18985, y3+1:444.24649, y3+1:476.21674, y4+1:515.28143, y4+1:547.25348, y5+1:628.36328, y5+1:660.33710, y6+1:699.40881, y6+1:731.37738, y8+1:885.47717, y8+1:917.43610, y9+1:984.53998, y9+1:1016.50183, y10+1:1041.55862, y10+1:1073.52808, y11+1:1172.59192, y11+1:1204.56875, y12+1:1286.64578, y12+1:1318.61387, y13+1:1399.73218, y13+1:1431.69682, y14+1:1500.77225, y14+1:1532.75964];[b2+1:215.13914, b5+1:573.30548];[M15+1:1613.87341, M15+2:823.41936] [y1+1:0.00003, y2+1:0.00097, y2+1:-0.00055, y3+1:0.00122, y3+1:-0.00061, y4+1:-0.00095, y4+1:-0.00098, y5+1:-0.00317, y5+1:-0.00143, y6+1:0.00525, y6+1:0.00174, y8+1:0.00955, y8+1:-0.00360, y9+1:0.00395, y9+1:-0.00628, y10+1:0.00112, y10+1:-0.00150, y11+1:-0.00606, y11+1:-0.00130, y12+1:0.00487, y12+1:0.00089, y13+1:0.00721, y13+1:-0.00022, y14+1:-0.00040, y14+1:0.01492];[b2+1:0.00012, b5+1:-0.00101];[M15+1:0.01670, M15+2:0.00266] [y1+1:0.24, y2+1:2.96, y2+1:-1.53, y3+1:2.75, y3+1:-1.28, y4+1:-1.85, y4+1:-1.79, y5+1:-5.05, y5+1:-2.16, y6+1:7.52, y6+1:2.39, y8+1:10.80, y8+1:-3.93, y9+1:4.01, y9+1:-6.18, y10+1:1.08, y10+1:-1.39, y11+1:-5.17, y11+1:-1.08, y12+1:3.79, y12+1:0.67, y13+1:5.15, y13+1:-0.16, y14+1:-0.27, y14+1:9.74];[b2+1:0.55, b5+1:-1.77];[M15+1:10.35, M15+2:1.62] [y1+1:21722, y2+1:38487, y2+1:64403, y3+1:22145, y3+1:25565, y4+1:46929, y4+1:56360, y5+1:22780, y5+1:28692, y6+1:40853, y6+1:35980, y8+1:142248, y8+1:108807, y9+1:25118, y9+1:25294, y10+1:123048, y10+1:145438, y11+1:58818, y11+1:66025, y12+1:98161, y12+1:83413, y13+1:80308, y13+1:78009, y14+1:33858, y14+1:29881];[b2+1:206883, b5+1:21241];[M15+1:26033, M15+2:38150] 29 58.296152067655186 158.0078719195394 27 0;2 2 - 39 T 0 NaN NaN +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 28 23 1084.1672036127347 5 5415.799635729279 Intra K;K P0AG55 134 GNVINLSLGFSHPVDHQLPAGITAECPTQTEIVLKGADK GNVINLSLGFSHPVDHQLPAGITAEC[Common Fixed:Carbamidomethyl on C]PTQTEIVLKGADK(35) 4126.115728742 33.15043652341575 [y1+1, y2+1, y3+1, y4+1, y5+1, y5+1, y6+1, y6+1, y7+1, y7+1, y8+1, y8+1, y9+1, y9+1, y10+1, y10+1, y11+1, y11+1, y13+1, y13+1, y14+1, y14+1];[b2+1, b3+1, b4+1, b5+1, b6+1, b12+1, b14+1, b17+2, b18+2, b21+2, b25+2] [y1+1:147.11241, y2+1:262.13977, y3+1:333.17648, y4+1:390.19775, y5+1:572.30359, y5+1:604.27533, y6+1:685.38922, y6+1:717.36584, y7+1:784.45807, y7+1:816.43065, y8+1:897.54584, y8+1:929.51123, y9+1:1026.58035, y9+1:1058.55078, y10+1:1127.62377, y10+1:1159.60073, y11+1:1255.69104, y11+1:1287.65793, y13+1:1453.78977, y13+1:1485.76018, y14+1:1613.82542, y14+1:1645.79053];[b2+1:172.07158, b3+1:271.13966, b4+1:384.22473, b5+1:498.26654, b6+1:611.35284, b12+1:1239.66736, b14+1:1435.76085, b17+2:908.46129, b18+2:965.00287, b21+2:1077.55606, b25+2:1284.66319] [y1+1:-0.00039, y2+1:0.00002, y3+1:-0.00038, y4+1:-0.00058, y5+1:-0.00026, y5+1:-0.00059, y6+1:0.00131, y6+1:0.00586, y7+1:0.00174, y7+1:0.00225, y8+1:0.00545, y8+1:-0.00123, y9+1:-0.00264, y9+1:-0.00428, y10+1:-0.00689, y10+1:-0.00201, y11+1:0.00180, y11+1:-0.00339, y13+1:0.00009, y13+1:-0.00158, y14+1:0.00509, y14+1:-0.00188];[b2+1:-0.00009, b3+1:-0.00042, b4+1:0.00059, b5+1:-0.00053, b6+1:0.00171, b12+1:0.01931, b14+1:-0.00838, b17+2:0.00165, b18+2:0.00074, b21+2:-0.00422, b25+2:-0.00142] [y1+1:-2.69, y2+1:0.09, y3+1:-1.14, y4+1:-1.48, y5+1:-0.46, y5+1:-0.98, y6+1:1.91, y6+1:8.18, y7+1:2.23, y7+1:2.76, y8+1:6.08, y8+1:-1.33, y9+1:-2.57, y9+1:-4.04, y10+1:-6.11, y10+1:-1.73, y11+1:1.44, y11+1:-2.63, y13+1:0.06, y13+1:-1.06, y14+1:3.16, y14+1:-1.14];[b2+1:-0.52, b3+1:-1.54, b4+1:1.53, b5+1:-1.07, b6+1:2.80, b12+1:15.59, b14+1:-5.84, b17+2:0.91, b18+2:0.38, b21+2:-1.96, b25+2:-0.55] [y1+1:18356, y2+1:21066, y3+1:14448, y4+1:126868, y5+1:40753, y5+1:37258, y6+1:41476, y6+1:42567, y7+1:35688, y7+1:30814, y8+1:31066, y8+1:27161, y9+1:30150, y9+1:18243, y10+1:54474, y10+1:40244, y11+1:15273, y11+1:12609, y13+1:55269, y13+1:45169, y14+1:14049, y14+1:11369];[b2+1:193557, b3+1:211700, b4+1:36208, b5+1:23370, b6+1:32397, b12+1:50968, b14+1:13321, b17+2:20184, b18+2:44882, b21+2:21506, b25+2:39454] 33 P0AG55 86 KLQLVGVGYR KLQLVGVGYR(1) 1131.676427989 16.121691012123826 [y1+1, y3+1, y4+1, y5+1, y6+1, y7+1, y8+1, y9+1];[b2+1, b2+1, b3+1, b3+1, b4+1, b5+1];[M10+1, M10+1] [y1+1:175.11893, y3+1:395.20447, y4+1:494.27682, y5+1:551.29364, y6+1:650.36253, y7+1:763.44794, y8+1:891.50470, y9+1:1004.59125];[b2+1:296.19656, b2+1:328.16858, b3+1:424.25467, b3+1:456.22659, b4+1:537.34149, b5+1:636.41693];[M10+1:1186.69480, M10+1:1218.66543] [y1+1:-0.00003, y3+1:0.00072, y4+1:0.00467, y5+1:0.00002, y6+1:0.00050, y7+1:0.00184, y8+1:0.00002, y9+1:0.00251];[b2+1:-0.00030, b2+1:-0.00036, b3+1:-0.00077, b3+1:-0.00092, b4+1:0.00199, b5+1:0.00901];[M10+1:0.00053, M10+1:-0.00091] [y1+1:-0.14, y3+1:1.83, y4+1:9.46, y5+1:0.03, y6+1:0.77, y7+1:2.41, y8+1:0.02, y9+1:2.50];[b2+1:-1.02, b2+1:-1.10, b3+1:-1.82, b3+1:-2.03, b4+1:3.70, b5+1:14.18];[M10+1:0.45, M10+1:-0.75] [y1+1:20203, y3+1:45618, y4+1:13754, y5+1:147463, y6+1:85195, y7+1:79095, y8+1:133920, y9+1:20558];[b2+1:64963, b2+1:20745, b3+1:47077, b3+1:23057, b4+1:18911, b5+1:13653];[M10+1:354422, M10+1:85849] 16 49.272127535539575 158.0074789982782 1 0;2 2 - 1 T 0 NaN NaN +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 43 34 1354.956639502793 4 5415.797452143656 Intra K;K P0AG55 134 GNVINLSLGFSHPVDHQLPAGITAECPTQTEIVLKGADK GNVINLSLGFSHPVDHQLPAGITAEC[Common Fixed:Carbamidomethyl on C]PTQTEIVLKGADK(35) 4126.115728742 33.15257139640505 [y1+1, y2+1, y4+1, y5+1, y5+1, y6+1, y6+1, y7+1, y7+1, y8+1, y10+1, y11+1, y12+1, y13+1, y13+1, y14+1, y14+1, y17+1, y17+1, y31+2, y33+2];[b2+1, b3+1, b4+1, b5+1, b6+1, b7+1, b8+1, b12+1, b14+1, b15+1, b17+2, b18+2] [y1+1:147.11313, y2+1:262.13937, y4+1:390.19834, y5+1:572.30579, y5+1:604.27258, y6+1:685.39026, y6+1:717.36273, y7+1:784.46301, y7+1:816.43219, y8+1:897.53223, y10+1:1159.59424, y11+1:1287.64099, y12+1:1388.70512, y13+1:1453.78772, y13+1:1485.74756, y14+1:1613.82678, y14+1:1645.80371, y17+1:1914.92432, y17+1:1946.89417, y31+2:1685.82558, y33+2:1785.88229];[b2+1:172.07152, b3+1:271.13977, b4+1:384.22446, b5+1:498.26614, b6+1:611.35291, b7+1:698.37854, b8+1:811.45807, b12+1:1239.67212, b14+1:1435.75635, b15+1:1550.77061, b17+2:908.45964, b18+2:965.00365] [y1+1:0.00032, y2+1:-0.00037, y4+1:0.00002, y5+1:0.00194, y5+1:-0.00334, y6+1:0.00235, y6+1:0.00275, y7+1:0.00669, y7+1:0.00379, y8+1:-0.00816, y10+1:-0.00850, y11+1:-0.02032, y12+1:-0.00387, y13+1:-0.00196, y13+1:-0.01420, y14+1:0.00645, y14+1:0.01131, y17+1:-0.02340, y17+1:-0.02562, y31+2:-0.02972, y33+2:-0.03240];[b2+1:-0.00015, b3+1:-0.00031, b4+1:0.00031, b5+1:-0.00093, b6+1:0.00177, b7+1:-0.00463, b8+1:-0.00916, b12+1:0.02407, b14+1:-0.01288, b15+1:-0.02556, b17+2:-0.00165, b18+2:0.00231] [y1+1:2.22, y2+1:-1.43, y4+1:0.05, y5+1:3.39, y5+1:-5.54, y6+1:3.43, y6+1:3.83, y7+1:8.54, y7+1:4.65, y8+1:-9.11, y10+1:-7.33, y11+1:-15.79, y12+1:-2.79, y13+1:-1.35, y13+1:-9.56, y14+1:4.00, y14+1:6.87, y17+1:-12.23, y17+1:-13.17, y31+2:-8.82, y33+2:-9.08];[b2+1:-0.88, b3+1:-1.15, b4+1:0.81, b5+1:-1.87, b6+1:2.90, b7+1:-6.63, b8+1:-11.30, b12+1:19.43, b14+1:-8.98, b15+1:-16.49, b17+2:-0.91, b18+2:1.20] [y1+1:2173, y2+1:2972, y4+1:15793, y5+1:6849, y5+1:4887, y6+1:6797, y6+1:5165, y7+1:3977, y7+1:3092, y8+1:2123, y10+1:2168, y11+1:1787, y12+1:1574, y13+1:8405, y13+1:5930, y14+1:4425, y14+1:3775, y17+1:2756, y17+1:1708, y31+2:3596, y33+2:3140];[b2+1:20613, b3+1:28129, b4+1:6155, b5+1:5077, b6+1:5181, b7+1:1944, b8+1:1572, b12+1:11832, b14+1:1516, b15+1:2291, b17+2:5168, b18+2:9011] 33 P0AG55 86 KLQLVGVGYR KLQLVGVGYR(1) 1131.676427989 7.136792208777973 [y1+1, y3+1, y5+1, y8+1];[b2+1];[M10+1, M10+1] [y1+1:175.11925, y3+1:395.20401, y5+1:551.29877, y8+1:891.49396];[b2+1:296.19684];[M10+1:1186.69402, M10+1:1218.66714] [y1+1:0.00030, y3+1:0.00027, y5+1:0.00515, y8+1:-0.01072];[b2+1:-0.00003];[M10+1:-0.00024, M10+1:0.00080] [y1+1:1.70, y3+1:0.67, y5+1:9.35, y8+1:-12.04];[b2+1:-0.08];[M10+1:-0.20, M10+1:0.66] [y1+1:2475, y3+1:1832, y5+1:3073, y8+1:1734];[b2+1:1682];[M10+1:121491, M10+1:39484] 7 40.28936360518303 158.00529541265587 1 0;2 2 - 3 T 0 NaN NaN +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 27 23 903.6405490884255 6 5415.799635729279 Intra K;K P0AG55 134 GNVINLSLGFSHPVDHQLPAGITAECPTQTEIVLKGADK GNVINLSLGFSHPVDHQLPAGITAEC[Common Fixed:Carbamidomethyl on C]PTQTEIVLKGADK(35) 4126.115728742 24.126683253963822 [y1+1, y2+1, y3+1, y4+1, y5+1, y5+1, y6+1, y7+1, y7+1, y8+1, y8+1, y9+1, y9+1, y10+1, y10+1, y11+1, y11+1, y13+1, y13+1, y14+2];[b2+1, b3+1, b4+1, b6+1] [y1+1:147.11278, y2+1:262.14023, y3+1:333.17590, y4+1:390.19732, y5+1:572.30322, y5+1:604.27552, y6+1:685.38910, y7+1:784.46252, y7+1:816.43109, y8+1:897.54047, y8+1:929.51474, y9+1:1026.58535, y9+1:1058.55688, y10+1:1127.62061, y10+1:1159.60266, y11+1:1255.68005, y11+1:1287.65918, y13+1:1453.78932, y13+1:1485.76611, y14+2:807.41543];[b2+1:172.07167, b3+1:271.13977, b4+1:384.22406, b6+1:611.34302] [y1+1:-0.00003, y2+1:0.00048, y3+1:-0.00096, y4+1:-0.00100, y5+1:-0.00063, y5+1:-0.00040, y6+1:0.00119, y7+1:0.00620, y7+1:0.00269, y8+1:0.00008, y8+1:0.00228, y9+1:0.00237, y9+1:0.00183, y10+1:-0.01006, y10+1:-0.00008, y11+1:-0.00918, y11+1:-0.00213, y13+1:-0.00036, y13+1:0.00436, y14+2:0.00325];[b2+1:0.00000, b3+1:-0.00031, b4+1:-0.00009, b6+1:-0.00812] [y1+1:-0.18, y2+1:1.84, y3+1:-2.88, y4+1:-2.58, y5+1:-1.09, y5+1:-0.66, y6+1:1.74, y7+1:7.91, y7+1:3.30, y8+1:0.09, y8+1:2.45, y9+1:2.31, y9+1:1.73, y10+1:-8.93, y10+1:-0.06, y11+1:-7.32, y11+1:-1.66, y13+1:-0.25, y13+1:2.93, y14+2:2.01];[b2+1:0.02, b3+1:-1.15, b4+1:-0.22, b6+1:-13.30] [y1+1:15375, y2+1:19547, y3+1:11896, y4+1:94285, y5+1:30408, y5+1:33765, y6+1:34318, y7+1:32010, y7+1:35654, y8+1:25188, y8+1:26826, y9+1:20595, y9+1:20744, y10+1:45958, y10+1:37701, y11+1:17083, y11+1:14519, y13+1:34308, y13+1:36039, y14+2:21331];[b2+1:297776, b3+1:184639, b4+1:26539, b6+1:11787] 24 P0AG55 86 KLQLVGVGYR KLQLVGVGYR(1) 1131.676427989 15.112740044577905 [y1+1, y3+1, y4+1, y5+1, y6+1, y7+1, y8+1, y9+1];[b2+1, b2+1, b3+1, b3+1, b4+1];[M10+1, M10+1] [y1+1:175.11873, y3+1:395.20248, y4+1:494.27310, y5+1:551.29425, y6+1:650.36171, y7+1:763.44788, y8+1:891.50525, y9+1:1004.58844];[b2+1:296.19714, b2+1:328.16907, b3+1:424.25446, b3+1:456.22751, b4+1:537.34222];[M10+1:1186.69434, M10+1:1218.66241] [y1+1:-0.00022, y3+1:-0.00126, y4+1:0.00094, y5+1:0.00063, y6+1:-0.00033, y7+1:0.00178, y8+1:0.00057, y9+1:-0.00030];[b2+1:0.00028, b2+1:0.00013, b3+1:-0.00099, b3+1:-0.00001, b4+1:0.00272];[M10+1:0.00007, M10+1:-0.00393] [y1+1:-1.28, y3+1:-3.20, y4+1:1.91, y5+1:1.14, y6+1:-0.50, y7+1:2.33, y8+1:0.64, y9+1:-0.30];[b2+1:0.95, b2+1:0.39, b3+1:-2.33, b3+1:-0.02, b4+1:5.07];[M10+1:0.06, M10+1:-3.23] [y1+1:21593, y3+1:38231, y4+1:17572, y5+1:152087, y6+1:89434, y7+1:78355, y8+1:114958, y9+1:22446];[b2+1:55329, b2+1:22709, b3+1:57425, b3+1:28988, b4+1:14214];[M10+1:220667, M10+1:70101] 15 39.23942329854172 158.0074789982782 5 0;2 2 - 1 T 0 NaN NaN +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 51 45 678.704623045646 6 4066.184079472602 Intra K;K P62399 47 ITLNMGVGEAIADKK ITLNMGVGEAIADKK(14) 1558.838878688 20.089596895792027 [y1+1, y2+1, y2+1, y3+1, y3+1, y4+1, y4+1, y5+1, y5+1, y6+1, y6+1, y8+1, y8+1, y10+1, y10+1, y11+1, y12+1, y12+1, y13+1];[b2+1] [y1+1:147.11273, y2+1:329.22083, y2+1:361.19110, y3+1:444.24945, y3+1:476.22574, y4+1:515.28436, y4+1:547.25384, y5+1:628.36487, y5+1:660.33893, y6+1:699.40552, y6+1:731.38312, y8+1:885.47797, y8+1:917.44286, y10+1:1041.56860, y10+1:1073.53394, y11+1:1204.58508, y12+1:1286.66272, y12+1:1318.62988, y13+1:1431.71167];[b2+1:215.13919] [y1+1:-0.00007, y2+1:0.00250, y2+1:0.00070, y3+1:0.00418, y3+1:0.00839, y4+1:0.00198, y4+1:-0.00062, y5+1:-0.00158, y5+1:0.00041, y6+1:0.00196, y6+1:0.00748, y8+1:0.01035, y8+1:0.00317, y10+1:0.01111, y10+1:0.00436, y11+1:0.01503, y12+1:0.02181, y12+1:0.01690, y13+1:0.01462];[b2+1:0.00017] [y1+1:-0.49, y2+1:7.61, y2+1:1.94, y3+1:9.43, y3+1:17.66, y4+1:3.85, y4+1:-1.13, y5+1:-2.52, y5+1:0.61, y6+1:2.80, y6+1:10.24, y8+1:11.70, y8+1:3.46, y10+1:10.67, y10+1:4.07, y11+1:12.49, y12+1:16.97, y12+1:12.83, y13+1:10.22];[b2+1:0.80] [y1+1:74724, y2+1:16137, y2+1:18663, y3+1:8680, y3+1:8874, y4+1:19015, y4+1:18702, y5+1:10478, y5+1:13384, y6+1:26267, y6+1:11389, y8+1:59509, y8+1:31219, y10+1:22269, y10+1:26060, y11+1:10336, y12+1:9065, y12+1:12012, y13+1:8226];[b2+1:33318] 20 P62399 69 LLDNAAADLAAISGQKPLITKAR LLDNAAADLAAISGQKPLITKAR(21) 2349.337993761 18.08918756365762 [y1+1, y4+1, y4+1, y6+1, y7+1, y7+1, y8+1, y11+2, y11+1, y12+2, y13+2, y14+2];[b2+1, b3+1, b5+1, b6+1, b8+1, b9+1] [y1+1:175.11913, y4+1:529.30933, y4+1:561.28784, y6+1:755.46906, y7+1:852.53164, y7+1:884.50635, y8+1:1012.59882, y11+2:626.87416, y11+1:1284.71545, y12+2:683.41356, y13+2:718.93366, y14+2:770.44121];[b2+1:227.17451, b3+1:342.20462, b5+1:527.28415, b6+1:598.31836, b8+1:784.38336, b9+1:897.47479] [y1+1:0.00017, y4+1:0.00006, y4+1:0.00650, y6+1:-0.00834, y7+1:0.00148, y7+1:0.00411, y8+1:0.00162, y11+2:0.00385, y11+1:0.00619, y12+2:-0.00141, y13+2:0.00167, y14+2:0.00759];[b2+1:-0.00089, b3+1:0.00227, b5+1:0.00176, b6+1:-0.00114, b8+1:-0.00020, b9+1:0.00717] [y1+1:0.99, y4+1:0.11, y4+1:11.60, y6+1:-11.05, y7+1:1.73, y7+1:4.66, y8+1:1.60, y11+2:3.08, y11+1:4.82, y12+2:-1.03, y13+2:1.17, y14+2:4.93];[b2+1:-3.93, b3+1:6.66, b5+1:3.34, b6+1:-1.91, b8+1:-0.25, b9+1:8.00] [y1+1:16344, y4+1:10137, y4+1:7731, y6+1:18453, y7+1:77950, y7+1:42608, y8+1:8493, y11+2:41998, y11+1:8207, y12+2:31722, y13+2:51822, y14+2:15292];[b2+1:15084, b3+1:9008, b5+1:23851, b6+1:11368, b8+1:35923, b9+1:10333] 18 38.178784459449645 158.0072070236024 16 0;0 0 - - T 0 NaN NaN +C:\Users\Alex\Source\Repos\MetaMorpheus\MetaMorpheus\Test\bin\Debug\net6.0-windows\XlTestData\2017-11-21_XL_DSSO_Ribosome_RT60min_28800-28898.mzML 38 34 1017.5533194495598 4 4066.184171930723 Intra K;K P62399 69 LLDNAAADLAAISGQKPLITKAR LLDNAAADLAAISGQKPLITKAR(21) 2349.337993761 19.066530501486493 [y1+1, y2+1, y4+1, y7+1, y7+1, y10+1, y10+1, y11+1, y11+1, y12+1, y13+1, y13+1, y14+1, y17+2, y18+2];[b2+1, b3+1, b5+1, b8+1] [y1+1:175.11909, y2+1:246.15567, y4+1:529.30151, y7+1:852.53503, y7+1:884.50214, y10+1:1165.70642, y10+1:1197.65442, y11+1:1252.74023, y11+1:1284.70789, y12+1:1397.77319, y13+1:1436.86865, y13+1:1468.80823, y14+1:1507.88306, y17+2:904.03063, y18+2:939.54384];[b2+1:227.17517, b3+1:342.20706, b5+1:527.27515, b8+1:784.39410] [y1+1:0.00014, y2+1:-0.00040, y4+1:-0.00775, y7+1:0.00487, y7+1:-0.00010, y10+1:0.00126, y10+1:-0.02282, y11+1:0.00304, y11+1:-0.00138, y12+1:-0.02014, y13+1:0.01028, y13+1:-0.02222, y14+1:-0.01243, y17+2:0.01038, y18+2:-0.00031];[b2+1:-0.00023, b3+1:0.00471, b5+1:-0.00724, b8+1:0.01054] [y1+1:0.82, y2+1:-1.61, y4+1:-14.68, y7+1:5.72, y7+1:-0.11, y10+1:1.08, y10+1:-19.07, y11+1:2.43, y11+1:-1.08, y12+1:-14.42, y13+1:7.16, y13+1:-15.14, y14+1:-8.25, y17+2:5.75, y18+2:-0.17];[b2+1:-1.03, b3+1:13.82, b5+1:-13.76, b8+1:13.46] [y1+1:19152, y2+1:17940, y4+1:5648, y7+1:27755, y7+1:16131, y10+1:9283, y10+1:5413, y11+1:21020, y11+1:11989, y12+1:5051, y13+1:6971, y13+1:5121, y14+1:5244, y17+2:13005, y18+2:13447];[b2+1:16642, b3+1:8156, b5+1:27418, b8+1:5894] 19 P62399 47 ITLNMGVGEAIADKK ITLNMGVGEAIADKK(14) 1558.838878688 17.041200589470144 [y1+1, y2+1, y2+1, y4+1, y4+1, y5+1, y6+1, y6+1, y8+1, y8+1, y10+1, y10+1, y11+1, y12+1];[b2+1];[M15+1, M15+1] [y1+1:147.11266, y2+1:329.21808, y2+1:361.18799, y4+1:515.27832, y4+1:547.25623, y5+1:660.33276, y6+1:699.40576, y6+1:731.37079, y8+1:885.47741, y8+1:917.44482, y10+1:1041.55688, y10+1:1073.51660, y11+1:1172.58313, y12+1:1286.64441];[b2+1:215.13843];[M15+1:1613.85203, M15+1:1645.83004] [y1+1:-0.00015, y2+1:-0.00025, y2+1:-0.00241, y4+1:-0.00406, y4+1:0.00177, y5+1:-0.00576, y6+1:0.00220, y6+1:-0.00485, y8+1:0.00979, y8+1:0.00513, y10+1:-0.00061, y10+1:-0.01297, y11+1:-0.01485, y12+1:0.00350];[b2+1:-0.00059];[M15+1:-0.00468, M15+1:0.00125] [y1+1:-1.02, y2+1:-0.76, y2+1:-6.70, y4+1:-7.90, y4+1:3.23, y5+1:-8.74, y6+1:3.15, y6+1:-6.64, y8+1:11.07, y8+1:5.60, y10+1:-0.59, y10+1:-12.09, y11+1:-12.68, y12+1:2.72];[b2+1:-2.76];[M15+1:-2.90, M15+1:0.76] [y1+1:7699, y2+1:7426, y2+1:5976, y4+1:5789, y4+1:6294, y5+1:5404, y6+1:7919, y6+1:5598, y8+1:19976, y8+1:9969, y10+1:9908, y10+1:6715, y11+1:5183, y12+1:5561];[b2+1:5749];[M15+1:17383, M15+1:16870] 17 36.107731090956634 158.0072994817233 7 0;2 2 - 13 T 0 NaN NaN diff --git a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs index 2018158b1..68d0f5481 100644 --- a/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs +++ b/mzLib/Test/FileReadingTests/TestPsmFromTsv.cs @@ -22,6 +22,8 @@ public static class TestPsmFromTsv [TestCase("oglycoSinglePsms.psmtsv", 2)] // oglyco [TestCase("nglyco_f5.psmtsv", 5)] // nglyco [TestCase("VariantCrossTest.psmtsv", 15)] // variant crossing + [TestCase("XL_Intralinks.tsv", 6)] // variant crossing + [TestCase("XLink.psmtsv", 19)] // variant crossing public static void TestPsmReaderWithMultipleEntryPoints(string path, int expected) { string psmFilePath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"FileReadingTests\SearchResults", diff --git a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs index ef7a238b0..1f2d91ed8 100644 --- a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs +++ b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs @@ -37,6 +37,8 @@ internal class TestSupportedFileExtensions [TestCase(@"FileReadingTests\ExternalFileTypes\MsPathFinderT_DecoyResults_IcDecoy.tsv", SupportedFileType.MsPathFinderTDecoys)] [TestCase(@"FileReadingTests\ExternalFileTypes\MsPathFinderT_AllResults_IcTda.tsv", SupportedFileType.MsPathFinderTAllResults)] [TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", SupportedFileType.CruxResult)] + [TestCase(@"FileReadingTests\ExternalFileTypes\XL_Intralinks.tsv", SupportedFileType.IntralinkResults)] + [TestCase(@"FileReadingTests\ExternalFileTypes\XLink.psmtsv", SupportedFileType.psmtsv)] public static void TestSupportedFileTypeExtensions(string filePath, SupportedFileType expectedType) { var supportedType = filePath.ParseFileType(); diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj index b58d87522..797157c8b 100644 --- a/mzLib/Test/Test.csproj +++ b/mzLib/Test/Test.csproj @@ -447,6 +447,9 @@ Always + + Always + Always From f91db8adf37640db163f084284eab7389d088b5c Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 18:09:02 -0500 Subject: [PATCH 05/10] Added unique sequence to PsmFromTsv --- .../SpectrumMatchFromTsvHeader.cs | 1 + mzLib/Proteomics/PSM/PsmFromTsv.cs | 42 ++++++++++++++----- .../SearchResults/SpectrumMatchTsvReader.cs | 9 ++-- 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs index 136a2916d..27f83f756 100644 --- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs +++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsvHeader.cs @@ -20,6 +20,7 @@ public class SpectrumMatchFromTsvHeader public const string BaseSequence = "Base Sequence"; public const string FullSequence = "Full Sequence"; public const string EssentialSequence = "Essential Sequence"; + public const string UniqueSequence = "Unique Sequence"; //Used for crosslinked peptides public const string AmbiguityLevel = "Ambiguity Level"; public const string SpectrumMatchCount = "Spectrum Match Count"; public const string Mods = "Mods"; diff --git a/mzLib/Proteomics/PSM/PsmFromTsv.cs b/mzLib/Proteomics/PSM/PsmFromTsv.cs index 95605ab49..412d66c49 100644 --- a/mzLib/Proteomics/PSM/PsmFromTsv.cs +++ b/mzLib/Proteomics/PSM/PsmFromTsv.cs @@ -34,6 +34,10 @@ public class PsmFromTsv : SpectrumMatchFromTsv public int? BetaPeptideRank { get; } public List BetaPeptideMatchedIons { get; } public Dictionary> BetaPeptideChildScanMatchedIons { get; } + /// + /// If Crosslink, this contains the alpha and beta sequences. Otherwise, it contains the full sequence + /// + public string UniqueSequence { get; } public double? XLTotalScore { get; } public string ParentIons { get; } @@ -135,6 +139,12 @@ public PsmFromTsv(string line, char[] split, Dictionary parsedHeade ((spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].StartsWith("{")) ? ReadChildScanMatchedIons(spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonIntensitiesLabel]].Trim(), BetaPeptideBaseSequence).First().Value : ReadFragmentIonsFromString(spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonsLabel]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.BetaPeptideMatchedIonIntensitiesLabel]].Trim(), BetaPeptideBaseSequence)); XLTotalScore = (parsedHeader[SpectrumMatchFromTsvHeader.XLTotalScoreLabel] < 0) ? null : (double?)double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.XLTotalScoreLabel]].Trim(), CultureInfo.InvariantCulture); ParentIons = (parsedHeader[SpectrumMatchFromTsvHeader.ParentIonsLabel] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.ParentIonsLabel]].Trim(); + // This ensures backwards compatibility with old Crosslink Search Results + // This works because the alpha and beta peptide full sequences are written to tsv with their crosslink site included (e.g., PEPTIDEK(4)) + if (UniqueSequence == null && BetaPeptideFullSequence != null) + { + UniqueSequence = FullSequence + BetaPeptideFullSequence; + } // child scan matched ions for xlink and glyco. we are getting them all above and then deleting primary scan ions here. ChildScanMatchedIons = (!spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonMzRatios]].StartsWith("{")) ? null : ReadChildScanMatchedIons(spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonMzRatios]].Trim(), spl[parsedHeader[SpectrumMatchFromTsvHeader.MatchedIonIntensities]].Trim(), BaseSeq); @@ -152,18 +162,30 @@ public PsmFromTsv(string line, char[] split, Dictionary parsedHeade } //For Glyco - GlycanMass = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanMass]], CultureInfo.InvariantCulture); - GlycanComposition = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanComposition] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanComposition]]; - GlycanStructure = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanStructure] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanStructure]]; - var localizationLevel = (parsedHeader[SpectrumMatchFromTsvHeader.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.GlycanLocalizationLevel]]; - if (localizationLevel != null) + try // Try is so that glyco and non-glyco psms can be read from the same file { - if (localizationLevel.Equals("NA")) - GlycanLocalizationLevel = null; - else - GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture); + GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]]; + GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]]; + var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]]; + if (localizationLevel != null) + { + if (localizationLevel.Equals("NA")) + GlycanLocalizationLevel = null; + else + GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + } + LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]]; + + } + catch + { + GlycanMass = null; + GlycanComposition = null; + GlycanStructure = null; + GlycanLocalizationLevel = null; + LocalizedGlycan = null; } - LocalizedGlycan = (parsedHeader[SpectrumMatchFromTsvHeader.LocalizedGlycan] < 0) ? null : spl[parsedHeader[SpectrumMatchFromTsvHeader.LocalizedGlycan]]; } /// diff --git a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs index 659fe413a..a69e2ed93 100644 --- a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs +++ b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs @@ -53,16 +53,15 @@ public static List ReadTsv(string filePath, out List Date: Wed, 23 Oct 2024 18:10:57 -0500 Subject: [PATCH 06/10] Removed unique sequence from PsmTsv --- mzLib/Proteomics/PSM/PsmFromTsv.cs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/mzLib/Proteomics/PSM/PsmFromTsv.cs b/mzLib/Proteomics/PSM/PsmFromTsv.cs index 412d66c49..ad2c75292 100644 --- a/mzLib/Proteomics/PSM/PsmFromTsv.cs +++ b/mzLib/Proteomics/PSM/PsmFromTsv.cs @@ -34,10 +34,6 @@ public class PsmFromTsv : SpectrumMatchFromTsv public int? BetaPeptideRank { get; } public List BetaPeptideMatchedIons { get; } public Dictionary> BetaPeptideChildScanMatchedIons { get; } - /// - /// If Crosslink, this contains the alpha and beta sequences. Otherwise, it contains the full sequence - /// - public string UniqueSequence { get; } public double? XLTotalScore { get; } public string ParentIons { get; } From ff9a1f02ad43041a2971f5ae26569dc30de6624d Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 18:13:07 -0500 Subject: [PATCH 07/10] undid last commit :( --- mzLib/Proteomics/PSM/PsmFromTsv.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mzLib/Proteomics/PSM/PsmFromTsv.cs b/mzLib/Proteomics/PSM/PsmFromTsv.cs index ad2c75292..412d66c49 100644 --- a/mzLib/Proteomics/PSM/PsmFromTsv.cs +++ b/mzLib/Proteomics/PSM/PsmFromTsv.cs @@ -34,6 +34,10 @@ public class PsmFromTsv : SpectrumMatchFromTsv public int? BetaPeptideRank { get; } public List BetaPeptideMatchedIons { get; } public Dictionary> BetaPeptideChildScanMatchedIons { get; } + /// + /// If Crosslink, this contains the alpha and beta sequences. Otherwise, it contains the full sequence + /// + public string UniqueSequence { get; } public double? XLTotalScore { get; } public string ParentIons { get; } From 015e9c2296e88d5b3a7ebf478b61517b32b01abf Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 18:40:13 -0500 Subject: [PATCH 08/10] Added Crosslink Library Spectrum --- .../SpectrumMatch/CrosslinkLibrarySpectrum.cs | 124 ++++++++++++++++++ .../SpectrumMatch/SpectrumMatchFromTsv.cs | 2 +- mzLib/Proteomics/PSM/PsmFromTsv.cs | 35 ++++- mzLib/Test/TestLibrarySpectrum.cs | 31 +++++ 4 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs diff --git a/mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs b/mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs new file mode 100644 index 000000000..44e88b3ac --- /dev/null +++ b/mzLib/Omics/SpectrumMatch/CrosslinkLibrarySpectrum.cs @@ -0,0 +1,124 @@ +using Omics.Fragmentation; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading.Tasks; + +namespace Omics.SpectrumMatch +{ + public class CrosslinkLibrarySpectrum : LibrarySpectrum + { + public CrosslinkLibrarySpectrum BetaPeptideSpectrum { get; } + public string AlphaPeptideSequence { get; private set; } + public string BetaPeptideSequence { get; private set; } + public string UniqueSequence { get; private set; } + public bool IsBetaPeptide { get; } + public static Regex CrosslinkRegex = new Regex(@"\(\d+\)"); + public new string Name => UniqueSequence + "/" + ChargeState; + + public CrosslinkLibrarySpectrum( + string uniqueSequence, + double precursorMz, + int precursorCharge, + List peaks, + double rt, + List betaPeaks, + bool isDecoy = false) : this( + uniqueSequence, + precursorMz, + precursorCharge, + peaks, + rt, + new CrosslinkLibrarySpectrum(uniqueSequence, precursorMz, precursorCharge, betaPeaks, rt), + isDecoy) + { } + + public CrosslinkLibrarySpectrum( + string uniqueSequence, + double precursorMz, + int precursorCharge, + List peaks, + double rt, + CrosslinkLibrarySpectrum betaSpectrum = null, + bool isDecoy = false) : base(uniqueSequence, precursorMz, precursorCharge, peaks, rt, isDecoy) + { + UniqueSequence = uniqueSequence; + if (betaSpectrum == null) + { + IsBetaPeptide = true; + } + else + { + BetaPeptideSpectrum = betaSpectrum; + } + SetAlphaBetaSequence(); + } + + private void SetAlphaBetaSequence() + { + string[] uniqueSequenceSplit = CrosslinkRegex.Split(UniqueSequence); + if (uniqueSequenceSplit.Length >= 2) + { + AlphaPeptideSequence = uniqueSequenceSplit[0]; + BetaPeptideSequence = uniqueSequenceSplit[1]; + } + else + { + AlphaPeptideSequence = null; + BetaPeptideSequence = null; + } + } + + public override string ToString() + { + StringBuilder spectrum = new(); + spectrum.AppendLine("Name: " + Name); + spectrum.AppendLine("MW: " + PrecursorMz); + spectrum.Append("Comment: "); + spectrum.Append("Parent=" + PrecursorMz); + spectrum.AppendLine(" RT=" + RetentionTime); + spectrum.Append("Num alpha peaks: " + MatchedFragmentIons.Count); + spectrum.AppendLine(", Num beta peaks: " + BetaPeptideSpectrum.MatchedFragmentIons.Count); + + double maxIntensity = Math.Max(MatchedFragmentIons.Max(b => b.Intensity), + BetaPeptideSpectrum.MatchedFragmentIons.Max(s => s.Intensity)); + + foreach (MatchedFragmentIon matchedIon in MatchedFragmentIons) + { + double intensityFraction = matchedIon.Intensity / maxIntensity; + + string neutralLoss = null; + if (matchedIon.NeutralTheoreticalProduct.NeutralLoss != 0) + { + neutralLoss = "-" + matchedIon.NeutralTheoreticalProduct.NeutralLoss; + } + + spectrum.AppendLine(matchedIon.Mz + "\t" + intensityFraction + "\t" + "\"" + + matchedIon.NeutralTheoreticalProduct.ProductType.ToString() + + matchedIon.NeutralTheoreticalProduct.FragmentNumber.ToString() + "^" + + matchedIon.Charge + neutralLoss + "/" + 0 + "ppm" + "\""); + } + + foreach (MatchedFragmentIon matchedIon in BetaPeptideSpectrum.MatchedFragmentIons) + { + double intensityFraction = matchedIon.Intensity / maxIntensity; + + string neutralLoss = null; + if (matchedIon.NeutralTheoreticalProduct.NeutralLoss != 0) + { + neutralLoss = "-" + matchedIon.NeutralTheoreticalProduct.NeutralLoss; + } + + spectrum.AppendLine(matchedIon.Mz + "\t" + intensityFraction + "\t" + "\"" + + matchedIon.NeutralTheoreticalProduct.ProductType.ToString() + + matchedIon.NeutralTheoreticalProduct.FragmentNumber.ToString() + "^" + + matchedIon.Charge + neutralLoss + "/" + 0 + "ppm" + "\"" + + "\t" + "BetaPeptideIon"); + } + + return spectrum.ToString().Trim(); + } + } + } diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs index a96be9e0c..69498cd0d 100644 --- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs +++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs @@ -365,7 +365,7 @@ public override string ToString() { return FullSequence; } - public LibrarySpectrum ToLibrarySpectrum() + public virtual LibrarySpectrum ToLibrarySpectrum() { bool isDecoy = this.DecoyContamTarget == "D"; diff --git a/mzLib/Proteomics/PSM/PsmFromTsv.cs b/mzLib/Proteomics/PSM/PsmFromTsv.cs index 412d66c49..5bc064f90 100644 --- a/mzLib/Proteomics/PSM/PsmFromTsv.cs +++ b/mzLib/Proteomics/PSM/PsmFromTsv.cs @@ -3,6 +3,7 @@ using System.Globalization; using System.IO; using System.Linq; +using Easy.Common.Extensions; using Omics.Fragmentation; using Omics.SpectrumMatch; @@ -281,6 +282,38 @@ public PsmFromTsv(PsmFromTsv psm, string fullSequence, int index = 0, string bas LocalizedGlycan = psm.LocalizedGlycan; } - + /// + /// Override library spectrum for cross link library spectrum implict conversion + /// + /// + public override LibrarySpectrum ToLibrarySpectrum() + { + bool isDecoy = this.DecoyContamTarget == "D"; + + List fragments = new List(); + + double matchedIonIntensitySum = Math.Max(1.0, this.MatchedIons.Select(i => i.Intensity).Sum()); + + foreach (MatchedFragmentIon ion in this.MatchedIons) + { + Product product = new Product(ion.NeutralTheoreticalProduct.ProductType, ion.NeutralTheoreticalProduct.Terminus, ion.NeutralTheoreticalProduct.NeutralMass, ion.NeutralTheoreticalProduct.FragmentNumber, ion.NeutralTheoreticalProduct.AminoAcidPosition, ion.NeutralTheoreticalProduct.NeutralLoss); + fragments.Add(new MatchedFragmentIon(product, ion.Mz, ion.Intensity / matchedIonIntensitySum, ion.Charge)); + } + double retentionTime = RetentionTime ?? -1; + + if (BetaPeptideMatchedIons.IsNotNullOrEmpty()) + { + List betaFragments = new(); + foreach (var ion in BetaPeptideMatchedIons) + { + Product product = new Product(ion.NeutralTheoreticalProduct.ProductType, ion.NeutralTheoreticalProduct.Terminus, ion.NeutralTheoreticalProduct.NeutralMass, ion.NeutralTheoreticalProduct.FragmentNumber, ion.NeutralTheoreticalProduct.AminoAcidPosition, ion.NeutralTheoreticalProduct.NeutralLoss); + betaFragments.Add(new MatchedFragmentIon(product, ion.Mz, ion.Intensity / matchedIonIntensitySum, ion.Charge)); + } + string uniqueSequence = UniqueSequence ?? FullSequence + BetaPeptideFullSequence; + return new CrosslinkLibrarySpectrum(uniqueSequence, PrecursorMz, PrecursorCharge, fragments, retentionTime, betaFragments); + } + + return (new(this.FullSequence, this.PrecursorMz, this.PrecursorCharge, fragments, retentionTime, isDecoy)); + } } } diff --git a/mzLib/Test/TestLibrarySpectrum.cs b/mzLib/Test/TestLibrarySpectrum.cs index ba0817f3c..4e5d98480 100644 --- a/mzLib/Test/TestLibrarySpectrum.cs +++ b/mzLib/Test/TestLibrarySpectrum.cs @@ -1,8 +1,13 @@ using NUnit.Framework; using Assert = NUnit.Framework.Legacy.ClassicAssert; using System.Collections.Generic; +using System.IO; +using System.Linq; using Omics.Fragmentation; using Omics.SpectrumMatch; +using NUnit.Framework.Legacy; +using Proteomics.PSM; +using Readers; namespace Test { @@ -34,5 +39,31 @@ public static void TestDecoyLibrarySpectraGenerationFunction() string spectralAngleOnTheFly = "N/A"; Assert.AreEqual(spectralAngleOnTheFly,librarySpectrum.CalculateSpectralAngleOnTheFly(peaks)); } + + [Test] + public static void CrosslinkPsmFromTsvTest() + { + string psmFile = @"FileReadingTests\SearchResults\XL_Intralinks.tsv"; + List parsedPsms = SpectrumMatchTsvReader.ReadPsmTsv(psmFile, out var warnings); + Assert.AreEqual(6, parsedPsms.Count); + Assert.That(parsedPsms[0].UniqueSequence, Is.EqualTo("LLDNAAADLAAISGQKPLITKAR(21)ITLNMGVGEAIADKK(14)")); + } + + [Test] + public static void CrosslinkPsmFromTsvToLibrarySpectrumTest() + { + string psmTsvPath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"FileReadingTests\SearchResults\XL_Intralinks.tsv"); + List warnings = new(); + List psms = SpectrumMatchTsvReader.ReadPsmTsv(psmTsvPath, out warnings).ToList(); + Assert.That(warnings.Count == 0); + + CrosslinkLibrarySpectrum librarySpectrum = psms[0].ToLibrarySpectrum() as CrosslinkLibrarySpectrum; + Assert.IsNotNull(librarySpectrum); + Assert.AreEqual("Name: LLDNAAADLAAISGQKPLITKAR(21)ITLNMGVGEAIADKK(14)/5", librarySpectrum.ToString().Split('\n')[0].Trim()); + + // This test would be better if MatchedIon.equals method worked, but it breaks because the mz comparison is implemented incorrectly. + CollectionAssert.AreEquivalent(librarySpectrum.MatchedFragmentIons.Select(ion => ion.Annotation), psms[0].MatchedIons.Select(ion => ion.Annotation)); + CollectionAssert.AreEquivalent(librarySpectrum.BetaPeptideSpectrum.MatchedFragmentIons.Select(ion => ion.Annotation), psms[0].BetaPeptideMatchedIons.Select(ion => ion.Annotation)); + } } } From c85760878811e307c506f1d2313f750cc0774bef Mon Sep 17 00:00:00 2001 From: nbollis Date: Wed, 23 Oct 2024 18:56:08 -0500 Subject: [PATCH 09/10] Fix header mapping in SpectrumMatchTsvReader.cs Corrected the mapping of `NextResidue` and `PreviousResidue` headers in the `parsedHeader` dictionary. The headers are now correctly mapped to `SpectrumMatchFromTsvHeader.NextResidue` and `SpectrumMatchFromTsvHeader.PreviousResidue` in both conditional branches. --- mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs index a69e2ed93..65f378b1c 100644 --- a/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs +++ b/mzLib/Readers/SearchResults/SpectrumMatchTsvReader.cs @@ -124,8 +124,8 @@ public static Dictionary ParseHeader(string header) parsedHeader.Add(SpectrumMatchFromTsvHeader.Name, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Name)); parsedHeader.Add(SpectrumMatchFromTsvHeader.Description, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.Description)); parsedHeader.Add(SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence)); - parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousResidue)); - parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NumExperimentalPeaks)); + parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextResidue)); + parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousResidue)); } else { @@ -135,8 +135,8 @@ public static Dictionary ParseHeader(string header) parsedHeader.Add(SpectrumMatchFromTsvHeader.Name, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.ProteinName)); parsedHeader.Add(SpectrumMatchFromTsvHeader.Description, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PeptideDescription)); parsedHeader.Add(SpectrumMatchFromTsvHeader.StartAndEndResiduesInFullSequence, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.StartAndEndResiduesInProtein)); - parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousAminoAcid)); - parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextAminoAcid)); + parsedHeader.Add(SpectrumMatchFromTsvHeader.NextResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.NextAminoAcid)); + parsedHeader.Add(SpectrumMatchFromTsvHeader.PreviousResidue, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.PreviousAminoAcid)); } parsedHeader.Add(SpectrumMatchFromTsvHeader.GeneName, Array.IndexOf(spl, SpectrumMatchFromTsvHeader.GeneName)); From 6ecd93d706d0709424867bca247500f9b04aee80 Mon Sep 17 00:00:00 2001 From: nbollis Date: Fri, 25 Oct 2024 17:07:47 -0500 Subject: [PATCH 10/10] Ensure terminus type is correct when read in from file --- mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs index 69498cd0d..fadad6697 100644 --- a/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs +++ b/mzLib/Omics/SpectrumMatch/SpectrumMatchFromTsv.cs @@ -156,7 +156,7 @@ public static void RemoveSpecialCharacters(ref string fullSeq, string replacemen } - protected static List ReadFragmentIonsFromString(string matchedMzString, string matchedIntensityString, string peptideBaseSequence, string matchedMassErrorDaString = null) + protected static List ReadFragmentIonsFromString(string matchedMzString, string matchedIntensityString, string peptideBaseSequence, string matchedMassErrorDaString = null, bool isProtein = true) { List matchedIons = new List(); @@ -225,11 +225,14 @@ protected static List ReadFragmentIonsFromString(string matc } //get terminus - if (TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus.TryGetValue(productType, - out terminus)); + + if (isProtein) + TerminusSpecificProductTypes.ProductTypeToFragmentationTerminus.TryGetValue(productType, out terminus); + else + terminus = Omics.Fragmentation.Oligo.TerminusSpecificProductTypes.GetRnaTerminusType(productType); //get amino acid position - aminoAcidPosition = terminus == FragmentationTerminus.C ? + aminoAcidPosition = terminus is FragmentationTerminus.C or FragmentationTerminus.ThreePrime ? peptideBaseSequence.Split('|')[0].Length - fragmentNumber : fragmentNumber; }