From 353ae254de704550bab9269678f2abbbdacab91c Mon Sep 17 00:00:00 2001 From: Nic Bollis Date: Mon, 27 Nov 2023 11:55:46 -0600 Subject: [PATCH] Toppic search result reading (#735) * Added in ability to parse TopPIC search result files * Added ability to write entire header * Added comments * Excluded Test class from code coverage * Adjusted some fields to match ours better * Handled a few edge cases * Added support for TopPIC version 1.5.3 * Added support for TopPIC version 1.5.3 * Fixed up operators --------- Co-authored-by: Nic Bollis Co-authored-by: trishorts --- .../IndividualResultRecords/ToppicPrsm.cs | 208 +++++++++ .../ResultFiles/TopPICSearchResultFile.cs | 353 +++++++++++++++ .../ExternalResults/SupportedVersions.txt | 3 +- mzLib/Readers/Util/Converters.cs | 13 + mzLib/Readers/Util/Software.cs | 1 + mzLib/Readers/Util/SupportedFileTypes.cs | 19 +- ...mSingle_TopPICv1.5.3_proteoform_single.tsv | 34 ++ ...mSingle_TopPICv1.6.2_proteoform_single.tsv | 37 ++ ...ppicProteofrom_TopPICv1.6.2_proteoform.tsv | 70 +++ ...picPrsmSingle_TopPICv1.6.2_prsm_single.tsv | 40 ++ .../ToppicPrsm_TopPICv1.6.2_prsm.tsv | 38 ++ .../TestSupportedFileExtensions.cs | 4 + .../FileReadingTests/TestToppicResultFiles.cs | 412 ++++++++++++++++++ mzLib/Test/Test.csproj | 18 + mzLib/mzLib.nuspec | 2 +- mzLib/mzLib.sln.DotSettings | 4 +- 16 files changed, 1252 insertions(+), 4 deletions(-) create mode 100644 mzLib/Readers/ExternalResults/IndividualResultRecords/ToppicPrsm.cs create mode 100644 mzLib/Readers/ExternalResults/ResultFiles/TopPICSearchResultFile.cs create mode 100644 mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv create mode 100644 mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv create mode 100644 mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofrom_TopPICv1.6.2_proteoform.tsv create mode 100644 mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv create mode 100644 mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsm_TopPICv1.6.2_prsm.tsv create mode 100644 mzLib/Test/FileReadingTests/TestToppicResultFiles.cs diff --git a/mzLib/Readers/ExternalResults/IndividualResultRecords/ToppicPrsm.cs b/mzLib/Readers/ExternalResults/IndividualResultRecords/ToppicPrsm.cs new file mode 100644 index 000000000..e2dcd260f --- /dev/null +++ b/mzLib/Readers/ExternalResults/IndividualResultRecords/ToppicPrsm.cs @@ -0,0 +1,208 @@ +using System.Globalization; +using System.Text; +using System.Text.RegularExpressions; +using CsvHelper.Configuration; +using CsvHelper.Configuration.Attributes; +using MassSpectrometry; +using static System.Net.Mime.MediaTypeNames; + +namespace Readers; + +/// +/// Class Representing a TopPIC prsm or proteoform +/// For supported versions and software this file type can come from see +/// Readers.ExternalResources.SupportedVersions.txt +/// +/// +/// Things that could be done to improve compatibility: +/// Convert Variable Modifications to a list of Modification objects +/// Convert NTerminalForm to a Modification object +/// +public class ToppicPrsm +{ + [Ignore] + public static CsvConfiguration CsvConfiguration => new CsvConfiguration(CultureInfo.InvariantCulture) + { + Encoding = Encoding.UTF8, + HasHeaderRecord = true, + Delimiter = "\t", + }; + + public ToppicPrsm() + { + AlternativeIdentifications = new List(); + } + + private string? _fileNameWithoutExtension; + [Ignore] + public string FileNameWithoutExtension => _fileNameWithoutExtension ??= Path.GetFileNameWithoutExtension(FilePath); + + [Name("Data file name")] + public string FilePath { get; set; } + + [Name("Prsm ID")] + public int PrsmID { get; set; } + + [Name("Spectrum ID")] + public int SpectrumId { get; set; } + + [Name("Fragmentation")] + public DissociationType DissociationType { get; set; } + + [Name("Scan(s)")] + public int OneBasedScanNumber { get; set; } + + [Name("Retention time")] + public double RetentionTime { get; set; } + + [Name("#peaks")] + public int PeakCount { get; set; } + + [Name("Charge")] + public int PrecursorCharge { get; set; } + + [Name("Precursor mass")] + public double PrecursorMass { get; set; } + + [Name("Adjusted precursor mass")] + public double AdjustedPrecursorMass { get; set; } + + [Name("Proteoform ID")] + public double ProteoformId { get; set; } + + [Name("Feature intensity")] + [Format("#.00#E+00")] + public double FeatureIntensity { get; set; } + + [Name("Feature score")] + public double FeatureScore { get; set; } + + [Name("Feature apex time")] + public double FeatureApexTime { get; set; } + + [Name("#Protein hits")] + public int ProteinHitsCount { get; set; } + + [Name("Protein accession")] + public string ProteinAccession { get; set; } + + [Name("Protein description")] + public string ProteinDescription { get; set; } + + [Name("First residue")] + public int FirstResidue { get; set; } + + [Name("Last residue")] + public int LastResidue { get; set; } + + [Name("Special amino acids")] + public string? SpecialAminoAcids { get; set; } + + [Ignore] + private string? _baseSequence; + + [Optional] + [Name("Database protein sequence")] + public string BaseSequence + { + get => _baseSequence ??= GetBaseSequenceFromFullSequence(); + set => _baseSequence = value; + } + + [Name("Proteoform")] + public string FullSequence { get; set; } + + [Name("Proteoform mass")] + public double FullSequenceMass { get; set; } + + [Name("Protein N-terminal form")] + public string ProteinNTerminalForm { get; set; } + + [Optional] + [Name("Fixed PTMs")] + public string? FixedPTMs { get; set; } + + [Name("#unexpected modifications")] + public int UnexpectedModificationsCount { get; set; } + + /// + /// The mass shift of the mod and its semi-localization + /// -47:[10-14] means a mass shift of -47 Da, and the semi-localization is between the 10th and 14th amino acids + /// + [Optional] + [Name("unexpected modifications")] + public string UnexpectedModifications { get; set; } + + [Name("#variable PTMs")] + public int VariableModificationsCount { get; set; } + + [Optional] + [Name("variable PTMs")] + public string VariableModifications { get; set; } + + [Name("MIScore")] + [TypeConverter(typeof(DashToNullOrDoubleConverter))] + public double? MIScore { get; set; } + + [Name("#matched peaks")] + public int MatchedPeaksCount { get; set; } + + [Name("#matched fragment ions")] + public int MatchedFragmentIonsCount { get; set; } + + [Name("E-value")] + [Format("0.00E+00")] + public double EValue { get; set; } + + [Name("Spectrum-level Q-value")] + [TypeConverter(typeof(DashToNullOrDoubleConverter))] + public double? QValueSpectrumLevel { get; set; } + + [Name("Proteoform-level Q-value")] + [TypeConverter(typeof(DashToNullOrDoubleConverter))] + public double? QValueProteoformLevel { get; set; } + + [Ignore] + public List AlternativeIdentifications { get; set; } + + public string GetBaseSequenceFromFullSequence() + { + // Remove text within square brackets + var text = Regex.Replace(FullSequence, @"\[[^\]]*\]", ""); + + // Remove parentheses + text = Regex.Replace(text, @"[()]", ""); + + // Remove periods + text = Regex.Replace(text, @"(^[^.]+)|(\.[^.]+$)", "") + .Replace(".",""); + return text; + } +} + +/// +/// Class representing an alternative Identification from the tsv file. +/// +public class AlternativeToppicId +{ + public int PrsmId { get; set; } + public string Accession { get; set; } + public string ProteinDescription { get; set; } + public int FirstResidue { get; set; } + public int LastResidue { get; set; } + + public AlternativeToppicId(int prsmId, string accession, string proteinDescription, int firstResidue, + int lastResidue) + { + PrsmId = prsmId; + Accession = accession; + ProteinDescription = proteinDescription; + FirstResidue = firstResidue; + LastResidue = lastResidue; + } + + public override string ToString() + { + return $"{PrsmId}\t\t\t\t\t\t\t\t\t\t\t\t\t\t{Accession}\t{ProteinDescription}\t{FirstResidue}\t{LastResidue}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"; + } +} \ No newline at end of file diff --git a/mzLib/Readers/ExternalResults/ResultFiles/TopPICSearchResultFile.cs b/mzLib/Readers/ExternalResults/ResultFiles/TopPICSearchResultFile.cs new file mode 100644 index 000000000..337c96762 --- /dev/null +++ b/mzLib/Readers/ExternalResults/ResultFiles/TopPICSearchResultFile.cs @@ -0,0 +1,353 @@ +using CsvHelper.Configuration; +using System.Globalization; +using System.Text; +using Easy.Common.Extensions; +using MassSpectrometry; +using CsvHelper; +using CsvHelper.TypeConversion; +using MzLibUtil; + +namespace Readers +{ + /// + /// Concrete Product for reading and representing a proteoform or psm search results file from TopPIC + /// For supported versions and software this file type can come from see + /// Readers.ExternalResources.SupportedVersions.txt + /// + public class ToppicSearchResultFile : ResultFile + { + private SupportedFileType _fileType; + public override SupportedFileType FileType + { + get + { + if (!_fileType.IsDefault()) return _fileType; + + if (FilePath.EndsWith(SupportedFileType.ToppicProteoform.GetFileExtension())) + _fileType = SupportedFileType.ToppicProteoform; + else if (FilePath.EndsWith(SupportedFileType.ToppicPrsm.GetFileExtension())) + _fileType = SupportedFileType.ToppicPrsm; + else if (FilePath.EndsWith(SupportedFileType.ToppicProteoformSingle.GetFileExtension())) + _fileType = SupportedFileType.ToppicProteoformSingle; + else if (FilePath.EndsWith(SupportedFileType.ToppicPrsmSingle.GetFileExtension())) + _fileType = SupportedFileType.ToppicPrsmSingle; + else throw new MzLibException("Cannot parse result file type from file path"); + + return _fileType; + } + } + public override Software Software { get; set; } + + #region Search Summary Parameters + + public string ProteinDatabasePath { get; private set; } + public string SpectrumFilePath { get; private set; } + public int NumberOfCombinedSpectra { get; private set; } + public DissociationType FragmentationMethod { get; private set; } + public string SearchType { get; private set; } + public List FixedModifications { get; private set; } + public List AllowedNTerminalForms { get; private set; } + public int NumberOfMaxUnexpectedModifications { get; private set; } + public double MaximumMassShift { get; private set; } + public double MinimumMassShift { get; private set; } + public string SpectrumLevelCutOffType { get; private set; } + public double SpectrumLevelCutOffValue { get; private set; } + public string ProteoformLevelCutOffType { get; private set; } + public double ProteoformLevelCutOffValue { get; private set; } + public double PrecursorErrorTolerance { get; private set; } + public double PrsmClusterErrorTolerance { get; private set; } + public bool UseToppicFeatureFile { get; private set; } + public string EValueComputation { get; private set; } + public bool LocalizationWithMIScore { get; private set; } + public int ThreadNumber { get; private set; } + public string ExecutableFileDirectory { get; private set; } + public DateTime StartTime { get; private set; } + public DateTime EndTime { get; private set; } + public string Version { get; private set; } + + #endregion + + public ToppicSearchResultFile(string filePath) : base(filePath, Software.Toppic) + { + FixedModifications = new List(); + AllowedNTerminalForms = new List(); + } + + /// + /// Constructor used to initialize from the factory method + /// + public ToppicSearchResultFile() : base() + { + FixedModifications = new List(); + AllowedNTerminalForms = new List(); + } + + + public override void LoadResults() + { + using var reader = new StreamReader(FilePath); + + // Pull out parameter values + bool isInsideParametersSection = false; + bool isInsideFixedModificationsSection = false; + StringBuilder dataBetweenParameters = new StringBuilder(); + + // Read the file line by line + while (reader.ReadLine() is { } line) + { + if (line.Contains("******* Parameters *******")) + { + // Found the start or end of the Parameters section + if (isInsideParametersSection) + { + // We are done, with parameters + isInsideParametersSection = false; + continue; + } + else + { + // We are entering the Parameters section + isInsideParametersSection = true; + continue; // Skip this line + } + } + if (line.Trim().Equals("")) + { + // everything below will be the actual Toppic results + // read them in with CsvHelper + var alternativeIDs = new List(); + bool isAlternativeId = false; + var toppicDefaultConfig = ToppicPrsm.CsvConfiguration; + var csvConfig = new CsvConfiguration(CultureInfo.InvariantCulture) + { + Delimiter = toppicDefaultConfig.Delimiter, + Encoding = toppicDefaultConfig.Encoding, + HasHeaderRecord = toppicDefaultConfig.HasHeaderRecord, + ReadingExceptionOccurred = context => + { + if (context.Exception is not TypeConverterException) throw new IOException("Error reading Toppic results file", context.Exception); + if (context.Exception.Context.Parser.RawRecord.Trim().IsNullOrEmpty()) return false; + + isAlternativeId = true; + alternativeIDs.Add(context.Exception.Context.Parser.RawRecord); + return false; + }, + MissingFieldFound = null, + }; + + var results = new List(); + using var csv = new CsvReader(reader, csvConfig); + + csv.Read(); + csv.ReadHeader(); + while (csv.Read()) + { + var record = csv.GetRecord(); + if (isAlternativeId) + { + results.Last().AlternativeIdentifications.AddRange(alternativeIDs + .Select(p => p.Split('\t') + .Where(str => !string.IsNullOrEmpty(str)) + .ToArray()) + .Select(p => new AlternativeToppicId(int.Parse(p[1]), p[2], p[3], int.Parse(p[4]), int.Parse(p[5]))) + .ToList()); + isAlternativeId = false; + alternativeIDs.Clear(); + } + else + { + if (record != null) + results.Add(record); + } + } + + Results = results; + + // dispose of used readers and exit the loop + csv.Dispose(); + reader.Dispose(); + break; + } + if (isInsideParametersSection) + { + if (line.Contains("Fixed modifications BEGIN")) + { + isInsideFixedModificationsSection = true; + continue; // Skip this line + } + + if (isInsideFixedModificationsSection) + { + if (line.Contains("Fixed modifications END")) + { + isInsideFixedModificationsSection = false; + continue; // Skip this line + } + else + { + FixedModifications.Add(string.Join(" ", line.SplitAndTrim('\t', ' ').Where(p => p.IsNotNullOrEmptyOrWhiteSpace()))); + continue; // Skip this line + } + } + + // Append the line to the data between the Parameters sections + dataBetweenParameters.AppendLine(line); + } + } + + // Display or process the data between the Parameters sections + var parameterResults = dataBetweenParameters.ToString(); + + // Parse the parameter values + var parameterLines = parameterResults.Split('\n') + .Where(p => !string.IsNullOrWhiteSpace(p) && !p.Contains("****")) + .Select(p => p.Split('\t')) + .ToDictionary(p => p[0].Trim().Replace(":",""), p => p[1].Replace("\r","")); + + string dateTimeFormat = "ddd MMM dd HH:mm:ss yyyy"; + foreach (var parameter in parameterLines) + { + switch (parameter.Key) + { + case "Protein database file": + ProteinDatabasePath = parameter.Value; + break; + case "Spectrum file": + SpectrumFilePath = parameter.Value; + break; + case "Number of combined spectra": + NumberOfCombinedSpectra = int.Parse(parameter.Value); + break; + case "Fragmentation method": + FragmentationMethod = parameter.Value.Equals("FILE", StringComparison.InvariantCultureIgnoreCase) + ? DissociationType.Autodetect + : Enum.Parse(parameter.Value); + break; + case "Search type": + SearchType = parameter.Value; + break; + case "Allowed N-terminal forms": + AllowedNTerminalForms = parameter.Value.Split(',').ToList(); + break; + case "Maximum number of unexpected modifications": + NumberOfMaxUnexpectedModifications = int.Parse(parameter.Value); + break; + case "Maximum mass shift of modifications": + MaximumMassShift = double.Parse(parameter.Value.Split(" ")[0]); + break; + case "Minimum mass shift of modifications": + MinimumMassShift = double.Parse(parameter.Value.Split(" ")[0]); + break; + case "Spectrum-level cutoff type": + SpectrumLevelCutOffType = parameter.Value; + break; + case "Spectrum-level cutoff value": + SpectrumLevelCutOffValue = double.Parse(parameter.Value); + break; + case "Proteoform-level cutoff type": + ProteoformLevelCutOffType = parameter.Value; + break; + case "Proteoform-level cutoff value": + ProteoformLevelCutOffValue = double.Parse(parameter.Value); + break; + case "Error tolerance for matching masses": + PrecursorErrorTolerance = double.Parse(parameter.Value.Split(" ")[0]); + break; + case "Error tolerance for identifying PrSM clusters": + PrsmClusterErrorTolerance = double.Parse(parameter.Value.Split(" ")[0]); + break; + case "Use TopFD feature file": + UseToppicFeatureFile = bool.Parse(parameter.Value); + break; + case "E-value computation": + EValueComputation = parameter.Value; + break; + case "Localization with MIScore": + LocalizationWithMIScore = bool.Parse(parameter.Value); + break; + case "Thread number": + ThreadNumber = int.Parse(parameter.Value); + break; + case "Executable file directory": + ExecutableFileDirectory = parameter.Value; + break; + case "Start time": + if (DateTime.TryParseExact(parameter.Value, dateTimeFormat, + System.Globalization.CultureInfo.InvariantCulture, + System.Globalization.DateTimeStyles.None, out DateTime result)) + { + StartTime = result; + } + break; + case "End time": + if (DateTime.TryParseExact(parameter.Value, dateTimeFormat, + System.Globalization.CultureInfo.InvariantCulture, + System.Globalization.DateTimeStyles.None, out result)) + { + EndTime = result; + } + break; + case "Version": + Version = parameter.Value; + break; + } + } + } + + public override void WriteResults(string outputPath) + { + if (!CanRead(outputPath)) + outputPath += FileType.GetFileExtension(); + + using var sw = new StreamWriter(File.Create(outputPath)); + using var csv = new CsvWriter(sw, ToppicPrsm.CsvConfiguration); + + // write header + sw.WriteLine("********************** Parameters **********************"); + sw.WriteLine($"{"Protein database file:",-46}\t{ProteinDatabasePath}"); + sw.WriteLine($"{"Spectrum file:",-46}\t{SpectrumFilePath}"); + sw.WriteLine($"{"Number of combined spectra:",-46}\t{NumberOfCombinedSpectra}"); + sw.WriteLine($"{"Fragmentation method:",-46}\t{FragmentationMethod}"); + sw.WriteLine($"{"Search type:",-46}\t{SearchType}"); + sw.WriteLine($"{"Fixed modifications BEGIN",-46}"); + foreach (var fixedMod in FixedModifications) + { + var splits = fixedMod.Split(' '); + sw.WriteLine($"{splits[0],-46}\t{splits[1]}\t{splits[2]}"); + } + sw.WriteLine($"{"Fixed modifications END",-46}"); + sw.WriteLine($"{"Allowed N-terminal forms:",-46}\t{string.Join(",", AllowedNTerminalForms)}"); + sw.WriteLine($"{"Maximum number of unexpected modifications:",-46}\t{NumberOfMaxUnexpectedModifications}"); + sw.WriteLine($"{"Maximum mass shift of modifications:",-46}\t{MaximumMassShift} Da"); + sw.WriteLine($"{"Minimum mass shift of modifications:",-46}\t{MinimumMassShift} Da"); + sw.WriteLine($"{"Spectrum-level cutoff type:",-46}\t{SpectrumLevelCutOffType}"); + sw.WriteLine($"{"Spectrum-level cutoff value:",-46}\t{SpectrumLevelCutOffValue}"); + sw.WriteLine($"{"Proteoform-level cutoff type:",-46}\t{ProteoformLevelCutOffType}"); + sw.WriteLine($"{"Proteoform-level cutoff value:",-46}\t{ProteoformLevelCutOffValue}"); + sw.WriteLine($"{"Error tolerance for matching masses:",-46}\t{PrecursorErrorTolerance} ppm"); + sw.WriteLine($"{"Error tolerance for identifying PrSM clusters:",-46}\t{PrsmClusterErrorTolerance} Da"); + sw.WriteLine($"{"Use TopFD feature file:",-46}\t{UseToppicFeatureFile}"); + sw.WriteLine($"{"E-value computation:",-46}\t{EValueComputation}"); + sw.WriteLine($"{"Localization with MIScore:",-46}\t{LocalizationWithMIScore}"); + sw.WriteLine($"{"Thread number:",-46}\t{ThreadNumber}"); + sw.WriteLine($"{"Executable file directory:",-46}\t{ExecutableFileDirectory}"); + sw.WriteLine($"{"Start time:",-46}\t{StartTime:ddd MMM dd HH:mm:ss yyyy}"); + sw.WriteLine($"{"End time:",-46}\t{EndTime:ddd MMM dd HH:mm:ss yyyy}"); + sw.WriteLine($"{"Version:",-46}\t{Version}"); + sw.WriteLine("********************** Parameters **********************"); + sw.WriteLine(""); + + csv.WriteHeader(); + foreach (var result in Results) + { + csv.NextRecord(); + csv.WriteRecord(result); + foreach (var alternativeId in result.AlternativeIdentifications) + { + csv.NextRecord(); + sw.Write($"{result.FilePath}\t{alternativeId}"); + } + } + } + } +} diff --git a/mzLib/Readers/ExternalResults/SupportedVersions.txt b/mzLib/Readers/ExternalResults/SupportedVersions.txt index 4cd8d5859..968bcd936 100644 --- a/mzLib/Readers/ExternalResults/SupportedVersions.txt +++ b/mzLib/Readers/ExternalResults/SupportedVersions.txt @@ -1,3 +1,4 @@ Software VersionTested FileTypes FLASHDeconv OpenMS-3.0.0 Ms1Feature, Ms2Feature, FlashDeconvMs1Tsv, FlashDeconvTsv -TopFD 1.6.2 Ms1Feature, Ms2Feature, mzrt.csv \ No newline at end of file +TopFD 1.6.2 Ms1Feature, Ms2Feature, mzrt.csv +TopPIC 1.6.2 Prsms, Proteoforms, Prsms Single, Proteoforms Single \ No newline at end of file diff --git a/mzLib/Readers/Util/Converters.cs b/mzLib/Readers/Util/Converters.cs index 84b1a9881..f8beffe50 100644 --- a/mzLib/Readers/Util/Converters.cs +++ b/mzLib/Readers/Util/Converters.cs @@ -23,4 +23,17 @@ public override string ConvertToString(object value, IWriterRow row, MemberMapDa return string.Join(';', list); } } + + public class DashToNullOrDoubleConverter : DefaultTypeConverter + { + public override object ConvertFromString(string text, IReaderRow row, MemberMapData memberMapData) + { + return text == "-" ? null : double.Parse(text); + } + + public override string ConvertToString(object value, IWriterRow row, MemberMapData memberMapData) + { + return value as double? == null ? "-" : value.ToString(); + } + } } diff --git a/mzLib/Readers/Util/Software.cs b/mzLib/Readers/Util/Software.cs index 339a6100e..d8a99af6a 100644 --- a/mzLib/Readers/Util/Software.cs +++ b/mzLib/Readers/Util/Software.cs @@ -9,5 +9,6 @@ public enum Software TopFD, // files tested were outputted from v1.6.2 MetaMorpheus, MaxQuant, + Toppic, } } diff --git a/mzLib/Readers/Util/SupportedFileTypes.cs b/mzLib/Readers/Util/SupportedFileTypes.cs index 64d7abf54..ef7bcb08e 100644 --- a/mzLib/Readers/Util/SupportedFileTypes.cs +++ b/mzLib/Readers/Util/SupportedFileTypes.cs @@ -12,7 +12,11 @@ public enum SupportedFileType ThermoRaw, MzML, Mgf, - BrukerD + BrukerD, + ToppicPrsm, + ToppicPrsmSingle, + ToppicProteoform, + ToppicProteoformSingle, } public static class SupportedFileTypeExtensions @@ -36,6 +40,10 @@ public static string GetFileExtension(this SupportedFileType type) SupportedFileType.MzML => ".mzML", SupportedFileType.Mgf => ".mgf", SupportedFileType.BrukerD => ".d", + SupportedFileType.ToppicPrsm => "_prsm.tsv", + SupportedFileType.ToppicPrsmSingle => "_prsm_single.tsv", + SupportedFileType.ToppicProteoform => "_proteoform.tsv", + SupportedFileType.ToppicProteoformSingle => "_proteoform_single.tsv", _ => throw new MzLibException("File type not supported") }; } @@ -63,6 +71,15 @@ public static SupportedFileType ParseFileType(this string filePath) case ".tsv": if (filePath.EndsWith(SupportedFileType.Ms1Tsv_FlashDeconv.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) return SupportedFileType.Ms1Tsv_FlashDeconv; + if (filePath.EndsWith(SupportedFileType.ToppicPrsm.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) + return SupportedFileType.ToppicPrsm; + if (filePath.EndsWith(SupportedFileType.ToppicProteoform.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) + return SupportedFileType.ToppicProteoform; + if (filePath.EndsWith(SupportedFileType.ToppicPrsmSingle.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) + return SupportedFileType.ToppicPrsmSingle; + if (filePath.EndsWith(SupportedFileType.ToppicProteoformSingle.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) + return SupportedFileType.ToppicProteoformSingle; + // catchall for other tsv types, one one implemented right now if (filePath.EndsWith(SupportedFileType.Tsv_FlashDeconv.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase) && !filePath.EndsWith(SupportedFileType.Ms1Tsv_FlashDeconv.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase)) diff --git a/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv new file mode 100644 index 000000000..fbc86d083 --- /dev/null +++ b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv @@ -0,0 +1,34 @@ +********************** Parameters ********************** +Protein database file: C:/Users/Claire/Downloads/HIV_Human.fasta +Spectrum file: C:/Users/Claire/Desktop/3bioreps_HIV_TopPic\05-02-23_HIV_HUMAN_search_3bioreps_ms2.msalign +Number of combined spectra: 1 +Fragmentation method: HCD +Search type: TARGET +Use TopFD feature file: TRUE +Maximum number of unexpected modifications: 1 +Error tolerance for matching masses: 15 ppm +Error tolerance for identifying PrSM clusters: 1.2 Da +Spectrum-level cutoff type: EVALUE +Spectrum-level cutoff value: 0.01 +Proteoform-level cutoff type: EVALUE +Proteoform-level cutoff value: 0.01 +Allowed N-terminal forms: "NONE,NME,NME_ACETYLATION,M_ACETYLATION" +Maximum mass shift of modifications: 600 Da +Minimum mass shift of modifications: -100 Da +Thread number: 1 +E-value computation: Generating function +Executable file directory: C:\Users\Claire\Desktop\toppic-windows-1.5.3 +Start time: Tue May 02 12:43:07 2023 +End time: Tue May 02 16:06:37 2023 +Version: 1.5.3 +********************** Parameters ********************** + +Data file name Prsm ID Spectrum ID Fragmentation Scan(s) Retention time #peaks Charge Precursor mass Adjusted precursor mass Proteoform ID Feature intensity Feature score Feature apex time #Protein hits Protein accession Protein description First residue Last residue Special amino acids Proteoform Proteoform mass Protein N-terminal form #unexpected modifications #variable PTMs MIScore #matched peaks #matched fragment ions E-value Spectrum-level Q-value Proteoform-level Q-value +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 0 161 HCD 981 1528.65 43 4 2623.5996 2623.586103 86 6.95E+07 54.017889 1527.13 2 sp|P68104|EF1A1_HUMAN Elongation factor 1-alpha 1 OS=Homo sapiens OX=9606 GN=EEF1A1 PE=1 SV=1 437 462 G.VIKAVDKKAAGAGKVTKSAQKAQKAK. 2623.586103 NONE 0 0 - 28 27 1.15E-14 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 2 163 HCD 983 1533.12 28 4 2679.5122 2679.5922 394 5.16E+06 33.033832 1527.13 2 sp|P68104|EF1A1_HUMAN Elongation factor 1-alpha 1 OS=Homo sapiens OX=9606 GN=EEF1A1 PE=1 SV=1 436 462 V.(GVIK)[-1.0154]AVDKKAAGAGKVTKSAQKAQKAK. 2679.5922 NONE 1 0 - 7 7 0.000621324 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 3 196 HCD 1038 1635.34 46 4 3335.64584 3335.634188 140 4.52E+05 -1000 1628.64 3 sp|P04591_MA|GAG_HV1H2 Gag polyprotein OS=Human immunodeficiency virus type 1 group M subtype B (isolate HXB2) OX=11706 GN=gag PE=1 SV=3 103 132 D.KIEEEQNKSKKKAQQAAADTGNNSQVSQNY. 3335.634188 NONE 0 0 - 18 16 7.42E-12 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 4 218 HCD 1071 1698.58 46 4 2779.68675 2779.675981 69 1.87E+07 54.055328 1699.85 2 sp|P68104|EF1A1_HUMAN Elongation factor 1-alpha 1 OS=Homo sapiens OX=9606 GN=EEF1A1 PE=1 SV=1 435 462 A.VGVIKAVDKKAAGAGKVTKSAQKAQKAK. 2779.675981 NONE 0 0 - 32 28 1.78E-15 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 6 220 HCD 1074 1704.24 47 4 2835.60521 2835.60021 257 1.81E+06 -1000 1699.85 2 sp|P68104|EF1A1_HUMAN Elongation factor 1-alpha 1 OS=Homo sapiens OX=9606 GN=EEF1A1 PE=1 SV=1 435 462 A.VGVIKA(VD)[55.9242]KKAAGAGKVTKSAQKAQKAK. 2835.60021 NONE 1 0 - 21 20 2.36E-07 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 8 253 HCD 1125 1801 33 13 9256.05822 9256.01695 182 7.52E+06 -1000 1809.32 1 sp|P05204|HMGN2_HUMAN Non-histone chromosomal protein HMG-17 OS=Homo sapiens OX=9606 GN=HMGN2 PE=1 SV=3 2 90 M.PKRKAEGDAKGDKAKVKDEPQRRSARLSAKPAPPKPEPKPKKAPAKKGEKVPKGKKGKADAGKEGNNPAENGDAKTDQAQKAEGAGDAK. 9256.01695 NME 0 0 - 12 9 4.33E-10 - - +C:/Users/Claire/Desktop/3bioreps_HIV_TopPic/03_21_23_HIV_freezethaw_TD_ms2.msalign 11 269 HCD 1154 1853.49 47 4 2850.72435 2850.713095 92 1.34E+06 -1000 1858.37 2 sp|P68104|EF1A1_HUMAN Elongation factor 1-alpha 1 OS=Homo sapiens OX=9606 GN=EEF1A1 PE=1 SV=1 434 462 V.AVGVIKAVDKKAAGAGKVTKSAQKAQKAK. 2850.713095 NONE 0 0 - 27 25 1.88E-14 - - + \ No newline at end of file diff --git a/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv new file mode 100644 index 000000000..3e15af9de --- /dev/null +++ b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv @@ -0,0 +1,37 @@ +********************** Parameters ********************** +Protein database file: D:/Databases/Human_uniprotkb_proteome_UP000005640_AND_revi_2023_09_29.fasta +Spectrum file: B:/Users/Nic/SharedWithMe/targetBias/Centroided\HumanFasta_TopPicDecoys_Small_ms2.msalign +Number of combined spectra: 1 +Fragmentation method: HCD +Search type: TARGET+DECOY +Fixed modifications BEGIN +Carbamidomethylation 57.0215 C +Fixed modifications END +Allowed N-terminal forms: NONE,NME,NME_ACETYLATION,M_ACETYLATION +Maximum number of unexpected modifications: 1 +Maximum mass shift of modifications: 500 Da +Minimum mass shift of modifications: -500 Da +Spectrum-level cutoff type: FDR +Spectrum-level cutoff value: 100 +Proteoform-level cutoff type: FDR +Proteoform-level cutoff value: 100 +Error tolerance for matching masses: 10 ppm +Error tolerance for identifying PrSM clusters: 1.2 Da +Use TopFD feature file: True +E-value computation: Generating function +Localization with MIScore: False +Thread number: 4 +Executable file directory: C:\Users\Nic\Downloads\toppic-windows-1.6.2\toppic-windows-1.6.2 +Start time: Fri Sep 29 10:14:12 2023 +End time: Sat Sep 30 03:00:08 2023 +Version: 1.6.2 +********************** Parameters ********************** + +Data file name Prsm ID Spectrum ID Fragmentation Scan(s) Retention time #peaks Charge Precursor mass Adjusted precursor mass Proteoform ID Feature intensity Feature score Feature apex time #Protein hits Protein accession Protein description First residue Last residue Special amino acids Database protein sequence Proteoform Proteoform mass Protein N-terminal form Fixed PTMs #unexpected modifications unexpected modifications #variable PTMs variable PTMs MIScore #matched peaks #matched fragment ions E-value Spectrum-level Q-value Proteoform-level Q-value +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 2 10 HCD 354 465.09 16 1 535.1598 534.16745 1346 1.038e+07 -1000 548.52 1 DECOY_sp|Q9NRR8|C42S1_HUMAN CDC42 small effector protein 1 OS=Homo sapiens OX=9606 GN=CDC42SE1 PE=1 SV=1 2 9 SAECTKRF M.SAE(C)[Carbamidomethylation](TKRF)[-463.2977].D 534.16745 NME Carbamidomethylation:[4] 1 -463.2977:[5-8] 0 - 1 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 3 11 HCD 361 476.49 13 1 1134.42247 1133.44012 1345 9.911e+02 -1000 472.29 1 DECOY_sp|P20929|NEBU_HUMAN Nebulin OS=Homo sapiens OX=9606 GN=NEB PE=1 SV=5 977 984 THASEKGY Q.TH(ASEK)[+242.0315]GY.L 1133.44012 NONE 1 +242.0315:[3-6] 0 - 1 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 4 12 HCD 367 486.58 17 1 637.21066 636.21831 1344 4.688e+05 -1000 460.9 1 DECOY_sp|O60313|OPA1_HUMAN Dynamin-like 120 kDa protein, mitochondrial OS=Homo sapiens OX=9606 GN=OPA1 PE=1 SV=3 810 815 MEEDNL D.ME(E)[-113.0719]DNL.G 636.21831 NONE 1 -113.0719:[3] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 5 13 HCD 368 490.86 18 4 5201.89612 5202.88347 1343 1.969e+03 -1000 482.47 1 DECOY_sp|Q9BYE3|LCE3D_HUMAN Late cornified envelope protein 3D OS=Homo sapiens OX=9606 GN=LCE3D PE=1 SV=1 2 45 SKGSPGRSSGGQPQPGRLPCSRGPGCRHCPQHGPGQECSPFGRR M.SKGS(P)[+435.6726]GRSSGGQPQPGRLP(C)[Carbamidomethylation]SRGPG(C)[Carbamidomethylation]RH(C)[Carbamidomethylation]PQHGPGQE(C)[Carbamidomethylation]SPFGRR.D 5202.88347 NME Carbamidomethylation:[20];Carbamidomethylation:[26];Carbamidomethylation:[29];Carbamidomethylation:[38] 1 +435.6726:[5] 0 - 3 3 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 6 14 HCD 370 496.14 19 3 3831.76876 3831.73376 1342 1.205e+03 -1000 491.93 1 DECOY_sp|P23511|NFYA_HUMAN Nuclear transcription factor Y subunit alpha OS=Homo sapiens OX=9606 GN=NFYA PE=1 SV=2 1 30 MESPVQVRAQYEVVPSMKQQREQLTVQQQL .MESPVQVRAQYEVVPSMKQQREQLTVQQQ(L)[+274.9207].P 3831.73376 NONE 1 +274.9207:[30] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 7 15 HCD 373 502.56 21 3 3527.02448 3527.09448 1341 1.541e+03 -1000 498.43 1 sp|Q9Y421|FA32A_HUMAN Protein FAM32A OS=Homo sapiens OX=9606 GN=FAM32A PE=1 SV=2 1 35 MEAYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA .ME(AYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA)[-442.1980].K 3527.09448 NONE 1 -442.1980:[3-35] 0 - 1 1 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 8 16 HCD 374 506.83 16 1 1242.22294 1241.24059 1340 1.149e+03 -1000 498.43 1 DECOY_sp|Q9H871|RMD5A_HUMAN E3 ubiquitin-protein transferase RMND5A OS=Homo sapiens OX=9606 GN=RMND5A PE=1 SV=1 250 259 HCGCHTGVSQ A.H(C)[Carbamidomethylation]G(C)[Carbamidomethylation]HT(GVSQ)[+99.8013].V 1241.24059 NONE Carbamidomethylation:[2];Carbamidomethylation:[4] 1 +99.8013:[7-10] 0 - 3 3 1e+300 1 1 diff --git a/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofrom_TopPICv1.6.2_proteoform.tsv b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofrom_TopPICv1.6.2_proteoform.tsv new file mode 100644 index 000000000..579a5af52 --- /dev/null +++ b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicProteofrom_TopPICv1.6.2_proteoform.tsv @@ -0,0 +1,70 @@ +********************** Parameters ********************** +Protein database file: D:/Databases/Human_uniprotkb_proteome_UP000005640_AND_revi_2023_09_29.fasta +Spectrum file: B:/Users/Nic/SharedWithMe/targetBias/Centroided\HumanFasta_TopPicDecoys_Small_ms2.msalign +Number of combined spectra: 1 +Fragmentation method: HCD +Search type: TARGET+DECOY +Fixed modifications BEGIN +Carbamidomethylation 57.0215 C +Fixed modifications END +Allowed N-terminal forms: NONE,NME,NME_ACETYLATION,M_ACETYLATION +Maximum number of unexpected modifications: 1 +Maximum mass shift of modifications: 500 Da +Minimum mass shift of modifications: -500 Da +Spectrum-level cutoff type: FDR +Spectrum-level cutoff value: 100 +Proteoform-level cutoff type: FDR +Proteoform-level cutoff value: 100 +Error tolerance for matching masses: 10 ppm +Error tolerance for identifying PrSM clusters: 1.2 Da +Use TopFD feature file: True +E-value computation: Generating function +Localization with MIScore: False +Thread number: 4 +Executable file directory: C:\Users\Nic\Downloads\toppic-windows-1.6.2\toppic-windows-1.6.2 +Start time: Fri Sep 29 10:14:12 2023 +End time: Sat Sep 30 03:00:08 2023 +Version: 1.6.2 +********************** Parameters ********************** + +Data file name Prsm ID Spectrum ID Fragmentation Scan(s) Retention time #peaks Charge Precursor mass Adjusted precursor mass Proteoform ID Feature intensity Feature score Feature apex time #Protein hits Protein accession Protein description First residue Last residue Special amino acids Database protein sequence Proteoform Proteoform mass Protein N-terminal form Fixed PTMs #unexpected modifications unexpected modifications #variable PTMs variable PTMs MIScore #matched peaks #matched fragment ions E-value Spectrum-level Q-value Proteoform-level Q-value +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 2 10 HCD 354 465.09 16 1 535.1598 534.16745 1346 1.038E+07 -1000 548.52 1 DECOY_sp|Q9NRR8|C42S1_HUMAN CDC42 small effector protein 1 OS=Homo sapiens OX=9606 GN=CDC42SE1 PE=1 SV=1 2 9 SAECTKRF M.SAE(C)[Carbamidomethylation](TKRF)[-463.2977].D 534.16745 NME Carbamidomethylation:[4] 1 -463.2977:[5-8] 0 - 1 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 3 11 HCD 361 476.49 13 1 1134.42247 1133.44012 1345 9.91E+02 -1000 472.29 1 DECOY_sp|P20929|NEBU_HUMAN Nebulin OS=Homo sapiens OX=9606 GN=NEB PE=1 SV=5 977 984 THASEKGY Q.TH(ASEK)[+242.0315]GY.L 1133.44012 NONE 1 +242.0315:[3-6] 0 - 1 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 4 12 HCD 367 486.58 17 1 637.21066 636.21831 1344 4.69E+05 -1000 460.9 1 DECOY_sp|O60313|OPA1_HUMAN Dynamin-like 120 kDa protein, mitochondrial OS=Homo sapiens OX=9606 GN=OPA1 PE=1 SV=3 810 815 MEEDNL D.ME(E)[-113.0719]DNL.G 636.21831 NONE 1 -113.0719:[3] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 5 13 HCD 368 490.86 18 4 5201.89612 5202.88347 1343 1.97E+03 -1000 482.47 1 DECOY_sp|Q9BYE3|LCE3D_HUMAN Late cornified envelope protein 3D OS=Homo sapiens OX=9606 GN=LCE3D PE=1 SV=1 2 45 SKGSPGRSSGGQPQPGRLPCSRGPGCRHCPQHGPGQECSPFGRR M.SKGS(P)[+435.6726]GRSSGGQPQPGRLP(C)[Carbamidomethylation]SRGPG(C)[Carbamidomethylation]RH(C)[Carbamidomethylation]PQHGPGQE(C)[Carbamidomethylation]SPFGRR.D 5202.88347 NME Carbamidomethylation:[20];Carbamidomethylation:[26];Carbamidomethylation:[29];Carbamidomethylation:[38] 1 +435.6726:[5] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 6 14 HCD 370 496.14 19 3 3831.76876 3831.73376 1342 1.21E+03 -1000 491.93 1 DECOY_sp|P23511|NFYA_HUMAN Nuclear transcription factor Y subunit alpha OS=Homo sapiens OX=9606 GN=NFYA PE=1 SV=2 1 30 MESPVQVRAQYEVVPSMKQQREQLTVQQQL .MESPVQVRAQYEVVPSMKQQREQLTVQQQ(L)[+274.9207].P 3831.73376 NONE 1 +274.9207:[30] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 7 15 HCD 373 502.56 21 3 3527.02448 3527.09448 1341 1.54E+03 -1000 498.43 1 sp|Q9Y421|FA32A_HUMAN Protein FAM32A OS=Homo sapiens OX=9606 GN=FAM32A PE=1 SV=2 1 35 MEAYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA .ME(AYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA)[-442.1980].K 3527.09448 NONE 1 -442.1980:[3-35] 0 - 1 1 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 8 16 HCD 374 506.83 16 1 1242.22294 1241.24059 1340 1.15E+03 -1000 498.43 1 DECOY_sp|Q9H871|RMD5A_HUMAN E3 ubiquitin-protein transferase RMND5A OS=Homo sapiens OX=9606 GN=RMND5A PE=1 SV=1 250 259 HCGCHTGVSQ A.H(C)[Carbamidomethylation]G(C)[Carbamidomethylation]HT(GVSQ)[+99.8013].V 1241.24059 NONE Carbamidomethylation:[2];Carbamidomethylation:[4] 1 +99.8013:[7-10] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 9 17 HCD 379 515.78 25 1 1158.69624 1157.70389 1339 8.71E+02 -1000 511.58 1 sp|Q7Z569|BRAP_HUMAN BRCA1-associated protein OS=Homo sapiens OX=9606 GN=BRAP PE=1 SV=2 1 10 MSVSLVVIRL .(MSV)[+42.0302]SLVVIRL.E 1157.70389 NONE 1 +42.0302:[1-3] 0 - 1 1 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 10 18 HCD 384 524.74 25 1 1012.32364 1012.32364 1327 3.74E+03 -1000 509.13 1 DECOY_sp|O75132|ZBED4_HUMAN Zinc finger BED domain-containing protein 4 OS=Homo sapiens OX=9606 GN=ZBED4 PE=1 SV=2 1 8 MESKMEYG .ME(SK)[+38.9351]MEYG.G 1012.32364 NONE 1 +38.9351:[3-4] 0 - 4 4 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 11 19 HCD 387 531.16 23 1 1457.91305 1457.93805 1337 1.24E+03 -1000 527.04 1 DECOY_sp|Q9NYB5|SO1C1_HUMAN Solute carrier organic anion transporter family member 1C1 OS=Homo sapiens OX=9606 GN=SLCO1C1 PE=1 SV=1 284 297 KGSSLLTIRGSKVL R.KGSS(LLTIRGSKVL)[+0.0451].F 1457.93805 NONE 1 +0.0451:[5-14] 0 - 1 1 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 12 20 HCD 388 535.44 26 3 3769.05417 3769.12917 1336 1.46E+03 -1000 527.04 1 DECOY_sp|Q8NGY9|OR2L8_HUMAN Olfactory receptor 2L8 OS=Homo sapiens OX=9606 GN=OR2L8 PE=2 SV=1 1 33 MEHTIKMLSANLVFSIPICLSSVHRIDIVYLGL .ME(HTIKMLSANLVFSIPI(C)[Carbamidomethylation]LSSVHRIDIVYLGL)[+0.1159].L 3769.12917 NONE Carbamidomethylation:[19] 1 +0.1159:[3-33] 0 - 1 1 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 13 21 HCD 390 540.72 26 3 4042.88072 4042.90572 1335 1.06E+03 -1000 536.51 1 DECOY_sp|P42830|CXCL5_HUMAN C-X-C motif chemokine 5 OS=Homo sapiens OX=9606 GN=CXCL5 PE=1 SV=1 77 114 SAAVKASRPTKGVPEHPAPPQQALIMGPLREAPSGNGI V.SAAVKASRPTKGVPEHPAPPQQALIMGPLREAP(S)[+213.8636]GNGI. 4042.90572 NONE 1 +213.8636:[34] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 14 22 HCD 391 544.99 25 1 990.34541 989.35806 1334 3.44E+04 -1000 536.51 1 DECOY_sp|O75132|ZBED4_HUMAN Zinc finger BED domain-containing protein 4 OS=Homo sapiens OX=9606 GN=ZBED4 PE=1 SV=2 1 8 MESKMEYG .MESK(ME)[+15.9696]YG.G 989.35806 NONE 1 +15.9696:[5-6] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 15 23 HCD 395 552.64 25 1 1466.72486 1467.72721 1333 1.19E+03 -1000 548.52 1 sp|P62745|RHOB_HUMAN Rho-related GTP-binding protein RhoB OS=Homo sapiens OX=9606 GN=RHOB PE=1 SV=1 1 18 MAAIRKKLVVVGDGACGK .[Acetyl]-MAAIRKKLVV(VGDGA(C)[Carbamidomethylation]GK)[-446.3271].T 1467.72721 M_ACETYLATION Carbamidomethylation:[16] 1 -446.3271:[11-18] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 16 24 HCD 396 556.92 25 1 1356.41359 1356.40859 1332 1.00E+03 -1000 548.52 1 DECOY_sp|O95925|EPPI_HUMAN Eppin OS=Homo sapiens OX=9606 GN=EPPIN PE=1 SV=1 2 12 GTALCLCDRKC M.[Acetyl]-GTAL(C)[Carbamidomethylation](L(C)[Carbamidomethylation]DR)[-38.2019]K(C)[Carbamidomethylation].Y 1356.40859 NME_ACETYLATION Carbamidomethylation:[5];Carbamidomethylation:[7];Carbamidomethylation:[11] 1 -38.2019:[6-9] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 17 25 HCD 398 562.2 25 1 1310.35585 1310.35585 1331 3.01E+03 -1000 557.99 1 DECOY_sp|O94915|FRYL_HUMAN Protein furry homolog-like OS=Homo sapiens OX=9606 GN=FRYL PE=1 SV=2 382 394 LMYHEDEDTVAGN K.LMYHE(DED)[-182.2582]TVAGN.K 1310.35585 NONE 1 -182.2582:[6-8] 0 - 4 4 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 18 26 HCD 399 566.48 25 1 1372.325 1373.33235 1330 2.15E+04 -1000 557.99 1 DECOY_sp|Q8WVC0|LEO1_HUMAN RNA polymerase-associated protein LEO1 OS=Homo sapiens OX=9606 GN=LEO1 PE=1 SV=1 1 18 MASSGENRGPDDDMSDSS .(MASSGENRG)[-483.3465]PDDDMSDSS.D 1373.33235 NONE 1 -483.3465:[1-9] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 19 27 HCD 401 571.75 25 1 1170.20284 1169.22049 1329 9.62E+03 -1000 567.55 1 DECOY_sp|O43525|KCNQ3_HUMAN Potassium voltage-gated channel subfamily KQT member 3 OS=Homo sapiens OX=9606 GN=KCNQ3 PE=1 SV=2 541 549 YDYGSAAPD T.YDYG(SAAPD)[+211.8489].R 1169.22049 NONE 1 +211.8489:[5-9] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 20 28 HCD 404 578.16 19 1 858.18013 859.17248 1328 2.61E+07 -339.9535696 836.01 1 DECOY_sp|Q06732|ZN33B_HUMAN Zinc finger protein 33B OS=Homo sapiens OX=9606 GN=ZNF33B PE=1 SV=2 130 135 ECHQCE V.E(C)[Carbamidomethylation](HQ)[-2.1021](C)[Carbamidomethylation]E.S 859.17248 NONE Carbamidomethylation:[2];Carbamidomethylation:[5] 1 -2.1021:[3-4] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 21 29 HCD 405 582.44 25 1 1005.12813 1006.15048 1338 4.64E+05 -1000 610.96 1 DECOY_sp|Q9NVU0|RPC5_HUMAN DNA-directed RNA polymerase III subunit RPC5 OS=Homo sapiens OX=9606 GN=POLR3E PE=1 SV=1 594 602 MEVSSEMYG S.ME(VSSEM)[-25.2435]YG.Q 1006.15048 NONE 1 -25.2435:[3-7] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 22 30 HCD 408 588.85 25 1 978.10984 979.12719 1364 1.56E+04 -1000 584.74 1 sp|P22033|MUTA_HUMAN Methylmalonyl-CoA mutase, mitochondrial OS=Homo sapiens OX=9606 GN=MMUT PE=1 SV=4 142 149 THRGYDSD A.TH(RG)[+29.7382]YDSD.N 979.12719 NONE 1 +29.7382:[3-4] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 23 32 HCD 428 618.69 18 1 858.17983 859.19218 1363 2.61E+07 -339.9535696 836.01 1 DECOY_sp|C9J069|AJM1_HUMAN Apical junction component 1 homolog OS=Homo sapiens OX=9606 GN=AJM1 PE=3 SV=1 351 361 GPGAGPRYPPE F.GPGA(GPRYP)[-237.3380]PE.G 859.19218 NONE 1 -237.3380:[5-9] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 24 33 HCD 457 655.58 20 1 858.17676 859.17911 1362 2.61E+07 -339.9535696 836.01 1 DECOY_sp|P51814|ZNF41_HUMAN Zinc finger protein 41 OS=Homo sapiens OX=9606 GN=ZNF41 PE=1 SV=2 369 375 NPAGHYT F.NP(A)[+100.8444]GHYT.E 859.17911 NONE 1 +100.8444:[3] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 25 34 HCD 489 695.42 17 1 858.17999 857.17764 1361 2.61E+07 -339.9535696 836.01 1 DECOY_sp|A6NFN3|RFOX3_HUMAN RNA binding protein fox-1 homolog 3 OS=Homo sapiens OX=9606 GN=RBFOX3 PE=2 SV=4 57 63 DTPYFAF P.DTPY(FAF)[-2.1976].F 857.17764 NONE 1 -2.1976:[5-7] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 26 36 HCD 524 741.52 18 1 858.17977 857.19242 1360 2.61E+07 -339.9535696 836.01 1 DECOY_sp|O75409|HYPM_HUMAN Huntingtin-interacting protein M OS=Homo sapiens OX=9606 GN=H2AP PE=1 SV=2 1 7 MSEQDSQ .(MSE)[+33.8906]QDSQ.N 857.19242 NONE 1 +33.8906:[1-3] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 27 37 HCD 542 765.24 17 1 683.19703 682.19968 1359 4.07E+07 -341.1765846 642.14 1 DECOY_sp|P00746|CFAD_HUMAN Complement factor D OS=Homo sapiens OX=9606 GN=CFD PE=1 SV=5 1 7 MHAQVPQ .(MHAQ)[-127.1857]VPQ.A 682.19968 NONE 1 -127.1857:[1-4] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 28 38 HCD 547 773.91 21 1 729.05927 729.06927 1358 9.46E+04 -345.3656508 769.7 1 DECOY_sp|O00182|LEG9_HUMAN Galectin-9 OS=Homo sapiens OX=9606 GN=LGALS9 PE=1 SV=2 83 89 MEACFGY Q.ME(A(C)[Carbamidomethylation]F)[-147.2453]GY.P 729.06927 NONE Carbamidomethylation:[4] 1 -147.2453:[3-5] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 29 39 HCD 557 788.19 20 1 858.18021 857.19286 1357 2.61E+07 -339.9535696 836.01 1 DECOY_sp|Q07954|LRP1_HUMAN Prolow-density lipoprotein receptor-related protein 1 OS=Homo sapiens OX=9606 GN=LRP1 PE=1 SV=2 4074 4082 CGCHWGACP D.(C)[Carbamidomethylation]G(C)[Carbamidomethylation]H(WGA(C)[Carbamidomethylation]P)[-246.1807].M 857.19286 NONE Carbamidomethylation:[1];Carbamidomethylation:[3];Carbamidomethylation:[8] 1 -246.1807:[5-9] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 30 40 HCD 580 817.22 25 1 1310.35418 1310.34918 1356 2.91E+04 -1000 813.1 1 DECOY_sp|P0DKB5|TPBGL_HUMAN Trophoblast glycoprotein-like OS=Homo sapiens OX=9606 GN=TPBGL PE=3 SV=1 1 13 MALDRHLRGRRPP .[Acetyl]-MALDR(HLRGR)[-305.5350]RPP.D 1310.34918 M_ACETYLATION 1 -305.5350:[6-10] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 31 41 HCD 583 823.25 25 2 1342.28424 1342.28424 1347 2.74E+06 -340.2686216 825.36 1 DECOY_sp|O95965|ITGBL_HUMAN Integrin beta-like protein 1 OS=Homo sapiens OX=9606 GN=ITGBL1 PE=2 SV=1 128 139 HCGCTRVQHPCC N.H(C)[Carbamidomethylation]G(C)[Carbamidomethylation]TRVQ(HP(C)[Carbamidomethylation](C)[Carbamidomethylation])[-228.3168].E 1342.28424 NONE Carbamidomethylation:[2];Carbamidomethylation:[4];Carbamidomethylation:[11];Carbamidomethylation:[12] 1 -228.3168:[9-12] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 32 42 HCD 588 829.3 25 2 1264.18393 1265.19128 1355 2.79E+08 -344.1550626 854.22 1 sp|A2A2V5|SRTM1_HUMAN Serine-rich and transmembrane domain-containing protein 1 OS=Homo sapiens OX=9606 GN=SERTM1 PE=1 SV=1 1 13 MSEPDTSSGFSGS .(MSEPDTSS)[-22.3012]GFSGS.V 1265.19128 NONE 1 -22.3012:[1-8] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 33 43 HCD 591 832.77 16 1 570.10282 571.11017 1354 3.34E+08 -339.1457476 928.92 1 DECOY_sp|O95336|6PGL_HUMAN 6-phosphogluconolactonase OS=Homo sapiens OX=9606 GN=PGLS PE=1 SV=2 1 8 MAGLGLEP .([Acetyl]-MAGLG)[-257.2950]LEP.S 571.11017 M_ACETYLATION 1 -257.2950:[1-5] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 34 44 HCD 601 847.02 21 1 858.18124 859.17859 1353 2.61E+07 -339.9535696 836.01 1 DECOY_sp|H0YKK7|GG6LS_HUMAN Putative golgin subfamily A member 6-like protein 19 OS=Homo sapiens OX=9606 GN=GOLGA6L19 PE=5 SV=1 523 529 CEQEAEC N.(C)[Carbamidomethylation]EQ(EA)[-65.1167]E(C)[Carbamidomethylation].L 859.17859 NONE Carbamidomethylation:[1];Carbamidomethylation:[7] 1 -65.1167:[4-5] 0 - 3 3 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 35 45 HCD 604 853.24 25 1 836.3739 836.3889 1352 3.25E+05 -348.6847345 849.12 1 DECOY_sp|Q9H714|PACER_HUMAN Protein associated with UVRAG as autophagy enhancer OS=Homo sapiens OX=9606 GN=RUBCNL PE=1 SV=3 1 8 MVSTAGTE .(MVS)[+42.0409]TAGTE.S 836.3889 NONE 1 +42.0409:[1-3] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 36 46 HCD 629 884.36 22 1 858.18185 857.1795 1351 5.57E+05 -339.8075356 880.24 1 DECOY_sp|A6PVC2|TTLL8_HUMAN Protein monoglycylase TTLL8 OS=Homo sapiens OX=9606 GN=TTLL8 PE=2 SV=4 451 456 DDGNPD T.DDGNP(D)[+225.9710].N 857.1795 NONE 1 +225.9710:[6] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 37 47 HCD 655 916.61 22 1 858.18168 859.17403 1350 4.67E+05 -345.4975912 912.5 4 DECOY_sp|P00740|FA9_HUMAN Coagulation factor IX OS=Homo sapiens OX=9606 GN=F9 PE=1 SV=2 207 211 CESCE S.(C)[Carbamidomethylation]E(S)[+175.9850](C)[Carbamidomethylation]E.K 859.17403 NONE Carbamidomethylation:[1];Carbamidomethylation:[4] 1 +175.9850:[3] 0 - 2 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 37 sp|Q14162|SREC_HUMAN Scavenger receptor class F member 1 OS=Homo sapiens OX=9606 GN=SCARF1 PE=1 SV=3 281 285 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 37 DECOY_sp|Q2MKA7|RSPO1_HUMAN R-spondin-1 OS=Homo sapiens OX=9606 GN=RSPO1 PE=1 SV=1 175 179 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 37 DECOY_sp|Q58EX7|PKHG4_HUMAN Puratrophin-1 OS=Homo sapiens OX=9606 GN=PLEKHG4 PE=1 SV=1 1174 1178 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 38 48 HCD 681 948.91 25 2 1542.44258 1542.47258 1349 7.85E+07 -343.0067316 909.11 1 DECOY_sp|O00481|BT3A1_HUMAN Butyrophilin subfamily 3 member A1 OS=Homo sapiens OX=9606 GN=BTN3A1 PE=1 SV=3 54 70 AQTAGQSAVLRIMHQGY K.(AQTAGQSAVL)[-287.4481]RIMHQGY.Y 1542.47258 NONE 1 -287.4481:[1-10] 0 - 2 2 1.00E+300 1 1 diff --git a/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv new file mode 100644 index 000000000..f22fce3a7 --- /dev/null +++ b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv @@ -0,0 +1,40 @@ +********************** Parameters ********************** +Protein database file: D:/Databases/Human_uniprotkb_proteome_UP000005640_AND_revi_2023_09_29.fasta +Spectrum file: B:/Users/Nic/SharedWithMe/targetBias/Centroided\HumanFasta_TopPicDecoys_Small_ms2.msalign +Number of combined spectra: 1 +Fragmentation method: HCD +Search type: TARGET+DECOY +Fixed modifications BEGIN +Carbamidomethylation 57.0215 C +Fixed modifications END +Allowed N-terminal forms: NONE,NME,NME_ACETYLATION,M_ACETYLATION +Maximum number of unexpected modifications: 1 +Maximum mass shift of modifications: 500 Da +Minimum mass shift of modifications: -500 Da +Spectrum-level cutoff type: FDR +Spectrum-level cutoff value: 100 +Proteoform-level cutoff type: FDR +Proteoform-level cutoff value: 100 +Error tolerance for matching masses: 10 ppm +Error tolerance for identifying PrSM clusters: 1.2 Da +Use TopFD feature file: True +E-value computation: Generating function +Localization with MIScore: False +Thread number: 4 +Executable file directory: C:\Users\Nic\Downloads\toppic-windows-1.6.2\toppic-windows-1.6.2 +Start time: Fri Sep 29 10:14:12 2023 +End time: Sat Sep 30 03:00:08 2023 +Version: 1.6.2 +********************** Parameters ********************** + +Data file name Prsm ID Spectrum ID Fragmentation Scan(s) Retention time #peaks Charge Precursor mass Adjusted precursor mass Proteoform ID Feature intensity Feature score Feature apex time #Protein hits Protein accession Protein description First residue Last residue Special amino acids Database protein sequence Proteoform Proteoform mass Protein N-terminal form Fixed PTMs #unexpected modifications unexpected modifications #variable PTMs variable PTMs MIScore #matched peaks #matched fragment ions E-value Spectrum-level Q-value Proteoform-level Q-value +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 0 7 HCD 259 340.12 14 1 576.12185 575.1295 1029 3.604e+06 -1000 536.51 1 DECOY_sp|Q8N5H7|SH2D3_HUMAN SH2 domain-containing protein 3C OS=Homo sapiens OX=9606 GN=SH2D3C PE=1 SV=1 719 724 GYASDS R.GYA(S)[-23.0940]DS.P 575.1295 NONE 1 -23.0940:[4] 0 - 2 3 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 8 HCD 270 356.42 12 1 535.15972 536.16207 917 1.038e+07 -1000 548.52 5 DECOY_sp|O60494|CUBN_HUMAN Cubilin OS=Homo sapiens OX=9606 GN=CUBN PE=1 SV=5 1363 1367 THIGY P.TH(I)[-53.1239]GY.S 536.16207 NONE 1 -53.1239:[3] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 2 10 HCD 354 465.09 16 1 535.1598 534.16745 1346 1.038e+07 -1000 548.52 1 DECOY_sp|Q9NRR8|C42S1_HUMAN CDC42 small effector protein 1 OS=Homo sapiens OX=9606 GN=CDC42SE1 PE=1 SV=1 2 9 SAECTKRF M.SAE(C)[Carbamidomethylation](TKRF)[-463.2977].D 534.16745 NME Carbamidomethylation:[4] 1 -463.2977:[5-8] 0 - 1 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 3 11 HCD 361 476.49 13 1 1134.42247 1133.44012 1345 9.911e+02 -1000 472.29 1 DECOY_sp|P20929|NEBU_HUMAN Nebulin OS=Homo sapiens OX=9606 GN=NEB PE=1 SV=5 977 984 THASEKGY Q.TH(ASEK)[+242.0315]GY.L 1133.44012 NONE 1 +242.0315:[3-6] 0 - 1 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 4 12 HCD 367 486.58 17 1 637.21066 636.21831 1344 4.688e+05 -1000 460.9 1 DECOY_sp|O60313|OPA1_HUMAN Dynamin-like 120 kDa protein, mitochondrial OS=Homo sapiens OX=9606 GN=OPA1 PE=1 SV=3 810 815 MEEDNL D.ME(E)[-113.0719]DNL.G 636.21831 NONE 1 -113.0719:[3] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 5 13 HCD 368 490.86 18 4 5201.89612 5202.88347 1343 1.969e+03 -1000 482.47 1 DECOY_sp|Q9BYE3|LCE3D_HUMAN Late cornified envelope protein 3D OS=Homo sapiens OX=9606 GN=LCE3D PE=1 SV=1 2 45 SKGSPGRSSGGQPQPGRLPCSRGPGCRHCPQHGPGQECSPFGRR M.SKGS(P)[+435.6726]GRSSGGQPQPGRLP(C)[Carbamidomethylation]SRGPG(C)[Carbamidomethylation]RH(C)[Carbamidomethylation]PQHGPGQE(C)[Carbamidomethylation]SPFGRR.D 5202.88347 NME Carbamidomethylation:[20];Carbamidomethylation:[26];Carbamidomethylation:[29];Carbamidomethylation:[38] 1 +435.6726:[5] 0 - 3 3 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 6 14 HCD 370 496.14 19 3 3831.76876 3831.73376 1342 1.205e+03 -1000 491.93 1 DECOY_sp|P23511|NFYA_HUMAN Nuclear transcription factor Y subunit alpha OS=Homo sapiens OX=9606 GN=NFYA PE=1 SV=2 1 30 MESPVQVRAQYEVVPSMKQQREQLTVQQQL .MESPVQVRAQYEVVPSMKQQREQLTVQQQ(L)[+274.9207].P 3831.73376 NONE 1 +274.9207:[30] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 7 15 HCD 373 502.56 21 3 3527.02448 3527.09448 1341 1.541e+03 -1000 498.43 1 sp|Q9Y421|FA32A_HUMAN Protein FAM32A OS=Homo sapiens OX=9606 GN=FAM32A PE=1 SV=2 1 35 MEAYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA .ME(AYEQVQKGPLKLKGVAELGVTKRKKKKKDKDKA)[-442.1980].K 3527.09448 NONE 1 -442.1980:[3-35] 0 - 1 1 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 8 16 HCD 374 506.83 16 1 1242.22294 1241.24059 1340 1.149e+03 -1000 498.43 1 DECOY_sp|Q9H871|RMD5A_HUMAN E3 ubiquitin-protein transferase RMND5A OS=Homo sapiens OX=9606 GN=RMND5A PE=1 SV=1 250 259 HCGCHTGVSQ A.H(C)[Carbamidomethylation]G(C)[Carbamidomethylation]HT(GVSQ)[+99.8013].V 1241.24059 NONE Carbamidomethylation:[2];Carbamidomethylation:[4] 1 +99.8013:[7-10] 0 - 3 3 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 9 17 HCD 379 515.78 25 1 1158.69624 1157.70389 1339 8.711e+02 -1000 511.58 1 sp|Q7Z569|BRAP_HUMAN BRCA1-associated protein OS=Homo sapiens OX=9606 GN=BRAP PE=1 SV=2 1 10 MSVSLVVIRL .(MSV)[+42.0302]SLVVIRL.E 1157.70389 NONE 1 +42.0302:[1-3] 0 - 1 1 1e+300 1 1 diff --git a/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsm_TopPICv1.6.2_prsm.tsv b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsm_TopPICv1.6.2_prsm.tsv new file mode 100644 index 000000000..909ce2b55 --- /dev/null +++ b/mzLib/Test/FileReadingTests/ExternalFileTypes/ToppicPrsm_TopPICv1.6.2_prsm.tsv @@ -0,0 +1,38 @@ +********************** Parameters ********************** +Protein database file: D:/Databases/Human_uniprotkb_proteome_UP000005640_AND_revi_2023_09_29.fasta +Spectrum file: B:/Users/Nic/SharedWithMe/targetBias/Centroided\HumanFasta_TopPicDecoys_Small_ms2.msalign +Number of combined spectra: 1 +Fragmentation method: HCD +Search type: TARGET+DECOY +Fixed modifications BEGIN +Carbamidomethylation 57.0215 C +Fixed modifications END +Allowed N-terminal forms: NONE,NME,NME_ACETYLATION,M_ACETYLATION +Maximum number of unexpected modifications: 1 +Maximum mass shift of modifications: 500 Da +Minimum mass shift of modifications: -500 Da +Spectrum-level cutoff type: FDR +Spectrum-level cutoff value: 100 +Proteoform-level cutoff type: FDR +Proteoform-level cutoff value: 100 +Error tolerance for matching masses: 10 ppm +Error tolerance for identifying PrSM clusters: 1.2 Da +Use TopFD feature file: True +E-value computation: Generating function +Localization with MIScore: False +Thread number: 4 +Executable file directory: C:\Users\Nic\Downloads\toppic-windows-1.6.2\toppic-windows-1.6.2 +Start time: Fri Sep 29 10:14:12 2023 +End time: Sat Sep 30 03:00:08 2023 +Version: 1.6.2 +********************** Parameters ********************** + +Data file name Prsm ID Spectrum ID Fragmentation Scan(s) Retention time #peaks Charge Precursor mass Adjusted precursor mass Proteoform ID Feature intensity Feature score Feature apex time #Protein hits Protein accession Protein description First residue Last residue Special amino acids Database protein sequence Proteoform Proteoform mass Protein N-terminal form Fixed PTMs #unexpected modifications unexpected modifications #variable PTMs variable PTMs MIScore #matched peaks #matched fragment ions E-value Spectrum-level Q-value Proteoform-level Q-value +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 0 7 HCD 259 340.12 14 1 576.12185 575.1295 1029 3.604e+06 -1000 536.51 1 DECOY_sp|Q8N5H7|SH2D3_HUMAN SH2 domain-containing protein 3C OS=Homo sapiens OX=9606 GN=SH2D3C PE=1 SV=1 719 724 GYASDS R.GYA(S)[-23.0940]DS.P 575.1295 NONE 1 -23.0940:[4] 0 - 2 3 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 8 HCD 270 356.42 12 1 535.15972 536.16207 917 1.038e+07 -1000 548.52 5 DECOY_sp|O60494|CUBN_HUMAN Cubilin OS=Homo sapiens OX=9606 GN=CUBN PE=1 SV=5 1363 1367 THIGY P.TH(I)[-53.1239]GY.S 536.16207 NONE 1 -53.1239:[3] 0 - 2 2 1e+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 DECOY_sp|P21781|FGF7_HUMAN Fibroblast growth factor 7 OS=Homo sapiens OX=9606 GN=FGF7 PE=1 SV=1 101 105 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 sp|Q13423|NNTM_HUMAN NAD(P) transhydrogenase, mitochondrial OS=Homo sapiens OX=9606 GN=NNT PE=1 SV=3 369 373 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 DECOY_sp|Q6ZT12|UBR3_HUMAN E3 ubiquitin-protein ligase UBR3 OS=Homo sapiens OX=9606 GN=UBR3 PE=2 SV=2 1805 1809 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 1 sp|Q96RW7|HMCN1_HUMAN Hemicentin-1 OS=Homo sapiens OX=9606 GN=HMCN1 PE=1 SV=2 179 183 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 2 10 HCD 354 465.09 16 1 535.1598 534.16745 1346 1.04E+07 -1000 548.52 1 DECOY_sp|Q9NRR8|C42S1_HUMAN CDC42 small effector protein 1 OS=Homo sapiens OX=9606 GN=CDC42SE1 PE=1 SV=1 2 9 SAECTKRF M.SAE(C)[Carbamidomethylation](TKRF)[-463.2977].D 534.16745 NME Carbamidomethylation:[4] 1 -463.2977:[5-8] 0 - 1 2 1.00E+300 1 1 +B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign 3 11 HCD 361 476.49 13 1 1134.42247 1133.44012 1345 9.91E+02 -1000 472.29 1 DECOY_sp|P20929|NEBU_HUMAN Nebulin OS=Homo sapiens OX=9606 GN=NEB PE=1 SV=5 977 984 THASEKGY Q.TH(ASEK)[+242.0315]GY.L 1133.44012 NONE 1 +242.0315:[3-6] 0 - 1 2 1.00E+300 1 1 diff --git a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs index f26817926..c57045e0c 100644 --- a/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs +++ b/mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs @@ -24,6 +24,10 @@ internal class TestSupportedFileExtensions [TestCase(@"FileReadingTests\ExternalFileTypes\TopFDmzrt_jurkat_td_rep1_fract2_frac.mzrt.csv", SupportedFileType.Mzrt_TopFd)] [TestCase(@"FileReadingTests\ExternalFileTypes\Ms1Tsv_FlashDeconvjurkat_td_rep1_fract2_ms1.tsv", SupportedFileType.Ms1Tsv_FlashDeconv)] [TestCase(@"FileReadingTests\ExternalFileTypes\Tsv_FlashDeconvjurkat_td_rep1_fract2.tsv", SupportedFileType.Tsv_FlashDeconv)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv", SupportedFileType.ToppicPrsm)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv", SupportedFileType.ToppicProteoform)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv", SupportedFileType.ToppicProteoformSingle)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv", SupportedFileType.ToppicPrsmSingle)] public static void TestSupportedFileTypeExtensions(string filePath, SupportedFileType expectedType) { var supportedType = filePath.ParseFileType(); diff --git a/mzLib/Test/FileReadingTests/TestToppicResultFiles.cs b/mzLib/Test/FileReadingTests/TestToppicResultFiles.cs new file mode 100644 index 000000000..aa1642078 --- /dev/null +++ b/mzLib/Test/FileReadingTests/TestToppicResultFiles.cs @@ -0,0 +1,412 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; +using System.IO; +using System.Linq; +using MassSpectrometry; +using Newtonsoft.Json; +using NUnit.Framework; +using Readers; + +namespace Test.FileReadingTests +{ + [TestFixture] + [ExcludeFromCodeCoverage] + public class TestToppicResultFiles + { + private static string directoryPath; + + [OneTimeSetUp] + public void SetUp() + { + directoryPath = Path.Combine(TestContext.CurrentContext.TestDirectory, + @"FileReadingTests\ReadingWritingTests"); + Directory.CreateDirectory(directoryPath); + } + + [OneTimeTearDown] + public void TearDown() + { + Directory.Delete(directoryPath, true); + } + + + [Test] + public static void TestFullToBaseSequence() + { + // test output where the base sequence and full sequence are present + + var path = @"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv"; + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + var file = new ToppicSearchResultFile(filePath); + file.LoadResults(); + var results = file.Results; + + foreach (var result in results) + { + string baseSeq = result.BaseSequence; + var convertedSequence = result.GetBaseSequenceFromFullSequence(); + Assert.That(result.BaseSequence, Is.EqualTo(convertedSequence)); + } + + // test output where the base sequence is not present + path = @"FileReadingTests\ExternalFileTypes\ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv"; + filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + file = new ToppicSearchResultFile(filePath); + file.LoadResults(); + results = file.Results; + + string[] expectedBaseSequences = new[] + { + "VIKAVDKKAAGAGKVTKSAQKAQKAK", + "GVIKAVDKKAAGAGKVTKSAQKAQKAK", + "KIEEEQNKSKKKAQQAAADTGNNSQVSQNY", + "VGVIKAVDKKAAGAGKVTKSAQKAQKAK", + "VGVIKAVDKKAAGAGKVTKSAQKAQKAK", + "PKRKAEGDAKGDKAKVKDEPQRRSARLSAKPAPPKPEPKPKKAPAKKGEKVPKGKKGKADAGKEGNNPAENGDAKTDQAQKAEGAGDAK", + "AVGVIKAVDKKAAGAGKVTKSAQKAQKAK", + }; + + for (int i = 0; i < results.Count; i++) + { + Assert.That(results[i].GetBaseSequenceFromFullSequence(), Is.EqualTo(expectedBaseSequences[i])); + Assert.That(results[i].BaseSequence, Is.EqualTo(expectedBaseSequences[i])); + } + } + + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv", 4)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv", 37)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv", 7)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv", 10)] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteoformSingle_TopPICv1.5.3_proteoform_single.tsv", 7)] + public void TestToppicProteoformAndPrsmLoadAndCountCorrect(string path, int count) + { + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + Assert.That(file.Count(), Is.EqualTo(count)); + Assert.That(file.CanRead(path)); + + file = FileReader.ReadFile(path); + Assert.That(file.Count(), Is.EqualTo(count)); + Assert.That(file.CanRead(path)); + } + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv")] + public void TestToppicHeaderParsing(string path) + { + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + file.LoadResults(); + + Assert.That(file.ProteinDatabasePath, Is.EqualTo("D:/Databases/Human_uniprotkb_proteome_UP000005640_AND_revi_2023_09_29.fasta")); + Assert.That(file.SpectrumFilePath, Is.EqualTo("B:/Users/Nic/SharedWithMe/targetBias/Centroided\\HumanFasta_TopPicDecoys_Small_ms2.msalign")); + Assert.That(file.NumberOfCombinedSpectra, Is.EqualTo(1)); + Assert.That(file.FragmentationMethod, Is.EqualTo(DissociationType.HCD)); + Assert.That(file.SearchType, Is.EqualTo("TARGET+DECOY")); + + CollectionAssert.AreEquivalent(file.FixedModifications, + new List { "Carbamidomethylation 57.0215 C" }); + CollectionAssert.AreEquivalent(file.AllowedNTerminalForms, + new List { "NONE", "NME", "NME_ACETYLATION", "M_ACETYLATION" }); + + Assert.That(file.NumberOfMaxUnexpectedModifications, Is.EqualTo(1)); + Assert.That(file.MaximumMassShift, Is.EqualTo(500.0)); + Assert.That(file.MinimumMassShift, Is.EqualTo(-500.0)); + Assert.That(file.SpectrumLevelCutOffType, Is.EqualTo("FDR")); + Assert.That(file.SpectrumLevelCutOffValue, Is.EqualTo(100.0)); + Assert.That(file.ProteoformLevelCutOffType, Is.EqualTo("FDR")); + Assert.That(file.ProteoformLevelCutOffValue, Is.EqualTo(100.0)); + Assert.That(file.PrecursorErrorTolerance, Is.EqualTo(10)); + Assert.That(file.PrsmClusterErrorTolerance, Is.EqualTo(1.2)); + Assert.That(file.UseToppicFeatureFile); + Assert.That(file.EValueComputation, Is.EqualTo("Generating function")); + Assert.That(!file.LocalizationWithMIScore); + Assert.That(file.ThreadNumber, Is.EqualTo(4)); + Assert.That(file.ExecutableFileDirectory, Is.EqualTo(@"C:\Users\Nic\Downloads\toppic-windows-1.6.2\toppic-windows-1.6.2")); + Assert.That(file.StartTime, Is.EqualTo(new DateTime(2023, 9, 29, 10, 14, 12))); + Assert.That(file.EndTime, Is.EqualTo(new DateTime(2023, 9, 30, 3, 0, 8))); + Assert.That(file.Version, Is.EqualTo("1.6.2")); + } + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv")] + public void TestToppicProteoformFileResultReading(string path) + { + string filePath = System.IO.Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + Assert.That(file.FileType, + path.Contains("single") + ? Is.EqualTo(SupportedFileType.ToppicProteoformSingle) + : Is.EqualTo(SupportedFileType.ToppicProteoform)); + file.LoadResults(); + + var first = file.First(); + Assert.That(first.FilePath, Is.EqualTo("B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign")); + Assert.That(first.PrsmID, Is.EqualTo(2)); + Assert.That(first.SpectrumId, Is.EqualTo(10)); + Assert.That(first.DissociationType, Is.EqualTo(DissociationType.HCD)); + Assert.That(first.OneBasedScanNumber, Is.EqualTo(354)); + Assert.That(first.RetentionTime, Is.EqualTo(465.09)); + Assert.That(first.PeakCount, Is.EqualTo(16)); + Assert.That(first.PrecursorCharge, Is.EqualTo(1)); + Assert.That(first.PrecursorMass, Is.EqualTo(535.1598)); + Assert.That(first.AdjustedPrecursorMass, Is.EqualTo(534.1675).Within(0.001)); + Assert.That(first.ProteoformId, Is.EqualTo(1346).Within(0.001)); + Assert.That(first.FeatureIntensity, Is.EqualTo(1.038E+07).Within(0.001)); + Assert.That(first.FeatureScore, Is.EqualTo(-1000)); + Assert.That(first.FeatureApexTime, Is.EqualTo(548.52).Within(0.001)); + Assert.That(first.ProteinHitsCount, Is.EqualTo(1)); + Assert.That(first.ProteinAccession, Is.EqualTo("DECOY_sp|Q9NRR8|C42S1_HUMAN")); + Assert.That(first.ProteinDescription, Is.EqualTo("CDC42 small effector protein 1 OS=Homo sapiens OX=9606 GN=CDC42SE1 PE=1 SV=1")); + Assert.That(first.FirstResidue, Is.EqualTo(2)); + Assert.That(first.LastResidue, Is.EqualTo(9)); + Assert.That(first.SpecialAminoAcids, Is.EqualTo("")); + Assert.That(first.BaseSequence, Is.EqualTo("SAECTKRF")); + Assert.That(first.FullSequence, Is.EqualTo("M.SAE(C)[Carbamidomethylation](TKRF)[-463.2977].D")); + Assert.That(first.FullSequenceMass, Is.EqualTo(534.1675).Within(0.001)); + Assert.That(first.ProteinNTerminalForm, Is.EqualTo("NME")); + Assert.That(first.FixedPTMs, Is.EqualTo("Carbamidomethylation:[4]")); + Assert.That(first.UnexpectedModificationsCount, Is.EqualTo(1)); + Assert.That(first.UnexpectedModifications, Is.EqualTo("-463.2977:[5-8]")); + Assert.That(first.VariableModificationsCount, Is.EqualTo(0)); + Assert.That(first.VariableModifications, Is.EqualTo(string.Empty)); + Assert.That(first.MIScore, Is.EqualTo(null)); + Assert.That(first.MatchedPeaksCount, Is.EqualTo(1)); + Assert.That(first.MatchedFragmentIonsCount, Is.EqualTo(2)); + Assert.That(first.EValue, Is.EqualTo(1E+300).Within(0.001)); + Assert.That(first.QValueSpectrumLevel, Is.EqualTo(1)); + Assert.That(first.QValueProteoformLevel, Is.EqualTo(1)); + } + + [Test] + public void TestToppicProteoformsFileAlternativeResults() + { + string path = @"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv"; + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + Assert.That(file.FileType, Is.EqualTo(SupportedFileType.ToppicProteoform)); + file.LoadResults(); + + var containsAlternatives = file.First(p => p.PrsmID == 37); + Assert.That(containsAlternatives.AlternativeIdentifications.Count, Is.EqualTo(3)); + + var alternative = containsAlternatives.AlternativeIdentifications[0]; + Assert.That(alternative.PrsmId, Is.EqualTo(37)); + Assert.That(alternative.Accession, Is.EqualTo("sp|Q14162|SREC_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("Scavenger receptor class F member 1 OS=Homo sapiens OX=9606 GN=SCARF1 PE=1 SV=3")); + Assert.That(alternative.FirstResidue, Is.EqualTo(281)); + Assert.That(alternative.LastResidue, Is.EqualTo(285)); + + alternative = containsAlternatives.AlternativeIdentifications[1]; + Assert.That(alternative.PrsmId, Is.EqualTo(37)); + Assert.That(alternative.Accession, Is.EqualTo("DECOY_sp|Q2MKA7|RSPO1_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("R-spondin-1 OS=Homo sapiens OX=9606 GN=RSPO1 PE=1 SV=1")); + Assert.That(alternative.FirstResidue, Is.EqualTo(175)); + Assert.That(alternative.LastResidue, Is.EqualTo(179)); + + alternative = containsAlternatives.AlternativeIdentifications[2]; + Assert.That(alternative.PrsmId, Is.EqualTo(37)); + Assert.That(alternative.Accession, Is.EqualTo("DECOY_sp|Q58EX7|PKHG4_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("Puratrophin-1 OS=Homo sapiens OX=9606 GN=PLEKHG4 PE=1 SV=1")); + Assert.That(alternative.FirstResidue, Is.EqualTo(1174)); + Assert.That(alternative.LastResidue, Is.EqualTo(1178)); + } + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv")] + public void TestToppicPrsmFileResultReading(string path) + { + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + Assert.That(file.FileType, + path.Contains("single") + ? Is.EqualTo(SupportedFileType.ToppicPrsmSingle) + : Is.EqualTo(SupportedFileType.ToppicPrsm)); + + file.LoadResults(); + + var first = file.First(); + Assert.That(first.FilePath, Is.EqualTo("B:/Users/Nic/SharedWithMe/targetBias/Centroided/05-26-17_B7A_yeast_td_fract5_rep1_ms2.msalign")); + Assert.That(first.PrsmID, Is.EqualTo(0)); + Assert.That(first.SpectrumId, Is.EqualTo(7)); + Assert.That(first.DissociationType, Is.EqualTo(DissociationType.HCD)); + Assert.That(first.OneBasedScanNumber, Is.EqualTo(259)); + Assert.That(first.RetentionTime, Is.EqualTo(340.12)); + Assert.That(first.PeakCount, Is.EqualTo(14)); + Assert.That(first.PrecursorCharge, Is.EqualTo(1)); + Assert.That(first.PrecursorMass, Is.EqualTo(576.1219).Within(0.001)); + Assert.That(first.AdjustedPrecursorMass, Is.EqualTo(575.1295).Within(0.001)); + Assert.That(first.ProteoformId, Is.EqualTo(1029).Within(0.001)); + Assert.That(first.FeatureIntensity, Is.EqualTo(3.604E+06).Within(0.001)); + Assert.That(first.FeatureScore, Is.EqualTo(-1000)); + Assert.That(first.FeatureApexTime, Is.EqualTo(536.51).Within(0.001)); + Assert.That(first.ProteinHitsCount, Is.EqualTo(1)); + Assert.That(first.ProteinAccession, Is.EqualTo("DECOY_sp|Q8N5H7|SH2D3_HUMAN")); + Assert.That(first.ProteinDescription, Is.EqualTo("SH2 domain-containing protein 3C OS=Homo sapiens OX=9606 GN=SH2D3C PE=1 SV=1")); + Assert.That(first.FirstResidue, Is.EqualTo(719)); + Assert.That(first.LastResidue, Is.EqualTo(724)); + Assert.That(first.SpecialAminoAcids, Is.EqualTo("")); + Assert.That(first.BaseSequence, Is.EqualTo("GYASDS")); + Assert.That(first.FullSequence, Is.EqualTo("R.GYA(S)[-23.0940]DS.P")); + Assert.That(first.FullSequenceMass, Is.EqualTo(575.1295).Within(0.001)); + Assert.That(first.ProteinNTerminalForm, Is.EqualTo("NONE")); + Assert.That(first.FixedPTMs, Is.EqualTo("")); + Assert.That(first.UnexpectedModificationsCount, Is.EqualTo(1)); + Assert.That(first.UnexpectedModifications, Is.EqualTo("-23.0940:[4]")); + Assert.That(first.VariableModificationsCount, Is.EqualTo(0)); + Assert.That(first.VariableModifications, Is.EqualTo(string.Empty)); + Assert.That(first.MIScore, Is.EqualTo(null)); + Assert.That(first.MatchedPeaksCount, Is.EqualTo(2)); + Assert.That(first.MatchedFragmentIonsCount, Is.EqualTo(3)); + Assert.That(first.EValue, Is.EqualTo(1E+300).Within(0.001)); + Assert.That(first.QValueSpectrumLevel, Is.EqualTo(1)); + Assert.That(first.QValueProteoformLevel, Is.EqualTo(1)); + } + + [Test] + public void TestToppicPrsmsFileAlternativeResults() + { + string path = @"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv"; + string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + ToppicSearchResultFile file = new ToppicSearchResultFile(filePath); + Assert.That(file.FileType, Is.EqualTo(SupportedFileType.ToppicPrsm)); + file.LoadResults(); + + var containsAlternatives = file.First(p => p.PrsmID == 1); + Assert.That(containsAlternatives.AlternativeIdentifications.Count, Is.EqualTo(4)); + + var alternative = containsAlternatives.AlternativeIdentifications[0]; + Assert.That(alternative.PrsmId, Is.EqualTo(1)); + Assert.That(alternative.Accession, Is.EqualTo("DECOY_sp|P21781|FGF7_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("Fibroblast growth factor 7 OS=Homo sapiens OX=9606 GN=FGF7 PE=1 SV=1")); + Assert.That(alternative.FirstResidue, Is.EqualTo(101)); + Assert.That(alternative.LastResidue, Is.EqualTo(105)); + + alternative = containsAlternatives.AlternativeIdentifications[1]; + Assert.That(alternative.PrsmId, Is.EqualTo(1)); + Assert.That(alternative.Accession, Is.EqualTo("sp|Q13423|NNTM_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("NAD(P) transhydrogenase, mitochondrial OS=Homo sapiens OX=9606 GN=NNT PE=1 SV=3")); + Assert.That(alternative.FirstResidue, Is.EqualTo(369)); + Assert.That(alternative.LastResidue, Is.EqualTo(373)); + + alternative = containsAlternatives.AlternativeIdentifications[2]; + Assert.That(alternative.PrsmId, Is.EqualTo(1)); + Assert.That(alternative.Accession, Is.EqualTo("DECOY_sp|Q6ZT12|UBR3_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("E3 ubiquitin-protein ligase UBR3 OS=Homo sapiens OX=9606 GN=UBR3 PE=2 SV=2")); + Assert.That(alternative.FirstResidue, Is.EqualTo(1805)); + Assert.That(alternative.LastResidue, Is.EqualTo(1809)); + + alternative = containsAlternatives.AlternativeIdentifications[3]; + Assert.That(alternative.PrsmId, Is.EqualTo(1)); + Assert.That(alternative.Accession, Is.EqualTo("sp|Q96RW7|HMCN1_HUMAN")); + Assert.That(alternative.ProteinDescription, + Is.EqualTo("Hemicentin-1 OS=Homo sapiens OX=9606 GN=HMCN1 PE=1 SV=2")); + Assert.That(alternative.FirstResidue, Is.EqualTo(179)); + Assert.That(alternative.LastResidue, Is.EqualTo(183)); + } + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofrom_TopPICv1.6.2_proteoform.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicProteofromSingle_TopPICv1.6.2_proteoform_single.tsv")] + public static void TestTopicProteoformsReadWrite(string path) + { + string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + var testOutputPath = path.Contains("single") + ? Path.Combine(directoryPath, "toppic_proteoform_single.tsv") + : Path.Combine(directoryPath, "toppic_proteoform.tsv"); + + ToppicSearchResultFile file = new ToppicSearchResultFile(filepath); + file.LoadResults(); + file.WriteResults(testOutputPath); + var writtenFile = FileReader.ReadFile(testOutputPath); + writtenFile.LoadResults(); + Assert.That(File.Exists(testOutputPath)); + + // check are equivalent + for (int i = 0; i < file.Results.Count; i++) + { + var original = JsonConvert.SerializeObject(file.Results[i]); + var written = JsonConvert.SerializeObject(writtenFile.Results[i]); + Assert.That(original, Is.EqualTo(written)); + } + + var originalLines = File.ReadAllLines(filepath); + var writtenLines = File.ReadAllLines(testOutputPath); + Assert.That(writtenLines.Length, Is.EqualTo(originalLines.Length)); + int paramCount = 0; + for (int i = 0; i < originalLines.Length; i++) + { + if (originalLines[i].Contains("********")) + paramCount++; + if (paramCount >= 2) + break; + Assert.That(writtenLines[i], Is.EqualTo(originalLines[i])); + } + + // test writer still works without specifying extensions + File.Delete(testOutputPath); + var testOutputPathWithoutExtension = Path.Combine(directoryPath, "toppic"); + file.WriteResults(testOutputPathWithoutExtension); + Assert.That(File.Exists(testOutputPath)); + } + + [Test] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsm_TopPICv1.6.2_prsm.tsv")] + [TestCase(@"FileReadingTests\ExternalFileTypes\ToppicPrsmSingle_TopPICv1.6.2_prsm_single.tsv")] + public static void TestToppicPrsmReadWrite(string path) + { + string filepath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); + var testOutputPath = path.Contains("single") + ? Path.Combine(directoryPath, "toppic_prsm_single.tsv") + : Path.Combine(directoryPath, "toppic_prsm.tsv"); + + ToppicSearchResultFile file = new ToppicSearchResultFile(filepath); + file.LoadResults(); + file.WriteResults(testOutputPath); + var writtenFile = FileReader.ReadFile(testOutputPath); + writtenFile.LoadResults(); + Assert.That(File.Exists(testOutputPath)); + + // check are equivalent + for (int i = 0; i < file.Results.Count; i++) + { + var original = JsonConvert.SerializeObject(file.Results[i]); + var written = JsonConvert.SerializeObject(writtenFile.Results[i]); + Assert.That(original, Is.EqualTo(written)); + } + + var originalLines = File.ReadAllLines(filepath); + var writtenLines = File.ReadAllLines(testOutputPath); + Assert.That(writtenLines.Length, Is.EqualTo(originalLines.Length)); + int paramCount = 0; + for (int i = 0; i < originalLines.Length; i++) + { + if (originalLines[i].Contains("********")) + paramCount++; + if (paramCount >= 2) + break; + Assert.That(writtenLines[i], Is.EqualTo(originalLines[i])); + } + + // test writer still works without specifying extensions + File.Delete(testOutputPath); + var testOutputPathWithoutExtension = Path.Combine(directoryPath, "toppic"); + file.WriteResults(testOutputPathWithoutExtension); + Assert.That(File.Exists(testOutputPath)); + } + } +} diff --git a/mzLib/Test/Test.csproj b/mzLib/Test/Test.csproj index 2e630d04e..5837b563e 100644 --- a/mzLib/Test/Test.csproj +++ b/mzLib/Test/Test.csproj @@ -285,6 +285,24 @@ Always + + Always + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + + + PreserveNewest + PreserveNewest diff --git a/mzLib/mzLib.nuspec b/mzLib/mzLib.nuspec index 570414d2c..50dc25ed0 100644 --- a/mzLib/mzLib.nuspec +++ b/mzLib/mzLib.nuspec @@ -2,7 +2,7 @@ mzLib - 5.0.540 + 5.0.004 mzLib Stef S. Stef S. diff --git a/mzLib/mzLib.sln.DotSettings b/mzLib/mzLib.sln.DotSettings index 424220832..78477fa52 100644 --- a/mzLib/mzLib.sln.DotSettings +++ b/mzLib/mzLib.sln.DotSettings @@ -1,9 +1,11 @@ - + True True True True True + True + True True True True \ No newline at end of file