Skip to content

Commit

Permalink
Add combine_spectra and base_peak_mass functions
Browse files Browse the repository at this point in the history
  • Loading branch information
domdfcoding committed May 2, 2024
1 parent 0c3790d commit 1a3cbcd
Show file tree
Hide file tree
Showing 7 changed files with 34,577 additions and 15 deletions.
31 changes: 30 additions & 1 deletion libgunshotmatch/consolidate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#

# stdlib
from collections import Counter
from collections import Counter, defaultdict
from fnmatch import fnmatch
from itertools import permutations
from multiprocessing import Pool
Expand Down Expand Up @@ -61,6 +61,7 @@
"pairwise_ms_comparisons",
"ConsolidatedPeakFilter",
"InvertedFilter",
"combine_spectra",
)


Expand Down Expand Up @@ -804,3 +805,31 @@ def filter(self, consolidated_peaks: List[ConsolidatedPeak]) -> List[Consolidate
filtered_consolidated_peaks.append(cp)

return filtered_consolidated_peaks


def combine_spectra(peak: ConsolidatedPeak) -> Tuple[List[int], List[float]]:
"""
Sum the intensities across all mass spectra in the given peak.
:param peak:
:returns: List of masses and list of corresponding intensities.
.. versionadded:: v0.11.0
"""

combined_ms_data: Dict[int, float] = defaultdict(float)

for ms in peak.ms_list:
if ms is not None:
for mass, intensity in zip(ms.mass_list, ms.intensity_list):
combined_ms_data[mass] += intensity

mass_list, intensity_list = [], []
for mass, intensity in combined_ms_data.items():
mass_list.append(mass)
intensity_list.append(intensity)

# mass_list, intensity_list = zip(*combined_ms_data.items())

return mass_list, intensity_list
23 changes: 23 additions & 0 deletions libgunshotmatch/peak.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"filter_peaks",
"peak_from_dict",
"write_alignment",
"base_peak_mass",
)


Expand Down Expand Up @@ -531,3 +532,25 @@ def _to_peak_list(a_list: List[Peak]) -> PeakList:
return a_list
else:
return PeakList(a_list)


def base_peak_mass(peak: Peak) -> float:
"""
Returns the mass of the largest fragment in the peak's mass spectrum.
:param peak:
.. versionadded:: v0.11.0
"""

apex_mass_list = peak.mass_spectrum.mass_list
apex_mass_spec = peak.mass_spectrum.mass_spec

# Determine the intensity of the base peak in the mass spectrum
base_peak_intensity = max(apex_mass_spec)

# Determine the index of the base peak in the mass spectrum
base_peak_index = apex_mass_spec.index(base_peak_intensity)

# Finally, determine the mass of the base peak
return apex_mass_list[base_peak_index]
20 changes: 20 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# 3rd party
import numpy
from coincidence.regressions import _representer_for
from pytest_regressions.data_regression import RegressionYamlDumper
from yaml.representer import RepresenterError

Expand All @@ -10,3 +12,21 @@ def represent_undefined(self, data): # noqa: MAN001,MAN002


RegressionYamlDumper.represent_undefined = represent_undefined # type: ignore[method-assign]


@_representer_for(
numpy.int64,
numpy.int32,
numpy.float64,
)
def _represent_numpy(dumper: RegressionYamlDumper, data: int): # noqa: MAN002
return dumper.represent_data(int(data))


# @_representer_for(
# numpy.float64,
# )
# def _represent_mappings(dumper: RegressionYamlDumper, data: float): # noqa: MAN002
# if data == 0:
# return dumper.represent_data(0)
# return dumper.represent_data(str(round_rt(data)))
30 changes: 17 additions & 13 deletions tests/test_consolidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,32 +3,26 @@
from typing import Optional

# 3rd party
import numpy
import pyms_nist_search
import pytest
from coincidence.regressions import AdvancedDataRegressionFixture, AdvancedFileRegressionFixture, _representer_for
from coincidence.regressions import AdvancedDataRegressionFixture, AdvancedFileRegressionFixture
from domdf_python_tools.paths import PathPlus
from domdf_python_tools.typing import PathLike
from pytest_regressions.data_regression import RegressionYamlDumper
from pytest_regressions.dataframe_regression import DataFrameRegressionFixture

# this package
from libgunshotmatch.consolidate import ConsolidatedPeakFilter, InvertedFilter, pairwise_ms_comparisons
from libgunshotmatch.consolidate import (
ConsolidatedPeakFilter,
InvertedFilter,
combine_spectra,
pairwise_ms_comparisons
)
from libgunshotmatch.consolidate._fields import _attrs_convert_reference_data
from libgunshotmatch.project import Project

# Test consolidate process from gsmp file


@_representer_for(
numpy.int64,
numpy.int32,
numpy.float64,
)
def _represent_numpy(dumper: RegressionYamlDumper, data: int): # noqa: MAN002
return dumper.represent_data(int(data))


class MockEngine(pyms_nist_search.Engine):
"""
Engine that returns :py:obj:`None` for the reference data.
Expand Down Expand Up @@ -197,3 +191,13 @@ def test__attrs_convert_reference_data():
with pytest.raises(TypeError, match="'reference_data' must be a `pyms_nist_search.ReferenceData` object,"):
# Wrong type
_attrs_convert_reference_data([]) # type: ignore[arg-type]


def test_combine_spectra(advanced_data_regression: AdvancedDataRegressionFixture):
project = Project.from_file("tests/Eley Super Game.gsmp")
assert project.consolidated_peaks is not None

spectra = []
for peak in project.consolidated_peaks:
spectra.append(combine_spectra(peak))
advanced_data_regression.check(spectra)
Loading

0 comments on commit 1a3cbcd

Please sign in to comment.