Skip to content

Commit

Permalink
removed mgf annotation verification
Browse files Browse the repository at this point in the history
  • Loading branch information
Lilferrit committed Aug 12, 2024
2 parents c4cd147 + 34fb4d1 commit 31cc133
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 324 deletions.
43 changes: 1 addition & 42 deletions casanovo/denovo/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import uuid
import warnings
from pathlib import Path
from typing import Iterable, List, Optional, Union, TextIO
from typing import Iterable, List, Optional, Union

import depthcharge.masses
import lightning.pytorch as pl
Expand Down Expand Up @@ -401,21 +401,6 @@ def _get_index(

msg = msg.strip()
filenames = _get_peak_filenames(peak_path, ext)
if annotated:
# Filter unannotated MGF files to avoid Depth Charge exception
filtered_fnames = list()
for fname in filenames:
if Path(
fname
).suffix.lower() == ".mgf" and not _mgf_is_annotated(fname):
warnings.warn(
f"Ignoring unannotated MGF peak file: {fname}",
RuntimeWarning,
)
else:
filtered_fnames.append(fname)
filenames = filtered_fnames

if not filenames:
not_found_err = f"Cound not find {msg} peak files"
logger.error(not_found_err + " from %s", peak_path)
Expand Down Expand Up @@ -459,32 +444,6 @@ def _get_strategy(self) -> Union[str, DDPStrategy]:
return "auto"


def _mgf_is_annotated(mgf_path: TextIO) -> bool:
"""Check whether MGF file is annotated
Parameters
----------
mgf_path : TextIO
MGF peak file to check
Returns
-------
bool
Whether MGF peak file is annotated
"""
num_spectra = 0
num_annotations = 0

with open(mgf_path) as f:
for curr_line in f:
if curr_line.startswith("BEGIN IONS"):
num_spectra += 1
elif curr_line.startswith("SEQ="):
num_annotations += 1

return num_spectra == num_annotations


def _get_peak_filenames(
paths: Iterable[str], supported_ext: Iterable[str]
) -> List[str]:
Expand Down
Loading

0 comments on commit 31cc133

Please sign in to comment.