-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathsplit_mgf.py
52 lines (38 loc) · 1.26 KB
/
split_mgf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 26 17:07:42 2021
@author: stravsm
"""
from pyteomics import mgf
import argparse
parser = argparse.ArgumentParser(
description='Helper to split MGF files')
parser.add_argument('--chunk-size', '-c')
parser.add_argument('--files', '-f')
parser.add_argument('--prefix', '-p', required = False, default = "spectra_out_")
parser.add_argument('--prefix-title', '-t', action = 'store_true')
parser.add_argument('filename')
args = parser.parse_args()
mgf_file = args.filename
mgf_data = mgf.read(open(mgf_file, 'r'))
spectra = [s for s in mgf_data]
def update_title(i, s):
title = s['params'].get('title', '')
s['params'].update({'title': f'{i} {title}'})
return s
if args.prefix_title:
spectra = [update_title(i, s) for i, s in enumerate(spectra)]
spectra_len = len(spectra)
if args.chunk_size is not None:
chunk_size = int(args.chunk_size)
elif args.files is not None:
chunk_size = (spectra_len // int(args.files)) + 1
else:
print("No chunk size or file number specified; splitting to individual files")
chunk_size = 1
counter = 1
while len(spectra) > 0:
chunk = spectra[:chunk_size]
spectra = spectra[chunk_size:]
mgf.write(chunk, f"{args.prefix}{counter}.mgf")
counter = counter + 1