-
Notifications
You must be signed in to change notification settings - Fork 66
/
iFeature.py
57 lines (54 loc) · 2.67 KB
/
iFeature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env python
#_*_coding:utf-8_*_
import argparse
import re
from codes import *
if __name__ == '__main__':
parser = argparse.ArgumentParser(usage="it's usage tip.",
description="Generating various numerical representation schemes for protein sequences")
parser.add_argument("--file", required=True, help="input fasta file")
parser.add_argument("--type", required=True,
choices=['AAC', 'EAAC', 'CKSAAP', 'DPC', 'DDE', 'TPC', 'BINARY',
'GAAC', 'EGAAC', 'CKSAAGP', 'GDPC', 'GTPC',
'AAINDEX', 'ZSCALE', 'BLOSUM62',
'NMBroto', 'Moran', 'Geary',
'CTDC', 'CTDT', 'CTDD',
'CTriad', 'KSCTriad',
'SOCNumber', 'QSOrder',
'PAAC', 'APAAC',
'KNNprotein', 'KNNpeptide',
'PSSM', 'SSEC', 'SSEB', 'Disorder', 'DisorderC', 'DisorderB', 'ASA', 'TA'
],
help="the encoding type")
parser.add_argument("--path", dest='filePath',
help="data file path used for 'PSSM', 'SSEB(C)', 'Disorder(BC)', 'ASA' and 'TA' encodings")
parser.add_argument("--train", dest='trainFile',
help="training file in fasta format only used for 'KNNprotein' or 'KNNpeptide' encodings")
parser.add_argument("--label", dest='labelFile',
help="sample label file only used for 'KNNprotein' or 'KNNpeptide' encodings")
parser.add_argument("--order", dest='order',
choices=['alphabetically', 'polarity', 'sideChainVolume', 'userDefined'],
help="output order for of Amino Acid Composition (i.e. AAC, EAAC, CKSAAP, DPC, DDE, TPC) descriptors")
parser.add_argument("--userDefinedOrder", dest='userDefinedOrder',
help="user defined output order for of Amino Acid Composition (i.e. AAC, EAAC, CKSAAP, DPC, DDE, TPC) descriptors")
parser.add_argument("--out", dest='outFile',
help="the generated descriptor file")
args = parser.parse_args()
fastas = readFasta.readFasta(args.file)
userDefinedOrder = args.userDefinedOrder if args.userDefinedOrder != None else 'ACDEFGHIKLMNPQRSTVWY'
userDefinedOrder = re.sub('[^ACDEFGHIKLMNPQRSTVWY]', '', userDefinedOrder)
if len(userDefinedOrder) != 20:
userDefinedOrder = 'ACDEFGHIKLMNPQRSTVWY'
myAAorder = {
'alphabetically': 'ACDEFGHIKLMNPQRSTVWY',
'polarity': 'DENKRQHSGTAPYVMCWIFL',
'sideChainVolume': 'GASDPCTNEVHQILMKRFYW',
'userDefined': userDefinedOrder
}
myOrder = myAAorder[args.order] if args.order != None else 'ACDEFGHIKLMNPQRSTVWY'
kw = {'path': args.filePath, 'train': args.trainFile, 'label': args.labelFile, 'order': myOrder}
myFun = args.type + '.' + args.type + '(fastas, **kw)'
print('Descriptor type: ' + args.type)
encodings = eval(myFun)
outFile = args.outFile if args.outFile != None else 'encoding.tsv'
saveCode.savetsv(encodings, outFile)