Skip to content

Commit

Permalink
add different enlargement modes for small proteins in CoMatrices
Browse files Browse the repository at this point in the history
  • Loading branch information
anazhmetdin committed Nov 25, 2020
1 parent f5c4cc9 commit 6be62b6
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
7 changes: 6 additions & 1 deletion protencoder/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def main():
parser.add_argument("-M", "--method", default="o",
help="protein encoding method; o: (defult)onehot,\
k: kmers frequency, c: compatibility matrices")
parser.add_argument("-e", "--enlargenMode", default='0',
help="mode to enlarge a small protein in method\
compatibility matrices; pad(default), resize, tile,\
repeat")
parser.add_argument("-k", "--kmerLength", default="3",
help="kmer length in frequency encoder")
parser.add_argument("-f", "--Protfilter", default="",
Expand Down Expand Up @@ -119,6 +123,7 @@ def main():
dsize = int(args.dsize)
flatten = bool(int(args.flatten))
PVmodel = args.PVmodelPath
action = args.enlargenMode

if not (GOfile is None):
outPrefix = args.outPrefix if args.outPrefix != "" else GOfile
Expand All @@ -137,7 +142,7 @@ def main():
elif method == 'k':
encoder = protKmers(k)
elif method == 'c':
encoder = AAcomptability(dsize)
encoder = AAcomptability(dsize, action)
elif method == 'p':
encoder = protvec(PVmodel, flatten)
if Protfilter != "":
Expand Down
30 changes: 23 additions & 7 deletions protencoder/coMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@


class AAcomptability():
def __init__(self, dsize=(500, 500)):
def __init__(self, dsize=(500, 500), action='pad'):
self.handler = encoder()
self.dsize = (dsize, dsize)
self.action = action
self.SCM, self.HCM, self.CCM = get_data()
self.matrices = [self.SCM, self.HCM, self.CCM]

Expand All @@ -29,12 +30,27 @@ def encode(self):
self.handler.seqDict[prot] = encoded.astype('uint8')

def co_resize(self, prot):
if prot.shape[0] < self.dsize[0]:
interpolation = INTER_LINEAR
elif prot.shape[0] > self.dsize[0]:
interpolation = INTER_AREA
prot = prot.reshape((prot.shape[1], prot.shape[2], 3))
x = resize(prot, self.dsize, interpolation=interpolation)
if prot.shape[1] > self.dsize[0]:
x = resize(prot, self.dsize, interpolation=INTER_AREA)
elif prot.shape[1] < self.dsize[0]:
if self.action == "repeat":
repeatSize = int(self.dsize[0]/prot.shape[1])
x = np.repeat(prot, repeatSize, axis=1)
x = np.repeat(x, repeatSize, axis=2)
padSize = self.dsize[0] - x.shape[1]
x = np.pad(prot, ((0, 0), (0, padSize), (0, padSize)),
mode="constant")
elif self.action == "tile":
tileSize = int(self.dsize[0]/prot.shape[1])+1
x = np.tile(prot, (0, tileSize, tileSize))
x = x[:, :self.dsize[0], :self.dsize[0]]
elif self.action == "resize":
x = resize(prot, self.dsize, interpolation=INTER_LINEAR)
elif self.action == "pad":
padSize = self.dsize[0] - prot.shape[1]
x = np.pad(prot, ((0, 0), (0, padSize), (0, padSize)),
mode="constant")
x = x.reshape((x.shape[1], x.shape[2], 3))
return x

def read(self, seqPath):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
test_suite='tests',
tests_require=test_requirements,
url='https://github.com/anegm98/protencoder',
version='1.3.0',
version='1.4.0',
zip_safe=False,
package_data={'protencoder': ['data/*']}
)

0 comments on commit 6be62b6

Please sign in to comment.