From 1b2f08f76040ff699f1c8876d9d30f1735f3fc83 Mon Sep 17 00:00:00 2001 From: Semidan Date: Fri, 1 Oct 2021 21:11:48 +0100 Subject: [PATCH] refac --- code_oven/code_oven.py | 215 ----------------------------------------- code_oven/script.py | 26 ----- setup.py | 2 +- 3 files changed, 1 insertion(+), 242 deletions(-) delete mode 100644 code_oven/code_oven.py delete mode 100644 code_oven/script.py diff --git a/code_oven/code_oven.py b/code_oven/code_oven.py deleted file mode 100644 index c338d0c..0000000 --- a/code_oven/code_oven.py +++ /dev/null @@ -1,215 +0,0 @@ -import re -import numpy as np - - -class reaction: - - def __init__(self, rxn_data): - self.data = rxn_data - - - -def findRxnData(data, ec_number): - idx_1 = data.find(f'ID\t{ec_number}') - idx_2 = data[idx_1:].find('///') - return data[idx_1:idx_2] - - - -class BRENDA: - """ - Provides methods to parse the BRENDA database (https://www.brenda-enzymes.org/) - """ - def __init__(self, path_to_database): - def getECnumberIndices() -> dict: - EC_pattern = '(?<=ID\\t)(.*)(?=\\n)' - -# EC_pattern = '(?<=ID\\t)(.*)(?=///\\nID\\t)' -# EC_pattern = 'ID\t(.+?)///\nID\t' -# return {self.data[m.start():m.end()]: (m.start(), m.end()) -# for m in re.finditer(EC_pattern, self.data)} - - return {self.data[m.start():m.end()]: (z.start(), z.end()) - for m, z in zip(re.finditer(EC_pattern, self.data), - re.finditer(rxn_data_pattern, self.data) - ) - } - - - with open(path_to_database, encoding="iso-8859-1") as file: - self.data = file.read() - self.ECIndices = getECnumberIndices() - - - def getReactionDataIndices(self) -> dict: - """ - Returns a dict with keys equal to EC numbers and values corresponding - to the start and end indices of the data corresponding to that reaction. - """ - '///\nID\t1.1.1.1' - - def getProteins(self, ec_number) -> dict: - """ - Returns a dict listing all proteins for given EC number - """ - def getPRlines(search_indices): - return [p.group(1) - for p in re.finditer("PR\t(.+?)\nPR", - self.data[search_indices[0]:search_indices[1]] - ) - ] - - - def getReferences(self, ec_number: str) -> dict: - """ - Returns a dict listing all references for given EC number - """ - - def getReactionName(self, ec_number: str) -> str: - """ - Returns a the systematic name of the reaction - """ - return self.data[self.ECIndices[ec_number][0]:].find('(?<=SN\\t)(.*)(?=\\n)') - - - - def getNextEnzymeIdx(self, ec_number: str) -> int: - try: - key_idx = list(self.ECIndices).index(ec_number) - if key_idx == len(self.ECIndices): - return len(self.data) - else: - return self.ECIndices[list(self.ECIndices.keys())[key_idx + 1]][0] - except: - raise ValueError("Ec number not in database") - - def getKMvalues(self, ec_number: str, substrate: str=None) -> dict: - """ - Returns a dictionary with all KM values of the enzyme with - given EC number. If a substrate is given, then results are - restricted to that substrate. - """ - search_indices = (self.ECIndices[ec_number][1], self.getNextEnzymeIdx(ec_number)) - - def getEnzymeSubstrates(KM_lines): - substrates = [] - for line in KM_lines: - sub = extractKMInfo(line)['substrate'] - if sub not in substrates: - substrates.append(sub) - return substrates - - def extractKMInfo(KM_line): - res = {} - try: - species = re.search('#(.+?)#', KM_line).group(1).split(',') - res['species'] = species - except: - res['species'] = '' - try: - KM_value = re.search('# (.+?) {', KM_line).group(1) - res['KM'] = KM_value - except: - res['KM'] = np.nan - try: - substrate = re.search('{(.+?)}', KM_line).group(1) - res['substrate'] = substrate - except: - res['substrate'] = '' - try: - meta = re.search('\((.+?)\)', KM_line).group(1) - res['meta'] = meta - except: - res['meta'] = '' - try: - references = re.search('<(.+?)>', KM_line).group(1) - res['references'] = references - except: - res['references'] = '' - return res - - def getKMlines(search_indices): - return [p.group(1) for p in re.finditer("KM\t(.+?)\nKM", - self.data[search_indices[0]:search_indices[1]])] - - KM_lines = getKMlines(search_indices) - enzyme_substrates = getEnzymeSubstrates(KM_lines) - if substrate is None: - res = {s: [] for s in enzyme_substrates} - for line in KM_lines: - KM_info = extractKMInfo(line) - res[KM_info['substrate']].append(float(KM_info['KM'])) - return res - else: - res = {substrate: []} - for line in KM_lines: - KM_info = extractKMInfo(line) - if KM_info['substrate'] == substrate: - res[substrate].append(float(KM_info['KM'])) - return res - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -''' -Provides classes and functions to parse the BRENDA data base -''' -# import re -# import numpy as np -# WorkDir = 'C:/Users/robaina/OneDrive/Documents/BRENDA' -# DataFile = WorkDir + '/brenda_download.txt' - -# with open(DataFile, encoding="iso-8859-1") as file: -# data = file.read() - -# ECidx = [( m.start(), m.end() ) -# for m in re.finditer('(?<=ID\\t)(.*)(?=\\n)', data)] # Enzyme number - -# MEidx = [( m.start(), m.end() ) -# for m in re.finditer('(?<=ME\\t)(.*)(?=\\n)', data)] # Metals/ions - -# CFidx = [( m.start(), m.end() ) -# for m in re.finditer('(?<=CF\\t)(.*)(?=\\n)', data)] # Cofactors - -# i=2000;data[ECidx[i][0]:ECidx[i][1]] -# data[MEidx[i][0]:MEidx[i][1]] -# data[MEidx[i][0]:CFidx[i][1]] diff --git a/code_oven/script.py b/code_oven/script.py deleted file mode 100644 index 430417d..0000000 --- a/code_oven/script.py +++ /dev/null @@ -1,26 +0,0 @@ -from parseBRENDA import BRENDA - -workDir = 'C:/Users/tinta/OneDrive/Documents/Projects/BRENDA' -dataFile = workDir + '/brenda_download.txt' - -brenda = BRENDA(dataFile) -r = brenda.reactions.get_by_id('2.7.1.40') -r -human_enzymes = brenda.reactions.filter_by_organism('Homo sapiens neanderthalensis') -len(human_enzymes) -human_enzymes[1] -human_enzymes[1].getOrganisms() - -lines = r._getDataLines('PR') -for line in lines[:10]: - print(r._extractDataLineInfo(line, numeric_value=False)) -r.getSpecies() -KMs = r.getKMvalues() -[KM['value'] for KM in KMs['D-ribose 5-phosphate']] - -KMs = r.getKMvalues() -KMs -KMs.filter_by_organism('Sus scrofa') - -r.getKMvalues().filter_by_organism('Homo sapiens').keys() -r.getKMvalues()['pyruvate'] diff --git a/setup.py b/setup.py index 34d3905..5c697c3 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ setup( name='brendapyrser', version='0.0.1', - description='Tools to parse de BRENDA database', + description='Tools to parse the BRENDA database', long_description=long_description, long_description_content_type='text/markdown', url='https://github.com/robaina/BRENDA_database',