From 22dc500d5a87790cf06d2a7f3690c7b7fdca2a72 Mon Sep 17 00:00:00 2001 From: SurgeArrester Date: Sun, 7 Mar 2021 10:59:15 +0000 Subject: [PATCH] Added bulk featurizing --- ElM2D/ElM2D.py | 28 ++++++++++++++++++++-------- setup.py | 4 ++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/ElM2D/ElM2D.py b/ElM2D/ElM2D.py index abc1a2b..c2e4d0f 100644 --- a/ElM2D/ElM2D.py +++ b/ElM2D/ElM2D.py @@ -54,7 +54,7 @@ from tqdm import tqdm from tqdm.contrib.concurrent import process_map -from ElMD import ElMD +from ElMD import ElMD, EMD if __name__ == "__main__": mapper = ElM2D() @@ -86,7 +86,6 @@ def __init__(self, formula_list=None, self.dm = None # Stores distance matrix self.metric = metric - self.feature_matrix = def save(self, filepath): # Save all variables except for the distance matrix @@ -347,10 +346,17 @@ def _process_list(self, formula_list, n_proc): ''' pool_list = [] - self.input_mat = np.ndarray(shape=(len(formula_list), 103), dtype=np.float64) - for i, formula in enumerate(formula_list): - self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector + n_elements = len(ElMD().periodic_tab[self.metric]) + self.input_mat = np.ndarray(shape=(len(formula_list), n_elements), dtype=np.float64) + + if self.verbose: + print("Parsing Formula") + for i, formula in tqdm(enumerate(formula_list)): + self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector + else: + for i, formula in enumerate(formula_list): + self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector # Create input pairings if self.verbose: @@ -402,9 +408,6 @@ def __repr__(self): else: return f"ElM2D()" - def features(self): - - def export_dm(self, path): np.savetxt(path, self.dm, delimiter=",") @@ -416,3 +419,12 @@ def export_embedding(self, path): def import_embedding(self, path): self.embedding = np.loadtxt(path, delimiter=",") + + def featurize(self, compositions, how="mean"): + elmd_obj = ElMD(metric=self.metric) + vectors = np.ndarray((len(compositions), len(elmd_obj.periodic_tab[self.metric]))) + + for i, formula in enumerate(compositions): + vectors[i] = ElMD(formula, metric=self.metric, feature_pooling=how).feature_vector + + return vectors \ No newline at end of file diff --git a/setup.py b/setup.py index 7e035d4..9d2866a 100644 --- a/setup.py +++ b/setup.py @@ -2,13 +2,13 @@ setup( name = 'ElM2D', packages = ['ElM2D'], - version = '0.3.2', + version = '0.3.3', license='GPL3', description = 'A high performance mapping class to embed large datasets of ionic compositions with respect to the ElMD metric.', author = 'Cameron Hagreaves', author_email = 'cameron.h@rgreaves.me.uk', url = 'https://github.com/lrcfmd/ElM2D/', - download_url = 'https://github.com/lrcfmd/ElM2D/archive/0.3.2.tar.gz', + download_url = 'https://github.com/lrcfmd/ElM2D/archive/0.3.3.tar.gz', keywords = ['ChemInformatics', 'Materials Science', 'Machine Learning', 'Materials Representation'], install_requires=[ 'cython',