Skip to content

Commit

Permalink
Added bulk featurizing
Browse files Browse the repository at this point in the history
  • Loading branch information
SurgeArrester committed Mar 7, 2021
1 parent 8477660 commit 22dc500
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
28 changes: 20 additions & 8 deletions ElM2D/ElM2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
from tqdm import tqdm
from tqdm.contrib.concurrent import process_map

from ElMD import ElMD
from ElMD import ElMD, EMD

if __name__ == "__main__":
mapper = ElM2D()
Expand Down Expand Up @@ -86,7 +86,6 @@ def __init__(self, formula_list=None,
self.dm = None # Stores distance matrix

self.metric = metric
self.feature_matrix =

def save(self, filepath):
# Save all variables except for the distance matrix
Expand Down Expand Up @@ -347,10 +346,17 @@ def _process_list(self, formula_list, n_proc):
'''
pool_list = []

self.input_mat = np.ndarray(shape=(len(formula_list), 103), dtype=np.float64)

for i, formula in enumerate(formula_list):
self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector
n_elements = len(ElMD().periodic_tab[self.metric])
self.input_mat = np.ndarray(shape=(len(formula_list), n_elements), dtype=np.float64)

if self.verbose:
print("Parsing Formula")
for i, formula in tqdm(enumerate(formula_list)):
self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector
else:
for i, formula in enumerate(formula_list):
self.input_mat[i] = ElMD(formula, metric=self.metric).ratio_vector

# Create input pairings
if self.verbose:
Expand Down Expand Up @@ -402,9 +408,6 @@ def __repr__(self):
else:
return f"ElM2D()"

def features(self):


def export_dm(self, path):
np.savetxt(path, self.dm, delimiter=",")

Expand All @@ -416,3 +419,12 @@ def export_embedding(self, path):

def import_embedding(self, path):
self.embedding = np.loadtxt(path, delimiter=",")

def featurize(self, compositions, how="mean"):
elmd_obj = ElMD(metric=self.metric)
vectors = np.ndarray((len(compositions), len(elmd_obj.periodic_tab[self.metric])))

for i, formula in enumerate(compositions):
vectors[i] = ElMD(formula, metric=self.metric, feature_pooling=how).feature_vector

return vectors
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
setup(
name = 'ElM2D',
packages = ['ElM2D'],
version = '0.3.2',
version = '0.3.3',
license='GPL3',
description = 'A high performance mapping class to embed large datasets of ionic compositions with respect to the ElMD metric.',
author = 'Cameron Hagreaves',
author_email = 'cameron.h@rgreaves.me.uk',
url = 'https://github.com/lrcfmd/ElM2D/',
download_url = 'https://github.com/lrcfmd/ElM2D/archive/0.3.2.tar.gz',
download_url = 'https://github.com/lrcfmd/ElM2D/archive/0.3.3.tar.gz',
keywords = ['ChemInformatics', 'Materials Science', 'Machine Learning', 'Materials Representation'],
install_requires=[
'cython',
Expand Down

0 comments on commit 22dc500

Please sign in to comment.