Skip to content

Commit

Permalink
Update README
Browse files Browse the repository at this point in the history
  • Loading branch information
SurgeArrester committed Mar 7, 2021
1 parent a142e23 commit f6c4873
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 14 deletions.
27 changes: 15 additions & 12 deletions ElM2D/ElM2D.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class ElM2D():
def __init__(self, formula_list=None,
n_proc=None,
n_components=2,
verbose=True):
verbose=True,
metric="mod_petti"):

self.verbose = verbose

Expand All @@ -84,6 +85,9 @@ def __init__(self, formula_list=None,
self.embedding = None # Stores the last embedded coordinates
self.dm = None # Stores distance matrix

self.metric = metric
self.feature_matrix =

def save(self, filepath):
# Save all variables except for the distance matrix
save_dict = {k: v for k, v in self.__dict__.items()}
Expand Down Expand Up @@ -135,28 +139,28 @@ def plot(self, fp=None, color=None, embedding=None):

return fig

def fit(self, X, metric="mod_petti"):
def fit(self, X):
'''
Take an input vector, either of a precomputed distance matrix, or
an iterable of strings of composition formula, construct an ElMD distance
matrix and store to self.dm. Can pass a precomputed matrix with
metric="precomputed"
matrix and store to self.dm.
Input
X - A list of compound formula strings, or a precomputed distance matrix
(ensure self.metric = "precomputed")
'''
self.formula_list = X
n = len(X)

if metric == "precomputed":
if self.metric == "precomputed":
self.dm = X

elif n < 1000:
# Do this on a single core for smaller datasets
distances = []

for i in range(n - 1):
x = ElMD(X[i], metric=metric)
x = ElMD(X[i], metric=self.metric)
for j in range(i + 1, n):
distances.append(x.elmd(X[j]))

Expand All @@ -165,20 +169,19 @@ def fit(self, X, metric="mod_petti"):

else:
if self.verbose: print("Constructing distances")
dist_vec = self._process_list(X, metric=metric, n_proc=self.n_proc)
dist_vec = self._process_list(X, n_proc=self.n_proc)
self.dm = squareform(dist_vec)

def fit_transform(self, X, y=None, how="UMAP", n_components=2, metric="mod_petti"):
def fit_transform(self, X, y=None, how="UMAP", n_components=2):
"""
Successively call fit and transform
Parameters:
X - List of compositions to embed
how - "UMAP" or "PCA", the embedding technique to use
n_components - The number of dimensions to embed to
metric - "precomputed" to pass precomputed distance matrices
"""
self.fit(X, metric=metric)
self.fit(X)
embedding = self.transform(how=how, n_components=n_components, y=y)
return embedding

Expand Down Expand Up @@ -335,7 +338,7 @@ def cross_validate(self, y=None, k=5, shuffle=True, seed=42):

return [(X_ret[i][0], X_ret[i][1], y_ret[i][0], y_ret[i][1]) for i in range(k)]

def _process_list(self, formula_list, n_proc, metric="mod_petti"):
def _process_list(self, formula_list, n_proc):
'''
Given an iterable list of formulas in composition form
use multiple processes to convert these to pettifor ratio
Expand All @@ -347,7 +350,7 @@ def _process_list(self, formula_list, n_proc, metric="mod_petti"):
self.input_mat = np.ndarray(shape=(len(formula_list), 103), dtype=np.float64)

for i, formula in enumerate(formula_list):
self.input_mat[i] = ElMD(formula, metric=metric).vector_form
self.input_mat[i] = ElMD(formula, metric=self.metric).vector_form

# Create input pairings
if self.verbose:
Expand Down
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ Embeddings may also be directed towards a particular chemical property in a pand
embedding = mapper.fit_transform(df["formula"], df["property_of_interest"])
```

By default, the [modified Pettifor scale](https://iopscience.iop.org/article/10.1088/1367-2630/18/9/093011/meta) is used as the method of atomic similarity, "atomic", "petti", "mod_petti", and "mendeleev" can be selected through the `metric` attribute.
By default, the [modified Pettifor scale](https://iopscience.iop.org/article/10.1088/1367-2630/18/9/093011/meta) is used as the method of atomic similarity, this is changed through the `metric` attribute.

```python
embedding = mapper.fit_transform(df["formula"], metric="atomic")
mapper = ElM2D(metric="atomic")
embedding = mapper.fit_transform(df["formula"])
```

These embeddings may be visualized within a jupyter notebook, or exported to HTML to view full page in the web browser.
Expand Down Expand Up @@ -168,6 +169,9 @@ Machine Learnt:
- megnet16
- random_200

Custom Distance Matrix
- precomputed

## Citing

If you would like to cite this code in your work, please use the following reference
Expand Down

0 comments on commit f6c4873

Please sign in to comment.