Update README

lrcfmd · Mar 7, 2021 · f6c4873 · f6c4873
1 parent a142e23
commit f6c4873
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 14 deletions.
diff --git a/ElM2D/ElM2D.py b/ElM2D/ElM2D.py
@@ -69,7 +69,8 @@ class ElM2D():
     def __init__(self, formula_list=None,
                        n_proc=None,
                        n_components=2,
-                       verbose=True):
+                       verbose=True,
+                       metric="mod_petti"):
 
         self.verbose = verbose
 
@@ -84,6 +85,9 @@ def __init__(self, formula_list=None,
         self.embedding = None    # Stores the last embedded coordinates
         self.dm = None           # Stores distance matrix
 
+        self.metric = metric
+        self.feature_matrix = 
+
     def save(self, filepath):
         # Save all variables except for the distance matrix
         save_dict = {k: v for k, v in self.__dict__.items()}
@@ -135,28 +139,28 @@ def plot(self, fp=None, color=None, embedding=None):
 
         return fig
 
-    def fit(self, X, metric="mod_petti"):
+    def fit(self, X):
         '''
         Take an input vector, either of a precomputed distance matrix, or
         an iterable of strings of composition formula, construct an ElMD distance
-        matrix and store to self.dm. Can pass a precomputed matrix with 
-        metric="precomputed"
+        matrix and store to self.dm.
 
         Input
         X - A list of compound formula strings, or a precomputed distance matrix
+        (ensure self.metric = "precomputed")
         '''
         self.formula_list = X
         n = len(X)
 
-        if metric == "precomputed":
+        if self.metric == "precomputed":
             self.dm = X
 
         elif n < 1000:
             # Do this on a single core for smaller datasets
             distances = []
 
             for i in range(n - 1):
-                x = ElMD(X[i], metric=metric)
+                x = ElMD(X[i], metric=self.metric)
                 for j in range(i + 1, n):
                     distances.append(x.elmd(X[j]))
 
@@ -165,20 +169,19 @@ def fit(self, X, metric="mod_petti"):
 
         else:
             if self.verbose: print("Constructing distances")
-            dist_vec = self._process_list(X, metric=metric, n_proc=self.n_proc)
+            dist_vec = self._process_list(X, n_proc=self.n_proc)
             self.dm = squareform(dist_vec)
 
-    def fit_transform(self, X, y=None, how="UMAP", n_components=2, metric="mod_petti"):
+    def fit_transform(self, X, y=None, how="UMAP", n_components=2):
         """
         Successively call fit and transform
 
         Parameters:
         X - List of compositions to embed 
         how - "UMAP" or "PCA", the embedding technique to use
         n_components - The number of dimensions to embed to
-        metric - "precomputed" to pass precomputed distance matrices
         """
-        self.fit(X, metric=metric)
+        self.fit(X)
         embedding = self.transform(how=how, n_components=n_components, y=y)
         return embedding
 
@@ -335,7 +338,7 @@ def cross_validate(self, y=None, k=5, shuffle=True, seed=42):
 
         return [(X_ret[i][0], X_ret[i][1], y_ret[i][0], y_ret[i][1]) for i in range(k)]
 
-    def _process_list(self, formula_list, n_proc, metric="mod_petti"):
+    def _process_list(self, formula_list, n_proc):
         '''
         Given an iterable list of formulas in composition form
         use multiple processes to convert these to pettifor ratio
@@ -347,7 +350,7 @@ def _process_list(self, formula_list, n_proc, metric="mod_petti"):
         self.input_mat = np.ndarray(shape=(len(formula_list), 103), dtype=np.float64)
 
         for i, formula in enumerate(formula_list):
-            self.input_mat[i] = ElMD(formula, metric=metric).vector_form
+            self.input_mat[i] = ElMD(formula, metric=self.metric).vector_form
 
         # Create input pairings
         if self.verbose: 

diff --git a/README.md b/README.md
@@ -67,10 +67,11 @@ Embeddings may also be directed towards a particular chemical property in a pand
 embedding = mapper.fit_transform(df["formula"], df["property_of_interest"])
 ```
 
-By default, the [modified Pettifor scale](https://iopscience.iop.org/article/10.1088/1367-2630/18/9/093011/meta) is used as the method of atomic similarity, "atomic", "petti", "mod_petti", and "mendeleev" can be selected through the `metric` attribute. 
+By default, the [modified Pettifor scale](https://iopscience.iop.org/article/10.1088/1367-2630/18/9/093011/meta) is used as the method of atomic similarity, this is changed through the `metric` attribute. 
 
 ```python
-embedding = mapper.fit_transform(df["formula"], metric="atomic")
+mapper = ElM2D(metric="atomic")
+embedding = mapper.fit_transform(df["formula"])
 ```
 
 These embeddings may be visualized within a jupyter notebook, or exported to HTML to view full page in the web browser.
@@ -168,6 +169,9 @@ Machine Learnt:
 - megnet16 
 - random_200
 
+Custom Distance Matrix
+- precomputed
+
 ## Citing
 
 If you would like to cite this code in your work, please use the following reference