-
Notifications
You must be signed in to change notification settings - Fork 1
/
Distances.py
76 lines (48 loc) · 1.98 KB
/
Distances.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import numpy as np
def distances(x, distance = "E", p = None):
"""
Take a data matrix, return the distances matrix.
Args:
x: numpy array, data matrix.
distance: {"E": Euclidean Distance, "Minkowski": Minkowski Distance, "Manhattan": Manhattan Distance,
"C": Chebyshev Distance, "Mahalanobis": Mahalanobis Distance, "Corr": Correlation Coefficient,
"cos": cosine}, default "E".
p: If Minkowski distance is chosen, p should be set, default None.
Return:
Distances matrix.
"""
if x.ndim == 1: x = np.reshape(x, [1, x.shape[0]])
if x.ndim == 0: x = np.reshape(x, [1, 1])
x = x.astype(np.float64)
if distance == "Minkowski" and p == None:
raise TypeError("Missing p for Minkowski distance.")
if distance in {"E", "Minkowski", "Manhattan", "C"}:
if distance == "Manhattan": p = 1
if distance == "E": p = 2
res = np.zeros([1, x.shape[0]])
for i in x:
dis = np.sum(np.abs(x - i) ** p, axis = 1) ** (1/p) if distance != "C" else np.max(np.abs(x - i), axis = 1)
dis = np.reshape(dis, [1, x.shape[0]])
res = np.vstack([res, dis])
return res[1:]
if distance == "Mahalanobis":
cov = np.cov(x.T)
try:
inv = np.linalg.inv(cov)
except np.linalg.LinAlgError:
raise ValueError("Cov matrix is singular matrix")
res = np.zeros([1, x.shape[0]])
for i in x:
diff = x - i
dis = np.diag(np.dot(np.dot(diff, inv), diff.T))
res = np.vstack([res, dis])
return res[1:]
if distance == "Corr":
return np.corrcoef(x)
if distance == "cos":
res = np.dot(x, x.T)
for i in range(x.shape[0]):
factor = res[i,i] ** 0.5
res[:,i] = res[:,i] / factor
res[i,:] = res[i,:] / factor
return res