-
Notifications
You must be signed in to change notification settings - Fork 1
/
tools.py
142 lines (106 loc) · 3.96 KB
/
tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import numpy as np
import scipy.io as sio
import scipy.sparse as sp
def load_data(path, dtype=np.float32):
db = sio.loadmat(path)
traindata = dtype(db['traindata'])
testdata = dtype(db['testdata'])
cateTrainTest = dtype(db['cateTrainTest'])
mean = np.mean(traindata, axis=0)
traindata -= mean
testdata -= mean
return traindata, testdata, cateTrainTest
def save_sparse_matrix(filename, array):
np.savez(filename, data=array.data, indices=array.indices,
indptr=array.indptr, shape=array.shape, _type=array.__class__)
def load_sparse_matrix(filename):
matrix = np.load(filename)
_type = matrix['_type']
sparse_matrix = _type.item(0)
return sparse_matrix((matrix['data'], matrix['indices'],
matrix['indptr']), shape=matrix['shape'])
def binarize_adj(adj):
adj[adj != 0] = 1
return adj
def renormalize_adj(adj):
rowsum = np.array(adj.sum(axis=1))
inv = np.power(rowsum, -0.5).flatten()
inv[np.isinf(inv)] = 0.
zdiag = sp.diags(inv)
return adj.dot(zdiag).transpose().dot(zdiag)
def sign_dot(data, func):
return np.sign(np.dot(data, func))
def mAP(cateTrainTest, IX, num_return_NN=None):
numTrain, numTest = IX.shape
num_return_NN = numTrain if not num_return_NN else num_return_NN
apall = np.zeros((numTest, 1))
yescnt_all = np.zeros((numTest, 1))
for qid in range(numTest):
query = IX[:, qid]
x, p = 0, 0
for rid in range(num_return_NN):
if cateTrainTest[query[rid], qid]:
x += 1
p += x / (rid * 1.0 + 1.0)
yescnt_all[qid] = x
if not p:
apall[qid] = 0.0
else:
apall[qid] = p / (num_return_NN * 1.0)
return np.mean(apall), apall, yescnt_all
def topK(cateTrainTest, HammingRank, k=500):
numTest = cateTrainTest.shape[1]
precision = np.zeros((numTest, 1))
recall = np.zeros((numTest, 1))
topk = HammingRank[:k, :]
for qid in range(numTest):
retrieved = topk[:, qid]
rel = cateTrainTest[retrieved, qid]
retrieved_relevant_num = np.sum(rel)
real_relevant_num = np.sum(cateTrainTest[:, qid])
precision[qid] = retrieved_relevant_num/(k*1.0)
recall[qid] = retrieved_relevant_num/(real_relevant_num*1.0)
return precision.mean(), recall.mean()
def calc_hammingDist(B1, B2):
q = B2.shape[1]
distH = 0.5 * (q - np.dot(B1, B2.transpose()))
return distH
def calc_map(qB, rB, query_L, retrieval_L):
# qB: {-1,+1}^{mxq}
# rB: {-1,+1}^{nxq}
# query_L: {0,1}^{mxl}
# retrieval_L: {0,1}^{nxl}
num_query = query_L.shape[0]
map = 0
for iter in range(num_query):
#print('iter', iter)
gnd = (np.dot(query_L[iter, :], retrieval_L.transpose()) > 0).astype(np.float32)
#print('gnd', gnd)
tsum = int(np.sum(gnd))
if tsum == 0:
continue
hamm = calc_hammingDist(qB[iter, :], rB)
ind = np.argsort(hamm)
gnd = gnd[ind]
count = np.linspace(1, tsum, tsum)
tindex = np.asarray(np.where(gnd == 1)) + 1.0
#print('count', count, 'tindex', tindex)
map = map + np.mean(count / (tindex[1]))
#print('map', map)
map = map / num_query
#print('map', map)
return map
if __name__ == '__main__':
hashcode = np.array([[1,0,1,1,0],[0,1,0,1,0],[0,0,1,0,1],[1,0,0,1,0],[1,0,0,0,1],[0,1,0,1,0],[0,0,1,1,0],[0,1,0,1,0]])
labels = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[1,0,0,0],[0,0,0,1],[0,1,0,0],[0,0,1,0],[0,1,0,0]])
print('labels:\n',labels)
hashcode[hashcode==0]=-1
print(hashcode)
hammingDist = 0.5*(-np.dot(hashcode,hashcode.transpose())+5)
print('hammingDist: \n',hammingDist)
HammingRank = np.argsort(hammingDist, axis=0)
print('Hamming Rank: \n',HammingRank)
sim_matrix = np.dot(labels,labels.transpose())
print('sim_matrix: \n',sim_matrix)
map = mAP(sim_matrix,HammingRank)
print(map)