-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsupervised.py
146 lines (125 loc) · 6.07 KB
/
supervised.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import numpy as np
import pickle
from sacred import Experiment
from sacred.observers import FileStorageObserver
from scipy import stats
from sklearn.svm import SVC
ex = Experiment("supervised")
ex.observers.append(FileStorageObserver('runs'))
@ex.named_config
def creditcard():
ds = 'creditcard'
num_classes = 5
party_data_size = 5000
condition = 'stable'
num_parties = 5
d = 4
@ex.named_config
def mnist():
ds = 'mnist'
num_classes = 10
party_data_size = 5000
condition = 'stable'
num_parties = 5
d = 8
@ex.named_config
def cifar():
ds = 'cifar'
num_classes = 10
party_data_size = 5000
condition = 'stable'
num_parties = 5
d = 8
@ex.automain
def main(ds, num_classes, party_data_size, condition, num_parties, d):
args = dict(sorted(locals().items()))
print("Running with parameters {}".format(args))
candidate_data_size = 100000
splits = ['equaldisjoint', 'unequal']
inv_temps = [1, 2, 4, 8]
result_dir = "data/{}/cgm-results/".format(ds)
for split in splits:
print("===========")
print(split)
corrs_before = []
corrs_with_reward = []
for inv_temp in inv_temps:
print("-----")
print("inv_temp = {}:".format(inv_temp))
file_name = "CGM-{}-{}-invtemp{}-{}.p".format(ds,
split,
inv_temp,
condition)
(
party_datasets, party_labels, reference_dataset, candidate_datasets, candidate_labels, rewards, deltas,
mus,
alpha) = pickle.load(open(result_dir + file_name, "rb"))
# Trim party_labels to party_data_size
party_labels = [party_labels[i][:party_data_size] for i in range(num_parties)]
print("alpha = {}".format(alpha))
# Get index in candidate set of each reward point
candidates = candidate_datasets[0]
reward_idxs = [[] for _ in range(num_parties)]
for party in range(num_parties):
rews = rewards[party]
n = len(rews)
for i in range(n):
rows, cols = np.where(candidates == rews[i])
index_found = False
for row in rows:
if np.allclose(rews[i], candidates[row]):
reward_idxs[party].append(row)
index_found = True
break
if not index_found:
raise Exception("Index not found for point {}".format(i))
# Concatenate rewards and labels
party_datasets_rewards = []
party_labels_rewards = []
for i in range(num_parties):
party_datasets_rewards.append(
np.concatenate([party_datasets[i], rewards[i]], axis=0))
party_labels_rewards.append(np.concatenate([party_labels[i], candidate_labels[reward_idxs[i]]], axis=0))
# Classification with SVM
scores_before = []
scores_with_reward = []
for i in range(num_parties):
# Construct test set as all real data in the system except for parties own (unseen data)
other_party_datasets = []
other_party_labels = []
for j in range(num_parties):
if j == i:
continue
else:
other_party_datasets.append(party_datasets[j])
other_party_labels.append(party_labels[j])
test_ds = np.concatenate(other_party_datasets)
test_labels = np.concatenate(other_party_labels)
# Without reward
train_ds = party_datasets[i]
train_labels = party_labels[i]
clf = SVC()
clf.fit(train_ds, train_labels)
scores_before.append(clf.score(test_ds, test_labels))
# With reward
train_with_reward_ds = party_datasets_rewards[i]
train_with_reward_labels = party_labels_rewards[i]
clf_with_reward = SVC()
clf_with_reward.fit(train_with_reward_ds, train_with_reward_labels)
scores_with_reward.append(clf_with_reward.score(test_ds, test_labels))
print("Classification accuracy before adding rewards:\n{}".format(scores_before))
print("Classification accuracy after adding rewards:\n{}".format(scores_with_reward))
corr_before = np.corrcoef(alpha, scores_before)[0, 1]
print("Correlation with alpha before rewards: {}".format(corr_before))
corr_with_reward = np.corrcoef(alpha, scores_with_reward)[0, 1]
print("Correlation with alpha after rewards: {}".format(corr_with_reward))
corrs_before.append(corr_before)
corrs_with_reward.append(corr_with_reward)
print("Average correlation with alpha before rewards for {} split: {}, stderr: {}".format(split,
np.mean(corrs_before),
stats.sem(corrs_before)
))
print("Average correlation with alpha after rewards for {} split: {}, stderr: {}".format(split,
np.mean(corrs_with_reward),
stats.sem(corrs_with_reward)
))