-
Notifications
You must be signed in to change notification settings - Fork 0
/
bayesian_optimization_for_gwa_ucb1.py
executable file
·208 lines (188 loc) · 8.3 KB
/
bayesian_optimization_for_gwa_ucb1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import time
import numpy as np
import matplotlib.pyplot as plt
import mab as mab
import algorithms as algorithms
from mpl_toolkits.mplot3d import Axes3D
from skopt import gp_minimize
from skopt.plots import plot_convergence
from functools import partial
from mab.utils import plot_bayesian_optimization_progress, generate_3d_bayesian_optimization_plot
# Execute a simulation for a Stochastic MAB using the GWA-UCB1 algorithm
def execute_stochastic_mab_with_gwa_ucb1(
xy,
random_seed=0,
n_simulations=1000,
n_steps=10000,
n_steps_to_switch_rewards_probabilities=None,
n_arms=2,
distribution_of_rewards_probabilities='uniform'
):
# Initialize parameters
alpha, m = xy
np.random.seed(random_seed)
n_model = 1
total_regrets = np.zeros([n_model, n_steps])
n_best_arms_selected = np.zeros([n_model, n_steps])
# Simulate the Stochastic MAB process
for i in range(n_simulations):
smab = mab.StochasticMAB(n_arms, n_steps_to_switch_rewards_probabilities=n_steps_to_switch_rewards_probabilities)
smab.update_distribution_of_rewards_probabilities(distribution_of_rewards_probabilities)
smab.update_rewards_probabilities_from_distribution()
models = [algorithms.GeneralizedWeightedAveragesUCB1(n_arms, alpha=alpha, m=m)]
regrets = np.zeros([n_model, n_steps])
is_best_arms = np.zeros([n_model, n_steps])
for j in range(n_steps):
smab.pull_lever()
for k, model in enumerate(models):
selected_arm = model.select_arm()
reward = smab.reward(selected_arm)
model.update(selected_arm, reward)
# Update regret and track if the selected arm was the best
regrets[k, j] = smab.regret(selected_arm) if j == 0 else regrets[k, j-1] + smab.regret(selected_arm)
is_best_arms[k, j] = smab.is_best_arm(selected_arm)
total_regrets += regrets
n_best_arms_selected += is_best_arms
average_regrets = total_regrets / n_simulations
accuracy_rates = n_best_arms_selected / n_simulations
return average_regrets[0, -1]
# Execute a simulation for a Survival MAB using the GWA-UCB1 algorithm
def execute_survival_mab_with_gwa_ucb1(
xy,
random_seed=0,
n_simulations=1000,
n_steps=10000,
n_steps_to_switch_rewards_probabilities=None,
n_arms=8,
initial_budgets=80,
initial_rewards_probabilities=[0.55, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45, 0.45]
):
# Initialize parameters
alpha, m = xy
np.random.seed(random_seed)
n_model = 1
total_budgets = np.zeros([n_model, n_steps])
n_survivals = np.zeros([n_model, n_steps])
# Simulate the Survival MAB process
for i in range(n_simulations):
smab = mab.SurvivalMAB(n_arms, n_steps_to_switch_rewards_probabilities=n_steps_to_switch_rewards_probabilities)
smab.update_rewards_probabilities(initial_rewards_probabilities)
models = [algorithms.GeneralizedWeightedAveragesUCB1(n_arms, alpha=alpha, m=m)]
budgets = np.zeros([n_model, n_steps])
is_survivals = np.zeros([n_model, n_steps])
for j in range(n_steps):
smab.pull_lever()
for k, model in enumerate(models):
# Check if the budget allows pulling the lever
if j == 0 or budgets[k, j-1] > 0:
selected_arm = model.select_arm()
reward = smab.reward(selected_arm)
model.update(selected_arm, 1.0 if reward == 1.0 else 0.0)
# Update budget based on reward
budgets[k, j] = initial_budgets + reward if j == 0 else budgets[k, j-1] + reward
is_survivals[k, j] = 1 if budgets[k, j] > 0 else 0
total_budgets += budgets
n_survivals += is_survivals
average_budgets = total_budgets / n_simulations
survival_rates = n_survivals / n_simulations
return -average_budgets[0, -1] # Multiply by -1 for Bayesian optimization
# Main
def main():
# Experiment 1: Stochastic MAB with GWA-UCB1, 2 Arms
TARGET_SIMULATION = execute_stochastic_mab_with_gwa_ucb1
TARGET_SIMULATION_OPTIONS = {
'random_seed': 0,
'n_simulations': 1000,
'n_steps': 10000,
'n_steps_to_switch_rewards_probabilities': None,
'n_arms': 2,
'distribution_of_rewards_probabilities': 'uniform',
}
GRAPH_TITLE = 'Experiment 1, Arm 2'
GRAPH_X_LABEL = 'α'
GRAPH_Y_LABEL = 'm'
GRAPH_Z_LABEL = 'Avg. regret at final step'
'''
# Experiment 2: Stochastic MAB with GWA-UCB1, 32 Arms
TARGET_SIMULATION = execute_stochastic_mab_with_gwa_ucb1
TARGET_SIMULATION_OPTIONS = {
'random_seed': 0,
'n_simulations': 1000,
'n_steps': 50000,
'n_steps_to_switch_rewards_probabilities': None,
'n_arms': 32,
'distribution_of_rewards_probabilities': 'normal',
}
GRAPH_TITLE = 'Experiment 2, Arm 32'
GRAPH_X_LABEL = 'α'
GRAPH_Y_LABEL = 'm'
GRAPH_Z_LABEL = 'Avg. regret at final step'
'''
'''
# Experiment 3: Survival MAB with GWA-UCB1, 8 Arms
TARGET_SIMULATION = execute_survival_mab_with_gwa_ucb1
TARGET_SIMULATION_OPTIONS = {
'random_seed': 0,
'n_simulations': 1000,
'n_steps': 50000,
'n_steps_to_switch_rewards_probabilities': None,
'n_arms': 8,
'initial_budgets': 8 * 10,
'initial_rewards_probabilities': np.array([0.55 if i == 0 else 0.45 for i in range(8)]),
}
GRAPH_TITLE = 'Experiment 3, Arm 8'
GRAPH_X_LABEL = 'α'
GRAPH_Y_LABEL = 'm'
GRAPH_Z_LABEL = 'Avg. budget at final step'
'''
# Define the space for Bayesian optimization (α and m search spaces)
X_SPACE = (0.0, 1.0) # α search space
Y_SPACE = (-2.0, 4.0) # m search space
# Set Bayesian optimization parameters
BAYESIAN_OPTIMIZATION_RANDOM_SEED = 0
BAYESIAN_OPTIMIZATION_MAX_ITERATIONS = 100
# Parameters for GWA-UCB1 to compare with Bayesian Optimization results
COMPARATIVE_GWAUCB1_ALPHA = 0.21
COMPARATIVE_GWAUCB1_M = 1.30
# Perform Bayesian optimization and track execution time
start_time = time.time() # Start time of the optimization process
objective = partial(TARGET_SIMULATION, **TARGET_SIMULATION_OPTIONS)
result = gp_minimize(objective, [X_SPACE, Y_SPACE], n_calls=BAYESIAN_OPTIMIZATION_MAX_ITERATIONS, random_state=BAYESIAN_OPTIMIZATION_RANDOM_SEED, verbose=True)
end_time = time.time() # End time of the optimization process
# Print the results of Bayesian Optimization for the chosen experiment
z_best = result.fun if TARGET_SIMULATION == execute_stochastic_mab_with_gwa_ucb1 else -result.fun
z_vals = result.func_vals if TARGET_SIMULATION == execute_stochastic_mab_with_gwa_ucb1 else -result.func_vals
print('*************************************************')
print(f'Execution Time: {end_time - start_time:.2f} seconds ({(end_time - start_time) / 60:.2f} minutes)')
print(f'Best (α, m): {result.x}')
print(f'Best {GRAPH_Z_LABEL}: {z_best}')
# Execute GWA-UCB1 for comparison with Bayesian Optimization
comparative_value = TARGET_SIMULATION([COMPARATIVE_GWAUCB1_ALPHA, COMPARATIVE_GWAUCB1_M], **TARGET_SIMULATION_OPTIONS)
comparative_value = comparative_value if TARGET_SIMULATION == execute_stochastic_mab_with_gwa_ucb1 else -comparative_value
print(f'Comparative (α, m): {[COMPARATIVE_GWAUCB1_ALPHA, COMPARATIVE_GWAUCB1_M]}')
print(f'Comparative {GRAPH_Z_LABEL}: {comparative_value}')
# Plot the progress of Bayesian optimization
plot_bayesian_optimization_progress(
z_vals=z_vals,
comparative_value=comparative_value,
graph_title=GRAPH_TITLE,
graph_z_label=GRAPH_Z_LABEL,
comparative_gwaucb1_alpha=COMPARATIVE_GWAUCB1_ALPHA,
comparative_gwaucb1_m=COMPARATIVE_GWAUCB1_M,
save_path='output/progress_of_bayesian_optimization.png'
)
# Generate a 3D plot of Bayesian optimization results
generate_3d_bayesian_optimization_plot(
result=result,
x_iters=result.x_iters,
z_vals=z_vals,
graph_title=GRAPH_TITLE,
graph_x_label=GRAPH_X_LABEL,
graph_y_label=GRAPH_Y_LABEL,
graph_z_label=GRAPH_Z_LABEL,
save_path='output/results_of_bayesian_optimization.png',
x_space=X_SPACE,
y_space=Y_SPACE
)
if __name__ == '__main__':
main()