-
Notifications
You must be signed in to change notification settings - Fork 2
/
dp_theo.m
61 lines (54 loc) · 1.76 KB
/
dp_theo.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
function [theo, simu] = dp_theo()
% Analyze the optimal policy (partial knowledge)
% Declare global variables
% See aoi_main.m
global K D T
global channels weights
global simu_switch
% Define the basic components of the POMDP functions
num_action = K;
num_observation = D + 1;
% Return the POMDP functions of each end node
[transF, obserF] = sigl_func();
% Return the finite sets of belief states
[h_set, B_set, num_belief, mapping] = beli_prod(transF, obserF);
% Set the finite horizon during which the set of belief states changes
T0 = length(h_set(1, 1, :));
% Compute the expected total reward from slot 1 to slot T
% V : the value function corresponding to the optimal policy
% pi: the optimal policy
V = zeros(num_belief(T0), T);
pi = zeros(num_belief(T0), T);
for nb = 1:num_belief(T0)
V(nb, T) = dot(h_set(:, nb, T0), weights);
end
for t = T-1:-1:1
t0 = min(t, T0);
for nb_1 = 1:num_belief(t0)
% Determine the optimal policy and compute the value function
acV = ones(1, num_action) * dot(h_set(:, nb_1, t0), weights);
for na = 1:num_action
for no = 1:num_observation
nb_2 = mapping(na, no, nb_1, t0);
if nb_2 > 0
if no < D + 1
transP = B_set(na, no, nb_1, t0) * channels(na);
else
transP = 1 - channels(na);
end
acV(na) = acV(na) + transP * V(nb_2, t+1);
end
end
end
[V(nb_1, t), pi(nb_1, t)] = min(acV);
end
end
% Compute the EWSAoI (theo)
theo = V(1, 1) / T / K;
% Print the EWSAoI (theo)
fprintf("dp_theo = %.6f\n", theo);
simu = 0;
if simu_switch
% Return the EWSAoI (simu)
simu = dp_simu(h_set, B_set, num_belief, pi);
end