diff --git a/lecture_slides/main.tex b/lecture_slides/main.tex index 5afd8f5..fc4fecf 100644 --- a/lecture_slides/main.tex +++ b/lecture_slides/main.tex @@ -177,7 +177,7 @@ %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%Lecture Include Onlys%%% -%\includeonly{tex/Lecture02} +%\includeonly{tex/Lecture03} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{document} diff --git a/lecture_slides/tex/Lecture02.tex b/lecture_slides/tex/Lecture02.tex index 980dcd8..520e4ff 100644 --- a/lecture_slides/tex/Lecture02.tex +++ b/lecture_slides/tex/Lecture02.tex @@ -598,6 +598,7 @@ \section{Finite Markov Decision Processes} \frame{\frametitle{Bellman Expectation Equation (3)} Inserting \eqref{eq:q_MDP_finite} into \eqref{eq:v_MDP_finite} directly results in: \begin{equation} +\label{eq:Bellman_MDP_linear_non_matrix} v_\pi(x_k) = \sum_{u_k\in\mathcal{U}}\pi(u_k|x_k)\left(\mathcal{R}^u_x + \gamma\sum_{x_{k+1}\in\mathcal{X}}p_{xx'}^u v_\pi(x_{k+1})\right) \, . \end{equation} \pause diff --git a/lecture_slides/tex/Lecture03.tex b/lecture_slides/tex/Lecture03.tex index 530383b..d71c2b1 100644 --- a/lecture_slides/tex/Lecture03.tex +++ b/lecture_slides/tex/Lecture03.tex @@ -221,11 +221,11 @@ \section{Policy Evaluation} %% Iterative Policy Evaluation by Richardson Iteration (1)%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \frame{\frametitle{Iterative Policy Evaluation by Richardson Iteration (1)} -General form for any $x_k\in\mathcal{X}$ at iteration $i$ is given as: +Applying the Richardson iteration \eqref{eq:richardson_general} to the Bellman equation \eqref{eq:Bellman_MDP_linear_non_matrix} for any $x_k\in\mathcal{X}$ at iteration $i$ results in: \begin{equation} v_{i+1}(x_k) = \sum_{u_k\in\mathcal{U}}\bm{\pi}(u_k|x_k)\left(\mathcal{R}^u_x + \gamma\sum_{x_{k+1}\in\mathcal{X}}p_{xx'}^u v_{i}(x_{k+1})\right)\, . \end{equation}\pause -Matrix form then is: +Matrix form based on \eqref{eq:Bellman_MDP_linear} then is: \begin{equation} \label{eq:iterative_policy_eval_matrix} \bm{v}_{\mathcal{X},i+1}^{\pi} =\bm{r}_{\mathcal{X}}^{\pi}+\gamma\bm{\mathcal{P}}_{xx'}^{\pi}\bm{v}_{\mathcal{X},i}^{\pi}\, .