typo and wording corrections within lecture03

upb-lea · Mar 25, 2023 · adbd4d0 · adbd4d0
1 parent 71e7a51
commit adbd4d0
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 3 deletions.
diff --git a/lecture_slides/main.tex b/lecture_slides/main.tex
@@ -177,7 +177,7 @@
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %%%Lecture Include Onlys%%%
-%\includeonly{tex/Lecture02}
+%\includeonly{tex/Lecture03}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 \begin{document}

diff --git a/lecture_slides/tex/Lecture02.tex b/lecture_slides/tex/Lecture02.tex
@@ -598,6 +598,7 @@ \section{Finite Markov Decision Processes}
 \frame{\frametitle{Bellman Expectation Equation (3)}
 Inserting \eqref{eq:q_MDP_finite} into \eqref{eq:v_MDP_finite} directly results in:
 \begin{equation}
+\label{eq:Bellman_MDP_linear_non_matrix}
 	v_\pi(x_k)	= \sum_{u_k\in\mathcal{U}}\pi(u_k|x_k)\left(\mathcal{R}^u_x + \gamma\sum_{x_{k+1}\in\mathcal{X}}p_{xx'}^u v_\pi(x_{k+1})\right) \, .
 \end{equation}
 \pause

diff --git a/lecture_slides/tex/Lecture03.tex b/lecture_slides/tex/Lecture03.tex
@@ -221,11 +221,11 @@ \section{Policy Evaluation}
 %% Iterative Policy Evaluation by Richardson Iteration (1)%%
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \frame{\frametitle{Iterative Policy Evaluation by Richardson Iteration (1)}
-General form for any $x_k\in\mathcal{X}$ at iteration $i$ is given as:
+Applying the Richardson iteration \eqref{eq:richardson_general} to the Bellman equation \eqref{eq:Bellman_MDP_linear_non_matrix} for any $x_k\in\mathcal{X}$ at iteration $i$ results in:
 \begin{equation}
 	v_{i+1}(x_k)	= \sum_{u_k\in\mathcal{U}}\bm{\pi}(u_k|x_k)\left(\mathcal{R}^u_x + \gamma\sum_{x_{k+1}\in\mathcal{X}}p_{xx'}^u v_{i}(x_{k+1})\right)\, .
 \end{equation}\pause
-Matrix form then is:
+Matrix form based on \eqref{eq:Bellman_MDP_linear} then is:
 \begin{equation}
 \label{eq:iterative_policy_eval_matrix}
 	\bm{v}_{\mathcal{X},i+1}^{\pi} =\bm{r}_{\mathcal{X}}^{\pi}+\gamma\bm{\mathcal{P}}_{xx'}^{\pi}\bm{v}_{\mathcal{X},i}^{\pi}\, .