rename and fix

marekpetrik · Dec 28, 2024 · 564761c · 564761c
1 parent 1c90249
commit 564761c
Show file tree

Hide file tree

Showing 4 changed files with 82 additions and 82 deletions.
diff --git a/LeanMDPs/Expected.lean b/LeanMDPs/Expected.lean
@@ -5,11 +5,11 @@ import LeanMDPs.Histories
 
 open NNReal
 
-variable {σ α : Type}
-variable [Inhabited σ] [Inhabited α]
-variable [DecidableEq σ] [DecidableEq α]
-
-variable {m : MDP σ α}
+/- state -/
+variable {σ : Type} [Inhabited σ] [DecidableEq σ] 
+/- action -/
+variable {α : Type} [Inhabited α] [DecidableEq α]
+variable {M : MDP σ α}
 
 open Finprob
 
@@ -19,25 +19,24 @@ Value function type for value functions that are
 - for a specific policy
 - and a horizon T
 -/
-def ValueH (m : MDP σ α) : Type := Hist m → ℝ
+def ValueH (M : MDP σ α) : Type := Hist M → ℝ
 
 /-- Bellman operator on history-dependent value functions -/
-def DPhπ (π : PolicyHR m) (vₜ : ValueH m) : ValueH m 
-  | h => ∑ a ∈ m.A, ∑ s' ∈ m.S,  
-           ((π h).p a * (m.P h.last a).p s') * (m.r h.last a s' + vₜ h)
-
+def DPhπ (π : PolicyHR M) (vₜ : ValueH M) : ValueH M 
+  | h => ∑ a ∈ M.A, ∑ s' ∈ M.S,  
+           ((π h).p a * (M.P h.last a).p s') * (M.r h.last a s' + vₜ h)
 
 /-- Finite-horizon value function definition, history dependent -/
-def value_π (π : PolicyHR m) : ℕ → ValueH m
+def value_π (π : PolicyHR M) : ℕ → ValueH M
   | Nat.zero => fun _ ↦ 0
-  | Nat.succ t => fun h ↦ 𝔼_ h π t.succ reward
+  | Nat.succ t => fun h ↦ 𝔼ₕ[ reward // h, π, t.succ ] 
 
 /-- Dynamic program value function, finite-horizon history dependent -/
-def value_dp_π (π : PolicyHR m) : ℕ → ValueH m 
+def value_dp_π (π : PolicyHR M) : ℕ → ValueH M 
   | Nat.zero => fun _ ↦ 0
   | Nat.succ t => DPhπ π (value_dp_π π t)
 
-theorem dp_correct_vf (π : PolicyHR m) (T : ℕ) (h : Hist m) : 
+theorem dp_correct_vf (π : PolicyHR M) (T : ℕ) (h : Hist M) : 
                       value_π π T h = value_dp_π π T h := 
    match T with
      | Nat.zero => rfl

diff --git a/LeanMDPs/FinPr.lean b/LeanMDPs/FinPr.lean
@@ -87,8 +87,10 @@ def probability (B : Finrv P Bool) : ℝ≥0 :=
 
 notation "ℙ[" B "]" => probability B 
 
-/-- Expected value 𝔼[X|B] conditional on a Bool random variable 
-IMPORTANT: conditional expectation for zero probability event is zero -/
+/-- 
+Expected value 𝔼[X|B] conditional on a Bool random variable 
+IMPORTANT: conditional expectation for zero probability event is zero 
+-/
 noncomputable 
 def expect_cnd (X : Finrv P ρ) (B : Finrv P Bool) : ρ := 
     let F : Finrv P ρ := ⟨fun ω ↦ 𝕀 B.val ω * X.val ω⟩
@@ -113,7 +115,7 @@ notation "𝔼[" X "|ᵥ" Y "]" => expect_cnd_rv X Y
 
 /-- Conditional version of the Law of the unconscious statistician -/
 theorem unconscious_statistician_cnd (X : Finrv P ρ) (Y : Finrv P V) :
-  ∀ ω ∈ P.Ω, Finrv.val 𝔼[X |ᵥ Y ] ω =  ∑ y ∈ V, ℙ[ Y ᵣ== (Y.val ω) ]* 𝔼[X | Y ᵣ== (Y.val ω) ]  :=
+  ∀ ω ∈ P.Ω, (𝔼[X |ᵥ Y ]).val ω = ∑ y ∈ V, ℙ[Y ᵣ== (Y.val ω)]* 𝔼[X | Y ᵣ== (Y.val ω)]  :=
     sorry
 
 
@@ -125,12 +127,20 @@ theorem total_expectation (X : Finrv P ρ) (Y : Finrv P V) :
 
 /- ---------------------- Supporting Results -----------------/
 
+
+/-- Construct a dirac distribution -/
+def dirac_ofsingleton (t : τ) : Findist {t} := 
+  let p := fun _ ↦ 1
+  {p := p, sumsto := Finset.sum_singleton p t}
+
+
 /--
 Product of a probability distribution with a dependent probability 
 distributions is a probability distribution. 
 -/
 lemma prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0) 
-      (h1 : ∑ t₁ ∈ T₁, f t₁ = 1) (h2 : ∀ t₁ ∈ T₁,  ∑ t₂ ∈ T₂, g t₁ t₂ = 1) : 
+      (h1 : ∑ t₁ ∈ T₁, f t₁ = 1) 
+      (h2 : ∀ t₁ ∈ T₁,  ∑ t₂ ∈ T₂, g t₁ t₂ = 1) : 
       ∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁) * (g t₁ t₂) = 1 :=
     calc 
         ∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁)*(g t₁ t₂) 
@@ -139,21 +149,14 @@ lemma prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0)
         _ = ∑ t₁ ∈ T₁, (f t₁) := by simp_all [Finset.sum_congr, congrArg]
         _ = 1 := h1
 
-/-- Construct a dirac distribution -/
-def dirac_ofsingleton (t : τ) : Findist {t} := 
-  let p := fun _ ↦ 1
-  {p := p, sumsto := Finset.sum_singleton p t}
-
 /--
-Probability distribution as a product of a probability distribution and a dependent probability 
-distribution.
--/
+Probability distribution as a product of a probability distribution and a 
+dependent probability distribution. -/
 def product_dep {Ω₁ : Finset τ₁}
     (P₁ : Findist Ω₁) (Ω₂ : Finset τ₂) (p : τ₁ → τ₂ → ℝ≥0) 
     (h1: ∀ ω₁ ∈ Ω₁, (∑ ω₂ ∈ Ω₂, p ω₁ ω₂) = 1) :
     Findist (Ω₁ ×ˢ Ω₂) := 
-  {p := fun ⟨ω₁,ω₂⟩ ↦  
-            P₁.p ω₁ * p ω₁ ω₂,
+  {p := fun ⟨ω₁,ω₂⟩ ↦ P₁.p ω₁ * p ω₁ ω₂,
    sumsto := prob_prod_prob P₁.p p P₁.sumsto h1}
 
 /--
@@ -164,8 +167,7 @@ def product_dep_pr {Ω₁ : Finset τ₁}
     (P₁ : Findist Ω₁) (Ω₂ : Finset τ₂) (Q : τ₁ → Findist Ω₂) : Findist (Ω₁ ×ˢ Ω₂) :=
       let g ω₁ ω₂ := (Q ω₁).p ω₂
       have h1 : ∀ ω₁ ∈ Ω₁, ∑ ω₂ ∈ Ω₂, g ω₁ ω₂ = 1 := fun ω₁ _ ↦ (Q ω₁).sumsto
-      {p := fun ⟨ω₁,ω₂⟩ ↦  
-            P₁.p ω₁ * (Q ω₁).p ω₂,
+      {p := fun ⟨ω₁,ω₂⟩ ↦ P₁.p ω₁ * (Q ω₁).p ω₂,
        sumsto := prob_prod_prob P₁.p (fun ω₁ => (Q ω₁).p) (P₁.sumsto) h1}
 
 
@@ -185,14 +187,8 @@ lemma embed_preserve (T₁ : Finset τ₁) (p : τ₁ → ℝ≥0) (f : τ₁
 -- see embed_preserve
 /-- Embed the probability in a new space using e. Needs an inverse -/
 def embed {Ω₁ : Finset τ₁} (P : Findist Ω₁) (e : τ₁ ↪ τ₂) (e_linv : τ₂ → τ₁) 
-              (h : Function.LeftInverse e_linv e):
-              Findist (Ω₁.map e) :=
+              (h : Function.LeftInverse e_linv e): Findist (Ω₁.map e) :=
           {p := fun t₂ ↦ (P.p∘e_linv) t₂,
            sumsto := Eq.trans (embed_preserve Ω₁ P.p e e_linv h) P.sumsto}
 
 end Finprob
-
-example : (1:ℝ) / (0:ℝ) = (0:ℝ) := div_zero (1:ℝ)
-example : (0:ℚ) * (0:ℚ) = (0:ℚ) := Rat.zero_mul 0
-example : (0:ℚ) / (0:ℚ) = (0:ℚ) := zero_div 0
-example : 0 / 0 = 0 := rfl
diff --git a/LeanMDPs/Histories.lean b/LeanMDPs/Histories.lean
@@ -46,56 +46,56 @@ structure MDP (σ α : Type) : Type where
   /-- reward function s, a, s' -/
   r : σ → α → σ → ℝ -- TODO: change to S → A → S → ℝ
 
-variable {m : MDP σ α}
+variable {M : MDP σ α}
 
 /-- Represents a history. The state is type σ and action is type α. -/
-inductive Hist {σ α : Type} (m : MDP σ α)  : Type where
-  | init : σ → Hist m
-  | prev : Hist m → α → σ → Hist m
+inductive Hist {σ α : Type} (M : MDP σ α)  : Type where
+  | init : σ → Hist M
+  | prev : Hist M → α → σ → Hist M
 
 /-- The length of the history corresponds to the zero-based step of the decision -/
-def Hist.length : Hist m → ℕ
+def Hist.length : Hist M → ℕ
   | init _ => 0
   | prev h _ _ => 1 + length h 
 
 /-- Nonempty histories -/
 abbrev HistNE {σ α : Type} (m : MDP σ α) : Type := {h : Hist m // h.length ≥ 1}
 
 /-- Returns the last state of the history -/
-def Hist.last : Hist m → σ
+def Hist.last : Hist M → σ
   | init s => s
   | prev _ _ s => s
 
 /-- Appends the state and action to the history --/
-def Hist.append (h : Hist m) (as : α × σ) : Hist m :=
+def Hist.append (h : Hist M) (as : α × σ) : Hist M :=
   Hist.prev h as.fst as.snd
 
-def tuple2hist : Hist m × α × σ → HistNE m
+def tuple2hist : Hist M × α × σ → HistNE M
   | ⟨h, as⟩ => ⟨h.append as, Nat.le.intro rfl⟩
 
-def hist2tuple : HistNE m  → Hist m × α × σ
+def hist2tuple : HistNE M → Hist M × α × σ
   | ⟨Hist.prev h a s, _ ⟩ => ⟨h, a, s⟩
 
 /-- Proves that history append has a left inverse. -/
 lemma linv_hist2tuple_tuple2hist : 
-      Function.LeftInverse (hist2tuple (m := m)) tuple2hist := fun _ => rfl
+      Function.LeftInverse (hist2tuple (M := M)) tuple2hist := fun _ => rfl
 
-lemma inj_tuple2hist_l1 : Function.Injective (tuple2hist (m:=m)) :=
+lemma inj_tuple2hist_l1 : Function.Injective (tuple2hist (M:=M)) :=
             Function.LeftInverse.injective linv_hist2tuple_tuple2hist
 
 lemma inj_tuple2hist :  
-  Function.Injective ((Subtype.val) ∘ (tuple2hist (m:=m))) := 
+  Function.Injective ((Subtype.val) ∘ (tuple2hist (M:=M))) := 
     Function.Injective.comp (Subtype.val_injective) inj_tuple2hist_l1
 
 /-- New history from a tuple. -/
-def emb_tuple2hist_l1 : Hist m × α × σ ↪ HistNE m :=
+def emb_tuple2hist_l1 : Hist M × α × σ ↪ HistNE M :=
  { toFun := tuple2hist, inj' := inj_tuple2hist_l1 }
 
-def emb_tuple2hist : Hist m × α × σ ↪ Hist m  :=
+def emb_tuple2hist : Hist M × α × σ ↪ Hist M  :=
  { toFun := fun x => tuple2hist x, inj' := inj_tuple2hist }
 
 /-- Checks if pre is the prefix of h. -/
-def isprefix : Hist m → Hist m → Prop 
+def isprefix : Hist M → Hist M → Prop 
     | Hist.init s₁, Hist.init s₂ => s₁ = s₂
     | Hist.init s₁, Hist.prev hp _ _ => isprefix (Hist.init s₁) hp 
     | Hist.prev _ _ _, Hist.init _ => False
@@ -113,35 +113,37 @@ def PolicyHR (m : MDP σ α) : Type := Hist m → Δ m.A
 -- TODO: define also the set of all policies for an MDP
 
 /-- Set of all histories of additional length T that follow history `h`. -/
-def Histories (h : Hist m) : ℕ → Finset (Hist m) 
+def Histories (h : Hist M) : ℕ → Finset (Hist M) 
     | Nat.zero => {h}
-    | Nat.succ t => ((Histories h t) ×ˢ m.A ×ˢ m.S).map emb_tuple2hist
+    | Nat.succ t => ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
 
-abbrev ℋ : Hist m → ℕ → Finset (Hist m) := Histories
+abbrev ℋ : Hist M → ℕ → Finset (Hist M) := Histories
 
-/-- Probability distribution over histories induced by the policy and transition probabilities -/
-def HistDist (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) : Δ (ℋ hₖ T) :=
+/-- Probability distribution over histories induced by the policy and 
+    transition probabilities -/
+def HistDist (h : Hist M) (π : PolicyHR M) (T : ℕ) : Δ (ℋ h T) :=
   match T with 
-    | Nat.zero => dirac_ofsingleton hₖ
+    | Nat.zero => dirac_ofsingleton h
     | Nat.succ t => 
-      let prev := HistDist hₖ π t -- previous history
+      let prev := HistDist h π t -- previous history
       -- probability of the history
-      let f h (as : α × σ) := ((π h).p as.1 * (m.P h.last as.1).p as.2)
+      let f h' (as : α × σ) := ((π h').p as.1 * (M.P h'.last as.1).p as.2)
       -- the second parameter below is the proof of being in Phist pre t; not used
-      let sumsto_as (h' : Hist m) _ : ∑ as ∈ m.A ×ˢ m.S, f h' as = 1 :=
-          prob_prod_prob (π h').p (fun a =>(m.P h'.last a).p ) 
-                         (π h').sumsto (fun a _ => (m.P h'.last a).sumsto)
-      let sumsto : ∑ ⟨h,as⟩ ∈ ((Histories hₖ t) ×ˢ m.A ×ˢ m.S), prev.p h * f h as = 1 := 
+      let sumsto_as (h' : Hist M) _ : ∑ as ∈ M.A ×ˢ M.S, f h' as = 1 :=
+          prob_prod_prob (π h').p (fun a =>(M.P h'.last a).p ) 
+                         (π h').sumsto (fun a _ => (M.P h'.last a).sumsto)
+      let sumsto : ∑ ⟨h',as⟩ ∈ ((Histories h t) ×ˢ M.A ×ˢ M.S), prev.p h' * f h' as = 1 := 
           prob_prod_prob prev.p f prev.sumsto sumsto_as 
-      let HAS := ((Histories hₖ t) ×ˢ m.A ×ˢ m.S).map emb_tuple2hist
-      let p : Hist m → ℝ≥0 
+      let HAS := ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
+      let p : Hist M → ℝ≥0 
         | Hist.init _ => 0 --ignored
         | Hist.prev h' a s => prev.p h' * f h' ⟨a,s⟩
-      let sumsto_fin : ∑ h ∈ HAS, p h  = 1 := 
-          (Finset.sum_map ((Histories hₖ t) ×ˢ m.A ×ˢ m.S) emb_tuple2hist p) ▸ sumsto
+      let sumsto_fin : ∑ h' ∈ HAS, p h'  = 1 := 
+          (Finset.sum_map ((Histories h t) ×ˢ M.A ×ˢ M.S) emb_tuple2hist p) ▸ sumsto
       {p := p, sumsto := sumsto_fin}
 
-  abbrev Δℋ (h : Hist m) (π : PolicyHR m) (T : ℕ) : Finprob (Hist m) := ⟨ℋ h T, HistDist h π T⟩
+abbrev Δℋ (h : Hist M) (π : PolicyHR M) (T : ℕ) : Finprob (Hist M) := 
+          ⟨ℋ h T, HistDist h π T⟩
 
 /- Computes the probability of a history -/
 /-def probability  (π : PolicyHR m) : Hist m → ℝ≥0 
@@ -150,19 +152,23 @@ def HistDist (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) : Δ (ℋ hₖ T) :=
 -/
 
 /-- Computes the reward of a history -/
-def reward : Hist m → ℝ 
+def reward : Hist M → ℝ 
     | Hist.init _ => 0
-    | Hist.prev hp a s' => (m.r hp.last a s') + (reward hp)  
+    | Hist.prev hp a s' => (M.r hp.last a s') + (reward hp)  
 
 /-- The probability of a history -/
-def ℙₕ (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) (h : ℋ hₖ T) : ℝ≥0 := (Δℋ hₖ π T).2.p h
+def prob_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (h' : ℋ h T) : ℝ≥0 := (Δℋ h π T).2.p h'
 
+/- ----------- Expectations ---------------- -/
 
-variable {ρ : Type}
-variable [HMul ℝ≥0 ρ ρ] [HMul ℕ ρ ρ] [AddCommMonoid ρ]
+variable {ρ : Type} [HMul ℝ≥0 ρ ρ] [HMul ℕ ρ ρ] [AddCommMonoid ρ]
 
 /-- Expectation over histories for a random variable f -/
-def 𝔼_ (h : Hist m) (π : PolicyHR m) (T : ℕ) := expect (ρ := ρ) (Δℋ h π T) 
+def expect_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (X : Hist M → ρ) : ρ := 
+        have P := Δℋ h π T
+        expect (⟨X⟩ : Finrv P ρ)
+
+notation "𝔼ₕ[" X "//" h "," π "," T "]" => expect_h h π T X
 
 /- Conditional expectation with future singletons -/
 /-theorem hist_tower_property {hₖ : Hist m} {π : PolicyHR m} {t : ℕ} {f : Hist m → ℝ}

diff --git a/blueprint/src/content.tex b/blueprint/src/content.tex
@@ -11,7 +11,7 @@ \section{Probability Spaces and Expectation}
 
 In this section, we define the basic probability concepts on finite sets. Eventually, this section should be replaced by the Mathlib results using probability theory and measure theory.
 
-\begin{definition}[A Finite Probability Measure] \label{def:probability-measure}
+\begin{definition}[Probability Measure] \label{def:probability-measure}
   For a finite set $\Omega$, $\Delta(\Omega)$, a finite probability measure is  $p\colon \Omega \to \Real_+$ such that
   \[
    \sum_{\omega \in \Omega } p(\omega) = 1. 
@@ -21,7 +21,7 @@ \section{Probability Spaces and Expectation}
   \leanok
 \end{definition}
 
-\begin{definition}[A Finite Probability Space] \label{def:probability-space}
+\begin{definition}[Probability Space] \label{def:probability-space}
   A finite probability space comprises a finite set $\Omega$ along with a finite probability measure defined in \cref{def:probability-measure}.
   \lean{Fin}
   \leanok
@@ -32,7 +32,7 @@ \section{Probability Spaces and Expectation}
   \[
     \E \left[ \tilde{x} \right] := \sum_{\omega \in \Omega } p(\omega ) \cdot \tilde{x}(\omega).
   \]
-  \lean{Finprob.expectP}
+  \lean{Finprob.expect}
   \leanok
 \end{definition}
 
@@ -50,14 +50,13 @@ \section{Probability Spaces and Expectation}
 \end{definition}
 
 \begin{definition}[Probability]
-  The probability of $\tilde{x}\colon \Omega \to \mathcal{X} \subseteq \Real$ on a probability space $(\Omega, p)$ is defined as
+  The probability of $\tilde{b}\colon \Omega \to \mathcal{B}$ on a probability space $P = (\Omega, p)$ is defined as
   \[
-    \P \left[ \tilde{x} \right] := \sum_{\omega \in \Omega } p(\omega) \cdot \tilde{c}(\omega),
+    \P \left[ \tilde{b} \right] := \E\left[\mathbb{I}\left\{ \tilde{b} \right\}\right],
   \]
 where $x / 0 = 0$ for each $x\in \Real$ (see \texttt{div\_zero}).
 \end{definition}
 
-
 \begin{definition}[Conditional Expectation]
   The conditional expectation of a random variable $\tilde{x} \colon \Omega \to \Real$ given an indicator $\tilde{c} \colon \Omega \to  \left\{ 0, 1  \right\}$ defined on a probability space $(\Omega, p)$ is defined as
   \[
@@ -76,7 +75,7 @@ \section{Probability Spaces and Expectation}
     \frac{1}{\P[\tilde{y} = \tilde{y}(\omega)]} \sum_{\omega' \in \Omega } p(\omega) \cdot \tilde{x}(\omega') \cdot \mathbb{I}\left\{ \tilde{y}(\omega') = \tilde{y}(\omega) \right\}, \quad \forall \omega \in \Omega.
   \]
 where $x / 0 = 0$ for each $x\in \Real$ (see \texttt{div\_zero}).
-  \lean{Finprob.expect_cnd_rv_P}
+  \lean{Finprob.expect_cnd_rv}
   \leanok
 \end{definition}