history tweaks

marekpetrik · Jan 4, 2025 · 4e392bf · 4e392bf
1 parent 084c75c
commit 4e392bf
Show file tree

Hide file tree

Showing 4 changed files with 183 additions and 80 deletions.
diff --git a/LeanMDPs/Expected.lean b/LeanMDPs/Expected.lean
@@ -43,5 +43,3 @@ theorem dp_correct_vf (π : PolicyHR M) (T : ℕ) (h : Hist M) :
      | Nat.succ t => 
        let hp h' := dp_correct_vf π t h'
        sorry
-       --calc
-       --  value_π π T h = 
diff --git a/LeanMDPs/Finprob.lean b/LeanMDPs/Finprob.lean
@@ -224,15 +224,15 @@ variable {T₁ : Finset τ₁} {T₂ : Finset τ₂}
 
 /-- Product of a probability distribution with a dependent probability 
 distributions is a probability distribution. -/
-lemma prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0) 
+theorem prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0) 
       (h1 : ∑ t₁ ∈ T₁, f t₁ = 1) 
       (h2 : ∀ t₁ ∈ T₁,  ∑ t₂ ∈ T₂, g t₁ t₂ = 1) : 
       ∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁) * (g t₁ t₂) = 1 :=
-    calc 
-        ∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁)*(g t₁ t₂) 
-        = ∑ t₁ ∈ T₁, ∑ t₂ ∈ T₂, (f t₁)*(g t₁ t₂) := by simp [Finset.sum_product] 
-        _ = ∑ t₁ ∈ T₁, (f t₁) * (∑ t₂ ∈ T₂, (g t₁ t₂)) := by simp [Finset.sum_congr, Finset.mul_sum] 
-        _ = ∑ t₁ ∈ T₁, (f t₁) := by simp_all [Finset.sum_congr, congrArg]
+    calc
+        ∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), f t₁ * g t₁ t₂ 
+        = ∑ t₁ ∈ T₁, ∑ t₂ ∈ T₂, f t₁ * g t₁ t₂ := by simp only [Finset.sum_product] 
+        _ = ∑ t₁ ∈ T₁, f t₁ * (∑ t₂ ∈ T₂, (g t₁ t₂)) := by simp only [Finset.mul_sum] 
+        _ = ∑ t₁ ∈ T₁, f t₁ := by simp_all only [mul_one]
         _ = 1 := h1
 
 /--

diff --git a/LeanMDPs/Histories.lean b/LeanMDPs/Histories.lean
@@ -42,20 +42,27 @@ structure MDP (σ α : Type) : Type where
   P : σ → α → Δ S  -- TODO : change to S → A → Δ S
   /-- reward function s, a, s' -/
   r : σ → α → σ → ℝ -- TODO: change to S → A → S → ℝ
+
+end Definitions
 
 variable {M : MDP σ α}
 
+section Histories
+
 /-- Represents a history. The state is type σ and action is type α. -/
 inductive Hist {σ α : Type} (M : MDP σ α)  : Type where
   | init : σ → Hist M
   | prev : Hist M → α → σ → Hist M
 
+/-- Coerces a state to a history -/
+instance : Coe σ (Hist M) where
+  coe s := Hist.init s
+
 /-- The length of the history corresponds to the zero-based step of the decision -/
 def Hist.length : Hist M → ℕ
   | init _ => 0
   | prev h _ _ => 1 + length h 
 
-
 /-- Nonempty histories -/
 abbrev HistNE {σ α : Type} (m : MDP σ α) : Type := {h : Hist m // h.length ≥ 1}
 
@@ -70,27 +77,31 @@ def Hist.append (h : Hist M) (as : α × σ) : Hist M :=
 
 def tuple2hist : Hist M × α × σ → HistNE M
   | ⟨h, as⟩ => ⟨h.append as, Nat.le.intro rfl⟩
-
 def hist2tuple : HistNE M → Hist M × α × σ
   | ⟨Hist.prev h a s, _ ⟩ => ⟨h, a, s⟩
+
+open Function 
 
-/-- Proves that history append has a left inverse. -/
 lemma linv_hist2tuple_tuple2hist : 
-      Function.LeftInverse (hist2tuple (M := M)) tuple2hist := fun _ => rfl
-
-lemma inj_tuple2hist_l1 : Function.Injective (tuple2hist (M:=M)) :=
-            Function.LeftInverse.injective linv_hist2tuple_tuple2hist
+      LeftInverse (hist2tuple (M := M)) tuple2hist := fun _ => rfl
+lemma inj_tuple2hist_l1 : Injective (tuple2hist (M:=M)) :=
+        LeftInverse.injective linv_hist2tuple_tuple2hist
+lemma inj_tuple2hist : Injective ((Subtype.val) ∘ (tuple2hist (M:=M))) := 
+    Injective.comp (Subtype.val_injective) inj_tuple2hist_l1
+
+def emb_tuple2hist_l1 : Hist M × α × σ ↪ HistNE M := ⟨tuple2hist, inj_tuple2hist_l1⟩
+def emb_tuple2hist : Hist M × α × σ ↪ Hist M  := ⟨λ x ↦ tuple2hist x, inj_tuple2hist⟩
+
+--- state
+def state2hist (s : σ) : Hist M := Hist.init s
+def hist2state : Hist M → σ | Hist.init s => s | Hist.prev _ _ s => s
+
+lemma linv_hist2state_state2hist : LeftInverse (hist2state (M:=M)) state2hist := fun _ => rfl
+lemma inj_state2hist : Injective (state2hist (M:=M)) := 
+                     LeftInverse.injective linv_hist2state_state2hist
+
+def state2hist_emb : σ ↪ Hist M := ⟨state2hist, inj_state2hist⟩
 
-lemma inj_tuple2hist :  
-  Function.Injective ((Subtype.val) ∘ (tuple2hist (M:=M))) := 
-    Function.Injective.comp (Subtype.val_injective) inj_tuple2hist_l1
-
-/-- New history from a tuple. -/
-def emb_tuple2hist_l1 : Hist M × α × σ ↪ HistNE M :=
- { toFun := tuple2hist, inj' := inj_tuple2hist_l1 }
-
-def emb_tuple2hist : Hist M × α × σ ↪ Hist M  :=
- { toFun := fun x => tuple2hist x, inj' := inj_tuple2hist }
 
 /-- Checks if pre is the prefix of h. -/
 def isprefix : Hist M → Hist M → Prop 
@@ -106,19 +117,41 @@ def isprefix : Hist M → Hist M → Prop
         else
             (a₁ = a₂) ∧ (s₁' = s₂') ∧ (isprefix h₁ h₂)
 
+/-- All histories of additional length T that follow history h -/
+def Histories (h : Hist M) : ℕ → Finset (Hist M) 
+    | Nat.zero => {h}
+    | Nat.succ t => ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
+
+abbrev ℋ : Hist M → ℕ → Finset (Hist M) := Histories
+
+/-- All histories of a given length  -/
+def HistoriesHorizon : ℕ → Finset (Hist M)
+  | Nat.zero => M.S.map state2hist_emb 
+  | Nat.succ t => ((HistoriesHorizon t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
+
+abbrev ℋₜ : ℕ → Finset (Hist M) := HistoriesHorizon
+
+--theorem Histories
+
+end Histories
+
+section Policies
+
 /-- Decision rule -/
 def DecisionRule (M : MDP σ α) := σ → M.A
 
+abbrev 𝒟 : MDP σ α → Type := DecisionRule
+
 /-- A randomized history-dependent policy -/
 def PolicyHR (M : MDP σ α) : Type := Hist M → Δ M.A
 abbrev Phr : MDP σ α → Type := PolicyHR
 
 /-- A deterministic Markov policy -/
-def PolicyMD (M : MDP σ α) : Type := ℕ → σ → M.A 
+def PolicyMD (M : MDP σ α) : Type := ℕ → DecisionRule M
 abbrev Pmd : MDP σ α → Type := PolicyMD
 
 /-- A deterministic stationary policy -/
-def PolicySD (M : MDP σ α) : Type := σ → M.A
+def PolicySD (M : MDP σ α) : Type := DecisionRule M
 abbrev Psd : MDP σ α → Type := PolicySD
 
 instance [DecidableEq α] : Coe (PolicySD M) (PolicyHR M) where
@@ -127,35 +160,31 @@ instance [DecidableEq α] : Coe (PolicySD M) (PolicyHR M) where
 instance [DecidableEq α] : Coe (PolicyMD M) (PolicyHR M) where
   coe d := fun h ↦ dirac_dist M.A (d h.length h.last)
 
-/-- Set of all histories of additional length T that follow history `h`. -/
-def Histories (h : Hist M) : ℕ → Finset (Hist M) 
-    | Nat.zero => {h}
-    | Nat.succ t => ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
+end Policies
 
-abbrev ℋ : Hist M → ℕ → Finset (Hist M) := Histories
+section Distribution
 
 /-- Probability distribution over histories induced by the policy and 
     transition probabilities -/
 def HistDist (h : Hist M) (π : PolicyHR M) (T : ℕ) : Δ (ℋ h T) :=
   match T with 
     | Nat.zero => dirac_ofsingleton h
     | Nat.succ t => 
-      let prev := HistDist h π t -- previous history
-      -- probability of the history
-      let f h' (as : α × σ) := ((π h').p as.1 * (M.P h'.last as.1).p as.2)
-      -- the second parameter below is the proof of being in Phist pre t; not used
-      let sumsto_as (h' : Hist M) _ : ∑ as ∈ M.A ×ˢ M.S, f h' as = 1 :=
+      let prev := HistDist h π t 
+      let update h' (as : α × σ) := ((π h').p as.1 * (M.P h'.last as.1).p as.2)
+      let probability : Hist M → ℝ≥0 
+        | Hist.init _ => 0 --ignored
+        | Hist.prev h' a s => prev.p h' * update h' ⟨a,s⟩
+      -- proof of probability
+      let sumsto_as (h' : Hist M) _ : ∑ as ∈ M.A ×ˢ M.S, update h' as = 1 :=
           prob_prod_prob (π h').p (fun a =>(M.P h'.last a).p ) 
                          (π h').sumsto (fun a _ => (M.P h'.last a).sumsto)
-      let sumsto : ∑ ⟨h',as⟩ ∈ ((Histories h t) ×ˢ M.A ×ˢ M.S), prev.p h' * f h' as = 1 := 
-          prob_prod_prob prev.p f prev.sumsto sumsto_as 
-      let HAS := ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
-      let p : Hist M → ℝ≥0 
-        | Hist.init _ => 0 --ignored
-        | Hist.prev h' a s => prev.p h' * f h' ⟨a,s⟩
-      have sumsto_fin : ∑ h' ∈ HAS, p h'  = 1 := 
-          (Finset.sum_map ((Histories h t) ×ˢ M.A ×ˢ M.S) emb_tuple2hist p) ▸ sumsto
-      ⟨p, sumsto_fin⟩
+      let sumsto : ∑ ⟨h',as⟩ ∈ ((ℋ h t) ×ˢ M.A ×ˢ M.S), prev.p h' * update h' as = 1 := 
+          prob_prod_prob prev.p update prev.sumsto sumsto_as 
+      let HAS := ((ℋ h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
+      have sumsto_fin : ∑ h' ∈ HAS, probability h'  = 1 := 
+          (Finset.sum_map ((ℋ h t) ×ˢ M.A ×ˢ M.S) emb_tuple2hist probability) ▸ sumsto
+      ⟨probability, sumsto_fin⟩
 
 abbrev Δℋ (h : Hist M) (π : PolicyHR M) (T : ℕ) : Finprob (Hist M) := 
           ⟨ℋ h T, HistDist h π T⟩
@@ -174,19 +203,34 @@ def reward : Hist M → ℝ
 /-- The probability of a history -/
 def prob_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (h' : ℋ h T) : ℝ≥0 := (Δℋ h π T).2.p h'
 
---theorem hdist_eq_prod_pi_P  (h : Hist M) (π : Π) 
-
 /- ----------- Expectations ---------------- -/
 
-
 --variable {ρ : Type} [HMulZero ρ] [HMul ℕ ρ ρ] [AddCommMonoid ρ]
 
 /-- Expectation over histories for a r.v. X for horizon T and policy π -/
 def expect_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (X : Hist M → ℝ) : ℝ := 
         have P := Δℋ h π T
         expect (⟨X⟩ : Finrv P ℝ)
 
-notation "𝔼ₕ[" X "//" h "," π "," T "]" => expect_h h π T X
+notation "𝔼ₕ[" X "//" h "," π "," t "]" => expect_h h π t X
+
+/-- The k-th state of a history -/
+def state [Inhabited σ] (k : ℕ) (h : Hist M) : σ := 
+    match h with
+    | Hist.init s => if h.length = k then s else Inhabited.default
+    | Hist.prev h' _ s => if h.length = k then s else state k h'
+
+
+/-- The k-th state of a history -/
+def action  [Inhabited α] (k : ℕ) (h : Hist M) : α := 
+    match h with
+    | Hist.init _ => Inhabited.default
+    | Hist.prev h' a _ => if h.length = k then a else action k h'
+
+
+#check List ℕ    
+
+end Distribution
 
 /- Conditional expectation with future singletons -/
 /-theorem hist_tower_property {hₖ : Hist m} {π : PolicyHR m} {t : ℕ} {f : Hist m → ℝ}