Skip to content

Commit

Permalink
rename and fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Marek Petrik committed Dec 28, 2024
1 parent 1c90249 commit 564761c
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 82 deletions.
27 changes: 13 additions & 14 deletions LeanMDPs/Expected.lean
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ import LeanMDPs.Histories

open NNReal

variable {σ α : Type}
variable [Inhabited σ] [Inhabited α]
variable [DecidableEq σ] [DecidableEq α]

variable {m : MDP σ α}
/- state -/
variable {σ : Type} [Inhabited σ] [DecidableEq σ]
/- action -/
variable {α : Type} [Inhabited α] [DecidableEq α]
variable {M : MDP σ α}

open Finprob

Expand All @@ -19,25 +19,24 @@ Value function type for value functions that are
- for a specific policy
- and a horizon T
-/
def ValueH (m : MDP σ α) : Type := Hist m → ℝ
def ValueH (M : MDP σ α) : Type := Hist M → ℝ

/-- Bellman operator on history-dependent value functions -/
def DPhπ (π : PolicyHR m) (vₜ : ValueH m) : ValueH m
| h => ∑ a ∈ m.A, ∑ s' ∈ m.S,
((π h).p a * (m.P h.last a).p s') * (m.r h.last a s' + vₜ h)

def DPhπ (π : PolicyHR M) (vₜ : ValueH M) : ValueH M
| h => ∑ a ∈ M.A, ∑ s' ∈ M.S,
((π h).p a * (M.P h.last a).p s') * (M.r h.last a s' + vₜ h)

/-- Finite-horizon value function definition, history dependent -/
def value_π (π : PolicyHR m) : ℕ → ValueH m
def value_π (π : PolicyHR M) : ℕ → ValueH M
| Nat.zero => fun _ ↦ 0
| Nat.succ t => fun h ↦ 𝔼_ h π t.succ reward
| Nat.succ t => fun h ↦ 𝔼ₕ[ reward // h, π, t.succ ]

/-- Dynamic program value function, finite-horizon history dependent -/
def value_dp_π (π : PolicyHR m) : ℕ → ValueH m
def value_dp_π (π : PolicyHR M) : ℕ → ValueH M
| Nat.zero => fun _ ↦ 0
| Nat.succ t => DPhπ π (value_dp_π π t)

theorem dp_correct_vf (π : PolicyHR m) (T : ℕ) (h : Hist m) :
theorem dp_correct_vf (π : PolicyHR M) (T : ℕ) (h : Hist M) :
value_π π T h = value_dp_π π T h :=
match T with
| Nat.zero => rfl
Expand Down
42 changes: 19 additions & 23 deletions LeanMDPs/FinPr.lean
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,10 @@ def probability (B : Finrv P Bool) : ℝ≥0 :=

notation "ℙ[" B "]" => probability B

/-- Expected value 𝔼[X|B] conditional on a Bool random variable
IMPORTANT: conditional expectation for zero probability event is zero -/
/--
Expected value 𝔼[X|B] conditional on a Bool random variable
IMPORTANT: conditional expectation for zero probability event is zero
-/
noncomputable
def expect_cnd (X : Finrv P ρ) (B : Finrv P Bool) : ρ :=
let F : Finrv P ρ := ⟨fun ω ↦ 𝕀 B.val ω * X.val ω⟩
Expand All @@ -113,7 +115,7 @@ notation "𝔼[" X "|ᵥ" Y "]" => expect_cnd_rv X Y

/-- Conditional version of the Law of the unconscious statistician -/
theorem unconscious_statistician_cnd (X : Finrv P ρ) (Y : Finrv P V) :
∀ ω ∈ P.Ω, Finrv.val 𝔼[X |ᵥ Y ] ω = ∑ y ∈ V, ℙ[ Y ᵣ== (Y.val ω) ]* 𝔼[X | Y ᵣ== (Y.val ω) ] :=
∀ ω ∈ P.Ω, (𝔼[X |ᵥ Y ]).val ω = ∑ y ∈ V, ℙ[Y ᵣ== (Y.val ω)]* 𝔼[X | Y ᵣ== (Y.val ω)] :=
sorry


Expand All @@ -125,12 +127,20 @@ theorem total_expectation (X : Finrv P ρ) (Y : Finrv P V) :

/- ---------------------- Supporting Results -----------------/


/-- Construct a dirac distribution -/
def dirac_ofsingleton (t : τ) : Findist {t} :=
let p := fun _ ↦ 1
{p := p, sumsto := Finset.sum_singleton p t}


/--
Product of a probability distribution with a dependent probability
distributions is a probability distribution.
-/
lemma prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0)
(h1 : ∑ t₁ ∈ T₁, f t₁ = 1) (h2 : ∀ t₁ ∈ T₁, ∑ t₂ ∈ T₂, g t₁ t₂ = 1) :
(h1 : ∑ t₁ ∈ T₁, f t₁ = 1)
(h2 : ∀ t₁ ∈ T₁, ∑ t₂ ∈ T₂, g t₁ t₂ = 1) :
∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁) * (g t₁ t₂) = 1 :=
calc
∑ ⟨t₁,t₂⟩ ∈ (T₁ ×ˢ T₂), (f t₁)*(g t₁ t₂)
Expand All @@ -139,21 +149,14 @@ lemma prob_prod_prob (f : τ₁ → ℝ≥0) (g : τ₁ → τ₂ → ℝ≥0)
_ = ∑ t₁ ∈ T₁, (f t₁) := by simp_all [Finset.sum_congr, congrArg]
_ = 1 := h1

/-- Construct a dirac distribution -/
def dirac_ofsingleton (t : τ) : Findist {t} :=
let p := fun _ ↦ 1
{p := p, sumsto := Finset.sum_singleton p t}

/--
Probability distribution as a product of a probability distribution and a dependent probability
distribution.
-/
Probability distribution as a product of a probability distribution and a
dependent probability distribution. -/
def product_dep {Ω₁ : Finset τ₁}
(P₁ : Findist Ω₁) (Ω₂ : Finset τ₂) (p : τ₁ → τ₂ → ℝ≥0)
(h1: ∀ ω₁ ∈ Ω₁, (∑ ω₂ ∈ Ω₂, p ω₁ ω₂) = 1) :
Findist (Ω₁ ×ˢ Ω₂) :=
{p := fun ⟨ω₁,ω₂⟩ ↦
P₁.p ω₁ * p ω₁ ω₂,
{p := fun ⟨ω₁,ω₂⟩ ↦ P₁.p ω₁ * p ω₁ ω₂,
sumsto := prob_prod_prob P₁.p p P₁.sumsto h1}

/--
Expand All @@ -164,8 +167,7 @@ def product_dep_pr {Ω₁ : Finset τ₁}
(P₁ : Findist Ω₁) (Ω₂ : Finset τ₂) (Q : τ₁ → Findist Ω₂) : Findist (Ω₁ ×ˢ Ω₂) :=
let g ω₁ ω₂ := (Q ω₁).p ω₂
have h1 : ∀ ω₁ ∈ Ω₁, ∑ ω₂ ∈ Ω₂, g ω₁ ω₂ = 1 := fun ω₁ _ ↦ (Q ω₁).sumsto
{p := fun ⟨ω₁,ω₂⟩ ↦
P₁.p ω₁ * (Q ω₁).p ω₂,
{p := fun ⟨ω₁,ω₂⟩ ↦ P₁.p ω₁ * (Q ω₁).p ω₂,
sumsto := prob_prod_prob P₁.p (fun ω₁ => (Q ω₁).p) (P₁.sumsto) h1}


Expand All @@ -185,14 +187,8 @@ lemma embed_preserve (T₁ : Finset τ₁) (p : τ₁ → ℝ≥0) (f : τ₁
-- see embed_preserve
/-- Embed the probability in a new space using e. Needs an inverse -/
def embed {Ω₁ : Finset τ₁} (P : Findist Ω₁) (e : τ₁ ↪ τ₂) (e_linv : τ₂ → τ₁)
(h : Function.LeftInverse e_linv e):
Findist (Ω₁.map e) :=
(h : Function.LeftInverse e_linv e): Findist (Ω₁.map e) :=
{p := fun t₂ ↦ (P.p∘e_linv) t₂,
sumsto := Eq.trans (embed_preserve Ω₁ P.p e e_linv h) P.sumsto}

end Finprob

example : (1:ℝ) / (0:ℝ) = (0:ℝ) := div_zero (1:ℝ)
example : (0:ℚ) * (0:ℚ) = (0:ℚ) := Rat.zero_mul 0
example : (0:ℚ) / (0:ℚ) = (0:ℚ) := zero_div 0
example : 0 / 0 = 0 := rfl
82 changes: 44 additions & 38 deletions LeanMDPs/Histories.lean
Original file line number Diff line number Diff line change
Expand Up @@ -46,56 +46,56 @@ structure MDP (σ α : Type) : Type where
/-- reward function s, a, s' -/
r : σ → α → σ → ℝ -- TODO: change to S → A → S → ℝ

variable {m : MDP σ α}
variable {M : MDP σ α}

/-- Represents a history. The state is type σ and action is type α. -/
inductive Hist {σ α : Type} (m : MDP σ α) : Type where
| init : σ → Hist m
| prev : Hist m → α → σ → Hist m
inductive Hist {σ α : Type} (M : MDP σ α) : Type where
| init : σ → Hist M
| prev : Hist M → α → σ → Hist M

/-- The length of the history corresponds to the zero-based step of the decision -/
def Hist.length : Hist m → ℕ
def Hist.length : Hist M → ℕ
| init _ => 0
| prev h _ _ => 1 + length h

/-- Nonempty histories -/
abbrev HistNE {σ α : Type} (m : MDP σ α) : Type := {h : Hist m // h.length ≥ 1}

/-- Returns the last state of the history -/
def Hist.last : Hist m → σ
def Hist.last : Hist M → σ
| init s => s
| prev _ _ s => s

/-- Appends the state and action to the history --/
def Hist.append (h : Hist m) (as : α × σ) : Hist m :=
def Hist.append (h : Hist M) (as : α × σ) : Hist M :=
Hist.prev h as.fst as.snd

def tuple2hist : Hist m × α × σ → HistNE m
def tuple2hist : Hist M × α × σ → HistNE M
| ⟨h, as⟩ => ⟨h.append as, Nat.le.intro rfl⟩

def hist2tuple : HistNE m → Hist m × α × σ
def hist2tuple : HistNE M → Hist M × α × σ
| ⟨Hist.prev h a s, _ ⟩ => ⟨h, a, s⟩

/-- Proves that history append has a left inverse. -/
lemma linv_hist2tuple_tuple2hist :
Function.LeftInverse (hist2tuple (m := m)) tuple2hist := fun _ => rfl
Function.LeftInverse (hist2tuple (M := M)) tuple2hist := fun _ => rfl

lemma inj_tuple2hist_l1 : Function.Injective (tuple2hist (m:=m)) :=
lemma inj_tuple2hist_l1 : Function.Injective (tuple2hist (M:=M)) :=
Function.LeftInverse.injective linv_hist2tuple_tuple2hist

lemma inj_tuple2hist :
Function.Injective ((Subtype.val) ∘ (tuple2hist (m:=m))) :=
Function.Injective ((Subtype.val) ∘ (tuple2hist (M:=M))) :=
Function.Injective.comp (Subtype.val_injective) inj_tuple2hist_l1

/-- New history from a tuple. -/
def emb_tuple2hist_l1 : Hist m × α × σ ↪ HistNE m :=
def emb_tuple2hist_l1 : Hist M × α × σ ↪ HistNE M :=
{ toFun := tuple2hist, inj' := inj_tuple2hist_l1 }

def emb_tuple2hist : Hist m × α × σ ↪ Hist m :=
def emb_tuple2hist : Hist M × α × σ ↪ Hist M :=
{ toFun := fun x => tuple2hist x, inj' := inj_tuple2hist }

/-- Checks if pre is the prefix of h. -/
def isprefix : Hist m → Hist mProp
def isprefix : Hist M → Hist MProp
| Hist.init s₁, Hist.init s₂ => s₁ = s₂
| Hist.init s₁, Hist.prev hp _ _ => isprefix (Hist.init s₁) hp
| Hist.prev _ _ _, Hist.init _ => False
Expand All @@ -113,35 +113,37 @@ def PolicyHR (m : MDP σ α) : Type := Hist m → Δ m.A
-- TODO: define also the set of all policies for an MDP

/-- Set of all histories of additional length T that follow history `h`. -/
def Histories (h : Hist m) : ℕ → Finset (Hist m)
def Histories (h : Hist M) : ℕ → Finset (Hist M)
| Nat.zero => {h}
| Nat.succ t => ((Histories h t) ×ˢ m.A ×ˢ m.S).map emb_tuple2hist
| Nat.succ t => ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist

abbrev ℋ : Hist m → ℕ → Finset (Hist m) := Histories
abbrev ℋ : Hist M → ℕ → Finset (Hist M) := Histories

/-- Probability distribution over histories induced by the policy and transition probabilities -/
def HistDist (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) : Δ (ℋ hₖ T) :=
/-- Probability distribution over histories induced by the policy and
transition probabilities -/
def HistDist (h : Hist M) (π : PolicyHR M) (T : ℕ) : Δ (ℋ h T) :=
match T with
| Nat.zero => dirac_ofsingleton hₖ
| Nat.zero => dirac_ofsingleton h
| Nat.succ t =>
let prev := HistDist hₖ π t -- previous history
let prev := HistDist h π t -- previous history
-- probability of the history
let f h (as : α × σ) := ((π h).p as.1 * (m.P h.last as.1).p as.2)
let f h' (as : α × σ) := ((π h').p as.1 * (M.P h'.last as.1).p as.2)
-- the second parameter below is the proof of being in Phist pre t; not used
let sumsto_as (h' : Hist m) _ : ∑ as ∈ m.A ×ˢ m.S, f h' as = 1 :=
prob_prod_prob (π h').p (fun a =>(m.P h'.last a).p )
(π h').sumsto (fun a _ => (m.P h'.last a).sumsto)
let sumsto : ∑ ⟨h,as⟩ ∈ ((Histories hₖ t) ×ˢ m.A ×ˢ m.S), prev.p h * f h as = 1 :=
let sumsto_as (h' : Hist M) _ : ∑ as ∈ M.A ×ˢ M.S, f h' as = 1 :=
prob_prod_prob (π h').p (fun a =>(M.P h'.last a).p )
(π h').sumsto (fun a _ => (M.P h'.last a).sumsto)
let sumsto : ∑ ⟨h',as⟩ ∈ ((Histories h t) ×ˢ M.A ×ˢ M.S), prev.p h' * f h' as = 1 :=
prob_prod_prob prev.p f prev.sumsto sumsto_as
let HAS := ((Histories hₖ t) ×ˢ m.A ×ˢ m.S).map emb_tuple2hist
let p : Hist m → ℝ≥0
let HAS := ((Histories h t) ×ˢ M.A ×ˢ M.S).map emb_tuple2hist
let p : Hist M → ℝ≥0
| Hist.init _ => 0 --ignored
| Hist.prev h' a s => prev.p h' * f h' ⟨a,s⟩
let sumsto_fin : ∑ h ∈ HAS, p h = 1 :=
(Finset.sum_map ((Histories hₖ t) ×ˢ m.A ×ˢ m.S) emb_tuple2hist p) ▸ sumsto
let sumsto_fin : ∑ h' ∈ HAS, p h' = 1 :=
(Finset.sum_map ((Histories h t) ×ˢ M.A ×ˢ M.S) emb_tuple2hist p) ▸ sumsto
{p := p, sumsto := sumsto_fin}

abbrev Δℋ (h : Hist m) (π : PolicyHR m) (T : ℕ) : Finprob (Hist m) := ⟨ℋ h T, HistDist h π T⟩
abbrev Δℋ (h : Hist M) (π : PolicyHR M) (T : ℕ) : Finprob (Hist M) :=
⟨ℋ h T, HistDist h π T⟩

/- Computes the probability of a history -/
/-def probability (π : PolicyHR m) : Hist m → ℝ≥0
Expand All @@ -150,19 +152,23 @@ def HistDist (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) : Δ (ℋ hₖ T) :=
-/

/-- Computes the reward of a history -/
def reward : Hist m → ℝ
def reward : Hist M → ℝ
| Hist.init _ => 0
| Hist.prev hp a s' => (m.r hp.last a s') + (reward hp)
| Hist.prev hp a s' => (M.r hp.last a s') + (reward hp)

/-- The probability of a history -/
def ℙₕ (hₖ : Hist m) (π : PolicyHR m) (T : ℕ) (h : ℋ hₖ T) : ℝ≥0 := (Δℋ hₖ π T).2.p h
def prob_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (h' : ℋ h T) : ℝ≥0 := (Δℋ h π T).2.p h'

/- ----------- Expectations ---------------- -/

variable {ρ : Type}
variable [HMul ℝ≥0 ρ ρ] [HMul ℕ ρ ρ] [AddCommMonoid ρ]
variable {ρ : Type} [HMul ℝ≥0 ρ ρ] [HMul ℕ ρ ρ] [AddCommMonoid ρ]

/-- Expectation over histories for a random variable f -/
def 𝔼_ (h : Hist m) (π : PolicyHR m) (T : ℕ) := expect (ρ := ρ) (Δℋ h π T)
def expect_h (h : Hist M) (π : PolicyHR M) (T : ℕ) (X : Hist M → ρ) : ρ :=
have P := Δℋ h π T
expect (⟨X⟩ : Finrv P ρ)

notation "𝔼ₕ[" X "//" h "," π "," T "]" => expect_h h π T X

/- Conditional expectation with future singletons -/
/-theorem hist_tower_property {hₖ : Hist m} {π : PolicyHR m} {t : ℕ} {f : Hist m → ℝ}
Expand Down
13 changes: 6 additions & 7 deletions blueprint/src/content.tex
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ \section{Probability Spaces and Expectation}

In this section, we define the basic probability concepts on finite sets. Eventually, this section should be replaced by the Mathlib results using probability theory and measure theory.

\begin{definition}[A Finite Probability Measure] \label{def:probability-measure}
\begin{definition}[Probability Measure] \label{def:probability-measure}
For a finite set $\Omega$, $\Delta(\Omega)$, a finite probability measure is $p\colon \Omega \to \Real_+$ such that
\[
\sum_{\omega \in \Omega } p(\omega) = 1.
Expand All @@ -21,7 +21,7 @@ \section{Probability Spaces and Expectation}
\leanok
\end{definition}

\begin{definition}[A Finite Probability Space] \label{def:probability-space}
\begin{definition}[Probability Space] \label{def:probability-space}
A finite probability space comprises a finite set $\Omega$ along with a finite probability measure defined in \cref{def:probability-measure}.
\lean{Fin}
\leanok
Expand All @@ -32,7 +32,7 @@ \section{Probability Spaces and Expectation}
\[
\E \left[ \tilde{x} \right] := \sum_{\omega \in \Omega } p(\omega ) \cdot \tilde{x}(\omega).
\]
\lean{Finprob.expectP}
\lean{Finprob.expect}
\leanok
\end{definition}

Expand All @@ -50,14 +50,13 @@ \section{Probability Spaces and Expectation}
\end{definition}

\begin{definition}[Probability]
The probability of $\tilde{x}\colon \Omega \to \mathcal{X} \subseteq \Real$ on a probability space $(\Omega, p)$ is defined as
The probability of $\tilde{b}\colon \Omega \to \mathcal{B}$ on a probability space $P = (\Omega, p)$ is defined as
\[
\P \left[ \tilde{x} \right] := \sum_{\omega \in \Omega } p(\omega) \cdot \tilde{c}(\omega),
\P \left[ \tilde{b} \right] := \E\left[\mathbb{I}\left\{ \tilde{b} \right\}\right],
\]
where $x / 0 = 0$ for each $x\in \Real$ (see \texttt{div\_zero}).
\end{definition}


\begin{definition}[Conditional Expectation]
The conditional expectation of a random variable $\tilde{x} \colon \Omega \to \Real$ given an indicator $\tilde{c} \colon \Omega \to \left\{ 0, 1 \right\}$ defined on a probability space $(\Omega, p)$ is defined as
\[
Expand All @@ -76,7 +75,7 @@ \section{Probability Spaces and Expectation}
\frac{1}{\P[\tilde{y} = \tilde{y}(\omega)]} \sum_{\omega' \in \Omega } p(\omega) \cdot \tilde{x}(\omega') \cdot \mathbb{I}\left\{ \tilde{y}(\omega') = \tilde{y}(\omega) \right\}, \quad \forall \omega \in \Omega.
\]
where $x / 0 = 0$ for each $x\in \Real$ (see \texttt{div\_zero}).
\lean{Finprob.expect_cnd_rv_P}
\lean{Finprob.expect_cnd_rv}
\leanok
\end{definition}

Expand Down

0 comments on commit 564761c

Please sign in to comment.