From 6adb65b493e7fdb54cd4e9d9243471f4322073ba Mon Sep 17 00:00:00 2001 From: antoine_galataud Date: Mon, 22 Apr 2024 14:34:57 +0200 Subject: [PATCH] OPS with PDIS/SNPDIS --- tests/test_evaluation.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index 62c12e8..fd51ec6 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -1,13 +1,6 @@ import unittest -import numpy as np - -from hopes.ope.estimators import ( - InverseProbabilityWeighting, - SelfNormalizedInverseProbabilityWeighting, - SelfNormalizedTrajectoryWiseImportanceSampling, - TrajectoryWiseImportanceSampling, -) +from hopes.ope.estimators import * from hopes.ope.evaluation import OffPolicyEvaluation from hopes.ope.selection import OffPolicySelection from hopes.policy import ClassificationBasedPolicy, RandomPolicy @@ -66,6 +59,7 @@ def test_ops(self): obs = np.random.rand(num_samples, num_obs) act = np.random.randint(num_actions, size=num_samples) rew = np.random.normal(10, 2.0, num_samples) + gamma = 0.99 # create the behavior policy behavior_policy = ClassificationBasedPolicy( @@ -86,10 +80,16 @@ def test_ops(self): InverseProbabilityWeighting(), SelfNormalizedInverseProbabilityWeighting(), TrajectoryWiseImportanceSampling( - steps_per_episode=steps_per_episode, discount_factor=0.99 + steps_per_episode=steps_per_episode, discount_factor=gamma ), SelfNormalizedTrajectoryWiseImportanceSampling( - steps_per_episode=steps_per_episode, discount_factor=0.99 + steps_per_episode=steps_per_episode, discount_factor=gamma + ), + PerDecisionImportanceSampling( + steps_per_episode=steps_per_episode, discount_factor=gamma + ), + SelfNormalizedPerDecisionImportanceSampling( + steps_per_episode=steps_per_episode, discount_factor=gamma ), ]