allow for setting critic palm from rlhftrainer

lucidrains · Mar 12, 2023 · 82fa3d0 · 82fa3d0
1 parent 12f3467
commit 82fa3d0
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/palm_rlhf_pytorch/ppo.py b/palm_rlhf_pytorch/ppo.py
@@ -291,6 +291,7 @@ def __init__(
         tokenizer: Callable = None,
         palm: PaLM,
         reward_model: RewardModel,
+        critic_palm: Optional[PaLM] = None,
         actor_critic: Optional[ActorCritic] = None,
         actor_lr = 1e-4,
         critic_lr = 1e-4,
@@ -345,6 +346,7 @@ def __init__(
         if not exists(actor_critic):
             actor_critic = ActorCritic(
                 palm = palm,
+                critic_palm = critic_palm,
                 actor_lora = actor_lora,
                 critic_lora = critic_lora,
                 actor_lora_r = actor_lora_r,

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'PaLM-rlhf-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.0.65',
+  version = '0.0.66',
   license='MIT',
   description = 'PaLM + Reinforcement Learning with Human Feedback - Pytorch',
   author = 'Phil Wang',