diff --git a/palm_rlhf_pytorch/__init__.py b/palm_rlhf_pytorch/__init__.py index ecfa3a1..6399eda 100644 --- a/palm_rlhf_pytorch/__init__.py +++ b/palm_rlhf_pytorch/__init__.py @@ -1,3 +1,5 @@ from palm_rlhf_pytorch.palm import PaLM -from palm_rlhf_pytorch.reward import RewardModel from palm_rlhf_pytorch.ppo import RLHFTrainer, ActorCritic + +from palm_rlhf_pytorch.reward import RewardModel +from palm_rlhf_pytorch.implicit_process_reward import ImplicitPRM diff --git a/setup.py b/setup.py index cc014a4..233bc98 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'PaLM-rlhf-pytorch', packages = find_packages(exclude=[]), - version = '0.3.1', + version = '0.3.2', license='MIT', description = 'PaLM + Reinforcement Learning with Human Feedback - Pytorch', author = 'Phil Wang',