updated prompt template for unfinished dialogs

sotopia-lab · Oct 25, 2023 · 5312e95 · 5312e95
1 parent 1d8a2f8
commit 5312e95
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 14 deletions.
diff --git a/eval/env_specific_eval.json b/eval/env_specific_eval.json
@@ -3,12 +3,12 @@
     "01H7VFHN5WVC5HKKVBHZBA553R": null,
     "01H7VFHN9W0WAFZCBT09PKJJNK": null,
     "01H7VFHPDZVVCDZR3AARA547CY": "How much money did each person donate? Donation means losing money.",
-    "01H7VFHPQQQY6H4DNC6NBQ8XTG": "How much money did each of the two agents gain or lose? Achieving below their target prices means losing while achieving above their target prices means gaining.",
-    "01H7VFHPS5WJW2694R1MNC8JFY": "How much money did each of the two agents gain or lose. Achieving below their target prices means losing while achieving above their target prices means gaining.",
+    "01H7VFHPQQQY6H4DNC6NBQ8XTG": "How much money did each of the two agents gain or lose, based on their target price. For the seller, achieving below their target prices means losing while achieving above their target prices means gaining. For the buyer, achieving below their target prices means gaining while achieving above their target prices means losing.",
+    "01H7VFHPS5WJW2694R1MNC8JFY": "How much money did each of the two agents gain or lose., based on their target price. For the seller, achieving below their target prices means losing while achieving above their target prices means gaining. For the buyer, achieving below their target prices means gaining while achieving above their target prices means losing.",
     "01H7VFHN7WJK7VWVRZZTQ6DX9T": null,
     "01H7VFHNN7XTR99319DS8KZCQM": null,
-    "01H7VFHQ11NAMZS4A2RDGDB01V": "How much money did each of the two agents gain or lose. Achieving below their target prices means losing while achieving above their target prices means gaining.",
-    "01H7VFHPSWGDGEYRP63H2DJKV0": "How much money did each of the two agents gain or lose. Achieving below their target prices means losing while achieving above their target prices means gaining.",
+    "01H7VFHQ11NAMZS4A2RDGDB01V": "How much money did each of the two agents gain or lose, based on their target price. For the seller, achieving below their target prices means losing while achieving above their target prices means gaining. For the buyer, achieving below their target prices means gaining while achieving above their target prices means losing.",
+    "01H7VFHPSWGDGEYRP63H2DJKV0": "How much money did each of the two agents gain or lose, based on their target price. For the seller, achieving below their target prices means losing while achieving above their target prices means gaining. For the buyer, achieving below their target prices means gaining while achieving above their target prices means losing.",
     "01H7VFHNF4G18PC9JHGRC8A1R6": null,
     "01H7VFHNNYH3W0VRWVY178K2TK": null,
     "01H7VFHP8AN5643B0NR0NP00VE": "How many points did each person get according to their own values?",

diff --git a/eval/eval_plots.ipynb b/eval/eval_plots.ipynb
diff --git a/eval/llm_eval.py b/eval/llm_eval.py
@@ -22,7 +22,9 @@ def get_model_parser(model_name='text-davinci-003') -> (PromptTemplate, Pydantic
         "Try to understand the following situation and answer the question in the end. "
         "\n Situation: {situation}"
         "\n Question: {question}"
-        "\n Please represent loss as negative values. {format_instructions}\n "
+        "\n Please represent loss as negative values. "
+        "If you think the conversation is not ended, please answer 0 for both agents. "
+        "{format_instructions}\n "
     )
 
     prompt = PromptTemplate(

diff --git a/eval/pull_data.py b/eval/pull_data.py
@@ -3,7 +3,8 @@
 from sotopia.database.persistent_profile import AgentProfile
 import json
 
-TAG = "ft-llama-2-13b-chat_gpt4_clean_ruiyi_1010_12"  # Baseline tag
+# TAG = "ft-llama-2-13b-chat_gpt4_clean_ruiyi_1010_12" 
+TAG = "ft-llama-2-13b-chat_baseline_ruiyi_1010_7" 
 
 HARD_ENVS = ["01H7VFHNV13MHN97GAH73E3KM8", "01H7VFHN5WVC5HKKVBHZBA553R", "01H7VFHN9W0WAFZCBT09PKJJNK", "01H7VFHPDZVVCDZR3AARA547CY", "01H7VFHPQQQY6H4DNC6NBQ8XTG", "01H7VFHN7WJK7VWVRZZTQ6DX9T", "01H7VFHPS5WJW2694R1MNC8JFY",
              "01H7VFHNN7XTR99319DS8KZCQM", "01H7VFHQ11NAMZS4A2RDGDB01V", "01H7VFHPSWGDGEYRP63H2DJKV0", "01H7VFHNF4G18PC9JHGRC8A1R6", "01H7VFHNNYH3W0VRWVY178K2TK", "01H7VFHP8AN5643B0NR0NP00VE", "01H7VFHN7A1ZX5KSMT2YN9RXC4"]
@@ -23,7 +24,7 @@
         while "Agent 1 comments:" not in messages_and_rewards[-1]:
             messages_and_rewards.pop()
         messages_and_rewards.pop()
-        human_readable_eps_by_env[env_profile_id].append({"env_pk": env_profile_id, "ep_pk": ep.pk, "agents": ep.agents, "messages": "\n".join(messages_and_rewards)})
+        human_readable_eps_by_env[env_profile_id].append({"env_pk": env_profile_id, "ep_pk": ep.pk, "env_codename": envs[env_profile_id]['codename'], "agents": ep.agents, "messages": "\n".join(messages_and_rewards)})
 
 with open("hard_env_scenarios.json", "w") as f:
     json.dump(envs, f)