From dc1c1cc7dcc7221b1595618d85b1ec282026dc52 Mon Sep 17 00:00:00 2001 From: Ruiyi Wang Date: Wed, 11 Oct 2023 12:55:55 -0400 Subject: [PATCH] add fastchat ft preprocess --- .../fastchat_data/fastchat_data_preprocess.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 data_process/data/fastchat_data/fastchat_data_preprocess.py diff --git a/data_process/data/fastchat_data/fastchat_data_preprocess.py b/data_process/data/fastchat_data/fastchat_data_preprocess.py new file mode 100644 index 00000000..e654713b --- /dev/null +++ b/data_process/data/fastchat_data/fastchat_data_preprocess.py @@ -0,0 +1,20 @@ +import json +import os + +sotopia_data_dir = "/Users/pamela/Documents/capstone/sotopia-ft-data/ft-data-gpt4-gpt4-easy-2-side-partial" + +ft_data_list = [] +count = 0 +for file in os.listdir(sotopia_data_dir): + with open(os.path.join(sotopia_data_dir, file), 'r') as f: + file_dict = json.load(f) + fastchat_dict = {"id": f"identity_{count}", "conversations": []} + fastchat_dict["conversations"].append( + {"from": "human", "value": file_dict["prompt"]}) + fastchat_dict["conversations"].append( + {"from": "gpt", "value": file_dict["result"]}) + ft_data_list.append(fastchat_dict) + count += 1 + +with open("fastchat-ft-gp4-gpt4-easy-2-side-partial.json", "w") as f: + f.write(json.dumps(ft_data_list, indent=4))