Skip to content

Commit

Permalink
add converting file for fastchat and together
Browse files Browse the repository at this point in the history
  • Loading branch information
ruiyiw committed Oct 11, 2023
1 parent 14003d4 commit d769317
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
20 changes: 20 additions & 0 deletions data_process/data/fastchat_data/fastchat_data_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import json
import os

sotopia_data_dir = "/Users/pamela/Documents/capstone/sotopia-ft-data/ft-data-gpt4-gpt4-easy-2-side-partial"

ft_data_list = []
count = 0
for file in os.listdir(sotopia_data_dir):
with open(os.path.join(sotopia_data_dir, file), 'r') as f:
file_dict = json.load(f)
fastchat_dict = {"id": f"identity_{count}", "conversations": []}
fastchat_dict["conversations"].append(
{"from": "human", "value": file_dict["prompt"]})
fastchat_dict["conversations"].append(
{"from": "gpt", "value": file_dict["result"]})
ft_data_list.append(fastchat_dict)
count += 1

with open("fastchat-ft-gp4-gpt4-easy-2-side-partial.json", "w") as f:
f.write(json.dumps(ft_data_list, indent=4))
17 changes: 17 additions & 0 deletions data_process/data/together_data/together_data_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import json
import os

sotopia_data_dir = "ft-data-gpt4-gpt4-easy-2-side-partial/"

ft_data_list = []
for file in os.listdir(sotopia_data_dir):
with open(os.path.join(sotopia_data_dir, file), 'r') as f: # 2510
file_dict = json.load(f)
output = file_dict["prompt"] + " " + file_dict["result"]
ft_data_list.append(output)


with open("human-bot-train-gpt4-gpt4-easy-2-side-partial.jsonl", 'w') as f:
for data in ft_data_list:
f.write(json.dumps({"text": data}))
f.write('\n')

0 comments on commit d769317

Please sign in to comment.