Skip to content

Commit

Permalink
rename data process files and add together multiturn data preprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
ruiyiw committed Nov 6, 2023
1 parent 5248438 commit 5b5720c
Show file tree
Hide file tree
Showing 9 changed files with 19 additions and 711 deletions.
72 changes: 0 additions & 72 deletions data_process/data/data_process.py

This file was deleted.

Empty file removed data_process/dummyfile
Empty file.
File renamed without changes.
19 changes: 19 additions & 0 deletions together_ai_ft/data_process/multiturn_data_preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import json
import os

sotopia_data_dir = "/Users/pamela/Documents/capstone/sotopia-ft-data/GPT4-4_Redis_Easy_No_Filter"

together_data_template = """<s>[INST] {user_msg} [/INST] {assistant_msg} </s>"""

lines = []
for file in os.listdir(sotopia_data_dir):
with open(os.path.join(sotopia_data_dir, file), 'r') as f:
file_dict = json.load(f)
text = together_data_template.format(
user_msg=file_dict["prompt"], assistant_msg=file_dict["result"])
lines.append(text)

with open("together-ft-gpt4-gpt4-easy-no-filter.jsonl", 'w') as f:
for line in lines:
f.write(json.dumps({"text": line}))
f.write('\n')
100 changes: 0 additions & 100 deletions together_ai_ft/together_ai_data.jsonl

This file was deleted.

Loading

0 comments on commit 5b5720c

Please sign in to comment.