Skip to content

Commit

Permalink
adding py
Browse files Browse the repository at this point in the history
  • Loading branch information
sharonwx54 committed Sep 27, 2023
1 parent 1ab49e8 commit e2e6e7e
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions together_ai_ft/prompt_res_data_process.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import json
import os
import re
import ast

data_dir = "data-pair-store/" # change this to the directory of parse chat data
full_data = []
TGTAI_FORMAT = """<s>[INST] {user_msg} [/INST] {model_answer} </s>"""

def run_processing(data_dir):
file_list = os.listdir(data_dir)
print(len(file_list))
unusable = 0
for data_file in file_list:
try:
with open(os.path.join(data_dir, data_file), 'r') as f:
dic = json.load(f)
prompt = dic["prompt"]
result = dic["result"]
format_str = TGTAI_FORMAT.format(user_msg=prompt, model_answer=result)
full_data.append({'text': format_str})
except:
unusable+=1

print(unusable)
json_output = json.dumps(full_data, indent=2)
with open("full-data.jsonl", "w") as f:
json.dump(full_data, f, indent=4)

0 comments on commit e2e6e7e

Please sign in to comment.