Skip to content

Commit

Permalink
move running into function
Browse files Browse the repository at this point in the history
  • Loading branch information
sharonwx54 committed Sep 27, 2023
1 parent 2737866 commit 67711c1
Showing 1 changed file with 18 additions and 17 deletions.
35 changes: 18 additions & 17 deletions together_ai_ft/prompt_res_data_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,25 @@
import ast

data_dir = "data-pair-store/" # change this to the directory of parse chat data
file_list = os.listdir(data_dir)
print(len(file_list))
full_data = []
TGTAI_FORMAT = """<s>[INST] {user_msg} [/INST] {model_answer} </s>"""

unusable = 0
for data_file in file_list:
try:
with open(os.path.join(data_dir, data_file), 'r') as f:
dic = json.load(f)
prompt = dic["prompt"]
result = dic["result"]
format_str = TGTAI_FORMAT.format(user_msg=prompt, model_answer=result)
full_data.append({'text': format_str})
except:
unusable+=1
def run_processing(data_dir):
file_list = os.listdir(data_dir)
print(len(file_list))
unusable = 0
for data_file in file_list:
try:
with open(os.path.join(data_dir, data_file), 'r') as f:
dic = json.load(f)
prompt = dic["prompt"]
result = dic["result"]
format_str = TGTAI_FORMAT.format(user_msg=prompt, model_answer=result)
full_data.append({'text': format_str})
except:
unusable+=1

print(unusable)
json_output = json.dumps(full_data, indent=2)
with open("full-data.jsonl", "w") as f:
json.dump(full_data, f, indent=4)
print(unusable)
json_output = json.dumps(full_data, indent=2)
with open("full-data.jsonl", "w") as f:
json.dump(full_data, f, indent=4)

0 comments on commit 67711c1

Please sign in to comment.