diff --git a/data_process/data/data_process.py b/data_process/data/data_process.py
deleted file mode 100644
index d95ee753..00000000
--- a/data_process/data/data_process.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import json
-import os
-
-data_dir = "sotopia-data/"
-file_list = os.listdir(data_dir)
-full_data = []
-
-for data_file in file_list:
- with open(os.path.join(data_dir, data_file), 'r') as f:
- # with open("sotopia-data/01H8D2KJXFPCNB7GDJMVVCQ45Z.json", 'r') as f:
- dic = json.load(f)
- id = dic["pk"]
- messages = dic["messages"]
-
- human_conv, ai_conv = [], []
- for i in range(len(messages)-1):
- msg = messages[i]
- if len(msg) != 4:
- break
- if i == 0:
- # env_to_human = msg[0][2]
- env_to_ai = msg[1][2]
- # human_conv.append(env_to_human)
- human_conv.append(env_to_ai)
- # ai_conv.append(env_to_ai)
- ai_conv.append("Sure!")
-
- if i % 2 == 0:
- if msg[2][2][0] == "s":
- human_to_ai = msg[2][2][6:]
- else:
- human_to_ai = msg[2][2]
- human_conv.append(human_to_ai)
- else:
- if msg[3][2][0] == "s":
- ai_to_human = msg[3][2][6:]
- else:
- ai_to_human = msg[3][2]
- ai_conv.append(ai_to_human)
-
- conv_dic = {}
- conv_dic["id"] = id
- conversations = []
-
- for i in range(min(len(human_conv), len(ai_conv))):
- human_dic = {}
- human_dic["from"] = "human"
- human_dic["value"] = human_conv[i]
- ai_dic = {}
- ai_dic["from"] = "gpt"
- ai_dic["value"] = ai_conv[i]
-
- conversations.append(human_dic)
- conversations.append(ai_dic)
-
- if len(human_conv) > len(ai_conv):
- human_dic = {}
- human_dic["from"] = "human"
- human_dic["value"] = human_conv[i]
- conversations.append(human_dic)
- elif len(human_conv) < len(ai_conv):
- ai_dic = {}
- ai_dic["from"] = "gpt"
- ai_dic["value"] = ai_conv[i]
- conversations.append(ai_dic)
-
- conv_dic["conversations"] = conversations
-
- full_data.append(conv_dic)
-
-with open("full-data.json", "w") as f:
- json.dump(full_data, f, indent=4)
diff --git a/data_process/dummyfile b/data_process/dummyfile
deleted file mode 100644
index e69de29b..00000000
diff --git a/data_process/data/fastchat_data/fastchat_data_preprocess.py b/data_process/fastchat_data_preprocess.py
similarity index 100%
rename from data_process/data/fastchat_data/fastchat_data_preprocess.py
rename to data_process/fastchat_data_preprocess.py
diff --git a/together_ai_ft/data_process/completion_data_preprocess.py b/together_ai_ft/data_process/completion_data_preprocess.py
new file mode 100644
index 00000000..c5df559f
--- /dev/null
+++ b/together_ai_ft/data_process/completion_data_preprocess.py
@@ -0,0 +1,17 @@
+import json
+import os
+
+sotopia_data_dir = "ft-data-gpt4-gpt4-easy-2-side-partial/"
+
+ft_data_list = []
+for file in os.listdir(sotopia_data_dir):
+ with open(os.path.join(sotopia_data_dir, file), 'r') as f: # 2510
+ file_dict = json.load(f)
+ output = file_dict["prompt"] + " " + file_dict["result"]
+ ft_data_list.append(output)
+
+
+with open("human-bot-train-gpt4-gpt4-easy-2-side-partial.jsonl", 'w') as f:
+ for data in ft_data_list:
+ f.write(json.dumps({"text": data}))
+ f.write('\n')
diff --git a/together_ai_ft/data_process/multiturn_data_preprocess.py b/together_ai_ft/data_process/multiturn_data_preprocess.py
new file mode 100644
index 00000000..1154e038
--- /dev/null
+++ b/together_ai_ft/data_process/multiturn_data_preprocess.py
@@ -0,0 +1,19 @@
+import json
+import os
+
+sotopia_data_dir = "/Users/pamela/Documents/capstone/sotopia-ft-data/GPT4-4_Redis_Easy_No_Filter"
+
+together_data_template = """[INST] {user_msg} [/INST] {assistant_msg} """
+
+lines = []
+for file in os.listdir(sotopia_data_dir):
+ with open(os.path.join(sotopia_data_dir, file), 'r') as f:
+ file_dict = json.load(f)
+ text = together_data_template.format(
+ user_msg=file_dict["prompt"], assistant_msg=file_dict["result"])
+ lines.append(text)
+
+with open("together-ft-gpt4-gpt4-easy-no-filter.jsonl", 'w') as f:
+ for line in lines:
+ f.write(json.dumps({"text": line}))
+ f.write('\n')
diff --git a/together_ai_ft/together_ai_data.jsonl b/together_ai_ft/together_ai_data.jsonl
deleted file mode 100644
index 75367e46..00000000
--- a/together_ai_ft/together_ai_data.jsonl
+++ /dev/null
@@ -1,100 +0,0 @@
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
-{"text":"[INST] how old are you? [/INST] one yaer old. "}
\ No newline at end of file