-
Notifications
You must be signed in to change notification settings - Fork 0
/
bot_user_dataset.py
47 lines (37 loc) · 1.46 KB
/
bot_user_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import pandas as pd
import json
from config import domain_name
def seperate_bot_user():
with open(f'meta_woz_{domain_name}.txt', 'r') as f:
dataset = []
for line in f:
data = json.loads(line.strip())
dataset.append(data)
# Create a dataframe from the datasets
df = pd.DataFrame(dataset)
df_user = df.copy()
df_bot = df.copy()
# drop utterances and roles columns
df_user.drop(['utterances', 'roles'], axis=1, inplace=True)
df_bot.drop(['utterances', 'roles'], axis=1, inplace=True)
df_bot['utterances'] = ['' for i in range(len(df))]
df_user['utterances'] = ['' for i in range(len(df))]
for i in range(len(df)):
bot_utterances = []
user_utterances = []
for j in range(len(df['utterances'][i])):
if df['roles'][i][j] == 'BOT':
bot_utterances.append(df['utterances'][i][j])
else:
user_utterances.append(df['utterances'][i][j])
df_bot['utterances'][i] = bot_utterances
df_user['utterances'][i] = user_utterances
# save df_bot and df_user to txt files
with open('df_bot.txt', 'w') as f:
for i in range(len(df_bot)):
f.write(json.dumps(df_bot.iloc[i].to_dict()) + '\n')
with open('df_user.txt', 'w') as f:
for i in range(len(df_user)):
f.write(json.dumps(df_user.iloc[i].to_dict()) + '\n')
if "__init__" == '__main__':
seperate_bot_user()