-
Notifications
You must be signed in to change notification settings - Fork 0
/
chatbot.py
318 lines (302 loc) · 13.9 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
import os
from lmsampler import LMSampler
from subject import Subject
from pdb import set_trace as breakpoint
import pandas as pd
class SoulSearcher:
def __init__(
self,
interviewer_style,
lm="gpt2",
temperature=0.7,
pplm=False,
bio_dry_run=False,
dry_run=False,
analysis_question_config="both",
evaluation=False,
print_prompt=False,
):
"""
interviewer_style: str - which persona SoulSearcher uses to analyze/question the subject
'biographer'
'mother'
'journalist'
'poet'
'manager'
'grandfather'
lm: str - which language model to use
'gpt2'
'gpt2-medium'
'gpt2-large'
'gpt2-xl'
'distilgpt2'
'EleutherAI/gpt-j-6B'
'EleutherAI/gpt-neo-2.7B'
'EleutherAI/gpt-neo-1.3B'
'EleutherAI/gpt-neo-125M'
'j1-jumbo' #Jurassic
'j1-large'
'gpt3-ada'
'gpt3-babbage'
'gpt3-curie'
'gpt3-davinci'
'gpt3-text-davinci-001'
'gpt3-text-davinci-002'
temperature: float between 0 and 1
pplm: bool - whether to use plug and play language model by Uber AI
dry_run: bool - whether to use the real model or not
"""
self.model = LMSampler(lm)
self.subject = Subject(dry_run=bio_dry_run, evaluation=evaluation)
self.temperature = temperature
self.analysis_question_config = analysis_question_config
self.analysis_question_config_dict = {
"analysis": ["analysis"],
"question": ["question"],
"both": ["analysis", "question"],
}
self._init_templates()
if interviewer_style not in self.templates:
raise ValueError("Interviewer style not found")
self.interviewer_style = interviewer_style
self._carry_out_interview(
dry_run=dry_run, evaluation=evaluation, print_prompt=print_prompt
)
def _init_templates(self):
self.templates = {
"biographer": {
"analysis": lambda subject_name, question, answer: (
f"I'm a biographer, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm a biographer, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
"mother": {
"analysis": lambda subject_name, question, answer: (
f"I'm {subject_name}'s mother, and when I asked them "
f"'{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm {subject_name}'s mother, and when I asked them "
f"'{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
"journalist": {
"analysis": lambda subject_name, question, answer: (
f"I'm a journalist, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm a journalist, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
"poet": {
"analysis": lambda subject_name, question, answer: (
f"I'm a poet, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm a poet, and when I asked "
f"{subject_name} '{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
"manager": {
"analysis": lambda subject_name, question, answer: (
f"I'm {subject_name}'s manager, and when I asked them "
f"'{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm {subject_name}'s manager, and when I asked them "
f"'{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
"grandfather": {
"analysis": lambda subject_name, question, answer: (
f"I'm {subject_name}'s grandfather, and when I asked them "
f"'{question}', they told me '{answer}'. "
f"When I heard that, I wanted to tell them"
),
"question": lambda subject_name, question, answer: (
f"I'm {subject_name}'s grandfather, and when I asked them "
f"'{question}', they told me '{answer}'. "
f'When I heard that, I wanted to ask them the question "'
),
},
}
def _carry_out_interview(self, dry_run=False, evaluation=False, print_prompt=False):
analysis_question_config = self.analysis_question_config
soul_searching_questions = [
"What do you like to do for fun?",
"What is the most important thing that ever happened to you?",
"What is the accomplishment you are most proud of?",
"Who is the person you admire the most?",
"What is the thing you would tell an 18-year old version of yourself?",
"What are your greatest passions in life?",
"What is the best gift that you've ever been given?",
"What has been the happiest time of your life so far?",
]
soul_searching_answers_evaluation = [
"I like to make furniture. I find it very relaxing, very therapeutic, "
"to work with my hands and to make something that exists in the "
"physical world.",
"Probably my high school English class. I was a big football player "
"and felt like that was my identity, but I found out that I was much better "
"at thinking and answering questions and writing than I was at tackling, "
"and it's changed the course of my whole life.",
"I had children. Raising children is the thing that made me feel "
"that I had a real, meaningful role in the world.",
"I really admire John Mayer, embarrassingly. He is a great musician, "
"and he is a great intellect. He feels like a person that I'd "
"really like in real life.",
"I would tell my 18-year-old self that advice is highly over-rated, "
"and the most important thoughts you'll ever find are your own, so "
"to make them good, whatever that means to you.",
"I love music, I love language, I love science, I love religion. I "
"feel pretty basic when I talk about things that I love.",
"A girlfriend gave me a book called 'The Crucible of Doubt' and symphony "
"tickets for valentine's day. I felt very known that day.",
"Again, the happiest day I've ever had was when my kids were born. "
"I felt like everything changed that day.",
]
if evaluation:
templates = list(self.templates.keys())
else:
templates = [self.interviewer_style]
conversation_str = ""
conversations_dict = {}
# Options to present to messenger/subject after soul-searcher has asked a question
q_options = """
Options:
"pass" - Skip this question
"end" - End the interview
"""
# Options to present to messenger after soul-searcher has weighed in on subject's response to question
a_options = """
Options:
Press enter to continue, else type
"again" - to have soul-searcher resample the analysis / question
"analysis" - to force soul-searcher to offer analysis instead of asking a follow-up question
"question" - to force soul-searcher to offer a follow-up question instead of offering an analysis
"both" - to have soul-searcher offer both an analysis and a follow-up question
"""
turns_df = pd.DataFrame()
for template in templates:
for ix, question in enumerate(soul_searching_questions):
print(question, q_options)
if evaluation:
answer = soul_searching_answers_evaluation[ix]
else:
answer = input()
if answer == "pass":
continue
elif answer == "end":
break
turns_dict = {
"template": template,
"question": question,
"answer": answer,
}
backstory = self.subject.print_backstory()
analysis = ""
fupquestion = ""
while True:
# TODO pass in prompt according to analysis_question_config
for answer_type in self.analysis_question_config_dict[
analysis_question_config
]:
prompt = (
backstory
+ "\n\n"
+ self.templates[template][answer_type](
self.subject.name, question, answer
)
)
if dry_run:
response = "I'm a bot, and I don't know what to say."
else:
print("Hmm...")
response = self.model.sample_several(
prompt,
temperature=self.temperature,
n_tokens=50,
# stop_tokens=["\n"],
)
if answer_type == "analysis":
analysis = f"Interviewer (analysis): {response}"
turns_dict["analysis"] = analysis
elif answer_type == "question":
fupquestion = f"Interviewer (question): {response}"
turns_dict["followup_question"] = fupquestion
else:
raise ValueError("Answer type not found")
# response = openai.Completion.create(
# engine="text-davinci-002",
# prompt=prompt,
# max_tokens=64,
# stop=["\n"],
# )
# gpt3_followup = response.choices[0].text
if print_prompt:
print(f"\nPrompt: {prompt}\n\n{answer_type}: {response}")
else:
print(f"\n{answer_type}: {response}")
print(a_options)
if evaluation:
messenger_input = ""
else:
messenger_input = input()
if messenger_input == "":
# conversation_str += f"Interviewer: {question}\nSubject:{answer}\n{analysis}\n{fupquestion}\n\n"
turns_df = turns_df.append(turns_dict, ignore_index=True)
break
elif messenger_input == "again":
continue
elif messenger_input == "analysis":
analysis_question_config = "analysis"
continue
elif messenger_input == "question":
analysis_question_config = "question"
continue
elif messenger_input == "both":
analysis_question_config = "both"
continue
print("_______________________________\n")
conversations_dict[template] = conversation_str
print("Thanks for the interview! It was nice getting to know you!")
try:
# df = pd.DataFrame(
# conversations_dict.items(), columns=["template", "conversation"]
# )
turns_df.to_csv("conversation_turns.csv")
except Exception as e:
breakpoint()
pd.to_csv(conversations_dict, "conversations.csv")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="gpt2")
parser.add_argument("--temperature", type=float, default=0.7)
args = parser.parse_args()
soulsearcher = SoulSearcher(
"biographer",
lm=args.model,
dry_run=False,
bio_dry_run=False,
evaluation=False,
temperature=args.temperature,
print_prompt=False,
)