forked from lachlanpage/Markov-Chain-Sentence-Generator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchatGptApiCall.py
336 lines (247 loc) · 12.8 KB
/
chatGptApiCall.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
import json
import os
import time
import requests
from openai import OpenAI
from colorama import init, Fore, Style
from pygments import highlight
from pygments.formatters import TerminalFormatter
from pygments.lexers import JsonLexer
from config import Config
from log_config import configure_logger
from pdf_utilities import convert_pdf_to_text_file
from similarity_check import check_similarity
from text_utilities import TextGenerator
# Create an instance of TextGenerator
text_generator = TextGenerator()
# Configure the logger
logger = configure_logger(__name__)
# Initialize colorama
init(autoreset=True)
def call_openai_api(max_tokens,
input_file_name=None,
raw_markov=False,
similarity_check=False,
seed_words=None,
no_chat_gpt=False):
# If the user specified a training corpus, use that. Otherwise, use the default.
try:
if input_file_name is not None:
# If the user specified a PDF file, extract the training_corpus_filename from it.
if input_file_name.lower().endswith('.pdf'):
# Use the VERBOSE and QUIET flags from the Config class
if Config.VERBOSE:
print(f"{Fore.GREEN}[+] Extracting training_corpus_filename "
f"from '{input_file_name}'{Style.RESET_ALL}")
# Extract the training_corpus_filename from the PDF file.
input_file_name = convert_pdf_to_text_file(input_file_name)
# Otherwise, use the user-specified .txt file.
elif input_file_name.lower().endswith('.txt'):
# training_corpus = input_file_name
pass
else:
# If the user did not specify a training corpus, use the default.
input_file_name = Config.TRAINING_CORPUS
except FileNotFoundError:
print(f"File not found: '{input_file_name}'")
exit(1)
except IOError as e:
print(f"IOError occurred while reading the file '{input_file_name}': {e}")
exit(1)
# Use the VERBOSE and QUIET flags from the Config class
if Config.VERBOSE:
print(f"{Fore.GREEN}[+] Using training corpus: '{input_file_name}'{Style.RESET_ALL}")
raw_markov_result_string = text_generator.generate_text(
input_file_name, Config.MARKOV_ORDER, Config.RESULT_LENGTH, seed_words)
# Convert the word list to a string
sentence = text_generator.convert_word_list_to_string(raw_markov_result_string)
# Prepare the API request
data, headers = setup_api_request(max_tokens, sentence)
print_verbose_api_request(data) if Config.VERBOSE else None
corrected_sentence = ""
if no_chat_gpt is False:
# Send the API request
corrected_sentence = make_api_request(input_file_name, data, headers, raw_markov, sentence, similarity_check)
elif no_chat_gpt is True:
dont_make_api_request(input_file_name, raw_markov, sentence, similarity_check)
return corrected_sentence
def make_api_request(training_corpus, data, headers, raw_markov, sentence, similarity_check):
"""
Sends a POST request to the OpenAI API, processes the response and prints various outputs and analysis.
Args:
training_corpus (list): A corpus used for training.
data (dict): The request payload containing prompt and other parameters.
headers (dict): The request headers containing API key.
raw_markov (bool): If True, prints raw Markov chain generated input.
sentence (str): The input sentence on which completion will be performed.
similarity_check (bool): If True, performs and prints similarity analysis of the output with the given corpus.
Response JSON structure:
{
"choices": [
{
"training_corpus_filename": "<corrected_sentence>",...
},...
],...
}
:param similarity_check:
:param sentence:
:param raw_markov:
:param headers:
:param data:
:param training_corpus:
"""
response = requests.post("https://api.openai.com/v1/completions", headers=headers, json=data)
if response.status_code == 200:
corrected_sentences_list = []
corrected_sentence = ""
# Loop through and grab each response if Config.NUM_OF_RESPONSES > 1
if Config.NUM_OF_RESPONSES > 1:
for i in range(Config.NUM_OF_RESPONSES):
corrected_sentences_list.append(response.json().get("choices", [{}])[i].get("text", "").strip())
corrected_sentence = corrected_sentence + corrected_sentences_list[i] + "\n\n"
else:
corrected_sentence = response.json().get("choices", [{}])[0].get("text", "").strip()
# This will only run the similarity check if the user asked for it via --similarity-check
print_similarity_check(training_corpus, corrected_sentence, similarity_check, no_chat_gpt=False)
print_corrected_sentence(corrected_sentence, raw_markov, response, sentence, no_chat_gpt=False)
else:
if response.status_code == 429:
logger.error("Error: Too many requests. Please try again later.")
logger.error(f"Error: API call failed with status code {response.status_code}.")
logger.error(f"Response: {response.text}")
return corrected_sentence
def dont_make_api_request(training_corpus, raw_markov, sentence, similarity_check):
"""
Instead of sending a POST request to the OpenAI API, it prints various expected outputs and error messages.
Args:
training_corpus (list): A corpus used for training.
raw_markov (bool): If True, prints raw Markov chain generated input.
sentence (str): The input sentence on which completion would be performed.
similarity_check (bool): If True, performs and prints similarity analysis of the output with the given corpus.
"""
corrected_sentences_list = []
corrected_sentence = None
# Loop through and grab each expected response if Config.NUM_OF_RESPONSES > 1
if Config.NUM_OF_RESPONSES > 1:
for i in range(Config.NUM_OF_RESPONSES):
corrected_sentences_list.append("Expected response text...") # Expected response text
corrected_sentence = corrected_sentence + corrected_sentences_list[i] + "\n\n"
else:
# Expected response text is None because we didn't call the API
corrected_sentence = ""
print_similarity_check(training_corpus, corrected_sentence, similarity_check, no_chat_gpt=True)
print_corrected_sentence(corrected_sentence,
raw_markov,
"Expected response...",
sentence,
no_chat_gpt=True) # Expected response instead of actual response
# Uncomment the lines below if you want to print error messages
# logger.error("Error: Too many requests. Please try again later.")
# logger.error(f"Error: API call failed with status code {429}.")
# logger.error(f"Response: {'Expected error response...'}") # Expected error response
def print_corrected_sentence(corrected_sentence, raw_markov, response, sentence, no_chat_gpt):
if corrected_sentence is not None:
if raw_markov is True or no_chat_gpt is True:
print(f"[{Fore.YELLOW}RAW MARKOV{Style.RESET_ALL}]\n'{sentence}'\n")
# TODO: Strip off surrounding quotes if present. They are intermittently present in the response
if Config.VERBOSE:
print(f"[{Fore.YELLOW}OPENAI API RESPONSE{Style.RESET_ALL}]")
# Convert the Python object to a formatted JSON string
pretty_json_str = json.dumps(response.json(), default=str, indent=4, sort_keys=True)
# Colorize the JSON string
colored_json_str = highlight(pretty_json_str, JsonLexer(), TerminalFormatter())
# Print the colored JSON string
print(colored_json_str)
print(f"{Fore.LIGHTGREEN_EX}{corrected_sentence}{Fore.RESET}")
else:
logger.error("Error: Could not extract the corrected sentence.")
def print_similarity_check(training_corpus, corrected_sentence, similarity_check, no_chat_gpt):
if similarity_check and no_chat_gpt is False:
# TODO: How to pass reference without calling this again?
input_text = text_generator.return_corpus_text(training_corpus)
output_text = corrected_sentence
(highest_similarity_score,
average_similarity_score,
too_similar_bool,
list_overly_similar_phrases) = check_similarity(
input_text, output_text, Config.SIMILARITY_WINDOW, Config.SIMILARITY_THRESHOLD)
print(f"[{Fore.YELLOW}SIMILARITY ANALYSIS{Style.RESET_ALL}]")
print(f" Window size: {Fore.LIGHTCYAN_EX}{Config.SIMILARITY_WINDOW}{Style.RESET_ALL} words")
print(f" Similarity threshold: {Fore.LIGHTCYAN_EX}{Config.SIMILARITY_THRESHOLD}{Style.RESET_ALL}")
if not too_similar_bool:
print(f" Average similarity score: {Fore.GREEN}{average_similarity_score:.2f}{Style.RESET_ALL}")
print(f" Highest similarity score: {Fore.GREEN}{highest_similarity_score:.2f}{Style.RESET_ALL}")
if too_similar_bool:
print(
f" Average exceeding similarity score: {Fore.RED}{average_similarity_score:.2f}{Style.RESET_ALL}")
print(
f" Highest exceeding similarity score: {Fore.RED}{highest_similarity_score:.2f}{Style.RESET_ALL}")
# Create a string with list elements on separate lines, indented by four spaces
formatted_list = '\n '.join(list_overly_similar_phrases)
print(
f" Output training_corpus_filename is too similar to these phrases:\n "
f"{Fore.RED}{formatted_list}{Style.RESET_ALL}")
else:
print(f" {Fore.GREEN}Output training_corpus_filename is adequately dissimilar.{Style.RESET_ALL}")
# Sleep for a second to give the API call time to finish
# so that this log message doesn't print below the final output
time.sleep(1)
elif similarity_check is True and no_chat_gpt is True:
print(f"{Fore.YELLOW}[!] Similarity analysis is not possible when --no-chat-gpt is specified.{Style.RESET_ALL}")
def print_verbose_api_request(data):
print("[" + Fore.YELLOW + "OPENAI API REQUEST" + Style.RESET_ALL + "]")
# Convert the Python object to a formatted JSON string
pretty_json_str = json.dumps(data, indent=4, sort_keys=True)
# Colorize the JSON string
colored_json_str = highlight(pretty_json_str, JsonLexer(), TerminalFormatter())
# Print the colored JSON string
print(colored_json_str)
def setup_api_request(max_tokens, sentence):
"""
Prepare data and headers for calling the OpenAI GPT API with a specific prompt.
Args:
max_tokens (int): Maximum number of tokens to include in the model's response.
sentence (str): The input sentence to be processed by the API.
Returns:
tuple: A tuple containing two dictionaries:
1. data - Dictionary with parameters to be sent to the GPT API.
2. headers - Dictionary with authorization information needed to make the API request.
"""
api_key = os.environ["GPT_API_KEY"]
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
}
data = {
"model": "gpt-3.5-turbo-instruct",
"prompt": f'The following sentence may be missing something: "{sentence}". '
f'Please make the sentence make more sense. '
f'And don\'t return anything but a single sentence. I only want to see one version of the sentence.',
"temperature": Config.TEMPERATURE,
"max_tokens": max_tokens,
"n": Config.NUM_OF_RESPONSES,
}
return data, headers
def test_openai_api():
"""
Tests if the user can successfully call the OpenAI API.
Returns:
bool: True if the API call is successful, False otherwise.
"""
api_key = os.environ["GPT_API_KEY"]
client = OpenAI(api_key=api_key)
try:
# Perform a simple API call.
# FYI this is currently testing via the openai module,
# whereas the actual mimic.py program calls the API via the Requests module.
# Also, we're using the gpt-3.5-turbo-instruct model now because the previous model is deprecated.
response = client.completions.create(model="gpt-3.5-turbo-instruct",
prompt="Hello, world!",
max_tokens=5)
if response.choices:
print(f"{Fore.GREEN}[+] OpenAI API call was successful.{Style.RESET_ALL}")
return True
return False
except Exception as e:
print(f"{Fore.RED}[-] Error while testing OpenAI API: {Style.RESET_ALL}{str(e)}")
return False