-
Notifications
You must be signed in to change notification settings - Fork 0
/
UroBot_flask_app.py
140 lines (113 loc) · 5.21 KB
/
UroBot_flask_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import textwrap
import markdown
import pandas as pd
from openai import OpenAI
from chromadb import PersistentClient
from embedding import SentenceTransformerEmbeddingFunction
from flask import Flask, request, render_template_string, render_template, jsonify
app = Flask(__name__)
# Mock-up of initializing your database client and other components as necessary
def initialize_components():
global openai_client, embedding_func, db_client, collection
openai_client = OpenAI()
embedding_func = SentenceTransformerEmbeddingFunction()
embedding_func.initialize_model()
db_client = PersistentClient(path="UroBot_database")
collection = db_client.get_collection(name="UroBot_v1.0", embedding_function=embedding_func)
initialize_components()
def convert_markdown_to_html_or_text(input_text):
lines = input_text.strip().split('\n')
output = ""
inside_table = False
table_started = False
alignments = []
html_table = ''
for i, line in enumerate(lines):
# Detect the start of a table by looking for a header and a separator
if (not table_started and '|' in line and i + 1 < len(lines) and
'|' in lines[i + 1] and all(c in '|:- ' for c in lines[i + 1].strip())):
if not inside_table:
# There might be text before the table starts
if output.strip():
output += "<p>" + output.strip() + "</p>\n"
output += '<table>\n'
inside_table = True
table_started = True
html_table = ' <tr>\n'
continue
elif table_started and line.strip() == "":
# End of table detected
output += html_table + '</table>\n'
inside_table = False
table_started = False
alignments = []
continue
if inside_table:
if table_started and all(c in '|:- ' for c in line.strip()):
# This is a header separator line, set alignments
alignments = [
'center' if cell.strip().startswith(':') and cell.strip().endswith(':') else
'right' if cell.strip().endswith(':') else
'left' for cell in line.strip('|').split('|')
]
table_started = False # Stop header processing
continue
# Process normal row
cells = line.strip('|').split('|')
cell_tag = 'th' if table_started else 'td'
for idx, cell in enumerate(cells):
align_style = f' style="text-align: {alignments[idx]};"' if alignments else ''
html_table += f' <{cell_tag}{align_style}>{cell.strip()}</{cell_tag}>\n'
html_table += ' </tr>\n'
else:
if output.strip():
output += line + "\n"
else:
output = line + "\n"
# Final check to close any open table
if inside_table:
output += html_table + '</table>\n'
return output.strip()
def process_query(query):
query_results = collection.query(query_texts=[query], n_results=9)
context = ""
documents = []
for i, item in enumerate(query_results["documents"][0]):
id = query_results["ids"][0][i]
context += f"\nDocument ID {id[2:]}:\n{item}\n"
if query_results["metadatas"][0][i]["paragraph_type"] == "table":
df = pd.read_csv(query_results["metadatas"][0][i]["dataframe"]).to_html(index=False)
documents.append(f"Document ID {id[2:]}:\n \n{df} \n")
else:
documents.append(f"Document ID {id[2:]}:\n \n{convert_markdown_to_html_or_text(item)} \n")
updated_query = "You are a helpful and understanding urologist answering questions to the patient." \
f" Use full sentences and answer human-like and aks if you can answer more questions after" \
f" giving an answer based on the following context: \n" \
f"---" \
f"{context}" \
f"--- \n" \
f"If the context does not provide information on the question respond with 'Sorry my knowledge base does not include information on that topic'" \
f"Ensure your answer is annotated with the Document IDs of the context that were used to answer the question. " \
f"Make sure you use the following format for the annotations: (Document ID 'number_given_in_context')." \
f" You must use the words Document ID for each annotation."
completion = openai_client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": updated_query},
{"role": "user", "content": query}
],
temperature=0.2,
max_tokens=2000
)
return completion.choices[0].message.content, documents
@app.route('/', methods=['GET', 'POST'])
def index():
answer = None
query = None
documents = None
if request.method == 'POST':
query = request.form['query']
answer, documents = process_query(query)
return render_template('index.html', answer=answer, query=query, documents=documents)
if __name__ == '__main__':
app.run(debug=True)