-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
147 lines (132 loc) · 6.41 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import openai
import pandas as pd
import streamlit as st
from src.secret import OPENAI_KEY
from typing import Tuple, List, Dict
from streamlit_agraph import agraph, Node, Edge, Config
openai.api_key = OPENAI_KEY
st.set_page_config(layout = "wide")
@st.cache
def load_clean_data(dataset: str = "dataset/result.csv") -> pd.DataFrame:
data = pd.read_csv(dataset)
data = data.drop_duplicates(subset=['keyword', 'author', 'topic'])
data['keyword'] = data["keyword"].replace("Mahfud Md", "Mahfud MD")
# data = data.sample(500).reset_index(drop=True)
data = pd.concat([df.sample(100, replace=True) for _, df in data.groupby("keyword")])
return data
@st.cache
def create_graph(data_filter: pd.DataFrame, use_sentiment_aspect: bool = False, mapping_sentiment: Dict[str, str] = {
"positive" : "#B3FFAE", "negative" : "#FF7D7D", "neutral" : "#F8FFDB"
}) -> Tuple[List[str], List[str]]:
# aspect_global = []
# nodes, edges = [], []
# candidates, organizations = [], []
# for _, i in data_filter.iterrows():
# candidate_name = i['name']
# organization = i['entity']
# if organization != "" and isinstance(organization, str):
# for person in organization.split(","):
# if candidate_name not in candidates:
# nodes.append(Node(id = candidate_name, label = candidate_name, symbolType = "diamond", color = "#FFF6F6", size = 20))
# candidates.append(candidate_name)
# if person not in organizations and person not in candidates and person not in aspect_global:
# nodes.append(Node(id = person, label = person, color = "#A7D397", size = 15))
# organizations.append(person)
# edges.append(Edge(source = person, target = candidate_name))
# if use_sentiment_aspect:
# sentiments = i['sentiment']
# aspects = i['aspect']
# if aspects != "" and isinstance(aspects, str):
# for aspect, sentiment in zip(aspects.split(","), sentiments.split(",")):
# if aspect not in aspect_global and aspect not in organizations and aspect not in candidates:
# # print(f'[ASPECT] - {aspect} is not available in, orgs: {aspect not in organizations} asp: {aspect not in aspect_global}')
# nodes.append(Node(id = aspect, label = aspect, size = 10, color = mapping_sentiment.get(sentiment)))
# edges.append(Edge(source = aspect, target = person, label = sentiment))
# aspect_global.append(aspect)
aspect_global = []
nodes, edges = [], []
candidates, authors = [], []
for _, i in data_filter.iterrows():
candidate = i['keyword']
author = i['author']
if author != "" and isinstance(author, str):
if candidate not in candidates:
nodes.append(Node(id = candidate, label = candidate, symbolType = "diamond", color = "#FFF6F6", size = 25))
candidates.append(candidate)
elif author not in authors:
nodes.append(Node(id = author, label = author, symbolType = "diamond", color = "#A7D397", size = 15))
authors.append(author)
# elif author in authors and candidate in candidates:
edges.append(Edge(source = author, target = candidate))
if use_sentiment_aspect:
sentiment = i['sentiment']
aspect = i['topic']
if aspect != "" and isinstance(aspect, str) and aspect not in aspect_global:
nodes.append(Node(id = aspect, label = aspect, size = 10, color = mapping_sentiment.get(sentiment)))
edges.append(Edge(source = aspect, target = author, label = sentiment))
aspect_global.append(aspect)
return nodes, edges
def prompt_qa(data: pd.DataFrame, query: str) -> str:
prompt = \
f"""
data = {data.to_dict('records')}
jawaban pertanyaan berikut berdasarkan informasi diatas yang diolah sesuai reasoning yang masuk akal.
pertanyaan: Siapa Pemenang pemilu 2024?
jawaban: Ganjar Pranowo
pertanyaan: {query}
dengan format dibawah:
jawaban:
"""
return prompt
def agent_qa_zero_shot(data: pd.DataFrame, query: str, model_base: str = "gpt-3.5-turbo-16k"):
token_usage = 0
response_extraction = ""
try:
response = openai.ChatCompletion.create(
model = model_base,
messages = [{"role" : "user", "content" : prompt_qa(data, query)}],
temperature = 0.5, max_tokens = 512, top_p = 1.0,
frequency_penalty = 0.0, presence_penalty = 0.0
)
response_extraction = response["choices"][0]["message"]["content"]
token_usage = response["usage"]["total_tokens"]
except Exception as E:
print(f"[ERROR] - {E}")
print("Retry with Recursive Func")
# agent_qa_zero_shot(data, query)
return response_extraction, token_usage
def app(data: pd.DataFrame, config: Config):
# Interface section
st.sidebar.header("ElectionAspectAnalyzer v.0.1")
# Sidebar section
candidates = data["keyword"].unique().tolist()
filter_candidate = st.sidebar.multiselect(
"Select Candidates:",
options = candidates,
default = candidates[:3]
)
filter_data = data[data['keyword'].isin(filter_candidate)].reset_index(drop = True)
use_aspect_sentiment = st.sidebar.checkbox("Use Aspect-Sentiment")
# Graph section
with st.spinner("Preprocess Data..."):
filter_node, filter_edge = create_graph(filter_data, use_sentiment_aspect = use_aspect_sentiment)
st.success("Total Nodes Loaded: " + str(len(filter_node)))
return_value = agraph(
nodes = filter_node,
edges = filter_edge,
config = config
)
# QnA section
# NOTE: Reduce token usage OpenAI Cost :)
data_sample = pd.concat([df.sample(3, replace=False) for _, df in filter_data.groupby("keyword")])
query = st.sidebar.text_input(label = "Any Question about Election 2024?")
if query != "":
response, _ = agent_qa_zero_shot(data = data_sample, query = query)
st.sidebar.success(response)
if __name__ == "__main__":
config = Config(
width = 1000, height = 500,
directed = True, physics = True, hierarchical = False
)
data = load_clean_data()
app(data = data, config = config)