Skip to content

Commit

Permalink
Merge pull request #10 from argrecsys/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ansegura7 authored Dec 12, 2023
2 parents 42d3d5b + a364207 commit f99c045
Show file tree
Hide file tree
Showing 103 changed files with 19,490 additions and 8,576 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Digital Government - DM Visual Analytics with ChatGPT
![version](https://img.shields.io/badge/version-0.3.0-blue)
![last-update](https://img.shields.io/badge/last_update-11/27/2023-orange)
![version](https://img.shields.io/badge/version-0.3.5-blue)
![last-update](https://img.shields.io/badge/last_update-12/12/2023-orange)
![license](https://img.shields.io/badge/license-Apache_2.0-brightgreen)

## Dependencies
Expand Down
53 changes: 7 additions & 46 deletions code/etls/generate_argument_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""
Created by: Andrés Segura-Tinoco
Version: 0.3
Version: 0.4
Created on: Nov 23, 2023
Updated on: Nov 27, 2023
Description: Generate argument data.
Updated on: Dec 12, 2023
Description: Generate argument data by topics.
"""

import util_libs as ul
Expand All @@ -13,8 +13,8 @@
MAX_TEXT_SIZE = 200


def create_argument_data(proposal_id: int, arg_data: dict, comments: dict):
prop_text = get_comment_text(comments[proposal_id])
def create_argument_data(proposal_id: int, prop_data: dict, arg_data: dict):
prop_text = get_comment_text(prop_data["text"])
prop_short_text = get_comment_short_text(prop_text)

argument_data = {
Expand All @@ -37,7 +37,6 @@ def create_argument_data(proposal_id: int, arg_data: dict, comments: dict):

for argument in arguments:
arg_desc = argument["arg_description"]
comment_ids = argument["comment_ids"]
arg_intent = "support" if argument["arg_intent"] == "A favor" else "attack"
arg_text = get_comment_text(f"[{arg_intent.upper()}] {arg_desc}")
arg_short_text = get_comment_short_text(arg_text)
Expand All @@ -50,48 +49,13 @@ def create_argument_data(proposal_id: int, arg_data: dict, comments: dict):
"short_text": arg_short_text,
}

for comment_id in comment_ids:
comment_id = int(comment_id)

if comment_id in comments:
comment_text = get_comment_text(comments[comment_id])
comment_text = f"[{comment_id}] " + comment_text
comment_short_text = get_comment_short_text(comment_text)

if len(comment_text) > 0:
comment_item = {
"name": str(comment_id),
"value": 100,
"text": comment_text,
"short_text": comment_short_text,
}
arg_item["children"].append(comment_item)
else:
print(
f"- Incorrect comment id: {comment_id} for proposal: {proposal_id}"
)

arg_cat_item["children"].append(arg_item)

argument_data["children"].append(arg_cat_item)

return argument_data


def get_proposal_comments(prop_data: list):
comments = {}

for item in prop_data:
text = item["text"].strip()
id = int(item["proposal_id"] if "proposal_id" in item else item["comment_id"])
if id in comments:
comments[id] += ". " + text
else:
comments[id] = text

return comments


def get_comment_text(comment: str, size: int = 10000):
text = comment.strip()
text = text.replace("..", ".")
Expand Down Expand Up @@ -147,6 +111,7 @@ def main():

for prop_name, prop_data in proposals.items():
prop_id = int(prop_name)
print(f"Proposal: {prop_id}")

# Filters
if prop_id not in arguments:
Expand All @@ -155,13 +120,9 @@ def main():
arg_data = arguments[prop_id]
print(f"Argument types: {len(arg_data)}")

comments = get_proposal_comments(prop_data)
print(f"Number of comments: {len(comments)}")

json_data = create_argument_data(prop_id, arg_data, comments)
json_data = create_argument_data(prop_id, prop_data[0], arg_data)
print(f"Number of items: {len(json_data)}")

output_file = f"{solution_path}/result/json_data/{prop_name}.json"
output_file = f"{solution_path}/result/json_data/arguments/{prop_name}.json"
ul.save_dict_to_json(output_file, json_data, 2)

Expand Down
23 changes: 19 additions & 4 deletions code/etls/generate_network_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
import datetime


def create_argument_data(proposal_id: int, arg_data: dict):
proposal_name = f"proposal {proposal_id}"
def create_argument_data(proposal_name: str, arg_data: dict):
nodes = [{"id": proposal_name, "group": 0}]
links = []

Expand Down Expand Up @@ -89,16 +88,32 @@ def load_arguments(input_path: str):
def main():
solution_path = "C:/Dev Projects/dgov-visual-analytics"
input_path = f"{solution_path}/data/gpt_data"
output_path = f"{solution_path}/result/json_data/networks"
arguments = load_arguments(input_path)
print(f"Number of arguments: {len(arguments)}")

graph_root = "decide madrid"
nodes = [{"id": graph_root, "group": 0}]
links = []

for prop_id, arg_data in arguments.items():
json_data = create_argument_data(prop_id, arg_data)
prop_name = f"proposal {prop_id}"
json_data = create_argument_data(prop_name, arg_data)
print(f"Number of items: {len(json_data)}")

output_file = f"{solution_path}/result/json_data/networks/{prop_id}.json"
output_file = f"{output_path}/{prop_id}.json"
ul.save_dict_to_json(output_file, json_data, 2)

# Create super graph
nodes.extend(json_data["nodes"])
link = {"source": graph_root, "target": prop_name, "value": 3}
links.append(link)
links.extend(json_data["links"])

super_graph = {"nodes": nodes, "links": links}
output_file = f"{output_path}/super_graph.json"
ul.save_dict_to_json(output_file, super_graph, 2)


#####################
### START PROGRAM ###
Expand Down
79 changes: 70 additions & 9 deletions code/etls/generate_proposal_data.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""
Created by: Andrés Segura-Tinoco
Version: 0.3
Version: 0.4
Created on: Nov 23, 2023
Updated on: Nov 27, 2023
Description: Generate proposal data.
Updated on: Dec 12, 2023
Description: Generate proposal data with identified arguments.
"""

import util_libs as ul
Expand All @@ -13,7 +13,9 @@
MAX_TEXT_SIZE = 200


def create_proposal_data(proposal_id: int, proposal_hierarchy: dict, comments: dict):
def create_proposal_data(
proposal_id: int, proposal_hierarchy: dict, comments: dict, arg_data: dict
):
prop_text = get_comment_text(comments[proposal_id])
prop_short_text = get_comment_short_text(prop_text)

Expand All @@ -26,7 +28,7 @@ def create_proposal_data(proposal_id: int, proposal_hierarchy: dict, comments: d
parent_id = -1

proposal_data["children"] = __create_proposal_data_inner(
proposal_data["children"], proposal_hierarchy, comments, parent_id
proposal_data["children"], proposal_hierarchy, comments, arg_data, parent_id
)

return proposal_data
Expand All @@ -36,6 +38,7 @@ def __create_proposal_data_inner(
proposal_data: list,
proposal_hierarchy: dict,
comments: dict,
arg_data: dict,
parent_id: int = -1,
):
comment_ids = proposal_hierarchy["comment_ids"]
Expand All @@ -45,25 +48,40 @@ def __create_proposal_data_inner(
if curr_parent_id == parent_id:
prop_text = get_comment_text(comments[curr_comment_id])
prop_short_text = get_comment_short_text(prop_text)
argument = arg_data.get(curr_comment_id, {})
arg_intent = (
{"A favor": "support", "En contra": "attack"}
.get(argument.get("arg_intent", ""), "")
.upper()
)
arg_category = argument.get("arg_cat", "")

if curr_comment_id in parent_ids:
item = {
"name": str(curr_comment_id),
"children": [],
"text": prop_text,
"short_text": prop_short_text,
"short_text": f"[{arg_intent} - {arg_category}] {prop_short_text}"
if arg_intent != ""
else prop_short_text,
}
proposal_data.append(item)
__create_proposal_data_inner(
item["children"], proposal_hierarchy, comments, curr_comment_id
item["children"],
proposal_hierarchy,
comments,
arg_data,
curr_comment_id,
)
else:
if len(prop_text) > 0:
item = {
"name": str(curr_comment_id),
"value": 100,
"text": prop_text,
"short_text": prop_short_text,
"short_text": f"[{arg_intent} - {arg_category}] {prop_short_text}"
if arg_intent != ""
else prop_short_text,
}
proposal_data.append(item)

Expand Down Expand Up @@ -98,6 +116,37 @@ def get_comment_short_text(comment: str):
return short_comment


def load_arguments(input_path: str):
arguments = {}
file_path = f"{input_path}/arguments.csv"
data = ul.read_csv_with_encoding(file_path)

for index, row in data.iterrows():
proposal_id = row["proposal_id"]
comment_id_str = row["comment_id"]
comment_ids = [
int(comment_id)
for comment_id in comment_id_str.replace("[", "")
.replace("]", "")
.replace(" ", "")
.split(",")
]
arg_cat = row["arguments name"]
arg_desc = row["argument_description"]
arg_intent = row["argument types"]

proposal = arguments.get(proposal_id, {})
for comment_id in comment_ids:
proposal[comment_id] = {
"arg_cat": arg_cat,
"arg_description": arg_desc,
"arg_intent": arg_intent,
}
arguments[proposal_id] = proposal

return arguments


def read_comment_hierarchy(input_path: str):
hierarchy = {}
file_path = f"{input_path}/comment_hierarchy.csv"
Expand All @@ -122,14 +171,26 @@ def main():
input_path = f"{solution_path}/data/raw_data"
proposals = ul.read_jsonl_data(input_path)
hierarchy = read_comment_hierarchy(input_path)
input_path = f"{solution_path}/data/gpt_data"
arguments = load_arguments(input_path)
print(f"Number of proposals: {len(proposals)}")
print(f"Number of arguments: {len(arguments)}")

for prop_name, prop_data in proposals.items():
prop_id = int(prop_name)

# Filters
if prop_id not in arguments:
continue

arg_data = arguments[prop_id]
print(f"Argument types: {len(arg_data)}")

comments = get_proposal_comments(prop_data)

json_data = create_proposal_data(prop_id, hierarchy[prop_id], comments)
json_data = create_proposal_data(
prop_id, hierarchy[prop_id], comments, arg_data
)
n_items = len(json_data["children"])
print(f"Proposal: {prop_id} and number of items: {n_items}")

Expand Down
Loading

0 comments on commit f99c045

Please sign in to comment.