Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
xingjian-zhang committed Jun 14, 2024
2 parents f3e1b09 + d82dc28 commit 16b2c0d
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 227 deletions.
90 changes: 25 additions & 65 deletions benchmark/aspect_prediction/eval.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
"""Evaluate experiment results based on the model generated output (file)."""
import pandas as pd
from massw.metrics import compute_metrics, flatten_metrics
from massw.api.api_gpt import raw_output_to_dict_gpt
from massw.api.api_mistral import raw_output_to_dict_mistral
from utils import postprocess_cot, TASK_NAMES, TASK2GT

import argparse
import json
import sys
Expand All @@ -7,100 +13,54 @@
sys.path.append("..")
nest_asyncio.apply()

import pandas as pd
from massw.metrics import compute_metrics, flatten_metrics
from massw.api.api_gpt import raw_output_to_dict_gpt
from massw.api.api_mistral import raw_output_to_dict_mistral
from utils import postprocess_cot, TASK_NAMES, TASK2GT


# def postprocess_output(model_output_dir, reference_path, used_cot=False, model_type="gpt"):
# """
# Process model output files to match predictions with references.

# Args:
# model_output_dir (str): Directory containing the output files.
# reference_path (str): Path to the file containing reference data.
# used_cot (bool): Flag to determine if chain of thought (COT) processing is needed.
# model_type (str): Type of model used to adjust processing logic.

# Returns:
# dict: A dictionary containing predictions and references organized by task.
# """
# results = {}
# with open(reference_path, "r") as f:
# references = [json.loads(line) for line in f]
# id2references = {r["pid"]: r for r in references}
# for task_name in TASK_NAMES:
# gt_name = TASK2GT[task_name]
# if model_type == "gpt":
# task_output = pd.read_csv(f"{model_output_dir}/{task_name}.tsv",
# sep="\t")
# elif model_type == "mistral":
# task_output = pd.read_csv(f"{model_output_dir}/{task_name}.tsv",
# sep="\t",
# converters={'result': lambda x: json.loads(x) if x else None})
# id2predictions = {}
# for i, row in task_output.iterrows():
# if model_type == "gpt":
# output = ast.literal_eval(
# row["result"])["choices"][0]["message"]["content"]
# elif model_type == "mistral":
# output = row["output"]
# if output is None:
# output = ""
# if used_cot:
# output = postprocess_cot(output)
# id2predictions[row["pid"]] = output
# results[task_name] = {
# "predictions": [id2predictions[id] for id in id2predictions],
# "references":
# [id2references[id][gt_name] for id in id2predictions],
# }
# return results

def postprocess_output(model_output_dir, reference_path, used_cot=False, model_type="gpt"):
def postprocess_output(model_output_dir,
reference_path,
used_cot=False,
model_type="gpt"):
"""
Process model output files to match predictions with references.
Args:
model_output_dir (str): Directory containing the output files.
reference_path (str): Path to the file containing reference data.
used_cot (bool): Flag to determine if chain of thought (COT) processing is needed.
used_cot (bool): Flag to determine if COT processing is needed.
model_type (str): Type of model used to adjust processing logic.
Returns:
dict: A dictionary containing predictions and references organized by task.
dict: A dictionary containing predictions and references by task.
"""
results = {}
with open(reference_path, "r") as f:
with open(reference_path, "r", encoding="utf-8") as f:
references = [json.loads(line) for line in f]
id2references = {r["pid"]: r for r in references}
id2ref = {r["pid"]: r for r in references}

for task_name in TASK_NAMES:
gt_name = TASK2GT[task_name]
model_path = f"{model_output_dir}/{task_name}.tsv"

if model_type == "gpt":
id2predictions = raw_output_to_dict_gpt(model_path)
elif model_type == "mistral":
id2predictions = raw_output_to_dict_mistral(model_path)
else:
raise ValueError(f"Model type {model_type} not supported.")

if used_cot:
for pid in id2predictions:
id2predictions[pid] = postprocess_cot(id2predictions[pid])

results[task_name] = {
"predictions": [id2predictions[pid] for pid in id2predictions],
"references": [id2references[pid][gt_name] for pid in id2predictions if pid in id2references]
"predictions": list(id2predictions.values()),
"references": [id2ref[pid][gt_name] for pid in id2ref.keys()
if pid in id2predictions]
}

return results


def main():
"""
Main function to process the model outputs and compute evaluation metrics.
"""
"""Run main function to process and compute evaluation metrics."""
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_output_dir",
Expand Down Expand Up @@ -132,10 +92,10 @@ def main():

metrics = {}

for task_name in results:
for task_name, task_results in results.items():
print(f"Processing task: {task_name}")
predictions = results[task_name]["predictions"]
references = results[task_name]["references"]
predictions = task_results["predictions"]
references = task_results["references"]
metrics[task_name] = flatten_metrics(
compute_metrics(
predictions,
Expand Down
80 changes: 47 additions & 33 deletions benchmark/aspect_prediction/prompts.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,50 @@
"""
This program defines a series of functions that automate the generation \
of prompts for academic research tasks.
This program defines a series of functions that automate the generation.
Each function corresponds to a specific aspect of research paper generation, \
such as idea generation, method
recommendation, outcome prediction, future work recommendation, and title prediction.
such as idea generation, method\
recommendation, outcome prediction, future work recommendation, \
and title prediction.
"""

SYSTEM_PROMPT = """
You are an expert in research tasked with generating detailed prompts for \
various aspects of academic research papers.
Each task involves creating a specific type of prompt based on the provided information. \
Here are the definitions of
various aspects of academic research papers.
Each task involves creating a specific type of prompt \
based on the provided information. Here are the definitions of \
each part you will work with:
- Concept
- Definition
- Relative Time
- Context: The status quo of related literature or reality which motivated this study.
- Context: The status quo of related literature or reality \
which motivated this study.
This could normally be a problem, a research question, or a research gap \
that has not been successfully addressed by previous work. This is anything that
happened before this study.
that has not been successfully addressed by previous work. \
This is anything that happened before this study.
- Key Idea: The main intellectual merit of this paper, often in comparison to the context.
- Key Idea: The main intellectual merit of this paper, \
often in comparison to the context.
This could normally be a novel idea or solution proposed in this paper \
that distinguishes it from what's already done in literature.
that distinguishes it from what's already done in literature.
This is proposed in this study.
- Method: The specific research method that investigates and validates the key idea.
- Method: The specific research method that investigates \
and validates the key idea.
This could be an experimental setup, a theoretical framework, or \
other necessary methodology to implement and/or evaluate the key idea.
other necessary methodology to implement and/or evaluate the key idea.
This is performed in this study.
- Outcome: The factual statement about the study output.
This could be the experiment results and any other measurable outcome that has occurred.
It marks whether the key hypothesis is testified or not. This is produced in this study.
- Outcome: The factual statement about the study output.
This could be the experiment results and any other measurable \
outcome that has occurred.
It marks whether the key hypothesis is testified or not. \
This is produced in this study.
- Projected Impact: The author-anticipated impact of the work on the field, \
and potential further research identified by the author that may improve or extend this study.
and potential further research identified by the author \
that may improve or extend this study.
This is anything being anticipated but has not happened yet.
"""

Expand All @@ -50,7 +57,8 @@ def idea_generation(data):
data (dict): Contains 'context' and 'key_idea' from the research data.
Returns:
tuple: A tuple containing the prompt and the ground truth for idea generation.
tuple: A tuple containing the prompt and the ground truth for
idea generation.
"""
context = data['context']
prompt = f"Given the context: '{context}', generate key ideas \
Expand All @@ -61,13 +69,15 @@ def idea_generation(data):

def method_recommendation(data):
"""
Recommend a method to validate a key idea based on the provided context and key idea.
Recommend a method to validate a key idea.
Args:
data (dict): Contains 'context', 'key_idea', and 'method' from the research data.
data (dict): Contains 'context', 'key_idea', and 'method'
from the research data.
Returns:
tuple: A tuple containing the prompt and the ground truth for method recommendation.
tuple: A tuple containing the prompt and the ground truth
for method recommendation.
"""
context = data['context']
key_idea = data['key_idea']
Expand All @@ -79,33 +89,36 @@ def method_recommendation(data):

def outcome_prediction(data):
"""
Predict the potential outcome of a research based on context, key idea, and method.
Predict the potential outcome of a research.
Args:
data (dict): Contains 'context', 'key_idea', 'method', and 'outcome' from the research data.
data (dict): Contains 'context', 'key_idea', 'method', and 'outcome'.
Returns:
tuple: A tuple containing the prompt and the ground truth for outcome prediction.
tuple: A tuple containing the prompt and the ground truth
for outcome prediction.
"""
context = data['context']
key_idea = data['key_idea']
method = data['method']
prompt = f"Based on the context: '{context}', the key idea: '{key_idea}', \
and the recommended method: '{method}', predict the potential outcome of this research. "
and the recommended method: '{method}', \
predict the potential outcome of this research. "
ground_truth = data['outcome']
return prompt, ground_truth


def future_work_recommendation(data):
"""
Suggest projected impact for the research based on context, key idea, method, and outcome.
Suggest projected impact for the research.
Args:
data (dict): Contains 'context', 'key_idea', 'method', 'outcome', \
and 'future_impact' from the research data.
Returns:
tuple: A tuple containing the prompt and the ground truth for future work recommendation.
tuple: A tuple containing the prompt and the ground truth
for future work.
"""
context = data['context']
key_idea = data['key_idea']
Expand All @@ -120,14 +133,14 @@ def future_work_recommendation(data):

def predict_title(data):
"""
Predict the title of a research paper based on context, key idea, \
method, outcome, and projected impact.
Predict the title of a research paper.
Args:
data (dict): Contains all necessary information from the research data.
Returns:
tuple: A tuple containing the prompt and the ground truth for title prediction.
tuple: A tuple containing the prompt and the ground trut
for title prediction.
"""
context = data['context']
key_idea = data['key_idea']
Expand All @@ -136,7 +149,8 @@ def predict_title(data):
future_impact = data['future_impact']
prompt = f"Given the context: '{context}', the key idea: '{key_idea}', \
the method: '{method}', the outcome: '{outcome}', \
and the future impact: '{future_impact}', predict the title of this research paper. \
The title should be concise and reflective of the core aspects."
and the future impact: '{future_impact}', \
predict the title of this research paper. \
The title should be concise and reflective of the core aspects."
ground_truth = data.get('title', '')
return prompt, ground_truth
Loading

0 comments on commit 16b2c0d

Please sign in to comment.