Merge branch 'main' of https://github.com/xingjian-zhang/massw

xingjian-zhang · Jun 14, 2024 · 16b2c0d · 16b2c0d
2 parents f3e1b09 + d82dc28
commit 16b2c0d
Show file tree

Hide file tree

Showing 7 changed files with 293 additions and 227 deletions.
diff --git a/benchmark/aspect_prediction/eval.py b/benchmark/aspect_prediction/eval.py
@@ -1,4 +1,10 @@
 """Evaluate experiment results based on the model generated output (file)."""
+import pandas as pd
+from massw.metrics import compute_metrics, flatten_metrics
+from massw.api.api_gpt import raw_output_to_dict_gpt
+from massw.api.api_mistral import raw_output_to_dict_mistral
+from utils import postprocess_cot, TASK_NAMES, TASK2GT
+
 import argparse
 import json
 import sys
@@ -7,100 +13,54 @@
 sys.path.append("..")
 nest_asyncio.apply()
 
-import pandas as pd
-from massw.metrics import compute_metrics, flatten_metrics
-from massw.api.api_gpt import raw_output_to_dict_gpt
-from massw.api.api_mistral import raw_output_to_dict_mistral
-from utils import postprocess_cot, TASK_NAMES, TASK2GT
-
 
-# def postprocess_output(model_output_dir, reference_path, used_cot=False, model_type="gpt"):
-# """
-# Process model output files to match predictions with references.
-
-# Args:
-# model_output_dir (str): Directory containing the output files.
-# reference_path (str): Path to the file containing reference data.
-# used_cot (bool): Flag to determine if chain of thought (COT) processing is needed.
-# model_type (str): Type of model used to adjust processing logic.
-
-# Returns:
-# dict: A dictionary containing predictions and references organized by task.
-# """
-# results = {}
-# with open(reference_path, "r") as f:
-# references = [json.loads(line) for line in f]
-# id2references = {r["pid"]: r for r in references}
-# for task_name in TASK_NAMES:
-# gt_name = TASK2GT[task_name]
-# if model_type == "gpt":
-# task_output = pd.read_csv(f"{model_output_dir}/{task_name}.tsv",
-# sep="\t")
-# elif model_type == "mistral":
-# task_output = pd.read_csv(f"{model_output_dir}/{task_name}.tsv",
-# sep="\t", 
-# converters={'result': lambda x: json.loads(x) if x else None})
-# id2predictions = {}
-# for i, row in task_output.iterrows():
-# if model_type == "gpt":
-# output = ast.literal_eval(
-# row["result"])["choices"][0]["message"]["content"]
-# elif model_type == "mistral":
-# output = row["output"]
-# if output is None:
-# output = ""
-# if used_cot:
-# output = postprocess_cot(output)
-# id2predictions[row["pid"]] = output
-# results[task_name] = {
-# "predictions": [id2predictions[id] for id in id2predictions],
-# "references":
-# [id2references[id][gt_name] for id in id2predictions],
-# }
-# return results
-
-def postprocess_output(model_output_dir, reference_path, used_cot=False, model_type="gpt"):
+def postprocess_output(model_output_dir,
+ reference_path,
+ used_cot=False,
+ model_type="gpt"):
  """
  Process model output files to match predictions with references.
 
  Args:
  model_output_dir (str): Directory containing the output files.
  reference_path (str): Path to the file containing reference data.
- used_cot (bool): Flag to determine if chain of thought (COT) processing is needed.
+ used_cot (bool): Flag to determine if COT processing is needed.
  model_type (str): Type of model used to adjust processing logic.
 
  Returns:
- dict: A dictionary containing predictions and references organized by task.
+ dict: A dictionary containing predictions and references by task.
  """
  results = {}
- with open(reference_path, "r") as f:
+ with open(reference_path, "r", encoding="utf-8") as f:
  references = [json.loads(line) for line in f]
- id2references = {r["pid"]: r for r in references}
+ id2ref = {r["pid"]: r for r in references}
 
  for task_name in TASK_NAMES:
  gt_name = TASK2GT[task_name]
  model_path = f"{model_output_dir}/{task_name}.tsv"
- 
+
  if model_type == "gpt":
  id2predictions = raw_output_to_dict_gpt(model_path)
  elif model_type == "mistral":
  id2predictions = raw_output_to_dict_mistral(model_path)
+ else:
+ raise ValueError(f"Model type {model_type} not supported.")
 
  if used_cot:
  for pid in id2predictions:
  id2predictions[pid] = postprocess_cot(id2predictions[pid])
 
  results[task_name] = {
- "predictions": [id2predictions[pid] for pid in id2predictions],
- "references": [id2references[pid][gt_name] for pid in id2predictions if pid in id2references]
+ "predictions": list(id2predictions.values()),
+ "references": [id2ref[pid][gt_name] for pid in id2ref.keys()
+ if pid in id2predictions]
  }
+
  return results
 
 
 def main():
- """
- Main function to process the model outputs and compute evaluation metrics.
- """
+ """Run main function to process and compute evaluation metrics."""
  parser = argparse.ArgumentParser()
  parser.add_argument(
  "--model_output_dir",
@@ -132,10 +92,10 @@ def main():
 
  metrics = {}
 
- for task_name in results:
+ for task_name, task_results in results.items():
  print(f"Processing task: {task_name}")
- predictions = results[task_name]["predictions"]
- references = results[task_name]["references"]
+ predictions = task_results["predictions"]
+ references = task_results["references"]
  metrics[task_name] = flatten_metrics(
  compute_metrics(
  predictions,

diff --git a/benchmark/aspect_prediction/prompts.py b/benchmark/aspect_prediction/prompts.py
@@ -1,43 +1,50 @@
 """
-This program defines a series of functions that automate the generation \
- of prompts for academic research tasks. 
+This program defines a series of functions that automate the generation.
+
 Each function corresponds to a specific aspect of research paper generation, \
- such as idea generation, method 
-recommendation, outcome prediction, future work recommendation, and title prediction.
+such as idea generation, method\
+recommendation, outcome prediction, future work recommendation, \
+and title prediction.
 """
 
 SYSTEM_PROMPT = """
 You are an expert in research tasked with generating detailed prompts for \
- various aspects of academic research papers. 
-Each task involves creating a specific type of prompt based on the provided information. \
- Here are the definitions of 
+ various aspects of academic research papers.
+Each task involves creating a specific type of prompt \
+based on the provided information. Here are the definitions of \
 each part you will work with:
 
 - Concept
  - Definition
  - Relative Time
 
-- Context: The status quo of related literature or reality which motivated this study. 
+- Context: The status quo of related literature or reality \
+which motivated this study.
 This could normally be a problem, a research question, or a research gap \
- that has not been successfully addressed by previous work. This is anything that 
-happened before this study.
+that has not been successfully addressed by previous work. \
+This is anything that happened before this study.
 
-- Key Idea: The main intellectual merit of this paper, often in comparison to the context. 
+- Key Idea: The main intellectual merit of this paper, \
+often in comparison to the context.
 This could normally be a novel idea or solution proposed in this paper \
- that distinguishes it from what's already done in literature. 
+that distinguishes it from what's already done in literature.
 This is proposed in this study.
 
-- Method: The specific research method that investigates and validates the key idea. 
+- Method: The specific research method that investigates \
+and validates the key idea.
 This could be an experimental setup, a theoretical framework, or \
- other necessary methodology to implement and/or evaluate the key idea. 
+other necessary methodology to implement and/or evaluate the key idea.
 This is performed in this study.
 
-- Outcome: The factual statement about the study output. 
-This could be the experiment results and any other measurable outcome that has occurred. 
-It marks whether the key hypothesis is testified or not. This is produced in this study.
+- Outcome: The factual statement about the study output.
+This could be the experiment results and any other measurable \
+outcome that has occurred.
+It marks whether the key hypothesis is testified or not. \
+This is produced in this study.
 
 - Projected Impact: The author-anticipated impact of the work on the field, \
- and potential further research identified by the author that may improve or extend this study.
+and potential further research identified by the author \
+that may improve or extend this study.
 This is anything being anticipated but has not happened yet.
 """
 
@@ -50,7 +57,8 @@ def idea_generation(data):
  data (dict): Contains 'context' and 'key_idea' from the research data.
 
  Returns:
- tuple: A tuple containing the prompt and the ground truth for idea generation.
+ tuple: A tuple containing the prompt and the ground truth for
+ idea generation.
  """
  context = data['context']
  prompt = f"Given the context: '{context}', generate key ideas \
@@ -61,13 +69,15 @@ def idea_generation(data):
 
 def method_recommendation(data):
  """
- Recommend a method to validate a key idea based on the provided context and key idea.
+ Recommend a method to validate a key idea.
 
  Args:
- data (dict): Contains 'context', 'key_idea', and 'method' from the research data.
+ data (dict): Contains 'context', 'key_idea', and 'method'
+ from the research data.
 
  Returns:
- tuple: A tuple containing the prompt and the ground truth for method recommendation.
+ tuple: A tuple containing the prompt and the ground truth
+ for method recommendation.
  """
  context = data['context']
  key_idea = data['key_idea']
@@ -79,33 +89,36 @@ def method_recommendation(data):
 
 def outcome_prediction(data):
  """
- Predict the potential outcome of a research based on context, key idea, and method.
+ Predict the potential outcome of a research.
 
  Args:
- data (dict): Contains 'context', 'key_idea', 'method', and 'outcome' from the research data.
+ data (dict): Contains 'context', 'key_idea', 'method', and 'outcome'.
 
  Returns:
- tuple: A tuple containing the prompt and the ground truth for outcome prediction.
+ tuple: A tuple containing the prompt and the ground truth
+ for outcome prediction.
  """
  context = data['context']
  key_idea = data['key_idea']
  method = data['method']
  prompt = f"Based on the context: '{context}', the key idea: '{key_idea}', \
- and the recommended method: '{method}', predict the potential outcome of this research. "
+ and the recommended method: '{method}', \
+ predict the potential outcome of this research. "
  ground_truth = data['outcome']
  return prompt, ground_truth
 
 
 def future_work_recommendation(data):
  """
- Suggest projected impact for the research based on context, key idea, method, and outcome.
+ Suggest projected impact for the research.
 
  Args:
  data (dict): Contains 'context', 'key_idea', 'method', 'outcome', \
  and 'future_impact' from the research data.
 
  Returns:
- tuple: A tuple containing the prompt and the ground truth for future work recommendation.
+ tuple: A tuple containing the prompt and the ground truth
+ for future work.
  """
  context = data['context']
  key_idea = data['key_idea']
@@ -120,14 +133,14 @@ def future_work_recommendation(data):
 
 def predict_title(data):
  """
- Predict the title of a research paper based on context, key idea, \
- method, outcome, and projected impact.
+ Predict the title of a research paper.
 
  Args:
  data (dict): Contains all necessary information from the research data.
 
  Returns:
- tuple: A tuple containing the prompt and the ground truth for title prediction.
+ tuple: A tuple containing the prompt and the ground trut
+ for title prediction.
  """
  context = data['context']
  key_idea = data['key_idea']
@@ -136,7 +149,8 @@ def predict_title(data):
  future_impact = data['future_impact']
  prompt = f"Given the context: '{context}', the key idea: '{key_idea}', \
  the method: '{method}', the outcome: '{outcome}', \
- and the future impact: '{future_impact}', predict the title of this research paper. \
- The title should be concise and reflective of the core aspects."
+ and the future impact: '{future_impact}', \
+ predict the title of this research paper. \
+ The title should be concise and reflective of the core aspects."
  ground_truth = data.get('title', '')
  return prompt, ground_truth