Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
# Conflicts:
#	trustllm_pkg/trustllm/utils/metrics.py
  • Loading branch information
HowieHwong committed Apr 21, 2024
2 parents 34a0390 + 0311d46 commit e680cf7
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 47 deletions.
3 changes: 2 additions & 1 deletion trustllm_pkg/trustllm/task/fairness.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from trustllm.utils import metrics, longformer, gpt_auto_eval
from trustllm.utils import metrics, longformer, gpt_auto_eval,file_process

class FairnessEval:
"""
Expand Down Expand Up @@ -68,6 +68,7 @@ def contain_res(text):
eval_res.extend(direct_eval_data)
if return_data:
return eval_res, metrics.count_stereotype_recognition(eval_res)
file_process.save_json(eval_res,"eval_res.json")
return metrics.count_stereotype_recognition(eval_res)

def stereotype_query_eval(self, data, return_data=False):
Expand Down
118 changes: 80 additions & 38 deletions trustllm_pkg/trustllm/task/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,21 @@
import inspect
from trustllm.task import ethics, fairness, privacy, robustness, safety, truthfulness
from trustllm.utils import file_process

import traceback

def run_ethics(
explicit_ethics_path=None, implicit_ethics_path_social_norm=None, implicit_ethics_path_ETHICS=None, awareness_path=None
all_folder_path=None,
explicit_ethics_path=None,
implicit_ethics_path_social_norm=None,
implicit_ethics_path_ETHICS=None,
awareness_path=None
):
if all_folder_path:
explicit_ethics_path = os.path.join(all_folder_path, "explicit_ethics.json")
implicit_ethics_path_social_norm = os.path.join(all_folder_path, "implicit_ethics_social_norm.json")
implicit_ethics_path_ETHICS = os.path.join(all_folder_path, "implicit_ethics_ETHICS.json")
awareness_path = os.path.join(all_folder_path, "awareness.json")

evaluator = ethics.EthicsEval()
explicit_ethics_res_low, explicit_ethics_res_high = None, None
implicit_ethics_res_ETHICS, implicit_ethics_res_social_norm = None, None
Expand Down Expand Up @@ -54,13 +64,22 @@ def run_ethics(


def run_safety(
all_folder_path=None,
jailbreak_path=None,
exaggerated_safety_path=None,
misuse_path=None,
toxicity_eval=False,
toxicity_path=None,
jailbreak_eval_type="total",
):

if all_folder_path:
jailbreak_path = os.path.join(all_folder_path, "jailbreak.json")
exaggerated_safety_path = os.path.join(all_folder_path, "exaggerated_safety.json")
misuse_path = os.path.join(all_folder_path, "misuse.json")
#if toxicity_eval:
toxicity_path = os.path.join(all_folder_path, "toxicity.json")

evaluator = safety.SafetyEval()

jailbreak_res, toxicity_res, exaggerated_res, misuse_res = None, None, None, None
Expand Down Expand Up @@ -96,11 +115,18 @@ def run_safety(


def run_robustness(
all_folder_path=None,
advglue_path=None,
advinstruction_path=None,
ood_detection_path=None,
ood_generalization_path=None,
):
if all_folder_path:
advglue_path = os.path.join(all_folder_path, "advglue.json")
advinstruction_path = os.path.join(all_folder_path, "advinstruction.json")
ood_detection_path = os.path.join(all_folder_path, "ood_detection.json")
ood_generalization_path = os.path.join(all_folder_path, "ood_generalization.json")

evaluator = robustness.RobustnessEval()

advglue_res, advinstruction_res, ood_detection_res, ood_generalization_res = (
Expand Down Expand Up @@ -135,11 +161,19 @@ def run_robustness(


def run_privacy(
all_folder_path=None,
privacy_confAIde_path=None,
privacy_awareness_query_path=None,
privacy_leakage_path=None,
):

if all_folder_path:
privacy_confAIde_path = os.path.join(all_folder_path, "privacy_awareness_confAIde.json")
privacy_awareness_query_path = os.path.join(all_folder_path, "privacy_awareness_query.json")
privacy_leakage_path = os.path.join(all_folder_path, "privacy_leakage.json")

evaluator = privacy.PrivacyEval()


(
privacy_confAIde_res,
Expand Down Expand Up @@ -181,12 +215,20 @@ def run_privacy(


def run_truthfulness(
all_folder_path=None,
internal_path=None,
external_path=None,
hallucination_path=None,
sycophancy_path=None,
advfact_path=None,
):
if all_folder_path:
internal_path = os.path.join(all_folder_path, "internal.json")
external_path = os.path.join(all_folder_path, "external.json")
hallucination_path = os.path.join(all_folder_path, "hallucination.json")
sycophancy_path = os.path.join(all_folder_path, "sycophancy.json")
advfact_path = os.path.join(all_folder_path, "advfact.json")

evaluator = truthfulness.TruthfulnessEval()

(
Expand All @@ -205,31 +247,34 @@ def run_truthfulness(
None,
)

if internal_path is not None:
internal_data = file_process.load_json(internal_path)
internal_res = evaluator.internal_eval(internal_data)

if external_path is not None:
external_data = file_process.load_json(external_path)
external_res = evaluator.external_eval(external_data)

if hallucination_path is not None:
hallucination_data = file_process.load_json(hallucination_path)
hallucination_res = evaluator.hallucination_eval(hallucination_data)

if sycophancy_path is not None:
sycophancy_data = file_process.load_json(sycophancy_path)
sycophancy_persona_res = evaluator.sycophancy_eval(
sycophancy_data, eval_type="persona"
)
sycophancy_preference_res = evaluator.sycophancy_eval(
sycophancy_data, eval_type="preference"
)

if advfact_path is not None:
advfact_data = file_process.load_json(advfact_path)
advfact_res = evaluator.advfact_eval(advfact_data)

try:
if internal_path is not None:
internal_data = file_process.load_json(internal_path)
internal_res = evaluator.internal_eval(internal_data)

if external_path is not None:
external_data = file_process.load_json(external_path)
external_res = evaluator.external_eval(external_data)

if hallucination_path is not None:
hallucination_data = file_process.load_json(hallucination_path)
hallucination_res = evaluator.hallucination_eval(hallucination_data)

if sycophancy_path is not None:
sycophancy_data = file_process.load_json(sycophancy_path)
sycophancy_persona_res = evaluator.sycophancy_eval(
sycophancy_data, eval_type="persona"
)
sycophancy_preference_res = evaluator.sycophancy_eval(
sycophancy_data, eval_type="preference"
)

if advfact_path is not None:
advfact_data = file_process.load_json(advfact_path)
advfact_res = evaluator.advfact_eval(advfact_data)
except Exception as e:
traceback.print_exc();
print(e)
return {
"misinformation_internal": internal_res,
"misinformation_external": external_res,
Expand Down Expand Up @@ -267,17 +312,14 @@ def run_fairness(
disparagement_path=None,
preference_path=None,
):
# param_info = inspect.signature(run_fairness).parameters
# paths = {param: None for param in param_info if param != 'all_folder_path'}

# # Auto-assign paths if all_folder_path is provided
# if all_folder_path is not None:
# auto_paths = auto_assign_paths(all_folder_path, paths.keys())
# paths.update((k, v) for k, v in auto_paths.items() if v is not None)

# # Update paths with explicitly provided paths
# local_vars = locals()
# paths.update((k, local_vars[k]) for k in paths if local_vars[k] is not None)

if all_folder_path:
stereotype_recognition_path = os.path.join(all_folder_path, "stereotype_recognition.json")
stereotype_agreement_path = os.path.join(all_folder_path, "stereotype_agreement.json")
stereotype_query_test_path = os.path.join(all_folder_path, "stereotype_query_test.json")
disparagement_path = os.path.join(all_folder_path, "disparagement.json")
preference_path = os.path.join(all_folder_path, "preference.json")

evaluator = fairness.FairnessEval()

(
Expand Down
12 changes: 7 additions & 5 deletions trustllm_pkg/trustllm/task/truthfulness.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,13 @@ def sycophancy_eval(self, data, eval_type):
similarities = []
Embedder = embedder.DataEmbedder()
for item in tqdm(persona_data, total=len(persona_data), desc="Evaluating persona sycophancy"):
r_embedding = Embedder.get_embeddings(item['res'])
n_embedding = Embedder.get_embeddings(item['n_completion'])
s_embedding = Embedder.get_embeddings(item['s_completion'])
similarities.append(metrics.sycophancy_sim(r_embedding, n_embedding, s_embedding))

try:
r_embedding = Embedder.get_embeddings(item['res'])
n_embedding = Embedder.get_embeddings(item['n_completion'])
s_embedding = Embedder.get_embeddings(item['s_completion'])
similarities.append(metrics.sycophancy_sim(r_embedding, n_embedding, s_embedding))
except Exception as e:
print(f"Error in embedding: {e}")
return round(mean(similarities), 3)

elif eval_type == 'preference':
Expand Down
9 changes: 6 additions & 3 deletions trustllm_pkg/trustllm/utils/longformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from trustllm.utils import file_process
from tqdm import tqdm

import torch
# Configure basic logging
logging.basicConfig(filename='huggingface_evaluator.log', level=logging.INFO,
format='%(asctime)s:%(levelname)s:%(message)s')
Expand All @@ -13,7 +13,7 @@ class HuggingFaceEvaluator:
"""
A class for evaluating text using a Hugging Face model.
"""
def __init__(self, model_name='LibrAI/longformer-harmful-ro', device='mps', save_dir='saved_evaluations'):
def __init__(self, model_name='LibrAI/longformer-harmful-ro', device=None, save_dir='saved_evaluations'):
"""
Initialize the HuggingFaceEvaluator class.
Expand All @@ -22,10 +22,13 @@ def __init__(self, model_name='LibrAI/longformer-harmful-ro', device='mps', save
device (str): The device to run the model on (e.g., 'cuda:0').
save_dir (str): Directory to save the evaluation results.
"""
if device==None:
self.device='cpu' if torch.cuda.is_available() else 'cuda'
# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
self.classifier = pipeline('text-classification', model=model, tokenizer=tokenizer, device=device)
self.classifier = pipeline('text-classification', model=model, tokenizer=tokenizer, device=self.device)


self.save_dir = save_dir
# Create save directory if it doesn't exist
Expand Down

0 comments on commit e680cf7

Please sign in to comment.