Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
HowieHwong committed Apr 19, 2024
2 parents a4764f1 + f517193 commit af9f322
Show file tree
Hide file tree
Showing 17 changed files with 28,950 additions and 58 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,4 @@ pyrightconfig.json

# End of https://www.toptal.com/developers/gitignore/api/python
test.ipynb
test.py
966 changes: 966 additions & 0 deletions generation_results/llama3-70b/robustness/ood_detection.json

Large diffs are not rendered by default.

4,202 changes: 4,202 additions & 0 deletions saved_embeddings/embeddings.json

Large diffs are not rendered by default.

20 changes: 16 additions & 4 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,21 @@



config.openai_key= "1f462c580d06407eb49954553ab22ff7"
from trustllm.generation.generation import LLMGeneration
from trustllm.utils import file_process
from trustllm import config

import concurrent.futures

config.replicate_api= "r8_EBJDSqZ6KDR2EmN0NWRodPva3018ceS0nd6BF"

evaluator = truthfulness.TruthfulnessEval()
internal= file_process.load_json('/Users/admin/Downloads/mixtral-8x7B/truthfulness/internal.json')
def run_task(test_type):
run_robu=LLMGeneration(test_type=test_type,
online_model=True,
use_replicate=True,
model_path='meta/meta-llama-3-8b-instruct',
data_path='/Users/admin/Documents/dataset')
run_robu.generation_results()

evaluator.eval_internal_squad(internal)
with concurrent.futures.ThreadPoolExecutor() as executor:
executor.map(run_task,['safety','fairness','truthfulness','privacy','robustness','ethics'])
6,002 changes: 6,002 additions & 0 deletions trustllm_pkg/generation_results/llama3-70b/fairness/stereotype_agreement.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

9,122 changes: 9,122 additions & 0 deletions trustllm_pkg/generation_results/llama3-70b/robustness/AdvGLUE.json

Large diffs are not rendered by default.

4,202 changes: 4,202 additions & 0 deletions trustllm_pkg/generation_results/llama3-70b/robustness/AdvInstruction.json

Large diffs are not rendered by default.

Large diffs are not rendered by default.

3,002 changes: 3,002 additions & 0 deletions trustllm_pkg/generation_results/llama3-70b/robustness/ood_generalization.json

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion trustllm_pkg/trustllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@

max_worker = 1

##only for auto evaluation
##only support azure api for auto evaluation
azure_openai = True
azure_engine = "TrustLLM-GPT-4"
azure_embedding_engine='TrustLLM-embedding'
azure_api_version = "2023-08-01-preview"
azure_api_base = "https://trustllm-gpt-4.openai.azure.com/"
azure_api_key=''
Expand Down
41 changes: 16 additions & 25 deletions trustllm_pkg/trustllm/generation/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ def __init__(self,
self.num_gpus = num_gpus
self.max_new_tokens = max_new_tokens
self.debug = debug
self.online_model_dict = get_models()[1]
self.online_model_list = get_models()[1]
self.model_mapping = get_models()[0]
self.device = device
self.use_replicate = use_replicate
self.use_deepinfra = use_deepinfra
self.model_name = model_mapping.get(self.model_path, "")

def _generation_hf(self, prompt, tokenizer, model, temperature):
"""
Expand All @@ -55,7 +56,7 @@ def _generation_hf(self, prompt, tokenizer, model, temperature):
:return: The generated text as a string.
"""

prompt = self._prompt2conversation(prompt)
prompt = prompt2conversation(prompt)
inputs = tokenizer([prompt])
inputs = {k: torch.tensor(v).to(self.device) for k, v in inputs.items()}
output_ids = model.generate(
Expand All @@ -74,14 +75,7 @@ def _generation_hf(self, prompt, tokenizer, model, temperature):
)
return outputs

def _prompt2conversation(self, prompt):
msg = prompt
conv = get_conversation_template(self.model_path)
conv.set_system_message('')
conv.append_message(conv.roles[0], msg)
conv.append_message(conv.roles[1], None)
conversation = conv.get_prompt()
return conversation


def generation(self, model_name, prompt, tokenizer, model, temperature=None):
"""
Expand All @@ -96,7 +90,7 @@ def generation(self, model_name, prompt, tokenizer, model, temperature=None):
"""

try:
if (model_name in self.online_model_dict) and ((self.online_model and self.use_replicate) or (self.online_model and self.use_deepinfra)):
if (model_name in self.online_model_list) and ((self.online_model and self.use_replicate) or (self.online_model and self.use_deepinfra)):
ans = gen_online(model_name, prompt, temperature, replicate=self.use_replicate, deepinfra=self.use_deepinfra)
else:
ans = self._generation_hf(prompt, tokenizer, model, temperature)
Expand Down Expand Up @@ -172,7 +166,7 @@ def process_file(self, data_path, save_path, model_name, tokenizer, model, file_
t.join()
file_process.save_json(saved_data, f"{save_path}")

def run_task(self, model_name, model, tokenizer, base_dir, file_config, key_name='prompt'):
def _run_task(self, model_name, model, tokenizer, base_dir, file_config, key_name='prompt'):
"""
Runs a specific evaluation task based on provided parameters.
Expand Down Expand Up @@ -205,7 +199,7 @@ def run_ethics(self, model_name, model, tokenizer):
"implicit_ETHICS.json": 0.0,
"implicit_SocialChemistry101.json": 0.0
}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_privacy(self, model_name, model, tokenizer):
base_dir = os.path.join(self.data_path, 'privacy')
Expand All @@ -214,7 +208,7 @@ def run_privacy(self, model_name, model, tokenizer):
'privacy_awareness_query.json': 1.0,
'privacy_leakage.json': 1.0,
}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_fairness(self, model_name, model, tokenizer):
base_dir = os.path.join(self.data_path, 'fairness')
Expand All @@ -225,7 +219,7 @@ def run_fairness(self, model_name, model, tokenizer):
'stereotype_query_test.json': 1.0,
'stereotype_recognition.json': 0.0,
}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_truthfulness(self, model_name, model, tokenizer):
base_dir = os.path.join(self.data_path, 'truthfulness')
Expand All @@ -236,7 +230,7 @@ def run_truthfulness(self, model_name, model, tokenizer):
"internal.json": 1.0,
"sycophancy.json": 1.0
}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_robustness(self, model_name, model, tokenizer):
base_dir = os.path.join(self.data_path, 'robustness')
Expand All @@ -246,7 +240,7 @@ def run_robustness(self, model_name, model, tokenizer):
'AdvGLUE.json': 0.0,
'AdvInstruction.json': 1.0,
}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_safety(self, model_name, model, tokenizer):
base_dir = os.path.join(self.data_path, 'safety')
Expand All @@ -256,9 +250,9 @@ def run_safety(self, model_name, model, tokenizer):
'misuse.json': 1.0,

}
self.run_task(model_name, model, tokenizer, base_dir, file_config)
self._run_task(model_name, model, tokenizer, base_dir, file_config)

def run_single_test(self):
def _run_single_test(self):
"""
Executes a single test based on specified parameters.
Expand All @@ -268,7 +262,7 @@ def run_single_test(self):
model_name = self.model_name
print(f"Beginning generation with {self.test_type} evaluation at temperature {self.temperature}.")
print(f"Evaluation target model: {model_name}")
if (model_name in self.online_model_dict) and ((self.online_model and self.use_replicate) or (self.online_model and self.use_deepinfra)):
if (model_name in self.online_model_list) and ((self.online_model and self.use_replicate) or (self.online_model and self.use_deepinfra)):
model, tokenizer = (None, None)
else:
model, tokenizer = load_model(
Expand Down Expand Up @@ -308,13 +302,10 @@ def generation_results(self, max_retries=10, retry_interval=3):
print(f"Dataset path {self.data_path} does not exist.")
return None

if self.use_replicate:
self.model_name = self.model_path
else:
self.model_name = model_mapping.get(self.model_path, "")

for attempt in range(max_retries):
try:
state = self.run_single_test()
state = self._run_single_test()
if state:
print(f"Test function successful on attempt {attempt + 1}")
return state
Expand Down
34 changes: 34 additions & 0 deletions trustllm_pkg/trustllm/task/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import os
import difflib
import inspect
from trustllm.task import ethics, fairness, privacy, robustness, safety, truthfulness
from trustllm.utils import file_process

Expand Down Expand Up @@ -233,13 +236,44 @@ def run_truthfulness(
}



def find_best_match(keyword, file_list):
"""Find the best match for a keyword in a list of filenames."""
matches = difflib.get_close_matches(keyword, file_list, n=1, cutoff=0.1)
return matches[0] if matches else None

def auto_assign_paths(all_folder_path, param_names):
"""Automatically assign paths based on parameter names and files in the given folder."""
files = os.listdir(all_folder_path)
paths = {}
for name in param_names:
# Convert parameter name to expected file name pattern
key = name.replace('_path', '')
expected_filename = f"{key}.json"
matched_file = find_best_match(expected_filename, files)
if matched_file:
paths[name] = os.path.join(all_folder_path, matched_file)
return paths

def run_fairness(
all_folder_path=None,
stereotype_recognition_path=None,
stereotype_agreement_path=None,
stereotype_query_test_path=None,
disparagement_path=None,
preference_path=None,
):
# param_info = inspect.signature(run_fairness).parameters
# paths = {param: None for param in param_info if param != 'all_folder_path'}

# # Auto-assign paths if all_folder_path is provided
# if all_folder_path is not None:
# auto_paths = auto_assign_paths(all_folder_path, paths.keys())
# paths.update((k, v) for k, v in auto_paths.items() if v is not None)

# # Update paths with explicitly provided paths
# local_vars = locals()
# paths.update((k, local_vars[k]) for k in paths if local_vars[k] is not None)
evaluator = fairness.FairnessEval()

(
Expand Down
45 changes: 27 additions & 18 deletions trustllm_pkg/trustllm/utils/embedder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import openai
from openai import OpenAI,AzureOpenAI
import os
import logging
from tqdm import tqdm
Expand All @@ -24,26 +24,35 @@ def __init__(self, save_dir='saved_embeddings'):
# Create the directory if it does not exist
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
openai.api_key = trustllm.config.openai_key


@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(6))
def get_embeddings(self, string):
"""
Retrieve embeddings for a given string.
@retry(wait=wait_random_exponential(min=1, max=5), stop=stop_after_attempt(6))
def get_embeddings(self,string, embedding_model='text-embedding-ada-002',):

if trustllm.config.azure_openai:
azure_endpoint = trustllm.config.azure_api_base
api_key = trustllm.config.azure_api_key
api_version = trustllm.config.azure_api_version
model = trustllm.config.azure_embedding_engine
client = AzureOpenAI(
azure_endpoint=azure_endpoint,
api_key=api_key,
api_version=api_version,
)
response = client.embeddings.create(
model=model,
input=string
)
else:
api_key = trustllm.config.openai_key
client = OpenAI(api_key=api_key,)
response = client.embeddings.create(
model=embedding_model,
input=string
)
return response.data[0].embedding

Args:
string (str): The text to embed.

Returns:
list: The embedding vector.
"""
if string is None:
string = ""
response = openai.Embedding.create(
model='text-embedding-ada-002', # Example model
input=string
)
return response["data"][0]["embedding"]

def save_embeddings(self, embeddings, filename):
"""
Expand Down
1 change: 1 addition & 0 deletions trustllm_pkg/trustllm/utils/file_process.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os


def load_json(file_path):
Expand Down
17 changes: 8 additions & 9 deletions trustllm_pkg/trustllm/utils/generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# deepinfra_model = trustllm.config.model_info['deepinfra_model']

model_info = trustllm.config.model_info
online_model = model_info['online_model']
online_model_list = model_info['online_model']
model_mapping = model_info['model_mapping']
rev_model_mapping = {value: key for key, value in model_mapping.items()}

Expand Down Expand Up @@ -51,7 +51,7 @@


def get_models():
return model_mapping, online_model
return model_mapping, online_model_list

def get_access_token():
url = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={}&client_secret=".format(
Expand Down Expand Up @@ -108,19 +108,19 @@ def get_res_openai(string, model, temperature):
return response



def deepinfra_api(string, model, temperature):
api_token = trustllm.config.deepinfra_api


top_p = 1 if temperature <= 1e-5 else 0.9

client = OpenAI(api_key=api_token,api_base="https://api.deepinfra.com/v1/openai")
OpenAI(api_key=api_token,api_base="https://api.deepinfra.com/v1/openai")
stream = client.chat.completions.create(
model=rev_model_mapping[model],
messages=[{"role": "user", "content": string}],
max_tokens=5192,
max_tokens=5192,
temperature=temperature,
top_p=top_p,)
top_p=top_p,)
response = stream.choices[0].message.content
return response

Expand All @@ -133,8 +133,7 @@ def replicate_api(string, model, temperature):
else:
input["prompt"]=prompt2conversation(rev_model_mapping[model],string)
os.environ["REPLICATE_API_TOKEN"] = trustllm.config.replicate_api
res = replicate.run(
model,
res = replicate.run(rev_model_mapping[model],
input=input
)
res = "".join(res)
Expand Down Expand Up @@ -204,7 +203,7 @@ def zhipu_api(string, model, temperature):
return response.choices[0].message.content


@retry(wait=wait_random_exponential(min=1, max=3), stop=stop_after_attempt(3))
@retry(wait=wait_random_exponential(min=1, max=10), stop=stop_after_attempt(5))
def gen_online(model_name, prompt, temperature, replicate=False, deepinfra=False):
if model_name in model_info['wenxin_model']:
res = get_ernie_res(prompt, temperature=temperature)
Expand Down
2 changes: 1 addition & 1 deletion trustllm_pkg/trustllm/utils/gpt_auto_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self, save_dir='saved_evaluations'):
self.max_worker = trustllm.config.max_worker
if not os.path.exists(self.save_dir):
os.makedirs(self.save_dir)
openai.api_key = trustllm.config.openai_key
#openai.api_key = trustllm.config.openai_key

def save_progress(self, data, filename='auto_eval.json'):
"""
Expand Down

0 comments on commit af9f322

Please sign in to comment.