Skip to content

Commit

Permalink
update government benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: IcyFeather <mengzhuo.happy@gmail.com>
  • Loading branch information
IcyFeather233 committed Sep 9, 2024
1 parent 171d59e commit 189f36a
Show file tree
Hide file tree
Showing 15 changed files with 9 additions and 1,685 deletions.
600 changes: 0 additions & 600 deletions dataset/government/objective/test_data/data.jsonl

This file was deleted.

4 changes: 0 additions & 4 deletions dataset/government/objective/test_data/data_info.json

This file was deleted.

5 changes: 0 additions & 5 deletions dataset/government/objective/test_data/prompts.json

This file was deleted.

5 changes: 0 additions & 5 deletions dataset/government/subjective/test_data/data.jsonl

This file was deleted.

1,045 changes: 0 additions & 1,045 deletions dataset/government/subjective/test_data/data_full.jsonl

This file was deleted.

4 changes: 0 additions & 4 deletions dataset/government/subjective/test_data/data_info.json

This file was deleted.

6 changes: 0 additions & 6 deletions dataset/government/subjective/test_data/prompts.json

This file was deleted.

10 changes: 0 additions & 10 deletions dataset/llm_simple_qa/test_data/data.jsonl

This file was deleted.

Empty file.
2 changes: 2 additions & 0 deletions examples/government/singletask_learning_bench/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ This Benchmark consists of two parts: subjective evaluation data and objective e

## Prepare Datasets

You can download dataset in [kaggle](https://www.kaggle.com/datasets/hsj576/government-bench-master)

```
dataset/government
├── objective
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@ def predict(self, data, input_shape=None, **kwargs):
infer_system_prompt = data.prompts['infer_system_prompt']

answer_list = []
for line in tqdm(data.question, desc="Processing", unit="question"):
for line in tqdm(data.x, desc="Processing", unit="question"):
# 3-shot
indices = random.sample([i for i, l in enumerate(data.question) if l != line], 3)
indices = random.sample([i for i, l in enumerate(data.x) if l != line], 3)
history = []
if infer_system_prompt:
history.append({"role": "system", "content": infer_system_prompt})
for idx in indices:
history.append({"role": "user", "content": data.question[idx]})
history.append({"role": "user", "content": data.x[idx]})
history.append({"role": "assistant", "content": data.y[idx]})
history.append({"role": "user", "content": line})
response = self._infer(history)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def predict(self, data, input_shape=None, **kwargs):
infer_system_prompt = data.prompts['infer_system_prompt']

answer_list = []
for line in tqdm(data.question, desc="Processing", unit="question"):
for line in tqdm(data.x, desc="Processing", unit="question"):
history = []
query = line.split('||')[0]
if infer_system_prompt:
Expand All @@ -78,7 +78,7 @@ def predict(self, data, input_shape=None, **kwargs):

# evaluate by llm
for index in tqdm(range(len(answer_list)), desc="Evaluating", ascii=False, ncols=75):
prompt = data.prompts['eval_user_template'].replace('{question}', data.question[index].split('||')[0]).replace('{reference}', data.question[index].split('||')[1]).replace('{answer}', answer_list[index])
prompt = data.prompts['eval_user_template'].replace('{question}', data.x[index].split('||')[0]).replace('{reference}', data.x[index].split('||')[1]).replace('{answer}', answer_list[index])
print(prompt)
judgement = self._openai_generate(prompt)
print(judgement)
Expand Down Expand Up @@ -117,7 +117,8 @@ def _infer(self, messages):


def _openai_generate(self, user_question, system=None):
client = OpenAI(api_key="", base_url="https://api.deepseek.com")
key = os.getenv("DEEPSEEK_API_KEY")
client = OpenAI(api_key=key, base_url="https://api.deepseek.com")

messages = []
if system:
Expand Down
Binary file not shown.
Binary file modified examples/resources/sedna-jsondatainfo.zip
Binary file not shown.
Binary file removed examples/resources/sedna-with-jsonl.zip
Binary file not shown.

0 comments on commit 189f36a

Please sign in to comment.