Skip to content

Commit

Permalink
add government benchmark
Browse files Browse the repository at this point in the history
Signed-off-by: IcyFeather <mengzhuo.happy@gmail.com>
  • Loading branch information
IcyFeather233 committed Sep 9, 2024
1 parent 147ce57 commit 171d59e
Show file tree
Hide file tree
Showing 31 changed files with 2,318 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -84,5 +84,8 @@ def _inference(self, job, trained_model):
inference_output_dir = os.path.join(self.workspace, "output/inference/")
os.environ["RESULT_SAVED_URL"] = inference_output_dir
job.load(trained_model)
infer_res = job.predict(inference_dataset.x)
if hasattr(inference_dataset, 'need_other_info'):
infer_res = job.predict(inference_dataset)
else:
infer_res = job.predict(inference_dataset.x)
return infer_res
39 changes: 36 additions & 3 deletions core/testenvmanager/dataset/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@

import os
import tempfile
import json

import pandas as pd
from sedna.datasources import CSVDataParse, TxtDataParse, JSONDataParse, JsonlDataParse
from sedna.datasources import CSVDataParse, TxtDataParse, JSONDataParse, JsonlDataParse, JSONDataInfoParse

from core.common import utils
from core.common.constant import DatasetFormat
Expand All @@ -42,6 +43,8 @@ def __init__(self, config):
self.test_index: str = ""
self.train_data: str = ""
self.test_data: str = ""
self.train_data_info: str = ""
self.test_data_info: str = ""
self.label: str = ""
self._parse_config(config)

Expand All @@ -54,6 +57,10 @@ def _check_fields(self):
self._check_dataset_url(self.train_data)
if self.test_data:
self._check_dataset_url(self.test_data)
if self.train_data_info:
self._check_dataset_url(self.train_data_info)
if self.test_data_info:
self._check_dataset_url(self.test_data_info)

def _parse_config(self, config):
for attr, value in config.items():
Expand Down Expand Up @@ -120,6 +127,13 @@ def _process_data_file(self, file_url):

return None

def _process_data_info_file(self, file_url):
file_format = utils.get_file_format(file_url)
if file_format == DatasetFormat.JSON.value:
return file_url

return None

def process_dataset(self):
"""
process dataset:
Expand All @@ -130,13 +144,24 @@ def process_dataset(self):
"""
if self.train_index:
self.train_url = self._process_index_file(self.train_index)
else:
elif self.train_data:
self.train_url = self._process_data_file(self.train_data)
elif self.train_data_info:
self.train_url = self._process_data_info_file(self.train_data_info)
# raise NotImplementedError('to be done')
else:
raise NotImplementedError('not one of train_index/train_data/train_data_info')

if self.test_index:
self.test_url = self._process_index_file(self.test_index)
else:
elif self.test_data:
self.test_url = self._process_data_file(self.test_data)
elif self.test_data_info:
self.test_url = self._process_data_info_file(self.test_data_info)
# raise NotImplementedError('to be done')
else:
raise NotImplementedError('not one of test_index/test_data/test_data_info')


# pylint: disable=too-many-arguments
def split_dataset(self, dataset_url, dataset_format, ratio, method="default",
Expand Down Expand Up @@ -411,6 +436,14 @@ def load_data(cls, file: str, data_type: str, label=None, use_raw=False, feature
e.g.: TxtDataParse, CSVDataParse.
"""
print("file:")
print(file)
if file.split('/')[-1] == "data_info.json":
print('This is data_info.json')
data = JSONDataInfoParse(data_type=data_type, func=feature_process)
data.parse(file)
return data

data_format = utils.get_file_format(file)

data = None
Expand Down
600 changes: 600 additions & 0 deletions dataset/government/objective/test_data/data.jsonl

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions dataset/government/objective/test_data/data_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"keys": ["question", "A", "B", "C", "D", "answer"],
"answer_key": "answer"
}
5 changes: 5 additions & 0 deletions dataset/government/objective/test_data/prompts.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"infer_system_prompt": "你是一个中国的政务大模型助手,需要结合中国政务的一些知识来回答下面的问题。",
"infer_user_template": "Question:{question}\n请从下面四个选项中选出正确的选项:\nA:{A}\nB:{B}\nC:{C}\nD:{D}\n",
"infer_answer_template": "Answer:{answer}\n"
}
5 changes: 5 additions & 0 deletions dataset/government/subjective/test_data/data.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{"question": "竞业限制适用于哪些员工?", "reference": "用人单位与劳动者可以在劳动合同中约定保守用人单位的商业秘密和与知识产权相关的保密事项。对负有保密义务的劳动者,用人单位可以在劳动合同或者保密协议中与劳动者约定竞业限制条款,并约定在解除或者终止劳动合同后,在竞业限制期限内按月给予劳动者经济补偿。劳动者违反竞业限制约定的,应当按照约定向用人单位支付违约金。 竞业限制的人员限于用人单位的高级管理人员、高级技术人员和其他负有保密义务的人员。"}
{"question": "职工在多个单位就业,工伤保险费由谁缴纳?", "reference": "职工(包括非全日制从业人员)在两个或者两个以上用人单位同时就业的,各用人单位应当分别为职工缴纳工伤保险费。 职工发生工伤,由职工受到伤害时工作的单位依法承担工伤保险责任。"}
{"question": "承诺放弃社保还能享受工伤保险待遇吗?", "reference": "根据《工伤保险条例》第二条规定,中华人民共和国境内的企业、事业单位、社会团体、民办非企业单位、基金会、律师事务所、会计师事务所等组织和有雇工的个体工商户应当依照本条例规定参加工伤保险,为本单位全部职工或者雇工缴纳工伤保险费。用人单位的雇工,均有依照本条例的规定享受工伤保险待遇的权利。 工伤保险是社会保险之一,不同于商业保险,属于国家强制性的保险。用人单位为职工参加工伤保险是为了保障职工在工作中受到事故伤害时,能依法从国家和社会获得物质帮助,也是法律明确规定用人单位应履行的义务,并不能由用人单位和职工自由协商决定放弃或免除。《劳动法》第七十二条规定,用人单位和劳动者必须依法参加社会保险,缴纳社会保险费。《社会保险法》第六十条规定,用人单位应当自行申报、按时足额缴纳社会保险费,非因不可抗力等法定事由不得缓缴、减免。"}
{"question": "工伤职工进行劳动能力鉴定需符合什么条件?", "reference": "职工发生工伤,经治疗伤情相对稳定后存在残疾、影响劳动能力的,应当进行劳动能力鉴定。具体应符合以下条件: 一是经过治疗后,伤情处于相对稳定状态,这样便于劳动能力鉴定机构聘请的医疗卫生专家对伤情进行鉴定; 二是职工经治疗后,确认是因工伤原因造成职工身体上的残疾; 三是工伤职工的残疾将对以后的工作、生活产生直接影响,并且伤残程度已经影响职工本人的劳动能力。"}
{"question": "工伤认定决定可以口头传达吗?", "reference": "不可以。工伤认定决定是工伤职工能否享受工伤保险待遇的依据,也是当事人进行行政复议和行政诉讼申请的依据。因此,工伤认定决定必须以书面方式送达。"}
Loading

0 comments on commit 171d59e

Please sign in to comment.