Skip to content

Commit

Permalink
PLT add nlp case (#3002)
Browse files Browse the repository at this point in the history
* plt update, test=model

* plt update, test=model

* plt update, test=model

* plt update testing reporter, test=model

* plt update testing reporter, test=model

* plt update testing reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt add pr_info

* plt fix ocr case, test=model

* plt fix ocr case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add more nlp case, test=model

* plt add more nlp case, test=model

* plt add nlp, test=model

* plt fix case, test=model

* fix plt builder data, test=model

* fix plt builder data, test=model

* fix plt builder data, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model
  • Loading branch information
Zeref996 authored Dec 16, 2024
1 parent dce0761 commit 2950680
Show file tree
Hide file tree
Showing 248 changed files with 6,001 additions and 12 deletions.
22 changes: 17 additions & 5 deletions framework/e2e/PaddleLT_new/TestingReporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import json
from pltools.res_save import xlsx_save
from pltools.logger import Logger
from pltools.upload_bos import UploadBos
from db.layer_db import LayerBenchmarkDB
from db.info_map import precision_md5, precision_flags, performance_md5

Expand Down Expand Up @@ -38,6 +39,8 @@ def __init__(self, task_list=list(precision_md5.keys()), date_interval=None):
self.date_interval = date_interval
self.logger.get_log().info(f"self.date_interval: {self.date_interval}")

self.AGILE_PIPELINE_BUILD_ID = os.environ.get("AGILE_PIPELINE_BUILD_ID", 0)

def get_fail_case_info(self):
"""
获取失败case信息
Expand Down Expand Up @@ -169,6 +172,14 @@ def binary_search(self, fail_dict, loop_num=1):
# print("commit list is: {}".format(commit_list))
# print("commit list origin is: {}".format(commit_list_origin))

def _bs_upload(self):
"""二分结果上传bos"""
bos_path = f"PaddleLT/binary_search/build_{self.AGILE_PIPELINE_BUILD_ID}"
excel_file = "binary_search_result.xlsx"
if os.path.exists(excel_file):
UploadBos().upload_to_bos(bos_path="paddle-qa/{}".format(bos_path), file_path=excel_file)
self.logger.get_log().info("表格下载链接: https://paddle-qa.bj.bcebos.com/{}/{}".format(bos_path, excel_file))


if __name__ == "__main__":
import argparse
Expand All @@ -180,13 +191,14 @@ def binary_search(self, fail_dict, loop_num=1):
reporter = TestingReporter(date_interval=args.date_interval) # date_interval=2024-11-13,2024-11-14
# 打印出相对失败case信息
relative_fail_dict, absolute_fail_dict = reporter.get_fail_case_info()
print(f"relative_fail_dict:{relative_fail_dict}")
# print(f"relative_fail_dict:{relative_fail_dict}")
relative_fail_num_dict = reporter.get_fail_case_num(fail_dict=relative_fail_dict)
print(f"relative_fail_num_dict:{relative_fail_num_dict}")
# print(f"relative_fail_num_dict:{relative_fail_num_dict}")
absolute_fail_num_dict = reporter.get_fail_case_num(fail_dict=absolute_fail_dict)
print(f"absolute_fail_num_dict:{absolute_fail_num_dict}")
# print(f"absolute_fail_num_dict:{absolute_fail_num_dict}")
# exit(0)
# 打印出commit定位结果
res_dict = reporter.binary_search(fail_dict=relative_fail_dict, loop_num=args.loop_num)
print("binary search end")
print(f"res_dict:{res_dict}")
reporter._bs_upload()
# print("binary search end")
# print(f"res_dict:{res_dict}")
16 changes: 16 additions & 0 deletions framework/e2e/PaddleLT_new/db/info_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
"paddlelt_train_api_dy2stcinn_inputspec": "76017cfeb6074f7188253df556e9fef9",
"paddlelt_train_prim_inputspec": "33f3b8b4505041abe5ae221f2abd8932",
"paddlelt_train_pir_infersymbolic_inputspec": "07da2ef04135d7ec5d42987705204e1f",
# "paddlelt_train_nlp_dy2stcinn_inputspec": "65e6644742b2653427cdaf51035e5ef6",
# "paddlelt_train_ocr_dy2stcinn_inputspec": "d3cd6b167556057f25568d93e0592529",
}

precision_flags = {
Expand Down Expand Up @@ -75,6 +77,20 @@
"FLAGS_check_infer_symbolic": "1",
"FLAGS_prim_forward_blacklist": "pd_op.dropout",
},
# "paddlelt_train_nlp_dy2stcinn_inputspec": {
# "MIN_GRAPH_SIZE": "0",
# "FLAGS_prim_all": "true",
# "FLAGS_use_cinn": "1",
# "FLAGS_prim_enable_dynamic": "true",
# "FLAGS_prim_forward_blacklist": "pd_op.dropout",
# },
# "paddlelt_train_ocr_dy2stcinn_inputspec": {
# "MIN_GRAPH_SIZE": "0",
# "FLAGS_prim_all": "true",
# "FLAGS_use_cinn": "1",
# "FLAGS_prim_enable_dynamic": "true",
# "FLAGS_prim_forward_blacklist": "pd_op.dropout",
# },
}

performance_md5 = {
Expand Down
2 changes: 2 additions & 0 deletions framework/e2e/PaddleLT_new/generator/builder_layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
elif os.environ.get("USE_PADDLE_MODEL", "None") == "PaddleNLP":
import layerNLPcase
import paddlenlp

os.system("cd /root/.paddlenlp && rm -rf models")
elif os.environ.get("FRAMEWORK") == "torch":
import torch
import layerTorchcase
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.debug.case_bug.transformers.blenderbot." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import paddle
import numpy as np
from paddlenlp.transformers import BlenderbotModel, BlenderbotTokenizer


def LayerCase():
"""模型库中间态"""
model = BlenderbotModel.from_pretrained("blenderbot-3B")
return model


def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = BlenderbotTokenizer.from_pretrained("blenderbot-3B")
inputs_dict = tokenizer(
"My friends are cool but they eat too many carbs.", return_attention_mask=True, return_token_type_ids=False
)
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = BlenderbotTokenizer.from_pretrained("blenderbot-3B")
inputs_dict = tokenizer(
"My friends are cool but they eat too many carbs.", return_attention_mask=True, return_token_type_ids=False
)
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.debug.case_bug.transformers.ernie." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import paddle
import numpy as np
from paddlenlp.transformers import ErnieModel, ErnieTokenizer

def LayerCase():
"""模型库中间态"""
model = ErnieModel.from_pretrained('ernie-3.0-tiny-mini-v2-en')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = ErnieTokenizer.from_pretrained('ernie-3.0-tiny-mini-v2-en')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = ErnieTokenizer.from_pretrained('ernie-3.0-tiny-mini-v2-en')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import paddle
import numpy as np
from paddlenlp.transformers import ErnieModel, ErnieTokenizer

def LayerCase():
"""模型库中间态"""
model = ErnieModel.from_pretrained('rocketqa-v1-marco-cross-encoder')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-cross-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-cross-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import paddle
import numpy as np
from paddlenlp.transformers import ErnieModel, ErnieTokenizer

def LayerCase():
"""模型库中间态"""
model = ErnieModel.from_pretrained('rocketqa-v1-marco-para-encoder')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-para-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-para-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import paddle
import numpy as np
from paddlenlp.transformers import ErnieModel, ErnieTokenizer

def LayerCase():
"""模型库中间态"""
model = ErnieModel.from_pretrained('rocketqa-v1-marco-query-encoder')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-query-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = ErnieTokenizer.from_pretrained('rocketqa-v1-marco-query-encoder')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.debug.case_bug.transformers.fnet." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import paddle
import numpy as np
from paddlenlp.transformers.fnet.modeling import FNetModel
from paddlenlp.transformers.fnet.tokenizer import FNetTokenizer

def LayerCase():
"""模型库中间态"""
model = FNetModel.from_pretrained('fnet-large')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = FNetTokenizer.from_pretrained('fnet-large')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = FNetTokenizer.from_pretrained('fnet-large')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.debug.case_bug.transformers.gpt." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import paddle
import numpy as np
from paddlenlp.transformers import GPTModel, GPTTokenizer

def LayerCase():
"""模型库中间态"""
model = GPTModel.from_pretrained('gpt2-en')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = GPTTokenizer.from_pretrained('gpt2-en')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!", return_token_type_ids=False)
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = GPTTokenizer.from_pretrained('gpt2-en')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!", return_token_type_ids=False)
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
print("inputs.shape",inputs[0].shape)
return inputs
Loading

0 comments on commit 2950680

Please sign in to comment.