diff --git a/framework/e2e/PaddleLT_new/TestingReporter.py b/framework/e2e/PaddleLT_new/TestingReporter.py index 17c3c9dd66..dd9fb4781e 100644 --- a/framework/e2e/PaddleLT_new/TestingReporter.py +++ b/framework/e2e/PaddleLT_new/TestingReporter.py @@ -7,10 +7,11 @@ """ import os +import json from pltools.res_save import xlsx_save from pltools.logger import Logger from db.layer_db import LayerBenchmarkDB -from db.info_map import precision_md5, performance_md5 +from db.info_map import precision_md5, precision_flags, performance_md5 from binary_search import BinarySearch @@ -29,8 +30,13 @@ def __init__(self, task_list=list(precision_md5.keys()), date_interval=None): self.storage = "./apibm_config.yml" self.task_list = task_list - self.date_interval = date_interval self.logger = Logger("PLTReporter") + self.logger.get_log().info(f"task list: {task_list}") + self.logger.get_log().info(f"date interval: {date_interval}") + if "," in date_interval: + date_interval = date_interval.split(",") + self.date_interval = date_interval + self.logger.get_log().info(f"self.date_interval: {self.date_interval}") def get_fail_case_info(self): """ @@ -41,40 +47,64 @@ def get_fail_case_info(self): task_list=self.task_list, date_interval=self.date_interval ) xlsx_save(relative_fail_dict, "./relative_fail_dict.xlsx") + xlsx_save(absolute_fail_dict, "./absolute_fail_dict.xlsx") return relative_fail_dict, absolute_fail_dict - def get_fail_case_num(self): + def get_fail_case_num(self, fail_dict): """ 获取失败case信息 """ - layer_db = LayerBenchmarkDB(storage=self.storage) - relative_fail_dict, absolute_fail_dict = layer_db.get_precision_fail_case_dict( - task_list=self.task_list, date_interval=self.date_interval - ) + # layer_db = LayerBenchmarkDB(storage=self.storage) + # relative_fail_dict, absolute_fail_dict = layer_db.get_precision_fail_case_dict( + # task_list=self.task_list, date_interval=self.date_interval + # ) - absolute_fail_num_dict = {} - for task, value_dict in absolute_fail_dict.items(): - absolute_fail_num_dict[task] = len(value_dict["absolute_fail_list"]) - return absolute_fail_num_dict + fail_num_dict = {} + for task, value_dict in fail_dict.items(): + fail_num_dict[task] = len(value_dict["fail_list"]) + return fail_num_dict - def binary_search(self, loop_num=1): + def _set_flags(self, task): + """ + 设定环境变量 + """ + task_flags_dict = precision_flags[task] + for key, value in task_flags_dict.items(): + os.environ[key] = value + self.logger.get_log().info(f"_set_flags设定环境变量: {key}={value}") + + def _unset_flags(self, task): + """ + 取消环境变量 + """ + task_flags_dict = precision_flags[task] + for key, value in task_flags_dict.items(): + if key in os.environ: + self.logger.get_log().info(f"_unset_flags取消环境变量: {key}={os.environ[key]}") + del os.environ[key] + + def binary_search(self, fail_dict, loop_num=1): """ 使用二分工具 """ - relative_fail_dict, absolute_fail_dict = self.get_fail_case_info() + # relative_fail_dict, absolute_fail_dict = self.get_fail_case_info() res_dict = {} - relative_fail_info_dict = {} - for task, value_dict in relative_fail_dict.items(): - relative_fail_info_dict[task] = value_dict - relative_fail_info_dict[task]["relative_fail_info"] = {} + fail_info_dict = {} + for task, value_dict in fail_dict.items(): + fail_info_dict[task] = value_dict + fail_info_dict[task]["fail_info"] = {} res_dict[task] = {} - if len(value_dict["relative_fail_list"]) == 0: + # 设定环境变量 + self._set_flags(task=task) + if len(value_dict["fail_list"]) == 0: + self.logger.get_log().info(f"{task}任务无报错case, 无需进行二分定位") continue else: + self.logger.get_log().info(f"{task}任务有报错case, 准备进行二分定位") baseline_commit = value_dict["baseline_commit"] latest_commit = value_dict["latest_commit"] testing = value_dict["testing"] - for layer_file in value_dict["relative_fail_list"]: + for layer_file in value_dict["fail_list"]: bs = BinarySearch( good_commit=baseline_commit, bad_commit=latest_commit, @@ -89,7 +119,7 @@ def binary_search(self, loop_num=1): "commit_list_origin": commit_list_origin, "check_info": check_info, } - relative_fail_info_dict[task]["relative_fail_info"].update( + fail_info_dict[task]["fail_info"].update( { layer_file: { "final_commit": final_commit, @@ -97,8 +127,10 @@ def binary_search(self, loop_num=1): } } ) + # 取消环境变量 + self._unset_flags(task=task) - xlsx_save(relative_fail_info_dict, "./relative_fail_info_dict.xlsx") + xlsx_save(fail_info_dict, "./binary_search_result.xlsx") return res_dict # def binary_search_old(self): @@ -139,15 +171,22 @@ def binary_search(self, loop_num=1): if __name__ == "__main__": - # reporter = TestingReporter(date_interval=["2024-11-05", "2024-11-07"]) - reporter = TestingReporter() + import argparse + + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--date_interval", type=str, default="None", help="时间区间选择") + parser.add_argument("--loop_num", type=int, default=1, help="循环验证次数") + args = parser.parse_args() + reporter = TestingReporter(date_interval=args.date_interval) # date_interval=2024-11-13,2024-11-14 # 打印出相对失败case信息 relative_fail_dict, absolute_fail_dict = reporter.get_fail_case_info() print(f"relative_fail_dict:{relative_fail_dict}") - absolute_fail_num_dict = reporter.get_fail_case_num() + relative_fail_num_dict = reporter.get_fail_case_num(fail_dict=relative_fail_dict) + print(f"relative_fail_num_dict:{relative_fail_num_dict}") + absolute_fail_num_dict = reporter.get_fail_case_num(fail_dict=absolute_fail_dict) print(f"absolute_fail_num_dict:{absolute_fail_num_dict}") # exit(0) # 打印出commit定位结果 - res_dict = reporter.binary_search(loop_num=10) - print("test end") + res_dict = reporter.binary_search(fail_dict=relative_fail_dict, loop_num=args.loop_num) + print("binary search end") print(f"res_dict:{res_dict}") diff --git a/framework/e2e/PaddleLT_new/binary_search.py b/framework/e2e/PaddleLT_new/binary_search.py index 96bb5d328f..e655bd405e 100644 --- a/framework/e2e/PaddleLT_new/binary_search.py +++ b/framework/e2e/PaddleLT_new/binary_search.py @@ -41,6 +41,8 @@ def __init__(self, good_commit, bad_commit, layerfile, testing, loop_num=1, perf "wget -q https://xly-devops.bj.bcebos.com/PaddleTest/Paddle/Paddle-develop.tar.gz \ && tar -xzf Paddle-develop.tar.gz" ) + else: + os.system("cd Paddle-develop && git pull") self.good_commit = good_commit self.bad_commit = bad_commit @@ -49,6 +51,7 @@ def __init__(self, good_commit, bad_commit, layerfile, testing, loop_num=1, perf "Develop-GpuSome-LinuxCentos-Gcc82-Cuda118-Cudnn86-Trt85-Py310-CINN-Compile/{}/paddle" "paddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl" ) + self.whl = "paddlepaddle_gpu-0.0.0-cp310-cp310-linux_x86_64.whl" self.layerfile = layerfile self.title = self.layerfile.replace(".py", "").replace("/", "^").replace(".", "^") @@ -154,7 +157,8 @@ def _install_paddle(self, commit_id): whl_link = self.whl_link_template.replace("{}", commit_id) - exit_code = os.system(f"{self.py_cmd} -m pip install {whl_link}") + # exit_code = os.system(f"{self.py_cmd} -m pip install {whl_link}") + exit_code = os.system(f"rm -rf {self.whl} && wget -q {whl_link} && {self.py_cmd} -m pip install {self.whl}") self._status_print(exit_code=exit_code, status_str="install paddlepaddle-gpu") self.logger.get_log().info("commit {} install done".format(commit_id)) return 0 @@ -240,6 +244,17 @@ def _run(self): """ 用户运行 """ + # 初始检查 + self._install_paddle(self.good_commit) + bool_res_init_good_commit = self._precision_debug(self.good_commit) # 应该为True + self._install_paddle(self.bad_commit) + bool_res_init_bad_commit = self._precision_debug(self.bad_commit) # 应该为False + + if not bool_res_init_good_commit or bool_res_init_bad_commit: + check_info = f"初始commit有误, good_commit为{bool_res_init_good_commit}, bad_commit为{bool_res_init_bad_commit}" + self.logger.get_log().info(check_info) + return "none", "none", "none", check_info + commit_list_origin = self._get_commits() self.logger.get_log().info(f"original commit list is: {commit_list_origin}") save_pickle(data=commit_list_origin, filename="commit_list_origin.pickle") @@ -291,10 +306,10 @@ def _run(self): if __name__ == "__main__": bs = BinarySearch( good_commit="2e963d2bd2ca03626bb46cccbd0119b8873523a6", - bad_commit="c4a91627a61a5a723850857600eed15dfde08a62", - layerfile="layercase/sublayer1000/Clas_cases/Twins_alt_gvt_base/SIR_136.py", + bad_commit="651e66ba06f3ae26c3cf649f83a9a54b486ce75d", + layerfile="layercase/sublayer1000/Clas_cases/EfficientNet_EfficientNetB0/SIR_140.py", testing="yaml/dy^dy2stcinn_train_inputspec.yml", - loop_num=10, + loop_num=1, perf_decay=None, # ["dy2st_eval_cinn_perf", 0.042814, -0.3] test_obj=LayerTest, ) diff --git a/framework/e2e/PaddleLT_new/db/info_map.py b/framework/e2e/PaddleLT_new/db/info_map.py index a009dc9ac8..da1f62672b 100644 --- a/framework/e2e/PaddleLT_new/db/info_map.py +++ b/framework/e2e/PaddleLT_new/db/info_map.py @@ -17,6 +17,66 @@ "paddlelt_train_pir_infersymbolic_inputspec": "07da2ef04135d7ec5d42987705204e1f", } +precision_flags = { + "paddlelt_eval_cinn": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_cinn": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_eval_cinn_inputspec": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_cinn_inputspec": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_api_dy2stcinn_static_inputspec": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_api_dy2stcinn_inputspec": { + "MIN_GRAPH_SIZE": "0", + "FLAGS_prim_all": "true", + "FLAGS_use_cinn": "1", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_prim_inputspec": { + "FLAGS_prim_all": "true", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_enable_pir_in_executor": "1", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, + "paddlelt_train_pir_infersymbolic_inputspec": { + "FLAGS_prim_all": "true", + "FLAGS_prim_enable_dynamic": "true", + "FLAGS_enable_pir_api": "1", + "FLAGS_enable_pir_in_executor": "1", + "MIN_GRAPH_SIZE": "0", + "FLAGS_check_infer_symbolic": "1", + "FLAGS_prim_forward_blacklist": "pd_op.dropout", + }, +} + performance_md5 = { "paddlelt_perf_1000_cinn_cinn_A100_latest_as_baseline": "1f7253c6a9014bacc74d07cfd3b14ab2", "paddlelt_train_perf_1000_cinn_cinn_A100_latest_as_baseline": "1f7253c6a9014bacc74d07cfd3b14ab2", diff --git a/framework/e2e/PaddleLT_new/db/layer_db.py b/framework/e2e/PaddleLT_new/db/layer_db.py index e80f1ec551..9b1e1cff73 100644 --- a/framework/e2e/PaddleLT_new/db/layer_db.py +++ b/framework/e2e/PaddleLT_new/db/layer_db.py @@ -246,7 +246,7 @@ def get_precision_fail_case_dict(self, task_list=list(precision_md5.keys()), dat "testing": latest_testing, "baseline_update_time": baseline_update_time, "latest_update_time": latest_update_time, - "relative_fail_list": relative_fail_list, + "fail_list": relative_fail_list, } absolute_fail_dict[task] = { @@ -255,7 +255,7 @@ def get_precision_fail_case_dict(self, task_list=list(precision_md5.keys()), dat "testing": latest_testing, "baseline_update_time": baseline_update_time, "latest_update_time": latest_update_time, - "absolute_fail_list": absolute_fail_list, + "fail_list": absolute_fail_list, } return relative_fail_dict, absolute_fail_dict diff --git a/framework/e2e/PaddleLT_new/support/pr_info.py b/framework/e2e/PaddleLT_new/support/pr_info.py new file mode 100644 index 0000000000..9990772452 --- /dev/null +++ b/framework/e2e/PaddleLT_new/support/pr_info.py @@ -0,0 +1,58 @@ +#!/bin/env python +# -*- coding: utf-8 -*- +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python +""" +获取pr信息 +""" + +import argparse +import json +import os +import re +import sys + +import requests + + +class PRInfo(object): + """ + pr信息获取 + """ + + def __init__(self, pr_id, title_keyword="CINN"): + """ + init + """ + self.pr_id = pr_id + self.title_keyword = title_keyword + + def get_pr_title(self): + """ + 获取pr标题 + """ + response = requests.get( + f"https://api.github.com/repos/PaddlePaddle/Paddle/pulls/{self.pr_id}", + headers={"Accept": "application/vnd.github.v3+json"}, + ) + data = json.loads(response.text) + title = data["title"] + return title + + def gen_skip_log(self): + """ + 决定pr是否跳过CI + """ + title = self.get_pr_title() + if self.title_keyword in title: + os.system("echo 1 > pr_title.log") + else: + os.system("echo 0 > pr_title.log") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--pr_id", type=int, default=0, help="pr号码") + parser.add_argument("--title_keyword", type=str, default="CINN", help="pr触发CI的关键字") + args = parser.parse_args() + reporter = PRInfo(pr_id=args.pr_id, title_keyword=args.title_keyword) + reporter.gen_skip_log()