Skip to content

Commit

Permalink
plt fix builder_data (#3000)
Browse files Browse the repository at this point in the history
* plt update, test=model

* plt update, test=model

* plt update, test=model

* plt update testing reporter, test=model

* plt update testing reporter, test=model

* plt update testing reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt update test reporter, test=model

* plt add pr_info

* plt fix ocr case, test=model

* plt fix ocr case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add nlp case, test=model

* plt add more nlp case, test=model

* plt add more nlp case, test=model

* plt add nlp, test=model

* plt fix case, test=model

* fix plt builder data, test=model

* fix plt builder data, test=model

* fix plt builder data, test=model
  • Loading branch information
Zeref996 authored Dec 2, 2024
1 parent 5a39cad commit 3ce7082
Show file tree
Hide file tree
Showing 20 changed files with 341 additions and 23 deletions.
5 changes: 4 additions & 1 deletion framework/e2e/PaddleLT_new/generator/builder_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,14 @@ def get_single_data(self):
data.append(torch.tensor(i, requires_grad=False))
else:
data.append(torch.tensor(i, requires_grad=True))
elif isinstance(i, float):
data.append(paddle.to_tensor(i, stop_gradient=False))
elif isinstance(i, int):
data.append(paddle.to_tensor(i, stop_gradient=True))
else:
data.append(i)
else:
data = self.get_single_tensor()

return data

def get_single_tensor(self):
Expand Down
18 changes: 18 additions & 0 deletions framework/e2e/PaddleLT_new/layerNLPcase/debug/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的文件夹路径(注意:这里不需要尾随的斜杠)
folders = glob.glob(os.path.join(current_dir, '*'))

# 过滤出文件夹(排除文件)
folders = [folder for folder in folders if os.path.isdir(folder) and not os.path.basename(folder) == '__pycache__']

# 动态导入所有 .py 文件
for folder in folders:
# 获取文件名(不含扩展名)
module_name = os.path.basename(folder)
# 导入模块
__import__('layerNLPcase.debug.' + module_name, globals(), locals(), [])
18 changes: 18 additions & 0 deletions framework/e2e/PaddleLT_new/layerNLPcase/debug/case_bug/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的文件夹路径(注意:这里不需要尾随的斜杠)
folders = glob.glob(os.path.join(current_dir, '*'))

# 过滤出文件夹(排除文件)
folders = [folder for folder in folders if os.path.isdir(folder) and not os.path.basename(folder) == '__pycache__']

# 动态导入所有 .py 文件
for folder in folders:
# 获取文件名(不含扩展名)
module_name = os.path.basename(folder)
# 导入模块
__import__('layerNLPcase.debug.case_bug.' + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的文件夹路径(注意:这里不需要尾随的斜杠)
folders = glob.glob(os.path.join(current_dir, '*'))

# 过滤出文件夹(排除文件)
folders = [folder for folder in folders if os.path.isdir(folder) and not os.path.basename(folder) == '__pycache__']

# 动态导入所有 .py 文件
for folder in folders:
# 获取文件名(不含扩展名)
module_name = os.path.basename(folder)
# 导入模块
__import__('layerNLPcase.debug.case_bug.transformers.' + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.transformers.layoutlmv2." + module_name, globals(), locals(), [])
__import__("layerNLPcase.debug.case_bug.transformers.layoutlmv2." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ def LayerCase():
def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
None,
None,
paddle.static.InputSpec(shape=(-1, 13, 4), dtype=paddle.int64, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 3, 224, 224), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
)
return inputspec
Expand All @@ -21,10 +21,10 @@ def create_tensor_inputs():
tokenizer = LayoutLMv2Tokenizer.from_pretrained('layoutlmv2-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
paddle.to_tensor(inputs_dict['input_ids'], stop_gradient=False),
None,
None,
paddle.to_tensor(inputs_dict['token_type_ids'], stop_gradient=False),
paddle.to_tensor([inputs_dict['input_ids']], stop_gradient=False),
paddle.to_tensor(np.random.random((1, 13, 4)).astype("int64"), stop_gradient=False),
paddle.to_tensor(np.random.random((1, 3, 224, 224)), stop_gradient=False),
paddle.to_tensor([inputs_dict['token_type_ids']], stop_gradient=False),
)
return inputs

Expand All @@ -34,8 +34,8 @@ def create_numpy_inputs():
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
np.array([inputs_dict['input_ids']]),
None,
None,
np.random.random((1, 13, 4)).astype("int64"),
np.random.random((1, 3, 224, 224)),
np.array([inputs_dict['token_type_ids']]),
)
return inputs
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.transformers.layoutxlm." + module_name, globals(), locals(), [])
__import__("layerNLPcase.debug.case_bug.transformers.layoutxlm." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ def LayerCase():

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
None,
None,
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15, 4), dtype=paddle.int64, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 3, 224, 224), dtype=paddle.float32, stop_gradient=False),
)
return inputspec

Expand All @@ -21,10 +20,9 @@ def create_tensor_inputs():
tokenizer = LayoutXLMTokenizer.from_pretrained('layoutxlm-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
paddle.to_tensor(inputs_dict['input_ids'], stop_gradient=False),
None,
None,
paddle.to_tensor(inputs_dict['token_type_ids'], stop_gradient=False),
paddle.to_tensor([inputs_dict['input_ids']], stop_gradient=False),
paddle.to_tensor(np.random.random((1, 15, 4)).astype("int64"), stop_gradient=False),
paddle.to_tensor(np.random.random((1, 3, 224, 224)), stop_gradient=False),
)
return inputs

Expand All @@ -34,8 +32,7 @@ def create_numpy_inputs():
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
np.array([inputs_dict['input_ids']]),
None,
None,
np.array([inputs_dict['token_type_ids']]),
np.random.random((1, 15, 4)).astype("int64"),
np.random.random((1, 3, 224, 224)),
)
return inputs
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.transformers.reformer." + module_name, globals(), locals(), [])
__import__("layerNLPcase.debug.case_bug.transformers.reformer." + module_name, globals(), locals(), [])
18 changes: 18 additions & 0 deletions framework/e2e/PaddleLT_new/layerNLPcase/debug/real_bug/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的文件夹路径(注意:这里不需要尾随的斜杠)
folders = glob.glob(os.path.join(current_dir, '*'))

# 过滤出文件夹(排除文件)
folders = [folder for folder in folders if os.path.isdir(folder) and not os.path.basename(folder) == '__pycache__']

# 动态导入所有 .py 文件
for folder in folders:
# 获取文件名(不含扩展名)
module_name = os.path.basename(folder)
# 导入模块
__import__('layerNLPcase.debug.real_bug.' + module_name, globals(), locals(), [])
15 changes: 15 additions & 0 deletions framework/e2e/PaddleLT_new/layerNLPcase/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.llm." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import paddle
import numpy as np
from paddlenlp.transformers import AutoModelForCausalLM, AutoTokenizer

def LayerCase():
"""模型库中间态"""
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-chat')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 14), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 14), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 14), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(paddle.to_tensor([v], stop_gradient=False) for (k, v) in inputs_dict.items())
return inputs


def create_numpy_inputs():
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = tuple(np.array([v]) for (k, v) in inputs_dict.items())
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import paddle
import numpy as np
from paddlenlp.transformers import LayoutLMModel, LayoutLMTokenizer

def LayerCase():
"""模型库中间态"""
model = LayoutLMModel.from_pretrained('layoutlm-base-uncased')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 13, 4), dtype=paddle.int64, stop_gradient=False),
# paddle.static.InputSpec(shape=(-1, 3, 224, 224), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = LayoutLMTokenizer.from_pretrained('layoutlm-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
paddle.to_tensor([inputs_dict['input_ids']], stop_gradient=False),
paddle.to_tensor(np.random.random((1, 13, 4)).astype("int64"), stop_gradient=False),
# paddle.to_tensor(np.random.random((1, 3, 224, 224)), stop_gradient=False),
paddle.to_tensor([inputs_dict['token_type_ids']], stop_gradient=False),
)
return inputs


def create_numpy_inputs():
tokenizer = LayoutLMTokenizer.from_pretrained('layoutlm-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
np.array([inputs_dict['input_ids']]),
np.random.random((1, 13, 4)).astype("int64"),
# np.random.random((1, 3, 224, 224)),
np.array([inputs_dict['token_type_ids']]),
)
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.transformers.layoutlmv2." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import paddle
import numpy as np
from paddlenlp.transformers import LayoutLMv2Model, LayoutLMv2Tokenizer

def LayerCase():
"""模型库中间态"""
model = LayoutLMv2Model.from_pretrained('layoutlmv2-base-uncased')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 13, 4), dtype=paddle.int64, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 3, 224, 224), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 13), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = LayoutLMv2Tokenizer.from_pretrained('layoutlmv2-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
paddle.to_tensor([inputs_dict['input_ids']], stop_gradient=False),
paddle.to_tensor(np.random.random((1, 13, 4)).astype("int64"), stop_gradient=False),
paddle.to_tensor(np.random.random((1, 3, 224, 224)), stop_gradient=False),
paddle.to_tensor([inputs_dict['token_type_ids']], stop_gradient=False),
)
return inputs


def create_numpy_inputs():
tokenizer = LayoutLMv2Tokenizer.from_pretrained('layoutlmv2-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
np.array([inputs_dict['input_ids']]),
np.random.random((1, 13, 4)).astype("int64"),
np.random.random((1, 3, 224, 224)),
np.array([inputs_dict['token_type_ids']]),
)
return inputs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os
import glob

# 获取当前文件所在目录
current_dir = os.path.dirname(__file__)

# 获取当前目录下所有的 .py 文件路径
py_files = glob.glob(os.path.join(current_dir, "*.py"))

# 动态导入所有 .py 文件
for py_file in py_files:
# 获取文件名(不含扩展名)
module_name = os.path.basename(py_file)[:-3]
# 导入模块
__import__("layerNLPcase.transformers.layoutxlm." + module_name, globals(), locals(), [])
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import paddle
import numpy as np
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer

def LayerCase():
"""模型库中间态"""
model = LayoutXLMModel.from_pretrained('layoutxlm-base-uncased')
return model

def create_inputspec():
inputspec = (
paddle.static.InputSpec(shape=(-1, 15), dtype=paddle.float32, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 15, 4), dtype=paddle.int64, stop_gradient=False),
paddle.static.InputSpec(shape=(-1, 3, 224, 224), dtype=paddle.float32, stop_gradient=False),
)
return inputspec


def create_tensor_inputs():
tokenizer = LayoutXLMTokenizer.from_pretrained('layoutxlm-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
paddle.to_tensor([inputs_dict['input_ids']], stop_gradient=False),
paddle.to_tensor(np.random.random((1, 15, 4)).astype("int64"), stop_gradient=False),
paddle.to_tensor(np.random.random((1, 3, 224, 224)), stop_gradient=False),
)
return inputs


def create_numpy_inputs():
tokenizer = LayoutXLMTokenizer.from_pretrained('layoutxlm-base-uncased')
inputs_dict = tokenizer("Welcome to use PaddlePaddle and PaddleNLP!")
inputs = (
np.array([inputs_dict['input_ids']]),
np.random.random((1, 15, 4)).astype("int64"),
np.random.random((1, 3, 224, 224)),
)
return inputs
Loading

0 comments on commit 3ce7082

Please sign in to comment.