Skip to content

Commit

Permalink
plt test Giant tensor torch (#3010)
Browse files Browse the repository at this point in the history
* Plt Support torch test, test=model

* Plt Support torch test, test=model

* plt update torch, test=model

* fix plt torch, test=model
  • Loading branch information
Zeref996 authored Jan 2, 2025
1 parent 2022323 commit ae8ded4
Show file tree
Hide file tree
Showing 34 changed files with 1,306 additions and 64 deletions.
28 changes: 27 additions & 1 deletion framework/e2e/PaddleLT_new/diy/loss/diy_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,13 @@
"""
diy loss list
"""
import paddle
import os

if "paddle" in os.environ.get("FRAMEWORK"):
import paddle

if "torch" in os.environ.get("FRAMEWORK"):
import torch


def naive_loss_list(logit, loss_list):
Expand Down Expand Up @@ -35,3 +41,23 @@ def mean_loss(logit):
return loss
else:
raise Exception("something wrong with mean_loss!!")


def torch_mean_loss(logit):
"""torch mean loss"""
if isinstance(logit, (list, tuple)):
tmp = 0.0
count = 0
for l in logit:
if isinstance(l, torch.Tensor) and l.numel() > 0:
mean = torch.mean(l)
tmp += mean
count += 1
# loss = tmp / len(logit)
loss = tmp / count
return loss
elif isinstance(logit, torch.Tensor):
loss = torch.mean(logit)
return loss
else:
raise Exception("something wrong with torch_mean_loss!!")
14 changes: 13 additions & 1 deletion framework/e2e/PaddleLT_new/diy/optimizer/diy_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,22 @@
"""
diy optimizer
"""
import paddle
import os

if "paddle" in os.environ.get("FRAMEWORK"):
import paddle

if "torch" in os.environ.get("FRAMEWORK"):
import torch


def naive_opt(net, opt_api, learning_rate):
"""navie optimizer func"""
opt = eval(opt_api)(learning_rate=learning_rate, parameters=net.parameters())
return opt


def torch_opt(net, opt_api, learning_rate):
"""torch optimizer func"""
opt = eval(opt_api)(net.parameters(), lr=learning_rate)
return opt
7 changes: 6 additions & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@

from pltools.res_save import save_tensor

from strategy.ordered_dict import OrderedDictProcess


class LayerEval(object):
"""
构建Layer评估的通用类
"""

# def __init__(self, testing, layerfile, device_id):
def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand All @@ -37,6 +39,7 @@ def __init__(self, testing, layerfile, device_place_id, upstream_net):

self.testing = testing
self.upstream_net = upstream_net
self.orderdict_usage = orderdict_usage
self.return_net_instance = self.testing.get("return_net_instance", "False")
self.model_dtype = self.testing.get("model_dtype")
paddle.set_default_dtype(self.model_dtype)
Expand Down Expand Up @@ -65,6 +68,8 @@ def _net_instant(self):
net = self.upstream_net
else:
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.orderdict_usage != "None":
net = OrderedDictProcess(net=net, layerfile=self.layerfile, orderdict_usage=self.orderdict_usage).process()
return net

def _net_input_and_spec(self):
Expand Down
2 changes: 1 addition & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_eval_bm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class LayerEvalBM(object):
"""

# def __init__(self, testing, layerfile, device_id):
def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand Down
2 changes: 1 addition & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class LayerExport(object):
构建Layer导出的通用类
"""

def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand Down
2 changes: 1 addition & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class LayerInfer(object):
构建Layer预测的通用类
"""

def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand Down
7 changes: 6 additions & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,16 @@

from pltools.logger import Logger

from strategy.ordered_dict import OrderedDictProcess


class LayerTrain(object):
"""
构建Layer训练的通用类
"""

# def __init__(self, testing, layerfile, device_id):
def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand All @@ -37,6 +39,7 @@ def __init__(self, testing, layerfile, device_place_id, upstream_net):

self.testing = testing
self.upstream_net = upstream_net
self.orderdict_usage = orderdict_usage
self.return_net_instance = self.testing.get("return_net_instance", "False")
self.model_dtype = self.testing.get("model_dtype")
paddle.set_default_dtype(self.model_dtype)
Expand All @@ -63,6 +66,8 @@ def _net_instant(self):
net = self.upstream_net
else:
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.orderdict_usage != "None":
net = OrderedDictProcess(net=net, layerfile=self.layerfile, orderdict_usage=self.orderdict_usage).process()
return net

def _net_optimizer(self):
Expand Down
2 changes: 1 addition & 1 deletion framework/e2e/PaddleLT_new/engine/paddle_train_bm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class LayerTrainBM(object):
"""

# def __init__(self, testing, layerfile, device_id):
def __init__(self, testing, layerfile, device_place_id, upstream_net):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
Expand Down
9 changes: 6 additions & 3 deletions framework/e2e/PaddleLT_new/engine/torch_engine_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
"""

from engine.torch_eval import TorchLayerEval
from engine.torch_eval_bm import TorchLayerEvalBM
from engine.torch_train import TorchLayerTrain

# from engine.torch_eval_bm import TorchLayerEvalBM

# from interpreter.testing_trans import TrainTrans, EvalTrans


torch_engine_map = {
"dy_eval": TorchLayerEval,
"dy_eval_perf": TorchLayerEvalBM, # 动态图评估性能
"torch_dy_eval": TorchLayerEval,
"torch_dy_train": TorchLayerTrain,
# "torch_dy_eval_perf": TorchLayerEvalBM, # 动态图评估性能
}
43 changes: 33 additions & 10 deletions framework/e2e/PaddleLT_new/engine/torch_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,49 +6,72 @@
eval 方法
"""
import os
import traceback
import numpy as np
import torch
from engine.torch_xtools import reset
from generator.builder_layer import BuildLayer
from generator.builder_data import BuildData

from strategy.ordered_dict import OrderedDictProcess


class TorchLayerEval(object):
"""
构建Layer评估的通用类
"""

# def __init__(self, testing, layerfile, device_id):
def __init__(self, testing, layerfile):
def __init__(self, testing, layerfile, device_place_id, upstream_net, orderdict_usage="None"):
"""
初始化
"""
self.seed = 33
reset(self.seed)

self.device = os.environ.get("PLT_SET_DEVICE")
torch.device(str(self.device))
# torch.device(f"cuda:{device_place_id}")
device = torch.device(f"cuda:{device_place_id}")
# device = torch.device('cuda:0')
# torch.cuda.set_device(device)
torch.set_default_device(device)

self.testing = testing
self.upstream_net = upstream_net
self.orderdict_usage = orderdict_usage
self.return_net_instance = self.testing.get("return_net_instance", "False")
self.model_dtype = self.testing.get("model_dtype")

# torch.set_default_dtype(self.model_dtype) # torch不支持字符串dtype, 测试框架暂时没兼容

self.layerfile = layerfile
self.data = BuildData(layerfile=self.layerfile).get_single_data()
# self.data = BuildData(layerfile=self.layerfile).get_single_data()

def _net_input(self):
"""get input"""
reset(self.seed)
data = BuildData(layerfile=self.layerfile).get_single_data(framework="torch")
return data

def _net_instant(self):
"""get net and data"""
"""get net"""
reset(self.seed)
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.upstream_net:
net = self.upstream_net
else:
net = BuildLayer(layerfile=self.layerfile).get_layer()
if self.orderdict_usage != "None":
net = OrderedDictProcess(net=net, layerfile=self.layerfile, orderdict_usage=self.orderdict_usage).process()
return net

def dy_eval(self):
def torch_dy_eval(self):
"""dygraph eval"""
net = self._net_instant()
# net.eval()
logit = net(*self.data)
return {"logit": logit}
net.eval()
logit = net(*self._net_input())
if self.return_net_instance == "True":
return {"res": {"logit": logit}, "net": net}
else:
return {"res": {"logit": logit}, "net": None}

# def dy2st_eval(self):
# """dy2st eval"""
Expand Down
Loading

0 comments on commit ae8ded4

Please sign in to comment.