Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev fast resnet50 #207

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions projects/fast_resnet50/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import os

from oneflow.utils.data import DataLoader

from flowvision import datasets, transforms
from flowvision.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from flowvision.transforms.functional import str_to_interp_mode


def build_loader(config):
config.defrost()
dataset_train, config.MODEL.NUM_CLASSES = build_dataset(
is_train=True, config=config
)
config.freeze()
dataset_val, _ = build_dataset(is_train=False, config=config)
data_loader_train = DataLoader(
dataset_train,
batch_size=config.DATA.BATCH_SIZE,
num_workers=config.DATA.NUM_WORKERS,
drop_last=True,
)
data_loader_val = DataLoader(
dataset_val,
batch_size=config.DATA.BATCH_SIZE,
shuffle=False,
num_workers=config.DATA.NUM_WORKERS,
drop_last=False,
)
return dataset_train, dataset_val, data_loader_train, data_loader_val


def build_dataset(is_train, config):
transform = build_transform(is_train, config)
if config.DATA.DATASET == "imagenet":
prefix = "train" if is_train else "val"
root = os.path.join(config.DATA.DATA_PATH, prefix)
dataset = datasets.ImageFolder(root, transform=transform)
nb_classes = 1000
elif config.DATA.DATASET == "cifar100":
dataset = datasets.CIFAR100(
root = config.DATA.DATA_PATH,
train=is_train,
transform=transform,
download=True,
)
nb_classes = 100
else:
raise NotImplementedError("We only support ImageNet and CIFAR100 Now.")

return dataset, nb_classes


def build_transform(is_train, config):
resize_im = config.DATA.IMG_SIZE > 32
if is_train:
t = []
# this should always dispatch to transforms_imagenet_train
t.append(transforms.RandomResizedCrop(
size=(config.DATA.IMG_SIZE, config.DATA.IMG_SIZE),
interpolation=str_to_interp_mode(config.DATA.INTERPOLATION)
))
t.append(transforms.RandomHorizontalFlip(p=0.5))
t.append(transforms.ToTensor())
t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))

return transforms.Compose(t)

t = []
if resize_im:
if config.TEST.CROP:
size = int((256 / 224) * config.DATA.IMG_SIZE)
t.append(
transforms.Resize(
size, interpolation=str_to_interp_mode(config.DATA.INTERPOLATION)
),
# to maintain same ratio w.r.t. 224 images
)
t.append(transforms.CenterCrop(config.DATA.IMG_SIZE))
else:
t.append(
transforms.Resize(
(config.DATA.IMG_SIZE, config.DATA.IMG_SIZE),
interpolation=str_to_interp_mode(config.DATA.INTERPOLATION),
)
)

t.append(transforms.ToTensor())
t.append(transforms.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD))
return transforms.Compose(t)
76 changes: 76 additions & 0 deletions projects/fast_resnet50/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import oneflow as flow

def make_grad_scaler():
return flow.amp.GradScaler(
init_scale=2 ** 30, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000,
)

class TrainGraph(flow.nn.Graph):
def __init__(self, model, loss, optimizer, lr_scheduler, data_loader, config):
super().__init__()
if config.use_fp16:
# 使用 nn.Graph 的自动混合精度训练
self.config.enable_amp(True)
self.set_grad_scalar(
flow.amp.GradScaler(
init_scale=2 ** 30, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000,
)
)
elif config.scale_grad:
self.set_grad_scaler(
flow.amp.StaticGradScaler(flow.env.get_world_size())
)

if config.fuse_add_to_output:
# 使用 nn.Graph 的add算子融合
self.config.allow_fuse_add_to_output(True)

if config.fuse_model_update_ops:
self.config.allow_fuse_model_update_ops(True)

if config.conv_try_run:
# 使用 nn.Graph 的卷积试跑优化
self.config.enable_cudnn_conv_heuristic_search_algo(False)

if config.fuse_pad_to_conv:
# 使用 nn.Graph 的pad算子融合
self.config.allow_fuse_pad_to_conv(True)


self.model = model
self.loss = loss
self.add_optimizer(optimizer, lr_sch=lr_scheduler)
self.data_loader = data_loader

def build(self):
image, label = self.data_loader()
image = image.to("cuda")
label = label.to("cuda")
logits = self.model(image)
loss = self.cross_entropy(logits, label)
loss.backward()
return loss


class EvalGraph(flow.nn.Graph):
def __init__(self, model, data_loader, config):
super().__init__()

if config.use_fp16:
# 使用 nn.Graph 的自动混合精度训练
self.config.enable_amp(True)

if config.fuse_add_to_output:
# 使用 nn.Graph 的add算子融合
self.config.allow_fuse_add_to_output(True)

self.data_loader = data_loader
self.model = model

def build(self):
image, label = self.data_loader()
image = image.to("cuda")
label = label.to("cuda")
logits = self.model(image)
pred = logits.softmax()
return pred, label
17 changes: 17 additions & 0 deletions projects/fast_resnet50/lr_scheduler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import oneflow as flow


def build_lr_scheduler(config, optimizer, n_iter_per_epoch):
num_steps = int(config.train.epochs * n_iter_per_epoch)
warmup_steps = int(config.train.warmup_epochs * n_iter_per_epoch)
lr_scheduler = flow.optim.lr_scheduler.CosineDecayLR(
optimizer, decay_steps=num_steps
)
if config.warmup_epochs > 0:
lr_scheduler = flow.optim.lr_scheduler.WarmUpLR(
lr_scheduler,
warmup_factor=0.01,
warmup_iters=warmup_steps,
warmup_method="linear"
)
return lr_scheduler
Loading