From 851c0c9fb9c876aeaf1ed7ee4ffc0847eeaa1bb6 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Wed, 11 Oct 2023 15:18:56 +0300 Subject: [PATCH 01/13] added dummy prune + full_pp_head PostProcess --- configs/_base_/datasets/cityscapes.py | 2 +- mmseg/models/decode_heads/__init__.py | 3 +- mmseg/models/decode_heads/full_pp_head.py | 79 +++++++++++++++++++++++ tools/pytorch2onnx.py | 41 ++++++++++++ 4 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 mmseg/models/decode_heads/full_pp_head.py diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index 2f88208b51..7be4a9c561 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -1,6 +1,6 @@ # dataset settings dataset_type = 'CityscapesDataset' -data_root = '/data/cityscapes/' +data_root = '/data/data/cityscapes10classes/' crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py index 0f43aad55d..2a94aa437a 100644 --- a/mmseg/models/decode_heads/__init__.py +++ b/mmseg/models/decode_heads/__init__.py @@ -35,9 +35,10 @@ from .setr_up_head import SETRUPHead from .stdc_head import STDCHead from .uper_head import UPerHead +from .full_pp_head import PostProcess __all__ = [ - 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'FCNHead', 'PostProcess', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'FCNGenHead', 'EMAHead', 'DNLHead', 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', diff --git a/mmseg/models/decode_heads/full_pp_head.py b/mmseg/models/decode_heads/full_pp_head.py new file mode 100644 index 0000000000..305d47319d --- /dev/null +++ b/mmseg/models/decode_heads/full_pp_head.py @@ -0,0 +1,79 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import torch +import torch.nn as nn +import numpy as np + +from ..builder import HEADS +from .decode_head import BaseDecodeHead + + + +@HEADS.register_module() +class PostProcess(BaseDecodeHead): + def __init__(self, num_convs, **kwargs): + super(PostProcess, self).__init__(**kwargs) + + self.num_convs = num_convs + self.output_channels = self.channels + self.conv_layers = nn.ModuleList() + in_channels = self.in_channels + self.num_classes = kwargs['num_classes'] + for i in range(self.num_convs): + self.conv_layers.append(nn.Conv2d(in_channels, self.output_channels, kernel_size=3, padding=1)) + in_channels = self.output_channels + + self.dw = torch.nn.Conv2d(in_channels=self.num_classes - 1, out_channels=self.num_classes - 1, kernel_size=(1, 2), groups=self.num_classes - 1, bias=False) + w = np.ones((self.num_classes - 1,1,1,2), dtype=np.float32) + # w[:, 0, 0, 0] = -1 + w[:, :, :, 1] = -1 + self.dw.weight = torch.nn.Parameter(torch.Tensor(w)) + self.relu = torch.nn.ReLU() + self.argmax_star = True # Argmax* (Argmax from bottom) -> will not compile, so default is False + + def forward(self, x): + + # channels in -> channels out (several, default is 1 convs layers after backbone) + for conv_layer in self.conv_layers: + x = nn.functional.relu(conv_layer(x)) + + # #channels -> #classes + x = self.cls_seg(x) + + # input is (BxCxHxW): 1x10x92x120 output is 1x10x736x240 + x = torch.nn.functional.interpolate(x, size=(736, 240), mode='bilinear', align_corners=True) + + # argmax on channels. output is 1x1x736x240 + x = torch.argmax(x, dim=1, keepdim=True) + + # H<->W transpose. output is 1x1x240x736 + x = torch.transpose(x, 2, 3) + + # torch.nn.functional.one_hot adds an extra dim at the end of the tensor so output is 1x1x240x736x10 + x = torch.nn.functional.one_hot(x, num_classes=self.num_classes) + x = torch.transpose(x, 1, 4) # output is 1x10x240x736x1 + x = torch.squeeze(x, dim=-1) # output is 1x10x240x736 + + # First output edge detector + out1 = x[:, :-1, :, :] # output is 1x9x240x736. Assuming raindrop is last class + # out1 = torch.nn.functional.pad(out1, [1, 0, 0, 0]) # output is 1x9x240x737 + out1 = out1.to(torch.float32) + out1 = torch.nn.functional.pad(out1, [0, 1, 0, 0], mode='constant', value=0.5) # output is 1x9x240x737 + out1 = self.relu(self.dw(out1)) # output is 1x9x240x736 + + # W<->C transpose. output is 1x736x240x9 + out1 = torch.transpose(out1, 1, 3) + + if self.argmax_star: + # argmax* support: Flip the 736 axis. output is 1x736x240x9 + out1 = torch.flip(out1, dims=(1,)) + # argmax on channels. final output is 1x1x240x9 + out1 = torch.argmax(out1, dim=1, keepdim=True) + + # second output is to reduce sum on final class. output is 4 integers of 1x1x1x1 so each one would be represented by 16 bit integer. Assuming raindrop is last class + out2, out3, out4, out5 = x[:, -1:, :, :184], x[:, -1:, :, 184:368], x[:, -1:, :, 368:552], x[:, -1:, :, 552:] + out2 = torch.sum(torch.sum(out2, dim=-1, keepdim=True), dim=-2, keepdim=True) + out3 = torch.sum(torch.sum(out3, dim=-1, keepdim=True), dim=-2, keepdim=True) + out4 = torch.sum(torch.sum(out4, dim=-1, keepdim=True), dim=-2, keepdim=True) + out5 = torch.sum(torch.sum(out5, dim=-1, keepdim=True), dim=-2, keepdim=True) + + return out1, out2, out3, out4, out5 diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 78851f0d2a..6954044e3d 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -64,6 +64,44 @@ def load_pretrained_weights_soft(model, checkpoint): ) +def dummy_prune_ckpt(ckpt, prune_ratio=0.5): + pass + for k, v in ckpt['state_dict'].items(): + if k.startswith('backbone.') and k.endswith('.rbr_dense.conv.weight'): + # Sparsify layer: + v = dummy_prune_layer(v, prune_ratio) + calc_sparsity(ckpt['state_dict']) + return ckpt + +def dummy_prune_layer(layer, prune_ratio=0.5): + # Flatten the tensor + flattened_layer = layer.flatten() + # Get the absolute values + abs_values = torch.abs(flattened_layer) + # Get indices sorted by absolute values + sorted_indices = torch.argsort(abs_values) + # Determine the threshold index + threshold_index = int(prune_ratio * len(sorted_indices)) + # Set values below the threshold to zero + flattened_layer[sorted_indices[:threshold_index]] = 0 + # Reshape the tensor back to its original shape + pruned_tensor = flattened_layer.reshape(layer.shape) + + return pruned_tensor + +def calc_sparsity(model_dict): + weights_layers_num, total_weights, total_zeros = 0, 0, 0 + for k, v in model_dict.items(): + if k.startswith('backbone.') and k.endswith('weight'): + weights_layers_num += 1 + total_weights += v.numel() + total_zeros += (v.numel() - v.count_nonzero()) + zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 + print(f"[{weights_layers_num:>2}] {k:<51}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") + print(f"Model has {weights_layers_num} weight layers") + print(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") + + def parse_args(): parser.add_argument('config', help='train config file path') parser.add_argument('--work-dir', help='the dir to save logs and models') @@ -149,6 +187,7 @@ def main(): if args.checkpoint: ckpt = torch.load(args.checkpoint, map_location='cpu') if args.soft_weights_loading: + ckpt = dummy_prune_ckpt(ckpt, 0.6) load_pretrained_weights_soft(model, ckpt) else: if 'state_dict' in ckpt: @@ -156,10 +195,12 @@ def main(): else: model.load_state_dict(ckpt) + print("Switching to deployment model") # if repvgg style -> deploy for module in model.modules(): if hasattr(module, 'switch_to_deploy'): module.switch_to_deploy() + calc_sparsity(model.state_dict()) # to onnx model.eval() From cd128684262c1c4d7027a35b9b488153c9af2eac Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Thu, 12 Oct 2023 13:59:28 +0300 Subject: [PATCH 02/13] added dummy prune ratio argument for export_onnx (default 0) --- tools/pytorch2onnx.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 6954044e3d..653c4ceb65 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -111,6 +111,7 @@ def parse_args(): parser.add_argument('--shape', nargs=2, type=int, default=[1024, 1920]) parser.add_argument('--out_name', default='fcn.onnx', type=str, help="Name for the onnx output") parser.add_argument('--soft_weights_loading',action='store_true', default=False) + parser.add_argument('--dummy_prune_ratio', type=float, default=0.0) parser.add_argument( '--cfg-options', nargs='+', @@ -187,7 +188,8 @@ def main(): if args.checkpoint: ckpt = torch.load(args.checkpoint, map_location='cpu') if args.soft_weights_loading: - ckpt = dummy_prune_ckpt(ckpt, 0.6) + if args.dummy_prune_ratio > 0.0: + ckpt = dummy_prune_ckpt(ckpt, args.dummy_prune_ratio) load_pretrained_weights_soft(model, ckpt) else: if 'state_dict' in ckpt: From 67849b732d65fd0426b14bdf5dc6cbcd386b0c16 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Wed, 18 Oct 2023 11:55:13 +0300 Subject: [PATCH 03/13] Modified PostProcess [optimization] --- mmseg/models/decode_heads/full_pp_head.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/mmseg/models/decode_heads/full_pp_head.py b/mmseg/models/decode_heads/full_pp_head.py index 305d47319d..d94ffaa305 100644 --- a/mmseg/models/decode_heads/full_pp_head.py +++ b/mmseg/models/decode_heads/full_pp_head.py @@ -52,9 +52,10 @@ def forward(self, x): x = torch.nn.functional.one_hot(x, num_classes=self.num_classes) x = torch.transpose(x, 1, 4) # output is 1x10x240x736x1 x = torch.squeeze(x, dim=-1) # output is 1x10x240x736 + x1, x2 = x[:, :-1, :, :], x[:, -1:, :, :] # Explicit split for optimization # First output edge detector - out1 = x[:, :-1, :, :] # output is 1x9x240x736. Assuming raindrop is last class + out1 = x1 # output is 1x9x240x736. Assuming raindrop is last class # out1 = torch.nn.functional.pad(out1, [1, 0, 0, 0]) # output is 1x9x240x737 out1 = out1.to(torch.float32) out1 = torch.nn.functional.pad(out1, [0, 1, 0, 0], mode='constant', value=0.5) # output is 1x9x240x737 @@ -69,11 +70,12 @@ def forward(self, x): # argmax on channels. final output is 1x1x240x9 out1 = torch.argmax(out1, dim=1, keepdim=True) - # second output is to reduce sum on final class. output is 4 integers of 1x1x1x1 so each one would be represented by 16 bit integer. Assuming raindrop is last class - out2, out3, out4, out5 = x[:, -1:, :, :184], x[:, -1:, :, 184:368], x[:, -1:, :, 368:552], x[:, -1:, :, 552:] - out2 = torch.sum(torch.sum(out2, dim=-1, keepdim=True), dim=-2, keepdim=True) - out3 = torch.sum(torch.sum(out3, dim=-1, keepdim=True), dim=-2, keepdim=True) - out4 = torch.sum(torch.sum(out4, dim=-1, keepdim=True), dim=-2, keepdim=True) - out5 = torch.sum(torch.sum(out5, dim=-1, keepdim=True), dim=-2, keepdim=True) + # second output is to reduce sum on final class. output is 4 integers of 1x1x4 so each one would be represented by 16 bit integer. Assuming raindrop is last class + sum1, sum2, sum3, sum4 = x2[:, 0, :60, :], x2[:, 0, 60:120, :], x2[:, 0, 120:180, :], x2[:, 0, 180:, :] + sum1 = torch.sum(torch.sum(sum1, dim=-1, keepdim=True), dim=-2, keepdim=True) + sum2 = torch.sum(torch.sum(sum2, dim=-1, keepdim=True), dim=-2, keepdim=True) + sum3 = torch.sum(torch.sum(sum3, dim=-1, keepdim=True), dim=-2, keepdim=True) + sum4 = torch.sum(torch.sum(sum4, dim=-1, keepdim=True), dim=-2, keepdim=True) + out2 = torch.cat((sum1, sum2, sum3, sum4), dim=2) - return out1, out2, out3, out4, out5 + return out1, out2 From 3bde4db28a69e1b6de58cfb1744130ccfc3fd862 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Sun, 22 Oct 2023 17:37:49 +0300 Subject: [PATCH 04/13] Initial sparsity support. seems working in highlevel --- configs/fcn/fcn_hailo_10classes.py | 7 ++- configs/fcn/fcn_hailo_prune.py | 81 +++++++++++++++++++++++++++ recipes/recipe_yolox_hailo_pruning.md | 59 +++++++++++++++++++ sparsity/sparseml_hook.py | 68 ++++++++++++++++++++++ tools/train.py | 13 ++++- 5 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 configs/fcn/fcn_hailo_prune.py create mode 100644 recipes/recipe_yolox_hailo_pruning.md create mode 100644 sparsity/sparseml_hook.py diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 85946b4e32..9aada13fab 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -33,6 +33,9 @@ checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), ) +# custom hooks +custom_hooks = [dict(type='SparseMLHook', interval=10, priority='ABOVE_NORMAL')] + # tensorboard vis vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')] @@ -72,4 +75,6 @@ # model training and testing settings train_cfg=dict(), test_cfg=dict(mode='whole'), - infer_wo_softmax=True) \ No newline at end of file + infer_wo_softmax=True) + +load_from='./fcn_hailo_10classes_sholev.pth' diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py new file mode 100644 index 0000000000..f2a2d24293 --- /dev/null +++ b/configs/fcn/fcn_hailo_prune.py @@ -0,0 +1,81 @@ +# model settings +_base_ = [ + '../_base_/datasets/cityscapes10classes.py', '../_base_/default_runtime.py', +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.2, by_epoch=False, begin=0, end=7440), + dict( + type='CosineAnnealingLR', begin=7440, by_epoch=False, end=59520) +] + +# runtime settings +train_cfg = dict(type='IterBasedTrainLoop', max_iters=74400, val_interval=1488) # 119040 +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# default hooks - logger & checkpoint configs +default_hooks = dict( + + # print log every 100 iterations. + logger=dict(type='LoggerHook', interval=100, log_metric_by_epoch=False), + + # enable the parameter scheduler. + param_scheduler=dict(type='ParamSchedulerHook'), + + # save checkpoint every 5 epochs. + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), +) + +# custom hooks +custom_hooks = [dict(type='SparseMLHook', interval=10, priority='ABOVE_NORMAL')] + +# tensorboard vis +vis_backends = [dict(type='LocalVisBackend'), + dict(type='TensorboardVisBackend')] + +# data preprocessing +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (512, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[0.0, 0.0, 0.0], + std=[1.0, 1.0, 1.0], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size) + +model = dict( + type='EncoderDecoder', + backbone=dict( + type='hailoFPN', + depth=0.33, + width=0.125, + bb_channels_list=[128, 256, 512, 1024], + bb_num_repeats_list=[9, 15, 21, 12], + neck_channels_list=[256, 128, 128, 256, 256, 512], + neck_num_repeats_list=[9, 12, 12, 9]), + decode_head=dict( + type='ConvHead', + in_channels=16, + channels=128, + num_convs=1, + num_classes=10, + norm_cfg=norm_cfg, + align_corners=True, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), + infer_wo_softmax=True) + +resume = True +load_from='./fcn_hailo_10classes_sholev.pth' diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md new file mode 100644 index 0000000000..2ed8d2fb5f --- /dev/null +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -0,0 +1,59 @@ + +--- + +version: 1.1.0 + +# General Hyperparams +num_epochs: 400 +init_lr: 0.00005 +final_lr: 0.000005 +weights_warmup_lr: 0 +biases_warmup_lr: 0 + +# Pruning Hyperparams +init_sparsity: 0.05 +pruning_start_epoch: 40 +pruning_end_epoch: 50 +pruning_update_frequency: 2.0 + +#Modifiers +training_modifiers: + - !EpochRangeModifier + start_epoch: 0 + end_epoch: eval(num_epochs) + + - !LearningRateFunctionModifier + start_epoch: 3 + end_epoch: eval(num_epochs) + lr_func: linear + init_lr: eval(init_lr) + final_lr: eval(final_lr) + + +pruning_modifiers: + - !GMPruningModifier + params: + - re:backbone.backbone.*.*.rbr_dense.conv.weight + - re:backbone.neck.*.*.rbr_dense.conv.weight + init_sparsity: eval(init_sparsity) + final_sparsity: eval(0.7) + start_epoch: eval(pruning_start_epoch) + end_epoch: eval(pruning_end_epoch) + update_frequency: eval(pruning_update_frequency) +--- + + - !LearningRateFunctionModifier + start_epoch: 0 + end_epoch: 3 + lr_func: linear + init_lr: eval(weights_warmup_lr) + final_lr: eval(init_lr) + param_groups: [0, 1] + + - !LearningRateFunctionModifier + start_epoch: 0 + end_epoch: 3 + lr_func: linear + init_lr: eval(biases_warmup_lr) + final_lr: eval(init_lr) + param_groups: [2] \ No newline at end of file diff --git a/sparsity/sparseml_hook.py b/sparsity/sparseml_hook.py new file mode 100644 index 0000000000..9e85f757d7 --- /dev/null +++ b/sparsity/sparseml_hook.py @@ -0,0 +1,68 @@ +from mmseg.registry import RUNNERS, HOOKS +from mmengine.hooks import Hook +from sparseml.pytorch.optim import ScheduledModifierManager +from sparseml.pytorch.utils import ModuleExporter +from yolov5.utils.neuralmagic import maybe_create_sparsification_manager +from yolov5.utils.torch_utils import de_parallel + +@HOOKS.register_module() +class SparseMLHook(Hook): + def __init__(self, interval=10): + self.interval = interval + + def before_train(self, runner) -> None: + print("before train\n before train\n before train\n before train") + # ckpt = runner.model.state_dict() + # ckpt["epoch"] = 0 + # ckpt["ema"] = ckpt.get("ema", None) + # self.sparsification_manager = maybe_create_sparsification_manager(runner.model, + # ckpt=ckpt, + # train_recipe=runner.cfg.recipe, + # recipe_args=runner.cfg.recipe_args, + # device=runner.model.device, resumed=runner._resume) + + + # # if self.args.recipe is not None: # SPARSEML + # start_epoch = 40 # self.start_epoch # 295 + # self.scaler, scheduler, self.ema_model, epochs = self.sparsification_manager.initialize( + # loggers=None, # None / self.tblogger / logger + # scaler=self.scaler, + # optimizer=runner.optim_wrapper.optimizer, # self.optimizer, + # scheduler=runner.param_schedulers[-1], # self.lr_scheduler, + # ema=None # self.ema_model, + # start_epoch=start_epoch, + # steps_per_epoch=len(self.train_dataloader), + # epochs=50 # self.max_epoch, + # compute_loss=None, # None / some loss function + # distillation_teacher=None, + # resumed=True, + # ) + self.manager = ScheduledModifierManager.from_yaml(runner.cfg.recipe) + + optimizer = runner.optim_wrapper.optimizer + # optimizer = self.manager.modify(pl_module, optimizer, steps_per_epoch=trainer.estimated_stepping_batches, epoch=0) + optimizer = self.manager.modify(runner.model.module, optimizer, steps_per_epoch=1488, epoch=40) + runner.optim_wrapper.optimizer = optimizer + + def after_train(self, runner) -> None: + self.manager.finalize(runner.model.module) + + def after_train_iter(self, runner, batch_idx, data_batch, outputs): #, batch_idx=0, data_batch=None, outputs=None): + # print(f"after_train_iter:: {batch_idx}") + if batch_idx % 1488 == 0: # epoch + print(f"Epoch #{batch_idx % 1488} End") + self._calc_sparsity(runner.model.state_dict(), runner.logger) + + def _calc_sparsity(self, model_dict, logger): + weights_layers_num, total_weights, total_zeros = 0, 0, 0 + # import ipdb; ipdb.set_trace() + for k, v in model_dict.items(): + if k.startswith('module.backbone.') and k.endswith('weight'): + weights_layers_num += 1 + total_weights += v.numel() + total_zeros += (v.numel() - v.count_nonzero()) + zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 + logger.info(f"[{weights_layers_num:>2}] {k:<58}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") + logger.info(f"Model has {weights_layers_num} weight layers") + logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") + diff --git a/tools/train.py b/tools/train.py index 10fdaa1874..b5ab30daa5 100644 --- a/tools/train.py +++ b/tools/train.py @@ -4,12 +4,12 @@ import os import os.path as osp +from sparsity import sparseml_hook + from mmengine.config import Config, DictAction from mmengine.logging import print_log from mmengine.runner import Runner -from mmseg.registry import RUNNERS - def parse_args(): parser = argparse.ArgumentParser(description='Train a segmentor') @@ -20,6 +20,11 @@ def parse_args(): action='store_true', default=False, help='resume from the latest checkpoint in the work_dir automatically') + parser.add_argument('--recipe', type=str, default=None, help='Path to a sparsification recipe, ' + 'see https://github.com/neuralmagic/sparseml for more information') + parser.add_argument("--recipe-args", type=str, default=None, help = 'A json string, csv key=value string, or dictionary ' + 'containing arguments to override the root arguments ' + 'within the recipe such as learning rate or num epochs') parser.add_argument( '--amp', action='store_true', @@ -86,7 +91,9 @@ def main(): # resume training cfg.resume = args.resume - + cfg.recipe = args.recipe + cfg.recipe_args = args.recipe_args + print(f"{cfg.resume=}, {cfg.load_from}") # build the runner from config if 'runner_type' not in cfg: # build the default runner From 3f94fc809a62a26813ca9bbb7bc72ff589ebbe8f Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Tue, 24 Oct 2023 11:57:12 +0300 Subject: [PATCH 05/13] Reached ~1.8degradation on 50% sparsity. Still optimizing. --- configs/fcn/fcn_hailo_10classes.py | 3 --- configs/fcn/fcn_hailo_prune.py | 12 ++++++------ recipes/recipe_yolox_hailo_pruning.md | 7 ++++--- sparsity/sparseml_hook.py | 16 ++++++++++++---- tools/test.py | 1 + 5 files changed, 23 insertions(+), 16 deletions(-) diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 9aada13fab..357048480f 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -33,9 +33,6 @@ checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), ) -# custom hooks -custom_hooks = [dict(type='SparseMLHook', interval=10, priority='ABOVE_NORMAL')] - # tensorboard vis vis_backends = [dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')] diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index f2a2d24293..2184f0ec9c 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -16,7 +16,7 @@ ] # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=74400, val_interval=1488) # 119040 +train_cfg = dict(type='IterBasedTrainLoop', max_iters=104160, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -24,21 +24,21 @@ default_hooks = dict( # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=100, log_metric_by_epoch=False), + logger=dict(type='LoggerHook', interval=200, log_metric_by_epoch=False), # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), - # save checkpoint every 5 epochs. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), + # save checkpoint every 2 epochs. + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2976), # 7440 (5 Epoches) ) # custom hooks -custom_hooks = [dict(type='SparseMLHook', interval=10, priority='ABOVE_NORMAL')] +custom_hooks = [dict(type='SparseMLHook', interval=10, priority='NORMAL')] # tensorboard vis vis_backends = [dict(type='LocalVisBackend'), - dict(type='TensorboardVisBackend')] + dict(type='TensorboardVisBackend', draw=True, interval=10, save_dir='tf_dir')] # , draw=True, interval=1 # data preprocessing norm_cfg = dict(type='SyncBN', requires_grad=True) diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md index 2ed8d2fb5f..15aaf5cff6 100644 --- a/recipes/recipe_yolox_hailo_pruning.md +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -11,9 +11,10 @@ weights_warmup_lr: 0 biases_warmup_lr: 0 # Pruning Hyperparams -init_sparsity: 0.05 +init_sparsity: 0.02 +final_sparsity: 0.58 pruning_start_epoch: 40 -pruning_end_epoch: 50 +pruning_end_epoch: 70 pruning_update_frequency: 2.0 #Modifiers @@ -36,7 +37,7 @@ pruning_modifiers: - re:backbone.backbone.*.*.rbr_dense.conv.weight - re:backbone.neck.*.*.rbr_dense.conv.weight init_sparsity: eval(init_sparsity) - final_sparsity: eval(0.7) + final_sparsity: eval(final_sparsity) start_epoch: eval(pruning_start_epoch) end_epoch: eval(pruning_end_epoch) update_frequency: eval(pruning_update_frequency) diff --git a/sparsity/sparseml_hook.py b/sparsity/sparseml_hook.py index 9e85f757d7..047173ffb5 100644 --- a/sparsity/sparseml_hook.py +++ b/sparsity/sparseml_hook.py @@ -49,15 +49,23 @@ def after_train(self, runner) -> None: def after_train_iter(self, runner, batch_idx, data_batch, outputs): #, batch_idx=0, data_batch=None, outputs=None): # print(f"after_train_iter:: {batch_idx}") - if batch_idx % 1488 == 0: # epoch - print(f"Epoch #{batch_idx % 1488} End") + if batch_idx % (1488*2) == 0: # 2 Epochs + print(f"Epoch #{batch_idx // 1488} End") self._calc_sparsity(runner.model.state_dict(), runner.logger) + def after_test_epoch(self, runner, metrics): + runner.logger.info("Switching to deployment model") + # if repvgg style -> deploy + for module in runner.model.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + self._calc_sparsity(runner.model.state_dict(), runner.logger) + def _calc_sparsity(self, model_dict, logger): weights_layers_num, total_weights, total_zeros = 0, 0, 0 - # import ipdb; ipdb.set_trace() + prefix = next(iter(model_dict)).split('backbone.stage0')[0] for k, v in model_dict.items(): - if k.startswith('module.backbone.') and k.endswith('weight'): + if k.startswith(prefix) and k.endswith('weight'): weights_layers_num += 1 total_weights += v.numel() total_zeros += (v.numel() - v.count_nonzero()) diff --git a/tools/test.py b/tools/test.py index 058fdfc864..19fa17fd07 100644 --- a/tools/test.py +++ b/tools/test.py @@ -2,6 +2,7 @@ import argparse import os import os.path as osp +from sparsity import sparseml_hook from mmengine.config import Config, DictAction from mmengine.runner import Runner From e47be6cda205308f8e9ee75305066d9d5fc4ad01 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Wed, 25 Oct 2023 17:45:09 +0300 Subject: [PATCH 06/13] Added TFBoard supprt. LrScheduler supported from recipe. Reached p50 with 1% deg. Still needs to understand the convergence --- configs/fcn/fcn_hailo_10classes.py | 13 ++++++----- configs/fcn/fcn_hailo_prune.py | 29 ++++++++++------------- recipes/recipe_yolox_hailo_pruning.md | 30 +++++++++++++++--------- sparsity/sparseml_hook.py | 33 ++------------------------- 4 files changed, 40 insertions(+), 65 deletions(-) diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 357048480f..42a1a12233 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -12,7 +12,7 @@ dict( type='LinearLR', start_factor=0.2, by_epoch=False, begin=0, end=7440), dict( - type='CosineAnnealingLR', begin=7440, by_epoch=False, end=59520) + type='CosineAnnealingLR', begin=7440, end=59520, eta_min=0.00001, by_epoch=False) ] # runtime settings @@ -24,18 +24,19 @@ default_hooks = dict( # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=100, log_metric_by_epoch=False), + logger=dict(type='LoggerHook', interval=200, log_metric_by_epoch=False), # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), # save checkpoint every 5 epochs. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440, save_best='mIoU', rule='greater', max_keep_ckpts=5), ) -# tensorboard vis -vis_backends = [dict(type='LocalVisBackend'), - dict(type='TensorboardVisBackend')] +# tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' +visualizer = dict(type='SegLocalVisualizer', + vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')], + name='visualizer') # data preprocessing norm_cfg = dict(type='SyncBN', requires_grad=True) diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index 2184f0ec9c..31f5af109d 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -3,20 +3,16 @@ '../_base_/datasets/cityscapes10classes.py', '../_base_/default_runtime.py', ] +resume = True +load_from='./fcn_hailo_10classes_sholev.pth' + # optimizer optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) -# learning policy -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.2, by_epoch=False, begin=0, end=7440), - dict( - type='CosineAnnealingLR', begin=7440, by_epoch=False, end=59520) -] # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=104160, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) +train_cfg = dict(type='IterBasedTrainLoop', max_iters=89280, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -24,21 +20,23 @@ default_hooks = dict( # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=200, log_metric_by_epoch=False), + logger=dict(type='LoggerHook', interval=500, log_metric_by_epoch=False), # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), - # save checkpoint every 2 epochs. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2976), # 7440 (5 Epoches) + # save checkpoint every 1 epoch. + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', max_keep_ckpts=5), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 ) +# learning policy: taken from the recipe # custom hooks custom_hooks = [dict(type='SparseMLHook', interval=10, priority='NORMAL')] -# tensorboard vis -vis_backends = [dict(type='LocalVisBackend'), - dict(type='TensorboardVisBackend', draw=True, interval=10, save_dir='tf_dir')] # , draw=True, interval=1 +# tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' +visualizer = dict(type='SegLocalVisualizer', + vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')], + name='visualizer') # data preprocessing norm_cfg = dict(type='SyncBN', requires_grad=True) @@ -76,6 +74,3 @@ train_cfg=dict(), test_cfg=dict(mode='whole'), infer_wo_softmax=True) - -resume = True -load_from='./fcn_hailo_10classes_sholev.pth' diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md index 15aaf5cff6..85042da004 100644 --- a/recipes/recipe_yolox_hailo_pruning.md +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -4,14 +4,15 @@ version: 1.1.0 # General Hyperparams -num_epochs: 400 +start_epoch: 40 +num_epochs: 80 init_lr: 0.00005 -final_lr: 0.000005 +final_lr: 0.00005 weights_warmup_lr: 0 biases_warmup_lr: 0 # Pruning Hyperparams -init_sparsity: 0.02 +init_sparsity: 0.01 final_sparsity: 0.58 pruning_start_epoch: 40 pruning_end_epoch: 70 @@ -19,18 +20,13 @@ pruning_update_frequency: 2.0 #Modifiers training_modifiers: - - !EpochRangeModifier - start_epoch: 0 - end_epoch: eval(num_epochs) - - !LearningRateFunctionModifier - start_epoch: 3 + start_epoch: eval(start_epoch) end_epoch: eval(num_epochs) lr_func: linear init_lr: eval(init_lr) - final_lr: eval(final_lr) + final_lr: eval(init_lr) - pruning_modifiers: - !GMPruningModifier params: @@ -43,6 +39,18 @@ pruning_modifiers: update_frequency: eval(pruning_update_frequency) --- +training_modifiers: + - !EpochRangeModifier + start_epoch: 0 + end_epoch: eval(num_epochs) + + - !LearningRateFunctionModifier + start_epoch: 3 + end_epoch: eval(num_epochs) + lr_func: linear + init_lr: eval(init_lr) + final_lr: eval(final_lr) + - !LearningRateFunctionModifier start_epoch: 0 end_epoch: 3 @@ -50,7 +58,7 @@ pruning_modifiers: init_lr: eval(weights_warmup_lr) final_lr: eval(init_lr) param_groups: [0, 1] - + - !LearningRateFunctionModifier start_epoch: 0 end_epoch: 3 diff --git a/sparsity/sparseml_hook.py b/sparsity/sparseml_hook.py index 047173ffb5..fd51602806 100644 --- a/sparsity/sparseml_hook.py +++ b/sparsity/sparseml_hook.py @@ -1,9 +1,6 @@ from mmseg.registry import RUNNERS, HOOKS from mmengine.hooks import Hook from sparseml.pytorch.optim import ScheduledModifierManager -from sparseml.pytorch.utils import ModuleExporter -from yolov5.utils.neuralmagic import maybe_create_sparsification_manager -from yolov5.utils.torch_utils import de_parallel @HOOKS.register_module() class SparseMLHook(Hook): @@ -11,32 +8,6 @@ def __init__(self, interval=10): self.interval = interval def before_train(self, runner) -> None: - print("before train\n before train\n before train\n before train") - # ckpt = runner.model.state_dict() - # ckpt["epoch"] = 0 - # ckpt["ema"] = ckpt.get("ema", None) - # self.sparsification_manager = maybe_create_sparsification_manager(runner.model, - # ckpt=ckpt, - # train_recipe=runner.cfg.recipe, - # recipe_args=runner.cfg.recipe_args, - # device=runner.model.device, resumed=runner._resume) - - - # # if self.args.recipe is not None: # SPARSEML - # start_epoch = 40 # self.start_epoch # 295 - # self.scaler, scheduler, self.ema_model, epochs = self.sparsification_manager.initialize( - # loggers=None, # None / self.tblogger / logger - # scaler=self.scaler, - # optimizer=runner.optim_wrapper.optimizer, # self.optimizer, - # scheduler=runner.param_schedulers[-1], # self.lr_scheduler, - # ema=None # self.ema_model, - # start_epoch=start_epoch, - # steps_per_epoch=len(self.train_dataloader), - # epochs=50 # self.max_epoch, - # compute_loss=None, # None / some loss function - # distillation_teacher=None, - # resumed=True, - # ) self.manager = ScheduledModifierManager.from_yaml(runner.cfg.recipe) optimizer = runner.optim_wrapper.optimizer @@ -69,8 +40,8 @@ def _calc_sparsity(self, model_dict, logger): weights_layers_num += 1 total_weights += v.numel() total_zeros += (v.numel() - v.count_nonzero()) - zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 - logger.info(f"[{weights_layers_num:>2}] {k:<58}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") + # zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 + # logger.info(f"[{weights_layers_num:>2}] {k:<58}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") logger.info(f"Model has {weights_layers_num} weight layers") logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") From d93e01883ac6ef28f2e2f21e1e5bdb9a443be920 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Thu, 9 Nov 2023 15:08:22 +0200 Subject: [PATCH 07/13] latest config and recipe. updated export script with random prune --- configs/fcn/fcn_hailo_10classes.py | 6 ++-- configs/fcn/fcn_hailo_prune.py | 8 +++--- recipes/recipe_yolox_hailo_pruning.md | 16 +++++------ tools/pytorch2onnx.py | 41 +++++++++++++++++++++++---- 4 files changed, 49 insertions(+), 22 deletions(-) diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 42a1a12233..2c19faf865 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -12,11 +12,11 @@ dict( type='LinearLR', start_factor=0.2, by_epoch=False, begin=0, end=7440), dict( - type='CosineAnnealingLR', begin=7440, end=59520, eta_min=0.00001, by_epoch=False) + type='CosineAnnealingLR', begin=7440, end=74400, eta_min=0.00001, by_epoch=False) ] # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=59520, val_interval=1488) +train_cfg = dict(type='IterBasedTrainLoop', max_iters=74400, val_interval=1488) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -74,5 +74,3 @@ train_cfg=dict(), test_cfg=dict(mode='whole'), infer_wo_softmax=True) - -load_from='./fcn_hailo_10classes_sholev.pth' diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index 31f5af109d..a53145c297 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -4,15 +4,15 @@ ] resume = True -load_from='./fcn_hailo_10classes_sholev.pth' +load_from='./work_dirs/fcn_hailo_eta1e5/iter_68448.pth' # optimizer -optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) +optimizer = dict(type='Adam', lr=0.0001, weight_decay=1e-5) optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=89280, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) +train_cfg = dict(type='IterBasedTrainLoop', max_iters=173760, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -26,7 +26,7 @@ param_scheduler=dict(type='ParamSchedulerHook'), # save checkpoint every 1 epoch. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', max_keep_ckpts=5), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2976, save_best='mIoU', rule='greater', max_keep_ckpts=5), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 ) # learning policy: taken from the recipe diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md index 85042da004..15bcabbb2d 100644 --- a/recipes/recipe_yolox_hailo_pruning.md +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -4,19 +4,19 @@ version: 1.1.0 # General Hyperparams -start_epoch: 40 -num_epochs: 80 -init_lr: 0.00005 -final_lr: 0.00005 +start_epoch: 50 +num_epochs: 120 +init_lr: 0.00001 +final_lr: 0.00001 weights_warmup_lr: 0 biases_warmup_lr: 0 # Pruning Hyperparams init_sparsity: 0.01 -final_sparsity: 0.58 -pruning_start_epoch: 40 -pruning_end_epoch: 70 -pruning_update_frequency: 2.0 +final_sparsity: 0.68 +pruning_start_epoch: 60 +pruning_end_epoch: 110 +pruning_update_frequency: 5.0 #Modifiers training_modifiers: diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 653c4ceb65..602121fb49 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -64,15 +64,43 @@ def load_pretrained_weights_soft(model, checkpoint): ) -def dummy_prune_ckpt(ckpt, prune_ratio=0.5): - pass +def dummy_prune_ckpt(ckpt, prune_ratio=0.5, random_prune=False): for k, v in ckpt['state_dict'].items(): if k.startswith('backbone.') and k.endswith('.rbr_dense.conv.weight'): - # Sparsify layer: - v = dummy_prune_layer(v, prune_ratio) + if random_prune: # Sparsify layer randomly: + v = random_prune_layer(v, prune_ratio) + else: # Sparsify layer according to magnitude: + v = dummy_prune_layer(v, prune_ratio) calc_sparsity(ckpt['state_dict']) return ckpt + +def random_prune_layer(layer, prune_ratio=0.5): + """ + Randomly prune (set to zero) a fraction of elements in a PyTorch tensor. + + Args: + layer (torch.Tensor): Input tensor of shape [B, C, H, W]. + prune_ratio (float): Fraction of elements to set to zero. + + Returns: + torch.Tensor: Pruned tensor with the same shape as the input. + """ + # Determine the number of elements to prune + num_elements = layer.numel() + num_prune = int(prune_ratio * num_elements) + + # Create a mask with zeros and ones to select the elements to prune + mask = torch.ones(num_elements) + mask[:num_prune] = 0 + mask = mask[torch.randperm(num_elements)] # Shuffle the mask randomly + mask = mask.view(layer.shape) + + # Apply the mask to the input tensor to prune it + layer *= mask + return layer + + def dummy_prune_layer(layer, prune_ratio=0.5): # Flatten the tensor flattened_layer = layer.flatten() @@ -111,7 +139,8 @@ def parse_args(): parser.add_argument('--shape', nargs=2, type=int, default=[1024, 1920]) parser.add_argument('--out_name', default='fcn.onnx', type=str, help="Name for the onnx output") parser.add_argument('--soft_weights_loading',action='store_true', default=False) - parser.add_argument('--dummy_prune_ratio', type=float, default=0.0) + parser.add_argument('--dummy_prune_ratio', type=float, default=0.0, help="Applies dummy pruning with ratio") + parser.add_argument('--random_prune', action='store_true', default=False, help="Set method to prune as random (default: Minimum absolute value)") parser.add_argument( '--cfg-options', nargs='+', @@ -189,7 +218,7 @@ def main(): ckpt = torch.load(args.checkpoint, map_location='cpu') if args.soft_weights_loading: if args.dummy_prune_ratio > 0.0: - ckpt = dummy_prune_ckpt(ckpt, args.dummy_prune_ratio) + ckpt = dummy_prune_ckpt(ckpt, args.dummy_prune_ratio, args.random_prune) load_pretrained_weights_soft(model, ckpt) else: if 'state_dict' in ckpt: From aee6e831e3aee46fd242ac9cae33ed5110d4aeb8 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Thu, 7 Dec 2023 12:13:19 +0200 Subject: [PATCH 08/13] Cosmetics --- mmseg/models/decode_heads/full_pp_head.py | 2 +- tools/pytorch2onnx.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mmseg/models/decode_heads/full_pp_head.py b/mmseg/models/decode_heads/full_pp_head.py index d94ffaa305..0e3e61e799 100644 --- a/mmseg/models/decode_heads/full_pp_head.py +++ b/mmseg/models/decode_heads/full_pp_head.py @@ -49,7 +49,7 @@ def forward(self, x): x = torch.transpose(x, 2, 3) # torch.nn.functional.one_hot adds an extra dim at the end of the tensor so output is 1x1x240x736x10 - x = torch.nn.functional.one_hot(x, num_classes=self.num_classes) + x = torch.nn.functional.one_hot(x, num_classes=self.num_classes) x = torch.transpose(x, 1, 4) # output is 1x10x240x736x1 x = torch.squeeze(x, dim=-1) # output is 1x10x240x736 x1, x2 = x[:, :-1, :, :], x[:, -1:, :, :] # Explicit split for optimization diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 602121fb49..c439a4ee05 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -135,7 +135,7 @@ def parse_args(): parser.add_argument('--work-dir', help='the dir to save logs and models') parser.add_argument('--checkpoint', help='checkpoint file', default=None) parser.add_argument('--no_simplify', action='store_false') - parser.add_argument('--no_postprocess', action='store_true', default=False) + parser.add_argument('--postprocess', action='store_true', default=False) parser.add_argument('--shape', nargs=2, type=int, default=[1024, 1920]) parser.add_argument('--out_name', default='fcn.onnx', type=str, help="Name for the onnx output") parser.add_argument('--soft_weights_loading',action='store_true', default=False) @@ -171,13 +171,14 @@ class ModelWithPostProc(torch.nn.Module): def __init__(self, model, args): super(ModelWithPostProc, self).__init__() self.model = model - self.post_proc_flag = not(args.no_postprocess) + self.post_proc_flag = args.postprocess self.shape = args.shape self.bilinear_resize = nn.Upsample(size=self.shape, mode='bilinear', align_corners=True) def forward(self, x): x = self.model(x) if self.post_proc_flag: + print("Adding Postprocess (Resize+ArgMax) to the model") x = self.bilinear_resize(x) if x.shape[1] > 1: x = x.argmax(dim=1, keepdim=True) From bd22c29de980dfca03565ae67c5467ecb1fd20b7 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Sat, 9 Dec 2023 11:48:43 +0200 Subject: [PATCH 09/13] Verified all flow. Updated default pruning ratio to 60 (before RepVGG). Next is cleanup --- recipes/recipe_yolox_hailo_pruning.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md index 15bcabbb2d..2b389524a5 100644 --- a/recipes/recipe_yolox_hailo_pruning.md +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -13,7 +13,7 @@ biases_warmup_lr: 0 # Pruning Hyperparams init_sparsity: 0.01 -final_sparsity: 0.68 +final_sparsity: 0.60 pruning_start_epoch: 60 pruning_end_epoch: 110 pruning_update_frequency: 5.0 From 1d78c1bb4a3b08371c3f887958dfccaf6a380971 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Sat, 9 Dec 2023 21:01:16 +0200 Subject: [PATCH 10/13] Removed old Hailo HEAD + updated pytorch2onnx script --- configs/_base_/datasets/cityscapes.py | 2 +- configs/fcn/fcn_hailo_10_classes_pp.py | 75 --------------------- mmseg/models/decode_heads/__init__.py | 3 +- mmseg/models/decode_heads/full_pp_head.py | 81 ----------------------- tools/pytorch2onnx.py | 24 +++++-- 5 files changed, 19 insertions(+), 166 deletions(-) delete mode 100644 configs/fcn/fcn_hailo_10_classes_pp.py delete mode 100644 mmseg/models/decode_heads/full_pp_head.py diff --git a/configs/_base_/datasets/cityscapes.py b/configs/_base_/datasets/cityscapes.py index 7be4a9c561..30912cb7bc 100644 --- a/configs/_base_/datasets/cityscapes.py +++ b/configs/_base_/datasets/cityscapes.py @@ -1,6 +1,6 @@ # dataset settings dataset_type = 'CityscapesDataset' -data_root = '/data/data/cityscapes10classes/' +data_root = '/data/cityscapes10classes/' crop_size = (512, 1024) train_pipeline = [ dict(type='LoadImageFromFile'), diff --git a/configs/fcn/fcn_hailo_10_classes_pp.py b/configs/fcn/fcn_hailo_10_classes_pp.py deleted file mode 100644 index 63594d5d83..0000000000 --- a/configs/fcn/fcn_hailo_10_classes_pp.py +++ /dev/null @@ -1,75 +0,0 @@ -# model settings -_base_ = [ - '../_base_/datasets/cityscapes10classes.py', '../_base_/default_runtime.py', -] - -# optimizer -optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) - -# learning policy -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.2, by_epoch=False, begin=0, end=7440), - dict( - type='CosineAnnealingLR', begin=7440, by_epoch=False, end=59520) -] - -# runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=59520, val_interval=1488) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') - -# default hooks - logger & checkpoint configs -default_hooks = dict( - - # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=100, log_metric_by_epoch=False), - - # enable the parameter scheduler. - param_scheduler=dict(type='ParamSchedulerHook'), - - # save checkpoint every 5 epochs. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440), -) - -# tensorboard vis -vis_backends = [dict(type='LocalVisBackend'), - dict(type='TensorboardVisBackend')] - -# data preprocessing -norm_cfg = dict(type='SyncBN', requires_grad=True) -crop_size = (512, 1024) -data_preprocessor = dict( - type='SegDataPreProcessor', - mean=[0.0, 0.0, 0.0], - std=[1.0, 1.0, 1.0], - bgr_to_rgb=True, - pad_val=0, - seg_pad_val=255, - size=crop_size) - -model = dict( - type='EncoderDecoder', - backbone=dict( - type='hailoFPN', - depth=0.33, - width=0.125, - bb_channels_list=[128, 256, 512, 1024], - bb_num_repeats_list=[9, 15, 21, 12], - neck_channels_list=[256, 128, 128, 256, 256, 512], - neck_num_repeats_list=[9, 12, 12, 9]), - decode_head=dict( - type='PostProcess', - in_channels=16, - channels=128, - num_convs=1, - num_classes=10, - norm_cfg=norm_cfg, - align_corners=True, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), - infer_wo_softmax=True) \ No newline at end of file diff --git a/mmseg/models/decode_heads/__init__.py b/mmseg/models/decode_heads/__init__.py index 2a94aa437a..0f43aad55d 100644 --- a/mmseg/models/decode_heads/__init__.py +++ b/mmseg/models/decode_heads/__init__.py @@ -35,10 +35,9 @@ from .setr_up_head import SETRUPHead from .stdc_head import STDCHead from .uper_head import UPerHead -from .full_pp_head import PostProcess __all__ = [ - 'FCNHead', 'PostProcess', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', + 'FCNHead', 'PSPHead', 'ASPPHead', 'PSAHead', 'NLHead', 'GCHead', 'CCHead', 'UPerHead', 'DepthwiseSeparableASPPHead', 'ANNHead', 'DAHead', 'OCRHead', 'EncHead', 'DepthwiseSeparableFCNHead', 'FPNHead', 'FCNGenHead', 'EMAHead', 'DNLHead', 'PointHead', 'APCHead', 'DMHead', 'LRASPPHead', 'SETRUPHead', diff --git a/mmseg/models/decode_heads/full_pp_head.py b/mmseg/models/decode_heads/full_pp_head.py deleted file mode 100644 index 0e3e61e799..0000000000 --- a/mmseg/models/decode_heads/full_pp_head.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import torch -import torch.nn as nn -import numpy as np - -from ..builder import HEADS -from .decode_head import BaseDecodeHead - - - -@HEADS.register_module() -class PostProcess(BaseDecodeHead): - def __init__(self, num_convs, **kwargs): - super(PostProcess, self).__init__(**kwargs) - - self.num_convs = num_convs - self.output_channels = self.channels - self.conv_layers = nn.ModuleList() - in_channels = self.in_channels - self.num_classes = kwargs['num_classes'] - for i in range(self.num_convs): - self.conv_layers.append(nn.Conv2d(in_channels, self.output_channels, kernel_size=3, padding=1)) - in_channels = self.output_channels - - self.dw = torch.nn.Conv2d(in_channels=self.num_classes - 1, out_channels=self.num_classes - 1, kernel_size=(1, 2), groups=self.num_classes - 1, bias=False) - w = np.ones((self.num_classes - 1,1,1,2), dtype=np.float32) - # w[:, 0, 0, 0] = -1 - w[:, :, :, 1] = -1 - self.dw.weight = torch.nn.Parameter(torch.Tensor(w)) - self.relu = torch.nn.ReLU() - self.argmax_star = True # Argmax* (Argmax from bottom) -> will not compile, so default is False - - def forward(self, x): - - # channels in -> channels out (several, default is 1 convs layers after backbone) - for conv_layer in self.conv_layers: - x = nn.functional.relu(conv_layer(x)) - - # #channels -> #classes - x = self.cls_seg(x) - - # input is (BxCxHxW): 1x10x92x120 output is 1x10x736x240 - x = torch.nn.functional.interpolate(x, size=(736, 240), mode='bilinear', align_corners=True) - - # argmax on channels. output is 1x1x736x240 - x = torch.argmax(x, dim=1, keepdim=True) - - # H<->W transpose. output is 1x1x240x736 - x = torch.transpose(x, 2, 3) - - # torch.nn.functional.one_hot adds an extra dim at the end of the tensor so output is 1x1x240x736x10 - x = torch.nn.functional.one_hot(x, num_classes=self.num_classes) - x = torch.transpose(x, 1, 4) # output is 1x10x240x736x1 - x = torch.squeeze(x, dim=-1) # output is 1x10x240x736 - x1, x2 = x[:, :-1, :, :], x[:, -1:, :, :] # Explicit split for optimization - - # First output edge detector - out1 = x1 # output is 1x9x240x736. Assuming raindrop is last class - # out1 = torch.nn.functional.pad(out1, [1, 0, 0, 0]) # output is 1x9x240x737 - out1 = out1.to(torch.float32) - out1 = torch.nn.functional.pad(out1, [0, 1, 0, 0], mode='constant', value=0.5) # output is 1x9x240x737 - out1 = self.relu(self.dw(out1)) # output is 1x9x240x736 - - # W<->C transpose. output is 1x736x240x9 - out1 = torch.transpose(out1, 1, 3) - - if self.argmax_star: - # argmax* support: Flip the 736 axis. output is 1x736x240x9 - out1 = torch.flip(out1, dims=(1,)) - # argmax on channels. final output is 1x1x240x9 - out1 = torch.argmax(out1, dim=1, keepdim=True) - - # second output is to reduce sum on final class. output is 4 integers of 1x1x4 so each one would be represented by 16 bit integer. Assuming raindrop is last class - sum1, sum2, sum3, sum4 = x2[:, 0, :60, :], x2[:, 0, 60:120, :], x2[:, 0, 120:180, :], x2[:, 0, 180:, :] - sum1 = torch.sum(torch.sum(sum1, dim=-1, keepdim=True), dim=-2, keepdim=True) - sum2 = torch.sum(torch.sum(sum2, dim=-1, keepdim=True), dim=-2, keepdim=True) - sum3 = torch.sum(torch.sum(sum3, dim=-1, keepdim=True), dim=-2, keepdim=True) - sum4 = torch.sum(torch.sum(sum4, dim=-1, keepdim=True), dim=-2, keepdim=True) - out2 = torch.cat((sum1, sum2, sum3, sum4), dim=2) - - return out1, out2 diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index c439a4ee05..2ffca37524 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -137,8 +137,9 @@ def parse_args(): parser.add_argument('--no_simplify', action='store_false') parser.add_argument('--postprocess', action='store_true', default=False) parser.add_argument('--shape', nargs=2, type=int, default=[1024, 1920]) + parser.add_argument('-o', '--opset', type=int, default=13) parser.add_argument('--out_name', default='fcn.onnx', type=str, help="Name for the onnx output") - parser.add_argument('--soft_weights_loading',action='store_true', default=False) + parser.add_argument('--soft_weights_loading', action='store_true', default=False) parser.add_argument('--dummy_prune_ratio', type=float, default=0.0, help="Applies dummy pruning with ratio") parser.add_argument('--random_prune', action='store_true', default=False, help="Set method to prune as random (default: Minimum absolute value)") parser.add_argument( @@ -178,7 +179,6 @@ def __init__(self, model, args): def forward(self, x): x = self.model(x) if self.post_proc_flag: - print("Adding Postprocess (Resize+ArgMax) to the model") x = self.bilinear_resize(x) if x.shape[1] > 1: x = x.argmax(dim=1, keepdim=True) @@ -236,21 +236,31 @@ def main(): # to onnx model.eval() + if args.postprocess: + print("Adding Postprocess (Resize+ArgMax) to the model") model_with_postprocess = ModelWithPostProc(model, args) model_with_postprocess.eval() + imgs = torch.zeros(1,3, args.shape[0], args.shape[1], dtype=torch.float32).to(device) outputs = model_with_postprocess(imgs) - torch.onnx.export(model_with_postprocess, imgs, args.out_name, input_names=['test_input'], output_names=['output'], training=torch.onnx.TrainingMode.PRESERVE, opset_version=13) - print('model saved at: ', args.out_name) + torch.onnx.export(model_with_postprocess, + imgs, args.out_name, + input_names=['test_input'], + output_names=['output'], + training=torch.onnx.TrainingMode.PRESERVE, + opset_version=args.opset) # if also simplify if args.no_simplify: model_onnx = onnx.load(args.out_name) model_simp, check = simplify(model_onnx) - onnx.save(model_simp, args.out_name[0:-5] + '_simplify.onnx') - print('model simplified saved at: ', args.out_name[0:-5] + '_simplify.onnx') + onnx.save(model_simp, args.out_name) + print('Simplified model saved at: ', args.out_name) + else: + print('Model saved at: ', args.out_name) if __name__ == '__main__': - parser = argparse.ArgumentParser(epilog='Example: CUDA_VISIBLE_DEVICES=0 python tools/pytorch2onnx.py configs/fcn/fcn8_r18_hailo.py --checkpoint work_dirs/fcn8_r18_hailo_iterbased/epoch_1.pth --out_name my_fcn_model.onnx --shape 600 800') + parser = argparse.ArgumentParser( + epilog='Example: CUDA_VISIBLE_DEVICES=0 python tools/pytorch2onnx.py configs/fcn/fcn_hailo_10classes.py --checkpoint work_dirs/fcn_hailo/iter_173760.pth --shape 736 960 --postprocess --soft_weights_loading --out_name fcn_hailo.onnx') main() From 32c9ba836e969ffeb8258e47cdd189aa428f0ccd Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Sat, 9 Dec 2023 21:46:23 +0200 Subject: [PATCH 11/13] Modified fcn_hailo_prune.py to save checkpoints after pruning is done + cosmetics --- configs/fcn/fcn_hailo_prune.py | 7 +++--- sparsity/sparseml_hook.py | 10 +++------ tools/pytorch2onnx.py | 40 +++++++++++++--------------------- 3 files changed, 22 insertions(+), 35 deletions(-) diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index a53145c297..ab5f84be83 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -19,15 +19,16 @@ # default hooks - logger & checkpoint configs default_hooks = dict( - # print log every 100 iterations. + # print log every 500 iterations. logger=dict(type='LoggerHook', interval=500, log_metric_by_epoch=False), # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), # save checkpoint every 1 epoch. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=2976, save_best='mIoU', rule='greater', max_keep_ckpts=5), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 -) + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', + max_keep_ckpts=5, save_begin=163680), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 + ) # learning policy: taken from the recipe # custom hooks diff --git a/sparsity/sparseml_hook.py b/sparsity/sparseml_hook.py index fd51602806..59786a166c 100644 --- a/sparsity/sparseml_hook.py +++ b/sparsity/sparseml_hook.py @@ -11,17 +11,15 @@ def before_train(self, runner) -> None: self.manager = ScheduledModifierManager.from_yaml(runner.cfg.recipe) optimizer = runner.optim_wrapper.optimizer - # optimizer = self.manager.modify(pl_module, optimizer, steps_per_epoch=trainer.estimated_stepping_batches, epoch=0) optimizer = self.manager.modify(runner.model.module, optimizer, steps_per_epoch=1488, epoch=40) runner.optim_wrapper.optimizer = optimizer def after_train(self, runner) -> None: self.manager.finalize(runner.model.module) - def after_train_iter(self, runner, batch_idx, data_batch, outputs): #, batch_idx=0, data_batch=None, outputs=None): - # print(f"after_train_iter:: {batch_idx}") - if batch_idx % (1488*2) == 0: # 2 Epochs - print(f"Epoch #{batch_idx // 1488} End") + def after_train_iter(self, runner, batch_idx, data_batch, outputs): + if batch_idx % (1488 * 2) == 0: # 2 Epochs + runner.logger.info(f"Epoch #{batch_idx // 1488} End") self._calc_sparsity(runner.model.state_dict(), runner.logger) def after_test_epoch(self, runner, metrics): @@ -40,8 +38,6 @@ def _calc_sparsity(self, model_dict, logger): weights_layers_num += 1 total_weights += v.numel() total_zeros += (v.numel() - v.count_nonzero()) - # zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 - # logger.info(f"[{weights_layers_num:>2}] {k:<58}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") logger.info(f"Model has {weights_layers_num} weight layers") logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index 2ffca37524..a313ad57ec 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -1,28 +1,20 @@ # Copyright (c) OpenMMLab. All rights reserved. import argparse -import logging import os import os.path as osp import torch -import torch.nn.functional as F import torch.nn as nn from mmengine.config import Config, DictAction -from mmengine.logging import print_log from mmengine.runner import Runner - from mmseg.registry import RUNNERS import onnx from onnxsim import simplify -from mmseg.models.utils import resize - -import torch.nn.functional as F from collections import OrderedDict -import warnings -def load_pretrained_weights_soft(model, checkpoint): +def load_pretrained_weights_soft(model, checkpoint, logger): if 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] @@ -49,15 +41,14 @@ def load_pretrained_weights_soft(model, checkpoint): model.load_state_dict(model_dict) if len(matched_layers) == 0: - warnings.warn( - 'The pretrained weights "{}" cannot be loaded, ' + logger.warning( + 'The pretrained weights cannot be loaded, ' 'please check the key names manually ' - '(** ignored and continue **)' ) else: - print('Successfully loaded pretrained weights') + logger.info('Successfully loaded pretrained weights') if len(discarded_layers) > 0: - print( + logger.warning( '** The following layers are discarded ' 'due to unmatched keys or layer size: {}'. format(discarded_layers) @@ -117,7 +108,7 @@ def dummy_prune_layer(layer, prune_ratio=0.5): return pruned_tensor -def calc_sparsity(model_dict): +def calc_sparsity(model_dict, logger): weights_layers_num, total_weights, total_zeros = 0, 0, 0 for k, v in model_dict.items(): if k.startswith('backbone.') and k.endswith('weight'): @@ -125,9 +116,9 @@ def calc_sparsity(model_dict): total_weights += v.numel() total_zeros += (v.numel() - v.count_nonzero()) zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 - print(f"[{weights_layers_num:>2}] {k:<51}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") - print(f"Model has {weights_layers_num} weight layers") - print(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") + logger.info(f"[{weights_layers_num:>2}] {k:<51}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") + logger.info(f"Model has {weights_layers_num} weight layers") + logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") def parse_args(): @@ -213,31 +204,30 @@ def main(): # if 'runner_type' is set in the cfg runner = RUNNERS.build(cfg) - # start training model = runner.model if args.checkpoint: ckpt = torch.load(args.checkpoint, map_location='cpu') if args.soft_weights_loading: if args.dummy_prune_ratio > 0.0: ckpt = dummy_prune_ckpt(ckpt, args.dummy_prune_ratio, args.random_prune) - load_pretrained_weights_soft(model, ckpt) + load_pretrained_weights_soft(model, ckpt, runner.logger) else: if 'state_dict' in ckpt: model.load_state_dict(ckpt['state_dict']) else: model.load_state_dict(ckpt) - print("Switching to deployment model") + runner.logger.info("Switching to deployment model") # if repvgg style -> deploy for module in model.modules(): if hasattr(module, 'switch_to_deploy'): module.switch_to_deploy() - calc_sparsity(model.state_dict()) + calc_sparsity(model.state_dict(), runner.logger) # to onnx model.eval() if args.postprocess: - print("Adding Postprocess (Resize+ArgMax) to the model") + runner.logger.info("Adding Postprocess (Resize+ArgMax) to the model") model_with_postprocess = ModelWithPostProc(model, args) model_with_postprocess.eval() @@ -256,9 +246,9 @@ def main(): model_onnx = onnx.load(args.out_name) model_simp, check = simplify(model_onnx) onnx.save(model_simp, args.out_name) - print('Simplified model saved at: ', args.out_name) + runner.logger.info(f"Simplified model saved at: {args.out_name}") else: - print('Model saved at: ', args.out_name) + runner.logger.info(f"Model saved at: {args.out_name}") if __name__ == '__main__': parser = argparse.ArgumentParser( From 8c9aaac85d4a1027c72c66657baa5c808a3f38b9 Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Sun, 17 Dec 2023 12:11:15 +0200 Subject: [PATCH 12/13] Working on new checkpoint hook --- configs/fcn/fcn_hailo_10classes.py | 4 +- configs/fcn/fcn_hailo_10classes_epoch.py | 80 ++++++++++++++++++++++++ configs/fcn/fcn_hailo_prune.py | 20 +++--- mmseg/engine/hooks/checkpoint_hook.py | 19 ++++++ mmseg/utils/misc.py | 60 +++++++++++++++++- recipes/recipe_yolox_hailo_pruning.md | 30 +-------- sparsity/sparseml_hook.py | 35 +++++------ tools/pytorch2onnx.py | 65 ++----------------- tools/test.py | 19 +++++- tools/train.py | 2 +- 10 files changed, 214 insertions(+), 120 deletions(-) create mode 100644 configs/fcn/fcn_hailo_10classes_epoch.py create mode 100644 mmseg/engine/hooks/checkpoint_hook.py diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 2c19faf865..663fc1356a 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -23,8 +23,8 @@ # default hooks - logger & checkpoint configs default_hooks = dict( - # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=200, log_metric_by_epoch=False), + # print log every 400 iterations. + logger=dict(type='LoggerHook', interval=400, log_metric_by_epoch=False), # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), diff --git a/configs/fcn/fcn_hailo_10classes_epoch.py b/configs/fcn/fcn_hailo_10classes_epoch.py new file mode 100644 index 0000000000..b4d212e6f3 --- /dev/null +++ b/configs/fcn/fcn_hailo_10classes_epoch.py @@ -0,0 +1,80 @@ +# model settings +_base_ = [ + '../_base_/datasets/cityscapes10classes.py', '../_base_/default_runtime.py', +] + +# optimizer +optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) +optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) + +# learning policy +param_scheduler = [ + dict( + type='LinearLR', start_factor=0.2, begin=0, end=1), + dict( + type='CosineAnnealingLR', begin=1, end=5, eta_min=0.00001) +] + +# runtime settings +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') + +# default hooks - logger & checkpoint configs +default_hooks = dict( + + # print log every 100 iterations. + logger=dict(type='LoggerHook', interval=1), #, log_metric_by_epoch=False), + + # enable the parameter scheduler. + param_scheduler=dict(type='ParamSchedulerHook'), + + # save checkpoint every 5 epochs. + checkpoint=dict(type='CheckpointHook', + interval=1, + save_best='mIoU', + rule='greater', + max_keep_ckpts=5), +) + +# tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' +visualizer = dict(type='SegLocalVisualizer', + vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')], + name='visualizer') + +# data preprocessing +norm_cfg = dict(type='SyncBN', requires_grad=True) +crop_size = (512, 1024) +data_preprocessor = dict( + type='SegDataPreProcessor', + mean=[0.0, 0.0, 0.0], + std=[1.0, 1.0, 1.0], + bgr_to_rgb=True, + pad_val=0, + seg_pad_val=255, + size=crop_size) + +model = dict( + type='EncoderDecoder', + backbone=dict( + type='hailoFPN', + depth=0.33, + width=0.125, + bb_channels_list=[128, 256, 512, 1024], + bb_num_repeats_list=[9, 15, 21, 12], + neck_channels_list=[256, 128, 128, 256, 256, 512], + neck_num_repeats_list=[9, 12, 12, 9]), + decode_head=dict( + type='ConvHead', + in_channels=16, + channels=128, + num_convs=1, + num_classes=10, + norm_cfg=norm_cfg, + align_corners=True, + loss_decode=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), + # model training and testing settings + train_cfg=dict(), + test_cfg=dict(mode='whole'), + infer_wo_softmax=True) diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index ab5f84be83..97a6bd5ac6 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -4,7 +4,8 @@ ] resume = True -load_from='./work_dirs/fcn_hailo_eta1e5/iter_68448.pth' +# load_from='./work_dirs/fcn_hailo_eta1e5/iter_68448.pth' # best checkpoint path of full training (fcn_hailo_10classes). Start of pruning procedure +load_from='./work_dirs/fcn_hailo_eta1e5_eve/iter_74400.pth' # optimizer optimizer = dict(type='Adam', lr=0.0001, weight_decay=1e-5) @@ -12,7 +13,7 @@ # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=173760, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 119040 (80 epochs) +train_cfg = dict(type='IterBasedTrainLoop', max_iters=178560, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 89280 (80 epochs), 173760 val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -24,15 +25,20 @@ # enable the parameter scheduler. param_scheduler=dict(type='ParamSchedulerHook'), + ) - # save checkpoint every 1 epoch. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', - max_keep_ckpts=5, save_begin=163680), # 2976 (2Epoches), 7440 (5 Epoches) , max_keep_ckpts=5 - ) + # # save checkpoint every 1 epoch. + # checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', + # max_keep_ckpts=5, save_begin=163680), # 2976 (2Epoches), 7440 (5 Epoches) + # ) # learning policy: taken from the recipe # custom hooks -custom_hooks = [dict(type='SparseMLHook', interval=10, priority='NORMAL')] +sparseml_hook = dict(type='SparseMLHook', priority='NORMAL') +# sparseml_hook = dict(type='SparseMLHook', interval=10, priority='NORMAL') +ext_checkpoint_hook = dict(type='ExtCheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', + max_keep_ckpts=5, save_begin=163680) # 2976 (2Epoches), 7440 (5 Epoches), 80352 (54), 83328 (56), 163680 +custom_hooks = [sparseml_hook, ext_checkpoint_hook] # tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' visualizer = dict(type='SegLocalVisualizer', diff --git a/mmseg/engine/hooks/checkpoint_hook.py b/mmseg/engine/hooks/checkpoint_hook.py new file mode 100644 index 0000000000..b7b820759e --- /dev/null +++ b/mmseg/engine/hooks/checkpoint_hook.py @@ -0,0 +1,19 @@ +from mmengine.hooks import CheckpointHook +from mmseg.registry import HOOKS + + +@HOOKS.register_module() +class ExtCheckpointHook(CheckpointHook): + # def __init__(self): + # self.by_epoch = False + + def after_val_epoch(self, runner, metrics): + if runner.iter == self.save_begin: + runner.logger.info('Resetting best_score to 0.0') + runner.message_hub.update_info('best_score', 0.0) + runner.message_hub.pop_info('best_ckpt', None) + if (runner.iter + 1 >= self.save_begin): + runner.logger.info('ExtCheckpointHook ExtCheckpointHook ExtCheckpointHook') + runner.logger.info( + f'Saving checkpoint at iter {runner.iter}') + super().after_val_epoch(runner, metrics) diff --git a/mmseg/utils/misc.py b/mmseg/utils/misc.py index 0a561732e9..a9d890c055 100644 --- a/mmseg/utils/misc.py +++ b/mmseg/utils/misc.py @@ -4,7 +4,7 @@ import numpy as np import torch import torch.nn.functional as F - +from collections import OrderedDict from .typing_utils import SampleList @@ -116,3 +116,61 @@ def stack_batch(inputs: List[torch.Tensor], pad_shape=pad_img.shape[-2:])) return torch.stack(padded_inputs, dim=0), padded_samples + + +def load_pretrained_weights_soft(model, checkpoint, logger): + + if 'state_dict' in checkpoint: + state_dict = checkpoint['state_dict'] + elif 'model' in checkpoint: + state_dict = checkpoint['model'] + else: + state_dict = checkpoint + + model_dict = model.state_dict() + new_state_dict = OrderedDict() + matched_layers, discarded_layers = [], [] + + for k, v in state_dict.items(): + if k.startswith('module.'): + k = k[7:] # discard module. + + if k in model_dict and model_dict[k].size() == v.size(): + new_state_dict[k] = v + matched_layers.append(k) + else: + discarded_layers.append(k) + + model_dict.update(new_state_dict) + model.load_state_dict(model_dict) + + if len(matched_layers) == 0: + logger.warning( + 'The pretrained weights cannot be loaded, ' + 'please check the key names manually ' + ) + else: + logger.info('Successfully loaded pretrained weights') + if len(discarded_layers) > 0: + logger.warning( + '** The following layers are discarded ' + 'due to unmatched keys or layer size: {}'. + format(discarded_layers) + ) + return + + +def calc_sparsity(model_dict, logger, verbose=False): + weights_layers_num, total_weights, total_zeros = 0, 0, 0 + prefix = next(iter(model_dict)).split('backbone.stage0')[0] + for k, v in model_dict.items(): + if k.startswith(prefix) and k.endswith('weight'): + weights_layers_num += 1 + total_weights += v.numel() + total_zeros += (v.numel() - v.count_nonzero()) + zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 + if verbose: + logger.info(f"[{weights_layers_num:>2}] {k:<51}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7}" + f" ({zeros_ratio:<4.1f}%) are zeros") + logger.info(f"Model has {weights_layers_num} weight layers") + logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") diff --git a/recipes/recipe_yolox_hailo_pruning.md b/recipes/recipe_yolox_hailo_pruning.md index 2b389524a5..bb2f649738 100644 --- a/recipes/recipe_yolox_hailo_pruning.md +++ b/recipes/recipe_yolox_hailo_pruning.md @@ -16,7 +16,7 @@ init_sparsity: 0.01 final_sparsity: 0.60 pruning_start_epoch: 60 pruning_end_epoch: 110 -pruning_update_frequency: 5.0 +pruning_update_frequency: 2.0 #Modifiers training_modifiers: @@ -38,31 +38,3 @@ pruning_modifiers: end_epoch: eval(pruning_end_epoch) update_frequency: eval(pruning_update_frequency) --- - -training_modifiers: - - !EpochRangeModifier - start_epoch: 0 - end_epoch: eval(num_epochs) - - - !LearningRateFunctionModifier - start_epoch: 3 - end_epoch: eval(num_epochs) - lr_func: linear - init_lr: eval(init_lr) - final_lr: eval(final_lr) - - - !LearningRateFunctionModifier - start_epoch: 0 - end_epoch: 3 - lr_func: linear - init_lr: eval(weights_warmup_lr) - final_lr: eval(init_lr) - param_groups: [0, 1] - - - !LearningRateFunctionModifier - start_epoch: 0 - end_epoch: 3 - lr_func: linear - init_lr: eval(biases_warmup_lr) - final_lr: eval(init_lr) - param_groups: [2] \ No newline at end of file diff --git a/sparsity/sparseml_hook.py b/sparsity/sparseml_hook.py index 59786a166c..6462c9f12d 100644 --- a/sparsity/sparseml_hook.py +++ b/sparsity/sparseml_hook.py @@ -1,26 +1,33 @@ -from mmseg.registry import RUNNERS, HOOKS +from mmseg.registry import HOOKS +from mmseg.utils.misc import calc_sparsity from mmengine.hooks import Hook from sparseml.pytorch.optim import ScheduledModifierManager + @HOOKS.register_module() class SparseMLHook(Hook): - def __init__(self, interval=10): - self.interval = interval + def __init__(self, steps_per_epoch=1488, start_epoch=50, prune_interval_epoch=2): + self.steps_per_epoch = steps_per_epoch + self.start_epoch = start_epoch + self.prune_interval_epoch = prune_interval_epoch def before_train(self, runner) -> None: self.manager = ScheduledModifierManager.from_yaml(runner.cfg.recipe) optimizer = runner.optim_wrapper.optimizer - optimizer = self.manager.modify(runner.model.module, optimizer, steps_per_epoch=1488, epoch=40) + optimizer = self.manager.modify(runner.model.module, + optimizer, + steps_per_epoch=self.steps_per_epoch, + epoch=self.start_epoch) runner.optim_wrapper.optimizer = optimizer def after_train(self, runner) -> None: self.manager.finalize(runner.model.module) def after_train_iter(self, runner, batch_idx, data_batch, outputs): - if batch_idx % (1488 * 2) == 0: # 2 Epochs - runner.logger.info(f"Epoch #{batch_idx // 1488} End") - self._calc_sparsity(runner.model.state_dict(), runner.logger) + if batch_idx % (self.steps_per_epoch * self.prune_interval_epoch) == 0: # 2 Epochs + calc_sparsity(runner.model.state_dict(), runner.logger) + runner.logger.info(f"Epoch #{batch_idx // self.steps_per_epoch} End") def after_test_epoch(self, runner, metrics): runner.logger.info("Switching to deployment model") @@ -28,16 +35,4 @@ def after_test_epoch(self, runner, metrics): for module in runner.model.modules(): if hasattr(module, 'switch_to_deploy'): module.switch_to_deploy() - self._calc_sparsity(runner.model.state_dict(), runner.logger) - - def _calc_sparsity(self, model_dict, logger): - weights_layers_num, total_weights, total_zeros = 0, 0, 0 - prefix = next(iter(model_dict)).split('backbone.stage0')[0] - for k, v in model_dict.items(): - if k.startswith(prefix) and k.endswith('weight'): - weights_layers_num += 1 - total_weights += v.numel() - total_zeros += (v.numel() - v.count_nonzero()) - logger.info(f"Model has {weights_layers_num} weight layers") - logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") - + calc_sparsity(runner.model.state_dict(), runner.logger, True) diff --git a/tools/pytorch2onnx.py b/tools/pytorch2onnx.py index a313ad57ec..e223ceda72 100644 --- a/tools/pytorch2onnx.py +++ b/tools/pytorch2onnx.py @@ -8,61 +8,20 @@ from mmengine.config import Config, DictAction from mmengine.runner import Runner from mmseg.registry import RUNNERS +from mmseg.utils.misc import calc_sparsity, load_pretrained_weights_soft import onnx from onnxsim import simplify -from collections import OrderedDict - -def load_pretrained_weights_soft(model, checkpoint, logger): - - if 'state_dict' in checkpoint: - state_dict = checkpoint['state_dict'] - elif 'model' in checkpoint: - state_dict = checkpoint['model'] - else: - state_dict = checkpoint - - model_dict = model.state_dict() - new_state_dict = OrderedDict() - matched_layers, discarded_layers = [], [] - - for k, v in state_dict.items(): - if k.startswith('module.'): - k = k[7:] # discard module. - - if k in model_dict and model_dict[k].size() == v.size(): - new_state_dict[k] = v - matched_layers.append(k) - else: - discarded_layers.append(k) - - model_dict.update(new_state_dict) - model.load_state_dict(model_dict) - - if len(matched_layers) == 0: - logger.warning( - 'The pretrained weights cannot be loaded, ' - 'please check the key names manually ' - ) - else: - logger.info('Successfully loaded pretrained weights') - if len(discarded_layers) > 0: - logger.warning( - '** The following layers are discarded ' - 'due to unmatched keys or layer size: {}'. - format(discarded_layers) - ) - def dummy_prune_ckpt(ckpt, prune_ratio=0.5, random_prune=False): + prefix = next(iter(ckpt['state_dict'])).split('backbone.stage0')[0] for k, v in ckpt['state_dict'].items(): - if k.startswith('backbone.') and k.endswith('.rbr_dense.conv.weight'): + if k.startswith(prefix) and k.endswith('.rbr_dense.conv.weight'): if random_prune: # Sparsify layer randomly: v = random_prune_layer(v, prune_ratio) else: # Sparsify layer according to magnitude: v = dummy_prune_layer(v, prune_ratio) - calc_sparsity(ckpt['state_dict']) return ckpt @@ -108,18 +67,6 @@ def dummy_prune_layer(layer, prune_ratio=0.5): return pruned_tensor -def calc_sparsity(model_dict, logger): - weights_layers_num, total_weights, total_zeros = 0, 0, 0 - for k, v in model_dict.items(): - if k.startswith('backbone.') and k.endswith('weight'): - weights_layers_num += 1 - total_weights += v.numel() - total_zeros += (v.numel() - v.count_nonzero()) - zeros_ratio = (v.numel() - v.count_nonzero()) / v.numel() * 100.0 - logger.info(f"[{weights_layers_num:>2}] {k:<51}:: {v.numel() - v.count_nonzero():<5} / {v.numel():<7} ({zeros_ratio:<4.1f}%) are zeros") - logger.info(f"Model has {weights_layers_num} weight layers") - logger.info(f"Overall Sparsity is roughly: {100 * total_zeros / total_weights:.1f}%") - def parse_args(): parser.add_argument('config', help='train config file path') @@ -216,13 +163,13 @@ def main(): model.load_state_dict(ckpt['state_dict']) else: model.load_state_dict(ckpt) - + runner.logger.info("Switching to deployment model") # if repvgg style -> deploy for module in model.modules(): if hasattr(module, 'switch_to_deploy'): module.switch_to_deploy() - calc_sparsity(model.state_dict(), runner.logger) + calc_sparsity(model.state_dict(), runner.logger, True) # to onnx model.eval() @@ -231,7 +178,7 @@ def main(): model_with_postprocess = ModelWithPostProc(model, args) model_with_postprocess.eval() - imgs = torch.zeros(1,3, args.shape[0], args.shape[1], dtype=torch.float32).to(device) + imgs = torch.zeros(1, 3, args.shape[0], args.shape[1], dtype=torch.float32).to(device) outputs = model_with_postprocess(imgs) torch.onnx.export(model_with_postprocess, diff --git a/tools/test.py b/tools/test.py index 19fa17fd07..787c5200ae 100644 --- a/tools/test.py +++ b/tools/test.py @@ -2,10 +2,14 @@ import argparse import os import os.path as osp +import torch +from copy import deepcopy from sparsity import sparseml_hook from mmengine.config import Config, DictAction from mmengine.runner import Runner +from mmseg.engine.hooks import checkpoint_hook +from mmseg.utils.misc import calc_sparsity, load_pretrained_weights_soft # TODO: support fuse_conv_bn, visualization, and format_only @@ -29,6 +33,10 @@ def parse_args(): help='directory where painted images will be saved. ' 'If specified, it will be automatically saved ' 'to the work_dir/timestamp/show_dir') + parser.add_argument( + '--deploy', + action='store_true', + help='switch model to deployment mode and calculate sparsity ratio') parser.add_argument( '--wait-time', type=float, default=2, help='the interval of show (s)') parser.add_argument( @@ -115,7 +123,16 @@ def main(): # build the runner from config runner = Runner.from_cfg(cfg) - + if args.deploy: + ckpt = torch.load(args.checkpoint, map_location='cpu') + model_deploy = deepcopy(runner.model) + load_pretrained_weights_soft(model_deploy, ckpt, runner.logger) + runner.logger.info("Calculating sparsity ratio on deployment model") + # if repvgg style -> deploy + for module in model_deploy.modules(): + if hasattr(module, 'switch_to_deploy'): + module.switch_to_deploy() + calc_sparsity(model_deploy.state_dict(), runner.logger, True) # start testing runner.test() diff --git a/tools/train.py b/tools/train.py index b5ab30daa5..88634829f8 100644 --- a/tools/train.py +++ b/tools/train.py @@ -5,6 +5,7 @@ import os.path as osp from sparsity import sparseml_hook +from mmseg.engine.hooks import checkpoint_hook from mmengine.config import Config, DictAction from mmengine.logging import print_log @@ -93,7 +94,6 @@ def main(): cfg.resume = args.resume cfg.recipe = args.recipe cfg.recipe_args = args.recipe_args - print(f"{cfg.resume=}, {cfg.load_from}") # build the runner from config if 'runner_type' not in cfg: # build the default runner From e1627ade66389b38d24b406148265e44aef05e4f Mon Sep 17 00:00:00 2001 From: Amit Klinger Date: Tue, 19 Dec 2023 07:14:34 +0200 Subject: [PATCH 13/13] Best checkpoint save after pruning + cleanup0 --- configs/fcn/fcn_hailo_10classes.py | 3 +- configs/fcn/fcn_hailo_10classes_epoch.py | 80 ------------------------ configs/fcn/fcn_hailo_prune.py | 15 ++--- mmseg/engine/hooks/checkpoint_hook.py | 3 - 4 files changed, 7 insertions(+), 94 deletions(-) delete mode 100644 configs/fcn/fcn_hailo_10classes_epoch.py diff --git a/configs/fcn/fcn_hailo_10classes.py b/configs/fcn/fcn_hailo_10classes.py index 663fc1356a..9105395ae4 100644 --- a/configs/fcn/fcn_hailo_10classes.py +++ b/configs/fcn/fcn_hailo_10classes.py @@ -30,7 +30,8 @@ param_scheduler=dict(type='ParamSchedulerHook'), # save checkpoint every 5 epochs. - checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440, save_best='mIoU', rule='greater', max_keep_ckpts=5), + checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=7440, save_best='mIoU', rule='greater', + max_keep_ckpts=5), ) # tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' diff --git a/configs/fcn/fcn_hailo_10classes_epoch.py b/configs/fcn/fcn_hailo_10classes_epoch.py deleted file mode 100644 index b4d212e6f3..0000000000 --- a/configs/fcn/fcn_hailo_10classes_epoch.py +++ /dev/null @@ -1,80 +0,0 @@ -# model settings -_base_ = [ - '../_base_/datasets/cityscapes10classes.py', '../_base_/default_runtime.py', -] - -# optimizer -optimizer = dict(type='Adam', lr=0.001, weight_decay=1e-5) -optim_wrapper = dict(type='OptimWrapper', optimizer=optimizer, clip_grad=None) - -# learning policy -param_scheduler = [ - dict( - type='LinearLR', start_factor=0.2, begin=0, end=1), - dict( - type='CosineAnnealingLR', begin=1, end=5, eta_min=0.00001) -] - -# runtime settings -train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=5, val_interval=1) -val_cfg = dict(type='ValLoop') -test_cfg = dict(type='TestLoop') - -# default hooks - logger & checkpoint configs -default_hooks = dict( - - # print log every 100 iterations. - logger=dict(type='LoggerHook', interval=1), #, log_metric_by_epoch=False), - - # enable the parameter scheduler. - param_scheduler=dict(type='ParamSchedulerHook'), - - # save checkpoint every 5 epochs. - checkpoint=dict(type='CheckpointHook', - interval=1, - save_best='mIoU', - rule='greater', - max_keep_ckpts=5), -) - -# tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' -visualizer = dict(type='SegLocalVisualizer', - vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')], - name='visualizer') - -# data preprocessing -norm_cfg = dict(type='SyncBN', requires_grad=True) -crop_size = (512, 1024) -data_preprocessor = dict( - type='SegDataPreProcessor', - mean=[0.0, 0.0, 0.0], - std=[1.0, 1.0, 1.0], - bgr_to_rgb=True, - pad_val=0, - seg_pad_val=255, - size=crop_size) - -model = dict( - type='EncoderDecoder', - backbone=dict( - type='hailoFPN', - depth=0.33, - width=0.125, - bb_channels_list=[128, 256, 512, 1024], - bb_num_repeats_list=[9, 15, 21, 12], - neck_channels_list=[256, 128, 128, 256, 256, 512], - neck_num_repeats_list=[9, 12, 12, 9]), - decode_head=dict( - type='ConvHead', - in_channels=16, - channels=128, - num_convs=1, - num_classes=10, - norm_cfg=norm_cfg, - align_corners=True, - loss_decode=dict( - type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)), - # model training and testing settings - train_cfg=dict(), - test_cfg=dict(mode='whole'), - infer_wo_softmax=True) diff --git a/configs/fcn/fcn_hailo_prune.py b/configs/fcn/fcn_hailo_prune.py index 97a6bd5ac6..523b223fbc 100644 --- a/configs/fcn/fcn_hailo_prune.py +++ b/configs/fcn/fcn_hailo_prune.py @@ -4,8 +4,8 @@ ] resume = True -# load_from='./work_dirs/fcn_hailo_eta1e5/iter_68448.pth' # best checkpoint path of full training (fcn_hailo_10classes). Start of pruning procedure -load_from='./work_dirs/fcn_hailo_eta1e5_eve/iter_74400.pth' +# best checkpoint path of full training (fcn_hailo_10classes). Start of pruning procedure: +load_from = './work_dirs/fcn_hailo_eta1e5_eve/iter_74400.pth' # optimizer optimizer = dict(type='Adam', lr=0.0001, weight_decay=1e-5) @@ -13,7 +13,7 @@ # runtime settings -train_cfg = dict(type='IterBasedTrainLoop', max_iters=178560, val_interval=1488) # 74400 (50 epochs), 89280 (60 epochs), 104160 (70 epochs), 89280 (80 epochs), 173760 +train_cfg = dict(type='IterBasedTrainLoop', max_iters=178560, val_interval=1488) # 74400 (50 epochs), 178560 (120) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') @@ -27,17 +27,12 @@ param_scheduler=dict(type='ParamSchedulerHook'), ) - # # save checkpoint every 1 epoch. - # checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', - # max_keep_ckpts=5, save_begin=163680), # 2976 (2Epoches), 7440 (5 Epoches) - # ) - # learning policy: taken from the recipe # custom hooks sparseml_hook = dict(type='SparseMLHook', priority='NORMAL') -# sparseml_hook = dict(type='SparseMLHook', interval=10, priority='NORMAL') +# Saving best checkpoint starts after pruning hits final ratio ext_checkpoint_hook = dict(type='ExtCheckpointHook', by_epoch=False, interval=1488, save_best='mIoU', rule='greater', - max_keep_ckpts=5, save_begin=163680) # 2976 (2Epoches), 7440 (5 Epoches), 80352 (54), 83328 (56), 163680 + max_keep_ckpts=5, save_begin=163680) # 163680 (110 epochs) custom_hooks = [sparseml_hook, ext_checkpoint_hook] # tensorboard vis ('LocalVisBackend' might be redundant) save_dir='./tf_dir/' diff --git a/mmseg/engine/hooks/checkpoint_hook.py b/mmseg/engine/hooks/checkpoint_hook.py index b7b820759e..d752fd839e 100644 --- a/mmseg/engine/hooks/checkpoint_hook.py +++ b/mmseg/engine/hooks/checkpoint_hook.py @@ -4,8 +4,6 @@ @HOOKS.register_module() class ExtCheckpointHook(CheckpointHook): - # def __init__(self): - # self.by_epoch = False def after_val_epoch(self, runner, metrics): if runner.iter == self.save_begin: @@ -13,7 +11,6 @@ def after_val_epoch(self, runner, metrics): runner.message_hub.update_info('best_score', 0.0) runner.message_hub.pop_info('best_ckpt', None) if (runner.iter + 1 >= self.save_begin): - runner.logger.info('ExtCheckpointHook ExtCheckpointHook ExtCheckpointHook') runner.logger.info( f'Saving checkpoint at iter {runner.iter}') super().after_val_epoch(runner, metrics)