From 369f0f27b526d5c3819a10d552bbfeb3ed909323 Mon Sep 17 00:00:00 2001 From: zhaojing Date: Thu, 14 Sep 2023 10:42:46 +0800 Subject: [PATCH] Training process for mlp models with varying layer sizes --- .../model_selection_psql/ms_mlp/train_mlp.py | 42 +++++++++++-------- examples/model_selection_psql/msmlp/model.py | 13 ++++++ 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/examples/model_selection_psql/ms_mlp/train_mlp.py b/examples/model_selection_psql/ms_mlp/train_mlp.py index d7b21fb58..9f411f04e 100644 --- a/examples/model_selection_psql/ms_mlp/train_mlp.py +++ b/examples/model_selection_psql/ms_mlp/train_mlp.py @@ -42,17 +42,17 @@ def __call__(self, loss): return pn_p_g_list def call_with_returns(self, loss): - print ("call_with_returns loss.data: \n", loss.data) + # print ("call_with_returns loss.data: \n", loss.data) pn_p_g_list = [] for p, g in autograd.backward(loss): if p.name is None: p.name = id(p) self.apply(p.name, p, g) pn_p_g_list.append(p.name, p, g) - print ("call with returns") - print ("p.name: \n", p.name) - print ("p.data: \n", p.data) - print ("g.data: \n", g.data) + # print ("call with returns") + # print ("p.name: \n", p.name) + # print ("p.data: \n", p.data) + # print ("g.data: \n", g.data) return pn_p_g_list class MSSGD(MSOptimizer): @@ -549,15 +549,23 @@ def run(global_rank, args = parser.parse_args() - mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision]) - run(0, - 1, - args.device_id, - args.max_epoch, - args.batch_size, - args.model, - args.data, - mssgd, - args.graph, - args.verbosity, - precision=args.precision) + DEFAULT_LAYER_CHOICES_4 = [8, 16, 24, 32] + for layer1 in DEFAULT_LAYER_CHOICES_4: + for layer2 in DEFAULT_LAYER_CHOICES_4: + for layer3 in DEFAULT_LAYER_CHOICES_4: + for layer4 in DEFAULT_LAYER_CHOICES_4: + layer_hidden_list = [layer1, layer2+1, layer3+2, layer4+3] + # print ("layer_hidden_list: \n", layer_hidden_list) + mssgd = MSSGD(lr=args.lr, momentum=0.9, weight_decay=1e-5, dtype=singa_dtype[args.precision]) + run(0, + 1, + args.device_id, + layer_hidden_list, + args.max_epoch, + args.batch_size, + args.model, + args.data, + mssgd, + args.graph, + args.verbosity, + precision=args.precision) diff --git a/examples/model_selection_psql/msmlp/model.py b/examples/model_selection_psql/msmlp/model.py index be898b291..70bc2341d 100644 --- a/examples/model_selection_psql/msmlp/model.py +++ b/examples/model_selection_psql/msmlp/model.py @@ -95,14 +95,21 @@ def forward(self, inputs): return y def train_one_batch(self, x, y, synflow_flag, dist_option, spars): + # print ("in train_one_batch") out = self.forward(x) + # print ("train_one_batch x.data: \n", x.data) + # print ("train_one_batch y.data: \n", y.data) + # print ("train_one_batch out.data: \n", out.data) if synflow_flag: loss = self.sum_error(out) + # print ("sum_error") else: # normal training loss = self.softmax_cross_entropy(out, y) if dist_option == 'plain': + # print ("before pn_p_g_list = self.optimizer(loss)") pn_p_g_list = self.optimizer(loss) + # print ("after pn_p_g_list = self.optimizer(loss)") elif dist_option == 'half': self.optimizer.backward_and_update_half(loss) elif dist_option == 'partialUpdate': @@ -115,7 +122,13 @@ def train_one_batch(self, x, y, synflow_flag, dist_option, spars): self.optimizer.backward_and_sparse_update(loss, topK=False, spars=spars) + # print ("len(pn_p_g_list): \n", len(pn_p_g_list)) + # print ("len(pn_p_g_list[0]): \n", len(pn_p_g_list[0])) + # print ("pn_p_g_list[0][0]: \n", pn_p_g_list[0][0]) + # print ("pn_p_g_list[0][1].data: \n", pn_p_g_list[0][1].data) + # print ("pn_p_g_list[0][2].data: \n", pn_p_g_list[0][2].data) return pn_p_g_list, out, loss + # return pn_p_g_list[0], pn_p_g_list[1], pn_p_g_list[2], out, loss def set_optimizer(self, optimizer): self.optimizer = optimizer