From f077d270b7f00e61151703ec0b9e4913739dc99b Mon Sep 17 00:00:00 2001 From: VU Manh Tu Date: Mon, 7 Dec 2020 08:48:50 +0100 Subject: [PATCH 1/2] Fixed unpacking sequence output of LSTM Since the packed_RNN_out is padded by zeros to make sure all sequences have the same length, therefore, we can't take the last output of RNN_out to decode as output of the network. Instead, we use the secondary output of torch.nn.utils.rnn.pad_packed_sequence function to determine which element is the last of the sequence to decode --- ResNetCRNN_varylength/functions.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ResNetCRNN_varylength/functions.py b/ResNetCRNN_varylength/functions.py index 862cf2f..979fe75 100644 --- a/ResNetCRNN_varylength/functions.py +++ b/ResNetCRNN_varylength/functions.py @@ -264,16 +264,20 @@ def forward(self, x_RNN, x_lengths): """ h_n shape (n_layers, batch, hidden_size), h_c shape (n_layers, batch, hidden_size) """ """ None represents zero initial hidden state. RNN_out has shape=(batch, time_step, output_size) """ - RNN_out, _ = torch.nn.utils.rnn.pad_packed_sequence(packed_RNN_out, batch_first=True) + RNN_out, out_lengths = torch.nn.utils.rnn.pad_packed_sequence(packed_RNN_out, batch_first=True) RNN_out = RNN_out.contiguous() + last_out = [] + for i, length in enumerate(out_lengths): + last_out.append(RNN_out[i][length - 1]) + last_out = torch.stack(last_out) # RNN_out = RNN_out.view(-1, RNN_out.size(2)) # reverse back to original sequence order _, unperm_idx = perm_idx.sort(0) - RNN_out = RNN_out[unperm_idx] + RNN_out = last_out[unperm_idx] # FC layers - x = self.fc1(RNN_out[:, -1, :]) # choose RNN_out at the last time step + x = self.fc1(RNN_out) # choose RNN_out at the last time step x = F.relu(x) x = F.dropout(x, p=self.drop_p, training=self.training) x = self.fc2(x) From 3a57edc41c689165830efb2ce7557c55c8cd1b05 Mon Sep 17 00:00:00 2001 From: VU Manh Tu Date: Mon, 7 Dec 2020 08:51:03 +0100 Subject: [PATCH 2/2] Simple fix to make sure it runnable for MacOS machines (for debugging) when GPU is not available. This will not affect the training flow on GPU machines --- ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py b/ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py index 94a7128..6c43c96 100644 --- a/ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py +++ b/ResNetCRNN_varylength/UCF101_ResNetCRNN_varlen.py @@ -150,7 +150,10 @@ def validation(model, device, optimizer, test_loader): device = torch.device("cuda" if use_cuda else "cpu") # use CPU or GPU # Data loading parameters -params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 8, 'pin_memory': True} if use_cuda else {} +if use_cuda: + params = {'batch_size': batch_size, 'shuffle': True, 'num_workers': 8, 'pin_memory': True} +else: + params = {'batch_size': 2, 'shuffle': True} # Testing # load UCF101 actions names @@ -184,6 +187,8 @@ def validation(model, device, optimizer, test_loader): all_names = [] all_length = [] # each video length for f in fnames: + if not os.path.isdir(os.path.join(data_path, f)): + continue loc1 = f.find('v_') loc2 = f.find('_g') actions.append(f[(loc1 + 2): loc2]) @@ -226,7 +231,7 @@ def validation(model, device, optimizer, test_loader): list(cnn_encoder.module.fc2.parameters()) + list(cnn_encoder.module.bn2.parameters()) + \ list(cnn_encoder.module.fc3.parameters()) + list(rnn_decoder.parameters()) -elif torch.cuda.device_count() == 1: +else: crnn_params = list(cnn_encoder.fc1.parameters()) + list(cnn_encoder.bn1.parameters()) + \ list(cnn_encoder.fc2.parameters()) + list(cnn_encoder.bn2.parameters()) + \ list(cnn_encoder.fc3.parameters()) + list(rnn_decoder.parameters())