diff --git a/README.md b/README.md index eaa1466..33de132 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ pip install dnc | num_layers | 1 | Number of layers of recurrent units in the controller | | bias | True | Bias | | batch_first | True | Whether data is fed batch first | -| dropout | 0 | Dropout between layers in the controller (Not yet implemented) | +| dropout | 0 | Dropout between layers in the controller | | bidirectional | False | If the controller is bidirectional (Not yet implemented) | | nr_cells | 5 | Number of memory cells | | read_heads | 2 | Number of read heads | diff --git a/dnc/dnc.py b/dnc/dnc.py index 18cc3be..d1e61b3 100644 --- a/dnc/dnc.py +++ b/dnc/dnc.py @@ -171,7 +171,7 @@ def _layer_forward(self, input, layer, hx=(None, None)): read_vectors[time] = read_vecs.view(-1, self.w * self.r) # get the final output for this time step - outs[time] = self.mem_out(T.cat([out, read_vectors[time]], 1)) + outs[time] = self.dropout_layer(self.mem_out(T.cat([out, read_vectors[time]], 1))) return outs, read_vectors, (chx, mhx) diff --git a/tasks/copy_task.py b/tasks/copy_task.py index d488d40..37ca792 100644 --- a/tasks/copy_task.py +++ b/tasks/copy_task.py @@ -25,7 +25,8 @@ parser = argparse.ArgumentParser(description='PyTorch Differentiable Neural Computer') parser.add_argument('-input_size', type=int, default= 6, help='dimension of input feature') parser.add_argument('-rnn_type', type=str, default='lstm', help='type of recurrent cells to use for the controller') -parser.add_argument('-nhid', type=int, default=64, help='humber of hidden units of the inner nn') +parser.add_argument('-nhid', type=int, default=64, help='number of hidden units of the inner nn') +parser.add_argument('-dropout', type=float, default=0.3, help='controller dropout') parser.add_argument('-nlayer', type=int, default=2, help='number of layers') parser.add_argument('-lr', type=float, default=1e-2, help='initial learning rate')