diff --git a/.gitignore b/.gitignore index 14b2842..59501da 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ __pycache__/ .cache/ dist/ dnc.egg-info/ +tasks/checkpoints/ diff --git a/README.md b/README.md index ae27460..eaa1466 100644 --- a/README.md +++ b/README.md @@ -59,8 +59,9 @@ output, (controller_hidden, memory, read_vectors) = \ The copy task, as descibed in the original paper, is included in the repo. +From the project root: ``` -python ./copy_task.py -cuda 0 +python ./tasks/copy_task.py -cuda 0 ``` ## General noteworthy stuff diff --git a/dnc/dnc.py b/dnc/dnc.py index f303a61..18cc3be 100644 --- a/dnc/dnc.py +++ b/dnc/dnc.py @@ -69,20 +69,11 @@ def __init__( for layer in range(self.num_layers): # controllers for each layer if self.rnn_type.lower() == 'rnn': - if layer == 0: self.rnns.append(nn.RNNCell(self.layer0_input_size, self.output_size, bias=self.bias, nonlinearity=self.nonlinearity)) - else: - self.rnns.append(nn.RNNCell(self.layern_input_size, self.output_size, bias=self.bias, nonlinearity=self.nonlinearity)) elif self.rnn_type.lower() == 'gru': - if layer == 0: self.rnns.append(nn.GRUCell(self.layer0_input_size, self.output_size, bias=self.bias)) - else: - self.rnns.append(nn.GRUCell(self.layern_input_size, self.output_size, bias=self.bias)) elif self.rnn_type.lower() == 'lstm': - # if layer == 0: self.rnns.append(nn.LSTMCell(self.layer0_input_size, self.output_size, bias=self.bias)) - # else: - # self.rnns.append(nn.LSTMCell(self.layern_input_size, self.output_size, bias=self.bias)) # memories for each layer if not self.share_memory: @@ -170,7 +161,7 @@ def _layer_forward(self, input, layer, hx=(None, None)): # the interface vector ΞΎ = chx[0] if self.rnn_type.lower() == 'lstm' else chx # the output - out = self.output_weights(chx[0]) + out = self.output_weights(chx[0]) if self.rnn_type.lower() == 'lstm' else self.output_weights(chx) # pass through memory if self.share_memory: diff --git a/setup.py b/setup.py index d98a37b..f45f0d7 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ keywords='differentiable neural computer dnc memory network', - packages=find_packages(exclude=['contrib', 'docs', 'tests']), + packages=find_packages(exclude=['contrib', 'docs', 'tests', 'tasks']), install_requires=['torch', 'numpy'], diff --git a/tasks/__init__.py b/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/dnc/copy_task.py b/tasks/copy_task.py similarity index 96% rename from dnc/copy_task.py rename to tasks/copy_task.py index ac3ccdd..d488d40 100644 --- a/dnc/copy_task.py +++ b/tasks/copy_task.py @@ -20,10 +20,11 @@ from torch.nn.utils import clip_grad_norm -from dnc import DNC +from dnc.dnc import DNC parser = argparse.ArgumentParser(description='PyTorch Differentiable Neural Computer') parser.add_argument('-input_size', type=int, default= 6, help='dimension of input feature') +parser.add_argument('-rnn_type', type=str, default='lstm', help='type of recurrent cells to use for the controller') parser.add_argument('-nhid', type=int, default=64, help='humber of hidden units of the inner nn') parser.add_argument('-nlayer', type=int, default=2, help='number of layers') @@ -101,13 +102,6 @@ def criterion(predictions, targets): mem_size = args.mem_size read_heads = args.read_heads - - # options, _ = getopt.getopt(sys.argv[1:], '', ['iterations=']) - - # for opt in options: - # if opt[0] == '-iterations': - # iterations = int(opt[1]) - rnn = DNC( input_size=args.input_size, hidden_size=args.nhid,