diff --git a/breakout/DQN.py b/breakout/DQN.py index 35b360c..4ca994b 100644 --- a/breakout/DQN.py +++ b/breakout/DQN.py @@ -175,7 +175,7 @@ def __init__(self, "discount": other.get("discount", 0.99), "gamma": other.get("gamma", 0.95), - "convolutions": len(network["channels"]) - len(network.get("nodes", [])), + "convolutions": len(network["channels"]) - 1, "optimizer": optimizer["optimizer"](self.parameters(), lr=optimizer["lr"], **optimizer.get("hyperparameters", {})) @@ -250,12 +250,6 @@ def preprocess(self, state): ------- output : torch.Tensor """ - # state = (torch.tensor(state, - # dtype=torch.float32).view(self.shape["original"]) / - # torch.tensor(255, - # dtype=torch.float32))[:, :, self.shape["height"], self.shape["width"]] - # state = torch.nn.functional.max_pool2d(state, self.shape["max_pooling"]) - state = torch.tensor(state, dtype=torch.float32).view(self.shape["original"]) state = state[:, :, self.shape["height"], self.shape["width"]] / 255.0 @@ -396,7 +390,7 @@ def learn(self, network, clamp=None): # BACKPROPAGATION # -------------------------------------------------------------------------------------- - loss = torch.nn.functional.huber_loss(actual, optimal, reduction="mean") + loss = torch.nn.functional.mse_loss(actual, optimal) self.parameter["optimizer"].zero_grad() loss.backward() diff --git a/enduro/DQN.py b/enduro/DQN.py index 2e3e120..4fc777a 100644 --- a/enduro/DQN.py +++ b/enduro/DQN.py @@ -37,6 +37,8 @@ def __init__(self, Kernel size for each layer. channels : list, optional Number of channels for each hidden layer. + nodes : list of int, optional + Number of nodes in fully-connected layer(s). optimizer : dict Contains the optimizer for the model and its hyperparameters. The dictionary must contain the following keys: @@ -174,7 +176,7 @@ def __init__(self, "discount": other.get("discount", 0.99), "gamma": other.get("gamma", 0.95), - "convolutions": len(network["channels"]) - len(network.get("nodes", [])), + "convolutions": len(network["channels"]) - 1, "optimizer": optimizer["optimizer"](self.parameters(), lr=optimizer["lr"], **optimizer.get("hyperparameters", {})) diff --git a/tetris/DQN.py b/tetris/DQN.py index a8a469d..220a291 100644 --- a/tetris/DQN.py +++ b/tetris/DQN.py @@ -176,7 +176,7 @@ def __init__(self, "discount": other.get("discount", 0.99), "gamma": other.get("gamma", 0.95), - "convolutions": len(network["channels"]) - len(network.get("nodes", [])), + "convolutions": len(network["channels"]) - 1, "optimizer": optimizer["optimizer"](self.parameters(), lr=optimizer["lr"], **optimizer.get("hyperparameters", {}))