diff --git a/Experements/DGCNN.ipynb b/Experements/DGCNN.ipynb deleted file mode 100644 index e3c4c69..0000000 --- a/Experements/DGCNN.ipynb +++ /dev/null @@ -1,195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "cbb1f3da", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "import torch.nn as nn" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e4699e27", - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.rand(5, 3, 30)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "934e5920", - "metadata": {}, - "outputs": [], - "source": [ - "from moduleZoo.graphs import GraphConv2d\n", - "from modelZoo.graphs import DGCNN" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0da36247", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "DGCNN(\n", - " (layers): ModuleList(\n", - " (0): GraphConv2d(\n", - " (conv): ConvNormActivation2d(\n", - " (0): Conv2d(6, 32, kernel_size=(1, 1), stride=(1, 1))\n", - " (1): SELU()\n", - " )\n", - " )\n", - " (1): GraphConv2d(\n", - " (conv): ConvNormActivation2d(\n", - " (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))\n", - " (1): SELU()\n", - " )\n", - " )\n", - " (2): GraphConv2d(\n", - " (conv): ConvNormActivation2d(\n", - " (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))\n", - " (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", - " (2): SELU()\n", - " )\n", - " )\n", - " (3): GraphConv2d(\n", - " (conv): ConvNormActivation2d(\n", - " (0): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))\n", - " (1): SELU()\n", - " )\n", - " )\n", - " (4): GraphConv2d(\n", - " (conv): ConvNormActivation2d(\n", - " (0): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))\n", - " (1): SELU()\n", - " )\n", - " )\n", - " )\n", - ")" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = DGCNN(30)\n", - "model" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "a4d825df", - "metadata": {}, - "outputs": [], - "source": [ - "out = model(x)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "dc04f1f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "torch.Size([5, 992, 30])" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "out.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "c8fb308f", - "metadata": {}, - "outputs": [], - "source": [ - "from torchviz import make_dot" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "faa45483", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "fig = make_dot(out, params=dict(list(model.named_parameters())))" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "5e9bccd8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'fig.pdf'" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fig.render(\"fig\", format=\"pdf\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9b78375c", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/Experements/fig b/Experements/fig deleted file mode 100644 index 372c521..0000000 --- a/Experements/fig +++ /dev/null @@ -1,208 +0,0 @@ -digraph { - graph [size="28.65,28.65"] - node [align=left fontname=monospace fontsize=10 height=0.2 ranksep=0.1 shape=box style=filled] - 140146209559152 [label=" - (5, 992, 30)" fillcolor=darkolivegreen1] - 140146142049712 [label=CatBackward0] - 140146142049856 -> 140146142049712 - 140146142049856 [label=MaxBackward0] - 140146142050000 -> 140146142049856 - 140146142050000 [label=EluBackward0] - 140146142050096 -> 140146142050000 - 140146142050096 [label=ConvolutionBackward0] - 140146142050192 -> 140146142050096 - 140146209583568 [label="layers.0.conv.0.weight - (32, 6, 1, 1)" fillcolor=lightblue] - 140146209583568 -> 140146142050192 - 140146142050192 [label=AccumulateGrad] - 140146142050144 -> 140146142050096 - 140146209583648 [label="layers.0.conv.0.bias - (32)" fillcolor=lightblue] - 140146209583648 -> 140146142050144 - 140146142050144 [label=AccumulateGrad] - 140146142049808 -> 140146142049712 - 140146142049808 [label=MaxBackward0] - 140146142049904 -> 140146142049808 - 140146142049904 [label=EluBackward0] - 140146142050288 -> 140146142049904 - 140146142050288 [label=ConvolutionBackward0] - 140146142050384 -> 140146142050288 - 140146142050384 [label=CloneBackward0] - 140146142050576 -> 140146142050384 - 140146142050576 [label=PermuteBackward0] - 140146142050672 -> 140146142050576 - 140146142050672 [label=CatBackward0] - 140146142050768 -> 140146142050672 - 140146142050768 [label=SubBackward0] - 140146142050912 -> 140146142050768 - 140146142050912 [label=ViewBackward0] - 140146142051008 -> 140146142050912 - 140146142051008 [label=IndexBackward0] - 140146142051104 -> 140146142051008 - 140146142051104 [label=SliceBackward0] - 140146142051200 -> 140146142051104 - 140146142051200 [label=ViewBackward0] - 140146142051296 -> 140146142051200 - 140146142051296 [label=CloneBackward0] - 140146142051344 -> 140146142051296 - 140146142051344 [label=TransposeBackward0] - 140146142049856 -> 140146142051344 - 140146142050720 -> 140146142050768 - 140146142050720 [label=RepeatBackward0] - 140146142051152 -> 140146142050720 - 140146142051152 [label=ViewBackward0] - 140146142051344 -> 140146142051152 - 140146142050720 -> 140146142050672 - 140146142050336 -> 140146142050288 - 140146209583808 [label="layers.1.conv.0.weight - (64, 64, 1, 1)" fillcolor=lightblue] - 140146209583808 -> 140146142050336 - 140146142050336 [label=AccumulateGrad] - 140146142049952 -> 140146142050288 - 140146209583888 [label="layers.1.conv.0.bias - (64)" fillcolor=lightblue] - 140146209583888 -> 140146142049952 - 140146142049952 [label=AccumulateGrad] - 140146142049664 -> 140146142049712 - 140146142049664 [label=MaxBackward0] - 140146142050432 -> 140146142049664 - 140146142050432 [label=EluBackward0] - 140146142050624 -> 140146142050432 - 140146142050624 [label=NativeBatchNormBackward0] - 140146142050864 -> 140146142050624 - 140146142050864 [label=ConvolutionBackward0] - 140146142050960 -> 140146142050864 - 140146142050960 [label=CloneBackward0] - 140146142051488 -> 140146142050960 - 140146142051488 [label=PermuteBackward0] - 140146142051584 -> 140146142051488 - 140146142051584 [label=CatBackward0] - 140146142051680 -> 140146142051584 - 140146142051680 [label=SubBackward0] - 140146142051824 -> 140146142051680 - 140146142051824 [label=ViewBackward0] - 140146142051968 -> 140146142051824 - 140146142051968 [label=IndexBackward0] - 140146142052016 -> 140146142051968 - 140146142052016 [label=SliceBackward0] - 140146142052112 -> 140146142052016 - 140146142052112 [label=ViewBackward0] - 140146142052208 -> 140146142052112 - 140146142052208 [label=CloneBackward0] - 140146142052304 -> 140146142052208 - 140146142052304 [label=TransposeBackward0] - 140146142049808 -> 140146142052304 - 140146142051632 -> 140146142051680 - 140146142051632 [label=RepeatBackward0] - 140146142052064 -> 140146142051632 - 140146142052064 [label=ViewBackward0] - 140146142052304 -> 140146142052064 - 140146142051632 -> 140146142051584 - 140146142051248 -> 140146142050864 - 140146209584048 [label="layers.2.conv.0.weight - (128, 128, 1, 1)" fillcolor=lightblue] - 140146209584048 -> 140146142051248 - 140146142051248 [label=AccumulateGrad] - 140146142049760 -> 140146142050864 - 140146209584128 [label="layers.2.conv.0.bias - (128)" fillcolor=lightblue] - 140146209584128 -> 140146142049760 - 140146142049760 [label=AccumulateGrad] - 140146142050480 -> 140146142050624 - 140150156484832 [label="layers.2.conv.1.weight - (128)" fillcolor=lightblue] - 140150156484832 -> 140146142050480 - 140146142050480 [label=AccumulateGrad] - 140146142050048 -> 140146142050624 - 140150156068240 [label="layers.2.conv.1.bias - (128)" fillcolor=lightblue] - 140150156068240 -> 140146142050048 - 140146142050048 [label=AccumulateGrad] - 140146142049472 -> 140146142049712 - 140146142049472 [label=MaxBackward0] - 140146142051056 -> 140146142049472 - 140146142051056 [label=EluBackward0] - 140146142050816 -> 140146142051056 - 140146142050816 [label=ConvolutionBackward0] - 140146142051392 -> 140146142050816 - 140146142051392 [label=CloneBackward0] - 140146142052160 -> 140146142051392 - 140146142052160 [label=PermuteBackward0] - 140146142051728 -> 140146142052160 - 140146142051728 [label=CatBackward0] - 140146142051920 -> 140146142051728 - 140146142051920 [label=SubBackward0] - 140146142081280 -> 140146142051920 - 140146142081280 [label=ViewBackward0] - 140146142081376 -> 140146142081280 - 140146142081376 [label=IndexBackward0] - 140146142081472 -> 140146142081376 - 140146142081472 [label=SliceBackward0] - 140146142081568 -> 140146142081472 - 140146142081568 [label=ViewBackward0] - 140146142081664 -> 140146142081568 - 140146142081664 [label=CloneBackward0] - 140146142081760 -> 140146142081664 - 140146142081760 [label=TransposeBackward0] - 140146142049664 -> 140146142081760 - 140146142081136 -> 140146142051920 - 140146142081136 [label=RepeatBackward0] - 140146142081520 -> 140146142081136 - 140146142081520 [label=ViewBackward0] - 140146142081760 -> 140146142081520 - 140146142081136 -> 140146142051728 - 140146142051536 -> 140146142050816 - 140146209584608 [label="layers.3.conv.0.weight - (256, 256, 1, 1)" fillcolor=lightblue] - 140146209584608 -> 140146142051536 - 140146142051536 [label=AccumulateGrad] - 140146142050240 -> 140146142050816 - 140146209584688 [label="layers.3.conv.0.bias - (256)" fillcolor=lightblue] - 140146209584688 -> 140146142050240 - 140146142050240 [label=AccumulateGrad] - 140146142049616 -> 140146142049712 - 140146142049616 [label=MaxBackward0] - 140146142051776 -> 140146142049616 - 140146142051776 [label=EluBackward0] - 140146142051872 -> 140146142051776 - 140146142051872 [label=ConvolutionBackward0] - 140146142050528 -> 140146142051872 - 140146142050528 [label=CloneBackward0] - 140146142081328 -> 140146142050528 - 140146142081328 [label=PermuteBackward0] - 140146142081808 -> 140146142081328 - 140146142081808 [label=CatBackward0] - 140146142081904 -> 140146142081808 - 140146142081904 [label=SubBackward0] - 140146142082048 -> 140146142081904 - 140146142082048 [label=ViewBackward0] - 140146142082144 -> 140146142082048 - 140146142082144 [label=IndexBackward0] - 140146142082240 -> 140146142082144 - 140146142082240 [label=SliceBackward0] - 140146142082336 -> 140146142082240 - 140146142082336 [label=ViewBackward0] - 140146142082432 -> 140146142082336 - 140146142082432 [label=CloneBackward0] - 140146142082528 -> 140146142082432 - 140146142082528 [label=TransposeBackward0] - 140146142049472 -> 140146142082528 - 140146142081184 -> 140146142081904 - 140146142081184 [label=RepeatBackward0] - 140146142082288 -> 140146142081184 - 140146142082288 [label=ViewBackward0] - 140146142082528 -> 140146142082288 - 140146142081184 -> 140146142081808 - 140146142081232 -> 140146142051872 - 140146209584848 [label="layers.4.conv.0.weight - (512, 512, 1, 1)" fillcolor=lightblue] - 140146209584848 -> 140146142081232 - 140146142081232 [label=AccumulateGrad] - 140146142081088 -> 140146142051872 - 140146209584928 [label="layers.4.conv.0.bias - (512)" fillcolor=lightblue] - 140146209584928 -> 140146142081088 - 140146142081088 [label=AccumulateGrad] - 140146142049712 -> 140146209559152 -} diff --git a/Experements/fig.pdf b/Experements/fig.pdf deleted file mode 100644 index 3c80f24..0000000 Binary files a/Experements/fig.pdf and /dev/null differ diff --git a/__version__ b/__version__ index cfb3795..34b3221 100644 --- a/__version__ +++ b/__version__ @@ -1 +1 @@ -"1.1.6-alpha" +"1.2.0-alpha" diff --git a/build.sh b/build.sh new file mode 100644 index 0000000..a40f109 --- /dev/null +++ b/build.sh @@ -0,0 +1 @@ +$PYTHON setup.py install # Python command to install the script. diff --git a/examples/Attention Blocks.ipynb b/examples/Attention Blocks.ipynb deleted file mode 100644 index e72c2d8..0000000 --- a/examples/Attention Blocks.ipynb +++ /dev/null @@ -1,173 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 6, - "id": "24f23c4c", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "from moduleZoo.attention import MultiHeadSelfAttention2d, MultiHeadSelfAttention1d" - ] - }, - { - "cell_type": "markdown", - "id": "00d0411e", - "metadata": {}, - "source": [ - "## MultiHeadSelfAttention2d" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "045bb526", - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.rand(5, 128, 64, 64)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "2bc21642", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MultiHeadSelfAttention2d(\n", - " (query_conv): ConvNormActivation2d(\n", - " (0): Conv2d(128, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)\n", - " )\n", - " (key_conv): ConvNormActivation2d(\n", - " (0): Conv2d(128, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)\n", - " )\n", - " (value_conv): ConvNormActivation2d(\n", - " (0): Conv2d(128, 512, kernel_size=(3, 3), stride=(1, 1), padding=same)\n", - " )\n", - " (projection): ConvNormActivation2d(\n", - " (0): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n", - " )\n", - " (softmax): Softmax(dim=-1)\n", - ")" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = MultiHeadSelfAttention2d(128, n_heads=4, kernel_size=3)\n", - "model" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "765904df", - "metadata": {}, - "outputs": [], - "source": [ - "out = model(x)" - ] - }, - { - "cell_type": "markdown", - "id": "416819cc", - "metadata": {}, - "source": [ - "## MultiHeadSelfAttention1d" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "157ab4ad", - "metadata": {}, - "outputs": [], - "source": [ - "x = torch.rand(5, 128, 64)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "bb1fd249", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "MultiHeadSelfAttention1d(\n", - " (query_conv): ConvNormActivation1d(\n", - " (0): Conv1d(128, 512, kernel_size=(3,), stride=(1,), padding=same)\n", - " )\n", - " (key_conv): ConvNormActivation1d(\n", - " (0): Conv1d(128, 512, kernel_size=(3,), stride=(1,), padding=same)\n", - " )\n", - " (value_conv): ConvNormActivation1d(\n", - " (0): Conv1d(128, 512, kernel_size=(3,), stride=(1,), padding=same)\n", - " )\n", - " (projection): ConvNormActivation1d(\n", - " (0): Conv1d(128, 128, kernel_size=(1,), stride=(1,), bias=False)\n", - " )\n", - " (softmax): Softmax(dim=-1)\n", - ")" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model = MultiHeadSelfAttention1d(128, n_heads=4, kernel_size=3)\n", - "model" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "2936271e", - "metadata": {}, - "outputs": [], - "source": [ - "out = model(x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af5be938", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/Graph Attention.ipynb b/examples/Graph Attention.ipynb deleted file mode 100644 index ae80bf0..0000000 --- a/examples/Graph Attention.ipynb +++ /dev/null @@ -1,1712 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "from torchsummary import summary\n", - "\n", - "from moduleZoo.graphs import MultiHeadSelfGraphAttentionLinear" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "class TargetModel(nn.Module):\n", - " def __init__(self, db = False) -> None:\n", - " super().__init__()\n", - " self.module = MultiHeadSelfGraphAttentionLinear(128, 256, n_heads=2, residual=True, dynamic_batching=db)\n", - " self.n_nodes = [1]*(256//2) + [2]*(256//4)\n", - "\n", - " def enable_dynamic_batching(self) -> None:\n", - " self.module.db = True\n", - "\n", - " def forward(self, x:torch.Tensor) -> torch.Tensor:\n", - " # print(f'{x.shape = }')\n", - " x = torch.cat(x.split(1, dim=0), dim=1).squeeze(dim=0)\n", - " return torch.cat(self.module(x, self.n_nodes*2).unsqueeze(dim=0).split(x.shape[0]//2, dim=1), dim=0)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "module1 = TargetModel(False)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "module1 = module1.to('cuda')" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "data = torch.rand((2, 256, 128), device='cuda')\n", - "n_nodes = [1]*(256//2) + [2]*(256//4)\n", - "n_nodes = np.array(n_nodes*2)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "result1 = module1(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# torch.all((result1.detach().cpu() - result2.detach().cpu()).abs() < 1e-6)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "===============================================================================================\n", - "Layer (type:depth-idx) Output Shape Param #\n", - "===============================================================================================\n", - "├─MultiHeadSelfGraphAttentionLinear: 1-1 [-1, 2, 256] --\n", - "| └─Linear: 2-1 [-1, 512] 66,048\n", - "| └─Linear: 2-2 [-1, 512] 66,048\n", - "| └─Linear: 2-3 [-1, 512] 66,048\n", - "| └─Softmax: 2-4 [-1, 256, 256] --\n", - "| └─Linear: 2-5 [-1, 1, 256] 32,768\n", - "| └─Softmax: 2-6 [-1, 256, 256] --\n", - "| └─Linear: 2-7 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-8 [-1, 256, 256] --\n", - "| └─Linear: 2-9 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-10 [-1, 256, 256] --\n", - "| └─Linear: 2-11 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-12 [-1, 256, 256] --\n", - "| └─Linear: 2-13 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-14 [-1, 256, 256] --\n", - "| └─Linear: 2-15 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-16 [-1, 256, 256] --\n", - "| └─Linear: 2-17 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-18 [-1, 256, 256] --\n", - "| └─Linear: 2-19 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-20 [-1, 256, 256] --\n", - "| └─Linear: 2-21 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-22 [-1, 256, 256] --\n", - "| └─Linear: 2-23 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-24 [-1, 256, 256] --\n", - "| └─Linear: 2-25 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-26 [-1, 256, 256] --\n", - "| └─Linear: 2-27 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-28 [-1, 256, 256] --\n", - "| └─Linear: 2-29 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-30 [-1, 256, 256] --\n", - "| └─Linear: 2-31 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-32 [-1, 256, 256] --\n", - "| └─Linear: 2-33 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-34 [-1, 256, 256] --\n", - "| └─Linear: 2-35 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-36 [-1, 256, 256] --\n", - "| └─Linear: 2-37 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-38 [-1, 256, 256] --\n", - "| └─Linear: 2-39 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-40 [-1, 256, 256] --\n", - "| └─Linear: 2-41 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-42 [-1, 256, 256] --\n", - "| └─Linear: 2-43 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-44 [-1, 256, 256] --\n", - "| └─Linear: 2-45 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-46 [-1, 256, 256] --\n", - "| └─Linear: 2-47 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-48 [-1, 256, 256] --\n", - "| └─Linear: 2-49 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-50 [-1, 256, 256] --\n", - "| └─Linear: 2-51 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-52 [-1, 256, 256] --\n", - "| └─Linear: 2-53 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-54 [-1, 256, 256] --\n", - "| └─Linear: 2-55 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-56 [-1, 256, 256] --\n", - "| └─Linear: 2-57 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-58 [-1, 256, 256] --\n", - "| └─Linear: 2-59 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-60 [-1, 256, 256] --\n", - "| └─Linear: 2-61 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-62 [-1, 256, 256] --\n", - "| └─Linear: 2-63 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-64 [-1, 256, 256] --\n", - "| └─Linear: 2-65 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-66 [-1, 256, 256] --\n", - "| └─Linear: 2-67 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-68 [-1, 256, 256] --\n", - "| └─Linear: 2-69 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-70 [-1, 256, 256] --\n", - "| └─Linear: 2-71 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-72 [-1, 256, 256] --\n", - "| └─Linear: 2-73 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-74 [-1, 256, 256] --\n", - "| └─Linear: 2-75 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-76 [-1, 256, 256] --\n", - "| └─Linear: 2-77 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-78 [-1, 256, 256] --\n", - "| └─Linear: 2-79 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-80 [-1, 256, 256] --\n", - "| └─Linear: 2-81 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-82 [-1, 256, 256] --\n", - "| └─Linear: 2-83 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-84 [-1, 256, 256] --\n", - "| └─Linear: 2-85 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-86 [-1, 256, 256] --\n", - "| └─Linear: 2-87 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-88 [-1, 256, 256] --\n", - "| └─Linear: 2-89 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-90 [-1, 256, 256] --\n", - "| └─Linear: 2-91 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-92 [-1, 256, 256] --\n", - "| └─Linear: 2-93 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-94 [-1, 256, 256] --\n", - "| └─Linear: 2-95 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-96 [-1, 256, 256] --\n", - "| └─Linear: 2-97 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-98 [-1, 256, 256] --\n", - "| └─Linear: 2-99 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-100 [-1, 256, 256] --\n", - "| └─Linear: 2-101 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-102 [-1, 256, 256] --\n", - "| └─Linear: 2-103 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-104 [-1, 256, 256] --\n", - "| └─Linear: 2-105 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-106 [-1, 256, 256] --\n", - "| └─Linear: 2-107 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-108 [-1, 256, 256] --\n", - "| └─Linear: 2-109 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-110 [-1, 256, 256] --\n", - "| └─Linear: 2-111 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-112 [-1, 256, 256] --\n", - "| └─Linear: 2-113 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-114 [-1, 256, 256] --\n", - "| └─Linear: 2-115 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-116 [-1, 256, 256] --\n", - "| └─Linear: 2-117 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-118 [-1, 256, 256] --\n", - "| └─Linear: 2-119 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-120 [-1, 256, 256] --\n", - "| └─Linear: 2-121 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-122 [-1, 256, 256] --\n", - "| └─Linear: 2-123 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-124 [-1, 256, 256] --\n", - "| └─Linear: 2-125 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-126 [-1, 256, 256] --\n", - "| └─Linear: 2-127 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-128 [-1, 256, 256] --\n", - "| └─Linear: 2-129 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-130 [-1, 256, 256] --\n", - "| └─Linear: 2-131 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-132 [-1, 256, 256] --\n", - "| └─Linear: 2-133 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-134 [-1, 256, 256] --\n", - "| └─Linear: 2-135 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-136 [-1, 256, 256] --\n", - "| └─Linear: 2-137 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-138 [-1, 256, 256] --\n", - "| └─Linear: 2-139 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-140 [-1, 256, 256] --\n", - "| └─Linear: 2-141 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-142 [-1, 256, 256] --\n", - "| └─Linear: 2-143 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-144 [-1, 256, 256] --\n", - "| └─Linear: 2-145 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-146 [-1, 256, 256] --\n", - "| └─Linear: 2-147 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-148 [-1, 256, 256] --\n", - "| └─Linear: 2-149 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-150 [-1, 256, 256] --\n", - "| └─Linear: 2-151 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-152 [-1, 256, 256] --\n", - "| └─Linear: 2-153 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-154 [-1, 256, 256] --\n", - "| └─Linear: 2-155 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-156 [-1, 256, 256] --\n", - "| └─Linear: 2-157 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-158 [-1, 256, 256] --\n", - "| └─Linear: 2-159 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-160 [-1, 256, 256] --\n", - "| └─Linear: 2-161 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-162 [-1, 256, 256] --\n", - "| └─Linear: 2-163 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-164 [-1, 256, 256] --\n", - "| └─Linear: 2-165 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-166 [-1, 256, 256] --\n", - "| └─Linear: 2-167 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-168 [-1, 256, 256] --\n", - "| └─Linear: 2-169 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-170 [-1, 256, 256] --\n", - "| └─Linear: 2-171 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-172 [-1, 256, 256] --\n", - "| └─Linear: 2-173 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-174 [-1, 256, 256] --\n", - "| └─Linear: 2-175 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-176 [-1, 256, 256] --\n", - "| └─Linear: 2-177 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-178 [-1, 256, 256] --\n", - "| └─Linear: 2-179 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-180 [-1, 256, 256] --\n", - "| └─Linear: 2-181 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-182 [-1, 256, 256] --\n", - "| └─Linear: 2-183 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-184 [-1, 256, 256] --\n", - "| └─Linear: 2-185 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-186 [-1, 256, 256] --\n", - "| └─Linear: 2-187 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-188 [-1, 256, 256] --\n", - "| └─Linear: 2-189 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-190 [-1, 256, 256] --\n", - "| └─Linear: 2-191 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-192 [-1, 256, 256] --\n", - "| └─Linear: 2-193 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-194 [-1, 256, 256] --\n", - "| └─Linear: 2-195 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-196 [-1, 256, 256] --\n", - "| └─Linear: 2-197 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-198 [-1, 256, 256] --\n", - "| └─Linear: 2-199 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-200 [-1, 256, 256] --\n", - "| └─Linear: 2-201 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-202 [-1, 256, 256] --\n", - "| └─Linear: 2-203 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-204 [-1, 256, 256] --\n", - "| └─Linear: 2-205 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-206 [-1, 256, 256] --\n", - "| └─Linear: 2-207 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-208 [-1, 256, 256] --\n", - "| └─Linear: 2-209 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-210 [-1, 256, 256] --\n", - "| └─Linear: 2-211 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-212 [-1, 256, 256] --\n", - "| └─Linear: 2-213 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-214 [-1, 256, 256] --\n", - "| └─Linear: 2-215 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-216 [-1, 256, 256] --\n", - "| └─Linear: 2-217 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-218 [-1, 256, 256] --\n", - "| └─Linear: 2-219 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-220 [-1, 256, 256] --\n", - "| └─Linear: 2-221 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-222 [-1, 256, 256] --\n", - "| └─Linear: 2-223 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-224 [-1, 256, 256] --\n", - "| └─Linear: 2-225 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-226 [-1, 256, 256] --\n", - "| └─Linear: 2-227 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-228 [-1, 256, 256] --\n", - "| └─Linear: 2-229 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-230 [-1, 256, 256] --\n", - "| └─Linear: 2-231 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-232 [-1, 256, 256] --\n", - "| └─Linear: 2-233 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-234 [-1, 256, 256] --\n", - "| └─Linear: 2-235 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-236 [-1, 256, 256] --\n", - "| └─Linear: 2-237 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-238 [-1, 256, 256] --\n", - "| └─Linear: 2-239 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-240 [-1, 256, 256] --\n", - "| └─Linear: 2-241 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-242 [-1, 256, 256] --\n", - "| └─Linear: 2-243 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-244 [-1, 256, 256] --\n", - "| └─Linear: 2-245 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-246 [-1, 256, 256] --\n", - "| └─Linear: 2-247 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-248 [-1, 256, 256] --\n", - "| └─Linear: 2-249 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-250 [-1, 256, 256] --\n", - "| └─Linear: 2-251 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-252 [-1, 256, 256] --\n", - "| └─Linear: 2-253 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-254 [-1, 256, 256] --\n", - "| └─Linear: 2-255 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-256 [-1, 256, 256] --\n", - "| └─Linear: 2-257 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-258 [-1, 256, 256] --\n", - "| └─Linear: 2-259 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-260 [-1, 256, 256] --\n", - "| └─Linear: 2-261 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-262 [-1, 256, 256] --\n", - "| └─Linear: 2-263 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-264 [-1, 256, 256] --\n", - "| └─Linear: 2-265 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-266 [-1, 256, 256] --\n", - "| └─Linear: 2-267 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-268 [-1, 256, 256] --\n", - "| └─Linear: 2-269 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-270 [-1, 256, 256] --\n", - "| └─Linear: 2-271 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-272 [-1, 256, 256] --\n", - "| └─Linear: 2-273 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-274 [-1, 256, 256] --\n", - "| └─Linear: 2-275 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-276 [-1, 256, 256] --\n", - "| └─Linear: 2-277 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-278 [-1, 256, 256] --\n", - "| └─Linear: 2-279 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-280 [-1, 256, 256] --\n", - "| └─Linear: 2-281 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-282 [-1, 256, 256] --\n", - "| └─Linear: 2-283 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-284 [-1, 256, 256] --\n", - "| └─Linear: 2-285 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-286 [-1, 256, 256] --\n", - "| └─Linear: 2-287 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-288 [-1, 256, 256] --\n", - "| └─Linear: 2-289 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-290 [-1, 256, 256] --\n", - "| └─Linear: 2-291 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-292 [-1, 256, 256] --\n", - "| └─Linear: 2-293 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-294 [-1, 256, 256] --\n", - "| └─Linear: 2-295 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-296 [-1, 256, 256] --\n", - "| └─Linear: 2-297 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-298 [-1, 256, 256] --\n", - "| └─Linear: 2-299 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-300 [-1, 256, 256] --\n", - "| └─Linear: 2-301 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-302 [-1, 256, 256] --\n", - "| └─Linear: 2-303 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-304 [-1, 256, 256] --\n", - "| └─Linear: 2-305 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-306 [-1, 256, 256] --\n", - "| └─Linear: 2-307 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-308 [-1, 256, 256] --\n", - "| └─Linear: 2-309 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-310 [-1, 256, 256] --\n", - "| └─Linear: 2-311 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-312 [-1, 256, 256] --\n", - "| └─Linear: 2-313 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-314 [-1, 256, 256] --\n", - "| └─Linear: 2-315 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-316 [-1, 256, 256] --\n", - "| └─Linear: 2-317 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-318 [-1, 256, 256] --\n", - "| └─Linear: 2-319 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-320 [-1, 256, 256] --\n", - "| └─Linear: 2-321 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-322 [-1, 256, 256] --\n", - "| └─Linear: 2-323 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-324 [-1, 256, 256] --\n", - "| └─Linear: 2-325 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-326 [-1, 256, 256] --\n", - "| └─Linear: 2-327 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-328 [-1, 256, 256] --\n", - "| └─Linear: 2-329 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-330 [-1, 256, 256] --\n", - "| └─Linear: 2-331 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-332 [-1, 256, 256] --\n", - "| └─Linear: 2-333 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-334 [-1, 256, 256] --\n", - "| └─Linear: 2-335 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-336 [-1, 256, 256] --\n", - "| └─Linear: 2-337 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-338 [-1, 256, 256] --\n", - "| └─Linear: 2-339 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-340 [-1, 256, 256] --\n", - "| └─Linear: 2-341 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-342 [-1, 256, 256] --\n", - "| └─Linear: 2-343 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-344 [-1, 256, 256] --\n", - "| └─Linear: 2-345 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-346 [-1, 256, 256] --\n", - "| └─Linear: 2-347 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-348 [-1, 256, 256] --\n", - "| └─Linear: 2-349 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-350 [-1, 256, 256] --\n", - "| └─Linear: 2-351 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-352 [-1, 256, 256] --\n", - "| └─Linear: 2-353 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-354 [-1, 256, 256] --\n", - "| └─Linear: 2-355 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-356 [-1, 256, 256] --\n", - "| └─Linear: 2-357 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-358 [-1, 256, 256] --\n", - "| └─Linear: 2-359 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-360 [-1, 256, 256] --\n", - "| └─Linear: 2-361 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-362 [-1, 256, 256] --\n", - "| └─Linear: 2-363 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-364 [-1, 256, 256] --\n", - "| └─Linear: 2-365 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-366 [-1, 256, 256] --\n", - "| └─Linear: 2-367 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-368 [-1, 256, 256] --\n", - "| └─Linear: 2-369 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-370 [-1, 256, 256] --\n", - "| └─Linear: 2-371 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-372 [-1, 256, 256] --\n", - "| └─Linear: 2-373 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-374 [-1, 256, 256] --\n", - "| └─Linear: 2-375 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-376 [-1, 256, 256] --\n", - "| └─Linear: 2-377 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-378 [-1, 256, 256] --\n", - "| └─Linear: 2-379 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-380 [-1, 256, 256] --\n", - "| └─Linear: 2-381 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-382 [-1, 256, 256] --\n", - "| └─Linear: 2-383 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-384 [-1, 256, 256] --\n", - "| └─Linear: 2-385 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-386 [-1, 256, 256] --\n", - "| └─Linear: 2-387 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-388 [-1, 256, 256] --\n", - "| └─Linear: 2-389 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-390 [-1, 256, 256] --\n", - "| └─Linear: 2-391 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-392 [-1, 256, 256] --\n", - "| └─Linear: 2-393 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-394 [-1, 256, 256] --\n", - "| └─Linear: 2-395 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-396 [-1, 256, 256] --\n", - "| └─Linear: 2-397 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-398 [-1, 256, 256] --\n", - "| └─Linear: 2-399 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-400 [-1, 256, 256] --\n", - "| └─Linear: 2-401 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-402 [-1, 256, 256] --\n", - "| └─Linear: 2-403 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-404 [-1, 256, 256] --\n", - "| └─Linear: 2-405 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-406 [-1, 256, 256] --\n", - "| └─Linear: 2-407 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-408 [-1, 256, 256] --\n", - "| └─Linear: 2-409 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-410 [-1, 256, 256] --\n", - "| └─Linear: 2-411 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-412 [-1, 256, 256] --\n", - "| └─Linear: 2-413 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-414 [-1, 256, 256] --\n", - "| └─Linear: 2-415 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-416 [-1, 256, 256] --\n", - "| └─Linear: 2-417 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-418 [-1, 256, 256] --\n", - "| └─Linear: 2-419 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-420 [-1, 256, 256] --\n", - "| └─Linear: 2-421 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-422 [-1, 256, 256] --\n", - "| └─Linear: 2-423 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-424 [-1, 256, 256] --\n", - "| └─Linear: 2-425 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-426 [-1, 256, 256] --\n", - "| └─Linear: 2-427 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-428 [-1, 256, 256] --\n", - "| └─Linear: 2-429 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-430 [-1, 256, 256] --\n", - "| └─Linear: 2-431 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-432 [-1, 256, 256] --\n", - "| └─Linear: 2-433 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-434 [-1, 256, 256] --\n", - "| └─Linear: 2-435 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-436 [-1, 256, 256] --\n", - "| └─Linear: 2-437 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-438 [-1, 256, 256] --\n", - "| └─Linear: 2-439 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-440 [-1, 256, 256] --\n", - "| └─Linear: 2-441 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-442 [-1, 256, 256] --\n", - "| └─Linear: 2-443 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-444 [-1, 256, 256] --\n", - "| └─Linear: 2-445 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-446 [-1, 256, 256] --\n", - "| └─Linear: 2-447 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-448 [-1, 256, 256] --\n", - "| └─Linear: 2-449 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-450 [-1, 256, 256] --\n", - "| └─Linear: 2-451 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-452 [-1, 256, 256] --\n", - "| └─Linear: 2-453 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-454 [-1, 256, 256] --\n", - "| └─Linear: 2-455 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-456 [-1, 256, 256] --\n", - "| └─Linear: 2-457 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-458 [-1, 256, 256] --\n", - "| └─Linear: 2-459 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-460 [-1, 256, 256] --\n", - "| └─Linear: 2-461 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-462 [-1, 256, 256] --\n", - "| └─Linear: 2-463 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-464 [-1, 256, 256] --\n", - "| └─Linear: 2-465 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-466 [-1, 256, 256] --\n", - "| └─Linear: 2-467 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-468 [-1, 256, 256] --\n", - "| └─Linear: 2-469 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-470 [-1, 256, 256] --\n", - "| └─Linear: 2-471 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-472 [-1, 256, 256] --\n", - "| └─Linear: 2-473 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-474 [-1, 256, 256] --\n", - "| └─Linear: 2-475 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-476 [-1, 256, 256] --\n", - "| └─Linear: 2-477 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-478 [-1, 256, 256] --\n", - "| └─Linear: 2-479 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-480 [-1, 256, 256] --\n", - "| └─Linear: 2-481 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-482 [-1, 256, 256] --\n", - "| └─Linear: 2-483 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-484 [-1, 256, 256] --\n", - "| └─Linear: 2-485 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-486 [-1, 256, 256] --\n", - "| └─Linear: 2-487 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-488 [-1, 256, 256] --\n", - "| └─Linear: 2-489 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-490 [-1, 256, 256] --\n", - "| └─Linear: 2-491 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-492 [-1, 256, 256] --\n", - "| └─Linear: 2-493 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-494 [-1, 256, 256] --\n", - "| └─Linear: 2-495 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-496 [-1, 256, 256] --\n", - "| └─Linear: 2-497 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-498 [-1, 256, 256] --\n", - "| └─Linear: 2-499 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-500 [-1, 256, 256] --\n", - "| └─Linear: 2-501 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-502 [-1, 256, 256] --\n", - "| └─Linear: 2-503 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-504 [-1, 256, 256] --\n", - "| └─Linear: 2-505 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-506 [-1, 256, 256] --\n", - "| └─Linear: 2-507 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-508 [-1, 256, 256] --\n", - "| └─Linear: 2-509 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-510 [-1, 256, 256] --\n", - "| └─Linear: 2-511 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-512 [-1, 256, 256] --\n", - "| └─Linear: 2-513 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-514 [-1, 256, 256] --\n", - "| └─Linear: 2-515 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-516 [-1, 256, 256] --\n", - "| └─Linear: 2-517 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-518 [-1, 256, 256] --\n", - "| └─Linear: 2-519 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-520 [-1, 256, 256] --\n", - "| └─Linear: 2-521 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-522 [-1, 256, 256] --\n", - "| └─Linear: 2-523 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-524 [-1, 256, 256] --\n", - "| └─Linear: 2-525 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-526 [-1, 256, 256] --\n", - "| └─Linear: 2-527 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-528 [-1, 256, 256] --\n", - "| └─Linear: 2-529 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-530 [-1, 256, 256] --\n", - "| └─Linear: 2-531 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-532 [-1, 256, 256] --\n", - "| └─Linear: 2-533 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-534 [-1, 256, 256] --\n", - "| └─Linear: 2-535 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-536 [-1, 256, 256] --\n", - "| └─Linear: 2-537 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-538 [-1, 256, 256] --\n", - "| └─Linear: 2-539 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-540 [-1, 256, 256] --\n", - "| └─Linear: 2-541 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-542 [-1, 256, 256] --\n", - "| └─Linear: 2-543 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-544 [-1, 256, 256] --\n", - "| └─Linear: 2-545 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-546 [-1, 256, 256] --\n", - "| └─Linear: 2-547 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-548 [-1, 256, 256] --\n", - "| └─Linear: 2-549 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-550 [-1, 256, 256] --\n", - "| └─Linear: 2-551 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-552 [-1, 256, 256] --\n", - "| └─Linear: 2-553 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-554 [-1, 256, 256] --\n", - "| └─Linear: 2-555 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-556 [-1, 256, 256] --\n", - "| └─Linear: 2-557 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-558 [-1, 256, 256] --\n", - "| └─Linear: 2-559 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-560 [-1, 256, 256] --\n", - "| └─Linear: 2-561 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-562 [-1, 256, 256] --\n", - "| └─Linear: 2-563 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-564 [-1, 256, 256] --\n", - "| └─Linear: 2-565 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-566 [-1, 256, 256] --\n", - "| └─Linear: 2-567 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-568 [-1, 256, 256] --\n", - "| └─Linear: 2-569 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-570 [-1, 256, 256] --\n", - "| └─Linear: 2-571 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-572 [-1, 256, 256] --\n", - "| └─Linear: 2-573 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-574 [-1, 256, 256] --\n", - "| └─Linear: 2-575 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-576 [-1, 256, 256] --\n", - "| └─Linear: 2-577 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-578 [-1, 256, 256] --\n", - "| └─Linear: 2-579 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-580 [-1, 256, 256] --\n", - "| └─Linear: 2-581 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-582 [-1, 256, 256] --\n", - "| └─Linear: 2-583 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-584 [-1, 256, 256] --\n", - "| └─Linear: 2-585 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-586 [-1, 256, 256] --\n", - "| └─Linear: 2-587 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-588 [-1, 256, 256] --\n", - "| └─Linear: 2-589 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-590 [-1, 256, 256] --\n", - "| └─Linear: 2-591 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-592 [-1, 256, 256] --\n", - "| └─Linear: 2-593 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-594 [-1, 256, 256] --\n", - "| └─Linear: 2-595 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-596 [-1, 256, 256] --\n", - "| └─Linear: 2-597 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-598 [-1, 256, 256] --\n", - "| └─Linear: 2-599 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-600 [-1, 256, 256] --\n", - "| └─Linear: 2-601 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-602 [-1, 256, 256] --\n", - "| └─Linear: 2-603 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-604 [-1, 256, 256] --\n", - "| └─Linear: 2-605 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-606 [-1, 256, 256] --\n", - "| └─Linear: 2-607 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-608 [-1, 256, 256] --\n", - "| └─Linear: 2-609 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-610 [-1, 256, 256] --\n", - "| └─Linear: 2-611 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-612 [-1, 256, 256] --\n", - "| └─Linear: 2-613 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-614 [-1, 256, 256] --\n", - "| └─Linear: 2-615 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-616 [-1, 256, 256] --\n", - "| └─Linear: 2-617 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-618 [-1, 256, 256] --\n", - "| └─Linear: 2-619 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-620 [-1, 256, 256] --\n", - "| └─Linear: 2-621 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-622 [-1, 256, 256] --\n", - "| └─Linear: 2-623 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-624 [-1, 256, 256] --\n", - "| └─Linear: 2-625 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-626 [-1, 256, 256] --\n", - "| └─Linear: 2-627 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-628 [-1, 256, 256] --\n", - "| └─Linear: 2-629 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-630 [-1, 256, 256] --\n", - "| └─Linear: 2-631 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-632 [-1, 256, 256] --\n", - "| └─Linear: 2-633 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-634 [-1, 256, 256] --\n", - "| └─Linear: 2-635 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-636 [-1, 256, 256] --\n", - "| └─Linear: 2-637 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-638 [-1, 256, 256] --\n", - "| └─Linear: 2-639 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-640 [-1, 256, 256] --\n", - "| └─Linear: 2-641 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-642 [-1, 256, 256] --\n", - "| └─Linear: 2-643 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-644 [-1, 256, 256] --\n", - "| └─Linear: 2-645 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-646 [-1, 256, 256] --\n", - "| └─Linear: 2-647 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-648 [-1, 256, 256] --\n", - "| └─Linear: 2-649 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-650 [-1, 256, 256] --\n", - "| └─Linear: 2-651 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-652 [-1, 256, 256] --\n", - "| └─Linear: 2-653 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-654 [-1, 256, 256] --\n", - "| └─Linear: 2-655 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-656 [-1, 256, 256] --\n", - "| └─Linear: 2-657 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-658 [-1, 256, 256] --\n", - "| └─Linear: 2-659 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-660 [-1, 256, 256] --\n", - "| └─Linear: 2-661 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-662 [-1, 256, 256] --\n", - "| └─Linear: 2-663 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-664 [-1, 256, 256] --\n", - "| └─Linear: 2-665 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-666 [-1, 256, 256] --\n", - "| └─Linear: 2-667 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-668 [-1, 256, 256] --\n", - "| └─Linear: 2-669 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-670 [-1, 256, 256] --\n", - "| └─Linear: 2-671 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-672 [-1, 256, 256] --\n", - "| └─Linear: 2-673 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-674 [-1, 256, 256] --\n", - "| └─Linear: 2-675 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-676 [-1, 256, 256] --\n", - "| └─Linear: 2-677 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-678 [-1, 256, 256] --\n", - "| └─Linear: 2-679 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-680 [-1, 256, 256] --\n", - "| └─Linear: 2-681 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-682 [-1, 256, 256] --\n", - "| └─Linear: 2-683 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-684 [-1, 256, 256] --\n", - "| └─Linear: 2-685 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-686 [-1, 256, 256] --\n", - "| └─Linear: 2-687 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-688 [-1, 256, 256] --\n", - "| └─Linear: 2-689 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-690 [-1, 256, 256] --\n", - "| └─Linear: 2-691 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-692 [-1, 256, 256] --\n", - "| └─Linear: 2-693 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-694 [-1, 256, 256] --\n", - "| └─Linear: 2-695 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-696 [-1, 256, 256] --\n", - "| └─Linear: 2-697 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-698 [-1, 256, 256] --\n", - "| └─Linear: 2-699 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-700 [-1, 256, 256] --\n", - "| └─Linear: 2-701 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-702 [-1, 256, 256] --\n", - "| └─Linear: 2-703 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-704 [-1, 256, 256] --\n", - "| └─Linear: 2-705 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-706 [-1, 256, 256] --\n", - "| └─Linear: 2-707 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-708 [-1, 256, 256] --\n", - "| └─Linear: 2-709 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-710 [-1, 256, 256] --\n", - "| └─Linear: 2-711 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-712 [-1, 256, 256] --\n", - "| └─Linear: 2-713 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-714 [-1, 256, 256] --\n", - "| └─Linear: 2-715 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-716 [-1, 256, 256] --\n", - "| └─Linear: 2-717 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-718 [-1, 256, 256] --\n", - "| └─Linear: 2-719 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-720 [-1, 256, 256] --\n", - "| └─Linear: 2-721 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-722 [-1, 256, 256] --\n", - "| └─Linear: 2-723 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-724 [-1, 256, 256] --\n", - "| └─Linear: 2-725 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-726 [-1, 256, 256] --\n", - "| └─Linear: 2-727 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-728 [-1, 256, 256] --\n", - "| └─Linear: 2-729 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-730 [-1, 256, 256] --\n", - "| └─Linear: 2-731 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-732 [-1, 256, 256] --\n", - "| └─Linear: 2-733 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-734 [-1, 256, 256] --\n", - "| └─Linear: 2-735 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-736 [-1, 256, 256] --\n", - "| └─Linear: 2-737 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-738 [-1, 256, 256] --\n", - "| └─Linear: 2-739 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-740 [-1, 256, 256] --\n", - "| └─Linear: 2-741 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-742 [-1, 256, 256] --\n", - "| └─Linear: 2-743 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-744 [-1, 256, 256] --\n", - "| └─Linear: 2-745 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-746 [-1, 256, 256] --\n", - "| └─Linear: 2-747 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-748 [-1, 256, 256] --\n", - "| └─Linear: 2-749 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-750 [-1, 256, 256] --\n", - "| └─Linear: 2-751 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-752 [-1, 256, 256] --\n", - "| └─Linear: 2-753 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-754 [-1, 256, 256] --\n", - "| └─Linear: 2-755 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-756 [-1, 256, 256] --\n", - "| └─Linear: 2-757 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-758 [-1, 256, 256] --\n", - "| └─Linear: 2-759 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-760 [-1, 256, 256] --\n", - "| └─Linear: 2-761 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-762 [-1, 256, 256] --\n", - "| └─Linear: 2-763 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-764 [-1, 256, 256] --\n", - "| └─Linear: 2-765 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-766 [-1, 256, 256] --\n", - "| └─Linear: 2-767 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-768 [-1, 256, 256] --\n", - "| └─Linear: 2-769 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-770 [-1, 256, 256] --\n", - "| └─Linear: 2-771 [-1, 2, 256] (recursive)\n", - "===============================================================================================\n", - "Total params: 230,912\n", - "Trainable params: 230,912\n", - "Non-trainable params: 0\n", - "Total mult-adds (M): 13.01\n", - "===============================================================================================\n", - "Input size (MB): 0.12\n", - "Forward/backward pass size (MB): 0.01\n", - "Params size (MB): 0.88\n", - "Estimated Total Size (MB): 1.02\n", - "===============================================================================================\n" - ] - }, - { - "data": { - "text/plain": [ - "===============================================================================================\n", - "Layer (type:depth-idx) Output Shape Param #\n", - "===============================================================================================\n", - "├─MultiHeadSelfGraphAttentionLinear: 1-1 [-1, 2, 256] --\n", - "| └─Linear: 2-1 [-1, 512] 66,048\n", - "| └─Linear: 2-2 [-1, 512] 66,048\n", - "| └─Linear: 2-3 [-1, 512] 66,048\n", - "| └─Softmax: 2-4 [-1, 256, 256] --\n", - "| └─Linear: 2-5 [-1, 1, 256] 32,768\n", - "| └─Softmax: 2-6 [-1, 256, 256] --\n", - "| └─Linear: 2-7 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-8 [-1, 256, 256] --\n", - "| └─Linear: 2-9 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-10 [-1, 256, 256] --\n", - "| └─Linear: 2-11 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-12 [-1, 256, 256] --\n", - "| └─Linear: 2-13 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-14 [-1, 256, 256] --\n", - "| └─Linear: 2-15 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-16 [-1, 256, 256] --\n", - "| └─Linear: 2-17 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-18 [-1, 256, 256] --\n", - "| └─Linear: 2-19 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-20 [-1, 256, 256] --\n", - "| └─Linear: 2-21 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-22 [-1, 256, 256] --\n", - "| └─Linear: 2-23 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-24 [-1, 256, 256] --\n", - "| └─Linear: 2-25 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-26 [-1, 256, 256] --\n", - "| └─Linear: 2-27 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-28 [-1, 256, 256] --\n", - "| └─Linear: 2-29 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-30 [-1, 256, 256] --\n", - "| └─Linear: 2-31 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-32 [-1, 256, 256] --\n", - "| └─Linear: 2-33 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-34 [-1, 256, 256] --\n", - "| └─Linear: 2-35 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-36 [-1, 256, 256] --\n", - "| └─Linear: 2-37 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-38 [-1, 256, 256] --\n", - "| └─Linear: 2-39 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-40 [-1, 256, 256] --\n", - "| └─Linear: 2-41 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-42 [-1, 256, 256] --\n", - "| └─Linear: 2-43 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-44 [-1, 256, 256] --\n", - "| └─Linear: 2-45 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-46 [-1, 256, 256] --\n", - "| └─Linear: 2-47 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-48 [-1, 256, 256] --\n", - "| └─Linear: 2-49 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-50 [-1, 256, 256] --\n", - "| └─Linear: 2-51 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-52 [-1, 256, 256] --\n", - "| └─Linear: 2-53 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-54 [-1, 256, 256] --\n", - "| └─Linear: 2-55 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-56 [-1, 256, 256] --\n", - "| └─Linear: 2-57 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-58 [-1, 256, 256] --\n", - "| └─Linear: 2-59 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-60 [-1, 256, 256] --\n", - "| └─Linear: 2-61 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-62 [-1, 256, 256] --\n", - "| └─Linear: 2-63 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-64 [-1, 256, 256] --\n", - "| └─Linear: 2-65 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-66 [-1, 256, 256] --\n", - "| └─Linear: 2-67 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-68 [-1, 256, 256] --\n", - "| └─Linear: 2-69 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-70 [-1, 256, 256] --\n", - "| └─Linear: 2-71 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-72 [-1, 256, 256] --\n", - "| └─Linear: 2-73 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-74 [-1, 256, 256] --\n", - "| └─Linear: 2-75 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-76 [-1, 256, 256] --\n", - "| └─Linear: 2-77 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-78 [-1, 256, 256] --\n", - "| └─Linear: 2-79 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-80 [-1, 256, 256] --\n", - "| └─Linear: 2-81 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-82 [-1, 256, 256] --\n", - "| └─Linear: 2-83 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-84 [-1, 256, 256] --\n", - "| └─Linear: 2-85 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-86 [-1, 256, 256] --\n", - "| └─Linear: 2-87 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-88 [-1, 256, 256] --\n", - "| └─Linear: 2-89 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-90 [-1, 256, 256] --\n", - "| └─Linear: 2-91 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-92 [-1, 256, 256] --\n", - "| └─Linear: 2-93 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-94 [-1, 256, 256] --\n", - "| └─Linear: 2-95 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-96 [-1, 256, 256] --\n", - "| └─Linear: 2-97 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-98 [-1, 256, 256] --\n", - "| └─Linear: 2-99 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-100 [-1, 256, 256] --\n", - "| └─Linear: 2-101 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-102 [-1, 256, 256] --\n", - "| └─Linear: 2-103 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-104 [-1, 256, 256] --\n", - "| └─Linear: 2-105 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-106 [-1, 256, 256] --\n", - "| └─Linear: 2-107 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-108 [-1, 256, 256] --\n", - "| └─Linear: 2-109 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-110 [-1, 256, 256] --\n", - "| └─Linear: 2-111 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-112 [-1, 256, 256] --\n", - "| └─Linear: 2-113 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-114 [-1, 256, 256] --\n", - "| └─Linear: 2-115 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-116 [-1, 256, 256] --\n", - "| └─Linear: 2-117 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-118 [-1, 256, 256] --\n", - "| └─Linear: 2-119 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-120 [-1, 256, 256] --\n", - "| └─Linear: 2-121 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-122 [-1, 256, 256] --\n", - "| └─Linear: 2-123 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-124 [-1, 256, 256] --\n", - "| └─Linear: 2-125 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-126 [-1, 256, 256] --\n", - "| └─Linear: 2-127 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-128 [-1, 256, 256] --\n", - "| └─Linear: 2-129 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-130 [-1, 256, 256] --\n", - "| └─Linear: 2-131 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-132 [-1, 256, 256] --\n", - "| └─Linear: 2-133 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-134 [-1, 256, 256] --\n", - "| └─Linear: 2-135 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-136 [-1, 256, 256] --\n", - "| └─Linear: 2-137 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-138 [-1, 256, 256] --\n", - "| └─Linear: 2-139 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-140 [-1, 256, 256] --\n", - "| └─Linear: 2-141 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-142 [-1, 256, 256] --\n", - "| └─Linear: 2-143 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-144 [-1, 256, 256] --\n", - "| └─Linear: 2-145 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-146 [-1, 256, 256] --\n", - "| └─Linear: 2-147 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-148 [-1, 256, 256] --\n", - "| └─Linear: 2-149 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-150 [-1, 256, 256] --\n", - "| └─Linear: 2-151 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-152 [-1, 256, 256] --\n", - "| └─Linear: 2-153 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-154 [-1, 256, 256] --\n", - "| └─Linear: 2-155 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-156 [-1, 256, 256] --\n", - "| └─Linear: 2-157 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-158 [-1, 256, 256] --\n", - "| └─Linear: 2-159 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-160 [-1, 256, 256] --\n", - "| └─Linear: 2-161 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-162 [-1, 256, 256] --\n", - "| └─Linear: 2-163 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-164 [-1, 256, 256] --\n", - "| └─Linear: 2-165 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-166 [-1, 256, 256] --\n", - "| └─Linear: 2-167 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-168 [-1, 256, 256] --\n", - "| └─Linear: 2-169 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-170 [-1, 256, 256] --\n", - "| └─Linear: 2-171 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-172 [-1, 256, 256] --\n", - "| └─Linear: 2-173 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-174 [-1, 256, 256] --\n", - "| └─Linear: 2-175 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-176 [-1, 256, 256] --\n", - "| └─Linear: 2-177 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-178 [-1, 256, 256] --\n", - "| └─Linear: 2-179 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-180 [-1, 256, 256] --\n", - "| └─Linear: 2-181 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-182 [-1, 256, 256] --\n", - "| └─Linear: 2-183 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-184 [-1, 256, 256] --\n", - "| └─Linear: 2-185 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-186 [-1, 256, 256] --\n", - "| └─Linear: 2-187 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-188 [-1, 256, 256] --\n", - "| └─Linear: 2-189 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-190 [-1, 256, 256] --\n", - "| └─Linear: 2-191 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-192 [-1, 256, 256] --\n", - "| └─Linear: 2-193 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-194 [-1, 256, 256] --\n", - "| └─Linear: 2-195 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-196 [-1, 256, 256] --\n", - "| └─Linear: 2-197 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-198 [-1, 256, 256] --\n", - "| └─Linear: 2-199 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-200 [-1, 256, 256] --\n", - "| └─Linear: 2-201 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-202 [-1, 256, 256] --\n", - "| └─Linear: 2-203 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-204 [-1, 256, 256] --\n", - "| └─Linear: 2-205 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-206 [-1, 256, 256] --\n", - "| └─Linear: 2-207 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-208 [-1, 256, 256] --\n", - "| └─Linear: 2-209 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-210 [-1, 256, 256] --\n", - "| └─Linear: 2-211 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-212 [-1, 256, 256] --\n", - "| └─Linear: 2-213 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-214 [-1, 256, 256] --\n", - "| └─Linear: 2-215 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-216 [-1, 256, 256] --\n", - "| └─Linear: 2-217 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-218 [-1, 256, 256] --\n", - "| └─Linear: 2-219 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-220 [-1, 256, 256] --\n", - "| └─Linear: 2-221 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-222 [-1, 256, 256] --\n", - "| └─Linear: 2-223 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-224 [-1, 256, 256] --\n", - "| └─Linear: 2-225 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-226 [-1, 256, 256] --\n", - "| └─Linear: 2-227 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-228 [-1, 256, 256] --\n", - "| └─Linear: 2-229 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-230 [-1, 256, 256] --\n", - "| └─Linear: 2-231 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-232 [-1, 256, 256] --\n", - "| └─Linear: 2-233 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-234 [-1, 256, 256] --\n", - "| └─Linear: 2-235 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-236 [-1, 256, 256] --\n", - "| └─Linear: 2-237 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-238 [-1, 256, 256] --\n", - "| └─Linear: 2-239 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-240 [-1, 256, 256] --\n", - "| └─Linear: 2-241 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-242 [-1, 256, 256] --\n", - "| └─Linear: 2-243 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-244 [-1, 256, 256] --\n", - "| └─Linear: 2-245 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-246 [-1, 256, 256] --\n", - "| └─Linear: 2-247 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-248 [-1, 256, 256] --\n", - "| └─Linear: 2-249 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-250 [-1, 256, 256] --\n", - "| └─Linear: 2-251 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-252 [-1, 256, 256] --\n", - "| └─Linear: 2-253 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-254 [-1, 256, 256] --\n", - "| └─Linear: 2-255 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-256 [-1, 256, 256] --\n", - "| └─Linear: 2-257 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-258 [-1, 256, 256] --\n", - "| └─Linear: 2-259 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-260 [-1, 256, 256] --\n", - "| └─Linear: 2-261 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-262 [-1, 256, 256] --\n", - "| └─Linear: 2-263 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-264 [-1, 256, 256] --\n", - "| └─Linear: 2-265 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-266 [-1, 256, 256] --\n", - "| └─Linear: 2-267 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-268 [-1, 256, 256] --\n", - "| └─Linear: 2-269 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-270 [-1, 256, 256] --\n", - "| └─Linear: 2-271 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-272 [-1, 256, 256] --\n", - "| └─Linear: 2-273 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-274 [-1, 256, 256] --\n", - "| └─Linear: 2-275 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-276 [-1, 256, 256] --\n", - "| └─Linear: 2-277 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-278 [-1, 256, 256] --\n", - "| └─Linear: 2-279 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-280 [-1, 256, 256] --\n", - "| └─Linear: 2-281 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-282 [-1, 256, 256] --\n", - "| └─Linear: 2-283 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-284 [-1, 256, 256] --\n", - "| └─Linear: 2-285 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-286 [-1, 256, 256] --\n", - "| └─Linear: 2-287 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-288 [-1, 256, 256] --\n", - "| └─Linear: 2-289 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-290 [-1, 256, 256] --\n", - "| └─Linear: 2-291 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-292 [-1, 256, 256] --\n", - "| └─Linear: 2-293 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-294 [-1, 256, 256] --\n", - "| └─Linear: 2-295 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-296 [-1, 256, 256] --\n", - "| └─Linear: 2-297 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-298 [-1, 256, 256] --\n", - "| └─Linear: 2-299 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-300 [-1, 256, 256] --\n", - "| └─Linear: 2-301 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-302 [-1, 256, 256] --\n", - "| └─Linear: 2-303 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-304 [-1, 256, 256] --\n", - "| └─Linear: 2-305 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-306 [-1, 256, 256] --\n", - "| └─Linear: 2-307 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-308 [-1, 256, 256] --\n", - "| └─Linear: 2-309 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-310 [-1, 256, 256] --\n", - "| └─Linear: 2-311 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-312 [-1, 256, 256] --\n", - "| └─Linear: 2-313 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-314 [-1, 256, 256] --\n", - "| └─Linear: 2-315 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-316 [-1, 256, 256] --\n", - "| └─Linear: 2-317 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-318 [-1, 256, 256] --\n", - "| └─Linear: 2-319 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-320 [-1, 256, 256] --\n", - "| └─Linear: 2-321 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-322 [-1, 256, 256] --\n", - "| └─Linear: 2-323 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-324 [-1, 256, 256] --\n", - "| └─Linear: 2-325 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-326 [-1, 256, 256] --\n", - "| └─Linear: 2-327 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-328 [-1, 256, 256] --\n", - "| └─Linear: 2-329 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-330 [-1, 256, 256] --\n", - "| └─Linear: 2-331 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-332 [-1, 256, 256] --\n", - "| └─Linear: 2-333 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-334 [-1, 256, 256] --\n", - "| └─Linear: 2-335 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-336 [-1, 256, 256] --\n", - "| └─Linear: 2-337 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-338 [-1, 256, 256] --\n", - "| └─Linear: 2-339 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-340 [-1, 256, 256] --\n", - "| └─Linear: 2-341 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-342 [-1, 256, 256] --\n", - "| └─Linear: 2-343 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-344 [-1, 256, 256] --\n", - "| └─Linear: 2-345 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-346 [-1, 256, 256] --\n", - "| └─Linear: 2-347 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-348 [-1, 256, 256] --\n", - "| └─Linear: 2-349 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-350 [-1, 256, 256] --\n", - "| └─Linear: 2-351 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-352 [-1, 256, 256] --\n", - "| └─Linear: 2-353 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-354 [-1, 256, 256] --\n", - "| └─Linear: 2-355 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-356 [-1, 256, 256] --\n", - "| └─Linear: 2-357 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-358 [-1, 256, 256] --\n", - "| └─Linear: 2-359 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-360 [-1, 256, 256] --\n", - "| └─Linear: 2-361 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-362 [-1, 256, 256] --\n", - "| └─Linear: 2-363 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-364 [-1, 256, 256] --\n", - "| └─Linear: 2-365 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-366 [-1, 256, 256] --\n", - "| └─Linear: 2-367 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-368 [-1, 256, 256] --\n", - "| └─Linear: 2-369 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-370 [-1, 256, 256] --\n", - "| └─Linear: 2-371 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-372 [-1, 256, 256] --\n", - "| └─Linear: 2-373 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-374 [-1, 256, 256] --\n", - "| └─Linear: 2-375 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-376 [-1, 256, 256] --\n", - "| └─Linear: 2-377 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-378 [-1, 256, 256] --\n", - "| └─Linear: 2-379 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-380 [-1, 256, 256] --\n", - "| └─Linear: 2-381 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-382 [-1, 256, 256] --\n", - "| └─Linear: 2-383 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-384 [-1, 256, 256] --\n", - "| └─Linear: 2-385 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-386 [-1, 256, 256] --\n", - "| └─Linear: 2-387 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-388 [-1, 256, 256] --\n", - "| └─Linear: 2-389 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-390 [-1, 256, 256] --\n", - "| └─Linear: 2-391 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-392 [-1, 256, 256] --\n", - "| └─Linear: 2-393 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-394 [-1, 256, 256] --\n", - "| └─Linear: 2-395 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-396 [-1, 256, 256] --\n", - "| └─Linear: 2-397 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-398 [-1, 256, 256] --\n", - "| └─Linear: 2-399 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-400 [-1, 256, 256] --\n", - "| └─Linear: 2-401 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-402 [-1, 256, 256] --\n", - "| └─Linear: 2-403 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-404 [-1, 256, 256] --\n", - "| └─Linear: 2-405 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-406 [-1, 256, 256] --\n", - "| └─Linear: 2-407 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-408 [-1, 256, 256] --\n", - "| └─Linear: 2-409 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-410 [-1, 256, 256] --\n", - "| └─Linear: 2-411 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-412 [-1, 256, 256] --\n", - "| └─Linear: 2-413 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-414 [-1, 256, 256] --\n", - "| └─Linear: 2-415 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-416 [-1, 256, 256] --\n", - "| └─Linear: 2-417 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-418 [-1, 256, 256] --\n", - "| └─Linear: 2-419 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-420 [-1, 256, 256] --\n", - "| └─Linear: 2-421 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-422 [-1, 256, 256] --\n", - "| └─Linear: 2-423 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-424 [-1, 256, 256] --\n", - "| └─Linear: 2-425 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-426 [-1, 256, 256] --\n", - "| └─Linear: 2-427 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-428 [-1, 256, 256] --\n", - "| └─Linear: 2-429 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-430 [-1, 256, 256] --\n", - "| └─Linear: 2-431 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-432 [-1, 256, 256] --\n", - "| └─Linear: 2-433 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-434 [-1, 256, 256] --\n", - "| └─Linear: 2-435 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-436 [-1, 256, 256] --\n", - "| └─Linear: 2-437 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-438 [-1, 256, 256] --\n", - "| └─Linear: 2-439 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-440 [-1, 256, 256] --\n", - "| └─Linear: 2-441 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-442 [-1, 256, 256] --\n", - "| └─Linear: 2-443 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-444 [-1, 256, 256] --\n", - "| └─Linear: 2-445 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-446 [-1, 256, 256] --\n", - "| └─Linear: 2-447 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-448 [-1, 256, 256] --\n", - "| └─Linear: 2-449 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-450 [-1, 256, 256] --\n", - "| └─Linear: 2-451 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-452 [-1, 256, 256] --\n", - "| └─Linear: 2-453 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-454 [-1, 256, 256] --\n", - "| └─Linear: 2-455 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-456 [-1, 256, 256] --\n", - "| └─Linear: 2-457 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-458 [-1, 256, 256] --\n", - "| └─Linear: 2-459 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-460 [-1, 256, 256] --\n", - "| └─Linear: 2-461 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-462 [-1, 256, 256] --\n", - "| └─Linear: 2-463 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-464 [-1, 256, 256] --\n", - "| └─Linear: 2-465 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-466 [-1, 256, 256] --\n", - "| └─Linear: 2-467 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-468 [-1, 256, 256] --\n", - "| └─Linear: 2-469 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-470 [-1, 256, 256] --\n", - "| └─Linear: 2-471 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-472 [-1, 256, 256] --\n", - "| └─Linear: 2-473 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-474 [-1, 256, 256] --\n", - "| └─Linear: 2-475 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-476 [-1, 256, 256] --\n", - "| └─Linear: 2-477 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-478 [-1, 256, 256] --\n", - "| └─Linear: 2-479 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-480 [-1, 256, 256] --\n", - "| └─Linear: 2-481 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-482 [-1, 256, 256] --\n", - "| └─Linear: 2-483 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-484 [-1, 256, 256] --\n", - "| └─Linear: 2-485 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-486 [-1, 256, 256] --\n", - "| └─Linear: 2-487 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-488 [-1, 256, 256] --\n", - "| └─Linear: 2-489 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-490 [-1, 256, 256] --\n", - "| └─Linear: 2-491 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-492 [-1, 256, 256] --\n", - "| └─Linear: 2-493 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-494 [-1, 256, 256] --\n", - "| └─Linear: 2-495 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-496 [-1, 256, 256] --\n", - "| └─Linear: 2-497 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-498 [-1, 256, 256] --\n", - "| └─Linear: 2-499 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-500 [-1, 256, 256] --\n", - "| └─Linear: 2-501 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-502 [-1, 256, 256] --\n", - "| └─Linear: 2-503 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-504 [-1, 256, 256] --\n", - "| └─Linear: 2-505 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-506 [-1, 256, 256] --\n", - "| └─Linear: 2-507 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-508 [-1, 256, 256] --\n", - "| └─Linear: 2-509 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-510 [-1, 256, 256] --\n", - "| └─Linear: 2-511 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-512 [-1, 256, 256] --\n", - "| └─Linear: 2-513 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-514 [-1, 256, 256] --\n", - "| └─Linear: 2-515 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-516 [-1, 256, 256] --\n", - "| └─Linear: 2-517 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-518 [-1, 256, 256] --\n", - "| └─Linear: 2-519 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-520 [-1, 256, 256] --\n", - "| └─Linear: 2-521 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-522 [-1, 256, 256] --\n", - "| └─Linear: 2-523 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-524 [-1, 256, 256] --\n", - "| └─Linear: 2-525 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-526 [-1, 256, 256] --\n", - "| └─Linear: 2-527 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-528 [-1, 256, 256] --\n", - "| └─Linear: 2-529 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-530 [-1, 256, 256] --\n", - "| └─Linear: 2-531 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-532 [-1, 256, 256] --\n", - "| └─Linear: 2-533 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-534 [-1, 256, 256] --\n", - "| └─Linear: 2-535 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-536 [-1, 256, 256] --\n", - "| └─Linear: 2-537 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-538 [-1, 256, 256] --\n", - "| └─Linear: 2-539 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-540 [-1, 256, 256] --\n", - "| └─Linear: 2-541 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-542 [-1, 256, 256] --\n", - "| └─Linear: 2-543 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-544 [-1, 256, 256] --\n", - "| └─Linear: 2-545 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-546 [-1, 256, 256] --\n", - "| └─Linear: 2-547 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-548 [-1, 256, 256] --\n", - "| └─Linear: 2-549 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-550 [-1, 256, 256] --\n", - "| └─Linear: 2-551 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-552 [-1, 256, 256] --\n", - "| └─Linear: 2-553 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-554 [-1, 256, 256] --\n", - "| └─Linear: 2-555 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-556 [-1, 256, 256] --\n", - "| └─Linear: 2-557 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-558 [-1, 256, 256] --\n", - "| └─Linear: 2-559 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-560 [-1, 256, 256] --\n", - "| └─Linear: 2-561 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-562 [-1, 256, 256] --\n", - "| └─Linear: 2-563 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-564 [-1, 256, 256] --\n", - "| └─Linear: 2-565 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-566 [-1, 256, 256] --\n", - "| └─Linear: 2-567 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-568 [-1, 256, 256] --\n", - "| └─Linear: 2-569 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-570 [-1, 256, 256] --\n", - "| └─Linear: 2-571 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-572 [-1, 256, 256] --\n", - "| └─Linear: 2-573 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-574 [-1, 256, 256] --\n", - "| └─Linear: 2-575 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-576 [-1, 256, 256] --\n", - "| └─Linear: 2-577 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-578 [-1, 256, 256] --\n", - "| └─Linear: 2-579 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-580 [-1, 256, 256] --\n", - "| └─Linear: 2-581 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-582 [-1, 256, 256] --\n", - "| └─Linear: 2-583 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-584 [-1, 256, 256] --\n", - "| └─Linear: 2-585 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-586 [-1, 256, 256] --\n", - "| └─Linear: 2-587 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-588 [-1, 256, 256] --\n", - "| └─Linear: 2-589 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-590 [-1, 256, 256] --\n", - "| └─Linear: 2-591 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-592 [-1, 256, 256] --\n", - "| └─Linear: 2-593 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-594 [-1, 256, 256] --\n", - "| └─Linear: 2-595 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-596 [-1, 256, 256] --\n", - "| └─Linear: 2-597 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-598 [-1, 256, 256] --\n", - "| └─Linear: 2-599 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-600 [-1, 256, 256] --\n", - "| └─Linear: 2-601 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-602 [-1, 256, 256] --\n", - "| └─Linear: 2-603 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-604 [-1, 256, 256] --\n", - "| └─Linear: 2-605 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-606 [-1, 256, 256] --\n", - "| └─Linear: 2-607 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-608 [-1, 256, 256] --\n", - "| └─Linear: 2-609 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-610 [-1, 256, 256] --\n", - "| └─Linear: 2-611 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-612 [-1, 256, 256] --\n", - "| └─Linear: 2-613 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-614 [-1, 256, 256] --\n", - "| └─Linear: 2-615 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-616 [-1, 256, 256] --\n", - "| └─Linear: 2-617 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-618 [-1, 256, 256] --\n", - "| └─Linear: 2-619 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-620 [-1, 256, 256] --\n", - "| └─Linear: 2-621 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-622 [-1, 256, 256] --\n", - "| └─Linear: 2-623 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-624 [-1, 256, 256] --\n", - "| └─Linear: 2-625 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-626 [-1, 256, 256] --\n", - "| └─Linear: 2-627 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-628 [-1, 256, 256] --\n", - "| └─Linear: 2-629 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-630 [-1, 256, 256] --\n", - "| └─Linear: 2-631 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-632 [-1, 256, 256] --\n", - "| └─Linear: 2-633 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-634 [-1, 256, 256] --\n", - "| └─Linear: 2-635 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-636 [-1, 256, 256] --\n", - "| └─Linear: 2-637 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-638 [-1, 256, 256] --\n", - "| └─Linear: 2-639 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-640 [-1, 256, 256] --\n", - "| └─Linear: 2-641 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-642 [-1, 256, 256] --\n", - "| └─Linear: 2-643 [-1, 1, 256] (recursive)\n", - "| └─Softmax: 2-644 [-1, 256, 256] --\n", - "| └─Linear: 2-645 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-646 [-1, 256, 256] --\n", - "| └─Linear: 2-647 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-648 [-1, 256, 256] --\n", - "| └─Linear: 2-649 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-650 [-1, 256, 256] --\n", - "| └─Linear: 2-651 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-652 [-1, 256, 256] --\n", - "| └─Linear: 2-653 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-654 [-1, 256, 256] --\n", - "| └─Linear: 2-655 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-656 [-1, 256, 256] --\n", - "| └─Linear: 2-657 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-658 [-1, 256, 256] --\n", - "| └─Linear: 2-659 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-660 [-1, 256, 256] --\n", - "| └─Linear: 2-661 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-662 [-1, 256, 256] --\n", - "| └─Linear: 2-663 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-664 [-1, 256, 256] --\n", - "| └─Linear: 2-665 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-666 [-1, 256, 256] --\n", - "| └─Linear: 2-667 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-668 [-1, 256, 256] --\n", - "| └─Linear: 2-669 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-670 [-1, 256, 256] --\n", - "| └─Linear: 2-671 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-672 [-1, 256, 256] --\n", - "| └─Linear: 2-673 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-674 [-1, 256, 256] --\n", - "| └─Linear: 2-675 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-676 [-1, 256, 256] --\n", - "| └─Linear: 2-677 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-678 [-1, 256, 256] --\n", - "| └─Linear: 2-679 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-680 [-1, 256, 256] --\n", - "| └─Linear: 2-681 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-682 [-1, 256, 256] --\n", - "| └─Linear: 2-683 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-684 [-1, 256, 256] --\n", - "| └─Linear: 2-685 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-686 [-1, 256, 256] --\n", - "| └─Linear: 2-687 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-688 [-1, 256, 256] --\n", - "| └─Linear: 2-689 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-690 [-1, 256, 256] --\n", - "| └─Linear: 2-691 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-692 [-1, 256, 256] --\n", - "| └─Linear: 2-693 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-694 [-1, 256, 256] --\n", - "| └─Linear: 2-695 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-696 [-1, 256, 256] --\n", - "| └─Linear: 2-697 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-698 [-1, 256, 256] --\n", - "| └─Linear: 2-699 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-700 [-1, 256, 256] --\n", - "| └─Linear: 2-701 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-702 [-1, 256, 256] --\n", - "| └─Linear: 2-703 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-704 [-1, 256, 256] --\n", - "| └─Linear: 2-705 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-706 [-1, 256, 256] --\n", - "| └─Linear: 2-707 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-708 [-1, 256, 256] --\n", - "| └─Linear: 2-709 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-710 [-1, 256, 256] --\n", - "| └─Linear: 2-711 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-712 [-1, 256, 256] --\n", - "| └─Linear: 2-713 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-714 [-1, 256, 256] --\n", - "| └─Linear: 2-715 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-716 [-1, 256, 256] --\n", - "| └─Linear: 2-717 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-718 [-1, 256, 256] --\n", - "| └─Linear: 2-719 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-720 [-1, 256, 256] --\n", - "| └─Linear: 2-721 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-722 [-1, 256, 256] --\n", - "| └─Linear: 2-723 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-724 [-1, 256, 256] --\n", - "| └─Linear: 2-725 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-726 [-1, 256, 256] --\n", - "| └─Linear: 2-727 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-728 [-1, 256, 256] --\n", - "| └─Linear: 2-729 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-730 [-1, 256, 256] --\n", - "| └─Linear: 2-731 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-732 [-1, 256, 256] --\n", - "| └─Linear: 2-733 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-734 [-1, 256, 256] --\n", - "| └─Linear: 2-735 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-736 [-1, 256, 256] --\n", - "| └─Linear: 2-737 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-738 [-1, 256, 256] --\n", - "| └─Linear: 2-739 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-740 [-1, 256, 256] --\n", - "| └─Linear: 2-741 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-742 [-1, 256, 256] --\n", - "| └─Linear: 2-743 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-744 [-1, 256, 256] --\n", - "| └─Linear: 2-745 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-746 [-1, 256, 256] --\n", - "| └─Linear: 2-747 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-748 [-1, 256, 256] --\n", - "| └─Linear: 2-749 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-750 [-1, 256, 256] --\n", - "| └─Linear: 2-751 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-752 [-1, 256, 256] --\n", - "| └─Linear: 2-753 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-754 [-1, 256, 256] --\n", - "| └─Linear: 2-755 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-756 [-1, 256, 256] --\n", - "| └─Linear: 2-757 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-758 [-1, 256, 256] --\n", - "| └─Linear: 2-759 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-760 [-1, 256, 256] --\n", - "| └─Linear: 2-761 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-762 [-1, 256, 256] --\n", - "| └─Linear: 2-763 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-764 [-1, 256, 256] --\n", - "| └─Linear: 2-765 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-766 [-1, 256, 256] --\n", - "| └─Linear: 2-767 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-768 [-1, 256, 256] --\n", - "| └─Linear: 2-769 [-1, 2, 256] (recursive)\n", - "| └─Softmax: 2-770 [-1, 256, 256] --\n", - "| └─Linear: 2-771 [-1, 2, 256] (recursive)\n", - "===============================================================================================\n", - "Total params: 230,912\n", - "Trainable params: 230,912\n", - "Non-trainable params: 0\n", - "Total mult-adds (M): 13.01\n", - "===============================================================================================\n", - "Input size (MB): 0.12\n", - "Forward/backward pass size (MB): 0.01\n", - "Params size (MB): 0.88\n", - "Estimated Total Size (MB): 1.02\n", - "===============================================================================================" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "summary(module1, [256, 128])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "clort", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/Graph Conv.ipynb b/examples/Graph Conv.ipynb deleted file mode 100644 index ddc0d4e..0000000 --- a/examples/Graph Conv.ipynb +++ /dev/null @@ -1,915 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import torch\n", - "import torch.nn as nn\n", - "from torchsummary import summary\n", - "from torchview import draw_graph\n", - "from torchviz import make_dot\n", - "\n", - "from moduleZoo.graphs import GraphConv" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "class TargetModel(nn.Module):\n", - " def __init__(self, db = False) -> None:\n", - " super().__init__()\n", - " self.module = GraphConv(128, 256, k=10, dynamic_batching=db, enable_offloading=True)\n", - "\n", - " def enable_dynamic_batching(self) -> None:\n", - " self.module.db = True\n", - "\n", - " def forward(self, x:torch.Tensor, n_nodes = None) -> torch.Tensor:\n", - " n_nodes = (np.ones((10, ), dtype=np.int16)*100).tolist() + (np.ones((10, ), dtype=np.int16)*50).tolist()*2\n", - " x = x.view((2*10*100, 128))\n", - " return self.module(x, n_nodes)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "module = TargetModel(True)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using static dynamic batching.\n", - "==========================================================================================\n", - "Layer (type:depth-idx) Output Shape Param #\n", - "==========================================================================================\n", - "├─GraphConv: 1-1 [-1, 256] --\n", - "| └─Linear: 2-1 [-1, 50, 10, 256] 65,792\n", - "| └─Linear: 2-2 [-1, 100, 10, 256] (recursive)\n", - "==========================================================================================\n", - "Total params: 65,792\n", - "Trainable params: 65,792\n", - "Non-trainable params: 0\n", - "Total mult-adds (M): 0.20\n", - "==========================================================================================\n", - "Input size (MB): 0.49\n", - "Forward/backward pass size (MB): 0.98\n", - "Params size (MB): 0.25\n", - "Estimated Total Size (MB): 1.72\n", - "==========================================================================================\n" - ] - }, - { - "data": { - "text/plain": [ - "==========================================================================================\n", - "Layer (type:depth-idx) Output Shape Param #\n", - "==========================================================================================\n", - "├─GraphConv: 1-1 [-1, 256] --\n", - "| └─Linear: 2-1 [-1, 50, 10, 256] 65,792\n", - "| └─Linear: 2-2 [-1, 100, 10, 256] (recursive)\n", - "==========================================================================================\n", - "Total params: 65,792\n", - "Trainable params: 65,792\n", - "Non-trainable params: 0\n", - "Total mult-adds (M): 0.20\n", - "==========================================================================================\n", - "Input size (MB): 0.49\n", - "Forward/backward pass size (MB): 0.98\n", - "Params size (MB): 0.25\n", - "Estimated Total Size (MB): 1.72\n", - "==========================================================================================" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# model_graph = draw_graph(module, input_size=(10*100, 128), expand_nested=False)\n", - "summary(module, (10*100, 128))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# model_graph.visual_graph.save(filename='graph-conv.svg')" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "module = module.to('cuda')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "data.shape = torch.Size([10, 100, 128])\n" - ] - } - ], - "source": [ - "data = torch.rand((10, 100, 128), device='cuda')\n", - "print(f'{data.shape = }')\n", - "# n_nodes = [1]*(256//2) + [2]*(256//4)\n", - "# n_nodes = np.array(n_nodes*2)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using static forward\n" - ] - } - ], - "source": [ - "result1 = module(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "%3\n", - "\n", - "\n", - "\n", - "140494380211136\n", - "\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494382202640\n", - "\n", - "MaxBackward0\n", - "\n", - "\n", - "\n", - "140494382202640->140494380211136\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382199904\n", - "\n", - "AddBackward0\n", - "\n", - "\n", - "\n", - "140494382199904->140494382202640\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382202208\n", - "\n", - "UnsafeViewBackward0\n", - "\n", - "\n", - "\n", - "140494382202208->140494382199904\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382200144\n", - "\n", - "MmBackward0\n", - "\n", - "\n", - "\n", - "140494382200144->140494382202208\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382197600\n", - "\n", - "TBackward0\n", - "\n", - "\n", - "\n", - "140494382197600->140494382200144\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382200912\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382200912->140494382197600\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494387193200\n", - "\n", - "module.0.weight\n", - " (256, 256)\n", - "\n", - "\n", - "\n", - "140494387193200->140494382200912\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382201440\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382201440->140494382199904\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380208816\n", - "\n", - "module.0.bias\n", - " (256)\n", - "\n", - "\n", - "\n", - "140494380208816->140494382201440\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "make_dot(result1, params=dict(module.named_parameters()))" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "module.module.db = True" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "data = data.view((10*100, 128))\n", - "n_nodes = np.ones((10, ), dtype=np.int16)*100" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using static dynamic batching.\n" - ] - } - ], - "source": [ - "result2 = module(data, n_nodes).view((10, 100, 256))" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "%3\n", - "\n", - "\n", - "\n", - "140494388910080\n", - "\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494382198992\n", - "\n", - "ViewBackward0\n", - "---------------------------\n", - "self_sym_sizes: (1000, 256)\n", - "\n", - "\n", - "\n", - "140494382198992->140494388910080\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382198320\n", - "\n", - "CopySlices\n", - "\n", - "\n", - "\n", - "140494382198320->140494382198992\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494394682048\n", - "\n", - " (1000, 256)\n", - "\n", - "\n", - "\n", - "140494382198320->140494394682048\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382202256\n", - "\n", - "ViewBackward0\n", - "------------------------------\n", - "self_sym_sizes: (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494382202256->140494382198320\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382199376\n", - "\n", - "MaxBackward0\n", - "----------------------------------\n", - "dim           :                  2\n", - "indices       :     [saved tensor]\n", - "keepdim       :              False\n", - "self_sym_sizes: (10, 100, 10, 256)\n", - "\n", - "\n", - "\n", - "140494382199376->140494382202256\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494389552816\n", - "\n", - "indices\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494382199376->140494389552816\n", - "\n", - "\n", - "\n", - "\n", - "140494382201920\n", - "\n", - "AddBackward0\n", - "------------\n", - "alpha: 1\n", - "\n", - "\n", - "\n", - "140494382201920->140494382199376\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382201872\n", - "\n", - "UnsafeViewBackward0\n", - "----------------------------\n", - "self_sym_sizes: (10000, 256)\n", - "\n", - "\n", - "\n", - "140494382201872->140494382201920\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382202208\n", - "\n", - "MmBackward0\n", - "--------------------------------\n", - "mat2            :           None\n", - "mat2_sym_sizes  :     (256, 256)\n", - "mat2_sym_strides:       (1, 256)\n", - "self            : [saved tensor]\n", - "self_sym_sizes  :   (10000, 256)\n", - "self_sym_strides:             ()\n", - "\n", - "\n", - "\n", - "140494382202208->140494382201872\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494399896000\n", - "\n", - "self\n", - " (10000, 256)\n", - "\n", - "\n", - "\n", - "140494382202208->140494399896000\n", - "\n", - "\n", - "\n", - "\n", - "140494382197936\n", - "\n", - "TBackward0\n", - "\n", - "\n", - "\n", - "140494382197936->140494382202208\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382201344\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382201344->140494382197936\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494387193200\n", - "\n", - " (256, 256)\n", - "\n", - "\n", - "\n", - "140494387193200->140494382201344\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382198224\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382198224->140494382201920\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380208816\n", - "\n", - " (256)\n", - "\n", - "\n", - "\n", - "140494380208816->140494382198224\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494394682048->140494388910080\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "make_dot(result2, show_attrs=True, show_saved=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], - "source": [ - "module.module.db = False" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using complete dynamic batching\n" - ] - } - ], - "source": [ - "result3 = module(data, n_nodes).view((10, 100, 256))" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "%3\n", - "\n", - "\n", - "\n", - "140494399166704\n", - "\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494381754272\n", - "\n", - "ViewBackward0\n", - "------------------------------\n", - "self_sym_sizes: (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494381754272->140494399166704\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494381756576\n", - "\n", - "MaxBackward0\n", - "----------------------------------\n", - "dim           :                  2\n", - "indices       :     [saved tensor]\n", - "keepdim       :              False\n", - "self_sym_sizes: (10, 100, 10, 256)\n", - "\n", - "\n", - "\n", - "140494381756576->140494381754272\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380212816\n", - "\n", - "indices\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494381756576->140494380212816\n", - "\n", - "\n", - "\n", - "\n", - "140494380209936\n", - "\n", - " (10, 100, 256)\n", - "\n", - "\n", - "\n", - "140494381756576->140494380209936\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494381759648\n", - "\n", - "AddBackward0\n", - "------------\n", - "alpha: 1\n", - "\n", - "\n", - "\n", - "140494381759648->140494381756576\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494381754848\n", - "\n", - "UnsafeViewBackward0\n", - "----------------------------\n", - "self_sym_sizes: (10000, 256)\n", - "\n", - "\n", - "\n", - "140494381754848->140494381759648\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380400800\n", - "\n", - "MmBackward0\n", - "--------------------------------\n", - "mat2            :           None\n", - "mat2_sym_sizes  :     (256, 256)\n", - "mat2_sym_strides:       (1, 256)\n", - "self            : [saved tensor]\n", - "self_sym_sizes  :   (10000, 256)\n", - "self_sym_strides:             ()\n", - "\n", - "\n", - "\n", - "140494380400800->140494381754848\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494390603552\n", - "\n", - "self\n", - " (10000, 256)\n", - "\n", - "\n", - "\n", - "140494380400800->140494390603552\n", - "\n", - "\n", - "\n", - "\n", - "140494380400896\n", - "\n", - "TBackward0\n", - "\n", - "\n", - "\n", - "140494380400896->140494380400800\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382201344\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382201344->140494380400896\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494387193200\n", - "\n", - " (256, 256)\n", - "\n", - "\n", - "\n", - "140494387193200->140494382201344\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494382198224\n", - "\n", - "AccumulateGrad\n", - "\n", - "\n", - "\n", - "140494382198224->140494381759648\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380208816\n", - "\n", - " (256)\n", - "\n", - "\n", - "\n", - "140494380208816->140494382198224\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "140494380209936->140494399166704\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "make_dot(result3, show_attrs=True, show_saved=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(True, device='cuda:0')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.all(result3 == result2) and torch.all(result1 == result2)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(True, device='cuda:0')" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.all(torch.cat(data.view((10, 100, 128)).split(1, dim=0), dim=1).squeeze(dim=0) == data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "clort", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/Residual Blocks.ipynb b/examples/Residual Blocks.ipynb deleted file mode 100644 index 86a1c87..0000000 --- a/examples/Residual Blocks.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "54209ca3", - "metadata": {}, - "outputs": [], - "source": [ - "import torch\n", - "\n", - "from torchsummary import summary\n", - "\n", - "from moduleZoo.resblocks import Conv2DResidualBlock" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ebffc203", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Conv2DResidualBlock(\n", - " (conv1): Conv2DNormActivation(\n", - " (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))\n", - " (1): ReLU6()\n", - " )\n", - " (conv2): Conv2DNormActivation(\n", - " (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=same, bias=False)\n", - " )\n", - " (projection): Conv2DNormActivation(\n", - " (0): Conv2d(3, 24, kernel_size=(1, 1), stride=(2, 2), bias=False)\n", - " )\n", - ")" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "res_blc = Conv2DResidualBlock(3, 24, stride=2)\n", - "res_blc.cuda()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "b191db73", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "----------------------------------------------------------------\n", - " Layer (type) Output Shape Param #\n", - "================================================================\n", - " Conv2d-1 [-1, 24, 128, 128] 672\n", - " ReLU6-2 [-1, 24, 128, 128] 0\n", - " Conv2d-3 [-1, 24, 128, 128] 5,184\n", - " Conv2d-4 [-1, 24, 128, 128] 72\n", - "================================================================\n", - "Total params: 5,928\n", - "Trainable params: 5,928\n", - "Non-trainable params: 0\n", - "----------------------------------------------------------------\n", - "Input size (MB): 0.75\n", - "Forward/backward pass size (MB): 12.00\n", - "Params size (MB): 0.02\n", - "Estimated Total Size (MB): 12.77\n", - "----------------------------------------------------------------\n" - ] - } - ], - "source": [ - "summary(res_blc, (3, 256, 256))" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "49c45938", - "metadata": {}, - "outputs": [], - "source": [ - "from moduleZoo.dense import LinearNormActivation\n", - "import torch.nn as nn\n", - "import torch" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "d6ffbe23", - "metadata": {}, - "outputs": [], - "source": [ - "module = LinearNormActivation(128, 256, norm_layer=nn.BatchNorm1d, activation_layer=nn.Tanh)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "fc9438b8", - "metadata": {}, - "outputs": [], - "source": [ - "data = torch.rand((100, 100, 128))\n", - "data2 = data.flatten(0, -2)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "1119a3f9", - "metadata": {}, - "outputs": [], - "source": [ - "res1 = module(data)\n", - "res2 = module(data2)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "f88f84e8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "tensor(True)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "torch.all(res1.flatten(0, -2) == res2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1f825ae5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/meta.yaml b/meta.yaml new file mode 100644 index 0000000..839469d --- /dev/null +++ b/meta.yaml @@ -0,0 +1,31 @@ +package: + name: "mzt" + version: 1.2.0 + +source: + git_rev: v1.2.0-Alpha + git_url: https://github.com/ShivamPR21/MZT.git + +requirements: + host: + - python + - setuptools + build: + - python + run: + - python + - numpy + - pytorch + - typing + - typing-extensions + +about: + home: https://github.com/ShivamPR21/MZT.git + license: AGPLv3+ + license_familY: AGPL + license_file: LICENSE + summary: "Package to host DeepLearning modules for pytorch ecosystem, to ease out model implementations and iterations." + +extra: + recipe-maintainers: + - ShivamPR21 diff --git a/modelZoo/__init__.py b/modelZoo/__init__.py deleted file mode 100644 index a76fe1d..0000000 --- a/modelZoo/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -""" Copyright (C) 2021 Shivam Pandey. - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -""" - -from .graphs import DGCNN - -# from .mbnetv2 import MobileNetV2 -# from .shufflenet import ShuffleInvertedResidual - -__all__ = ('DGCNN',) diff --git a/modelZoo/graphs/__init__.py b/modelZoo/graphs/__init__.py deleted file mode 100644 index c917d3a..0000000 --- a/modelZoo/graphs/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .dgcnn import DGCNN - -__all__ = ('DGCNN',) diff --git a/modelZoo/graphs/dgcnn.py b/modelZoo/graphs/dgcnn.py deleted file mode 100644 index 9cb8470..0000000 --- a/modelZoo/graphs/dgcnn.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Callable, List, Optional, Tuple - -import torch -import torch.nn as nn -from moduleZoo.graphs import GraphConv2d - - -class DGCNN(nn.Module): - - def __init__(self, k: int, - embed_dim: int = 512, - cfg: Optional[List[Tuple[int, int, int, bool]]] = None, - activation_layer: Optional[Callable[..., nn.Module]] = None) -> None: - super().__init__() - - self.cfg = [(3, 32, 1, False), - (32, 64, 1, False), - (64, 128, 1, True), - (128, 256, 1, False), - (256, 512, 1, False)] if cfg is None else cfg - self.activation_layer = nn.SELU if activation_layer is None else activation_layer - - self.layers = nn.ModuleList() - cat_dim = 0 - for cfg in self.cfg: - self.layers.append(GraphConv2d(*cfg[:-1], k, - norm_layer=nn.BatchNorm2d if cfg[-1] else None, - activation_layer=self.activation_layer)) - cat_dim += cfg[1] - - self.final_conv = GraphConv2d(cat_dim, - embed_dim, - 1, - activation_layer=None) - - def forward(self, x: torch.Tensor): - # Assumed shape x: [B, d, n] - out_lst: List[torch.Tensor] = [] - for layer in self.layers: - x = layer(x) - out_lst.append(x) - - x = torch.cat(tuple(out_lst), dim=1) - - x = self.final_conv(x) - return x diff --git a/modelZoo/mbnetv2.py b/modelZoo/mbnetv2.py deleted file mode 100644 index df991f7..0000000 --- a/modelZoo/mbnetv2.py +++ /dev/null @@ -1,105 +0,0 @@ -''' -Copyright (C) 2021 Shivam Pandey - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -''' - -from typing import Callable, List, Optional - -import torch.nn as nn -from torch import Tensor - -from ..moduleZoo import ConvInvertedResidualBlock2d, ConvNormActivation2d - -#TODO@ShivamPR21: #6 Adapt mobilenetv2 model according to new moduleZoo API -# Taken from Pytorch vision repo. -# def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: -# """ -# This function is taken from the original tf repo. -# It ensures that all layers have a channel number that is divisible by 8 -# It can be seen here: -# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py -# """ -# if min_value is None: -# min_value = divisor -# new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) -# # Make sure that round down does not go down by more than 10%. -# if new_v < 0.9 * v: -# new_v += divisor -# return new_v - -# class MobileNetV2(nn.Module): -# def __init__(self, -# in_activation_channel: int = 3, -# in_channel: int = 32, -# out_channel: int = 1280, -# width_multiplier: float = 1.0, -# round_nearest: int = 8, -# inverted_residual_setting: Optional[List[List[int]]] = None, -# block: Optional[Callable[..., nn.Module]] = None, -# norm_layer: Optional[Callable[..., nn.Module]] = None) -> None: -# super().__init__() - -# if block is None: -# block = InvertedResidual - -# if norm_layer is None: -# norm_layer = nn.BatchNorm2d - -# if inverted_residual_setting is None: -# inverted_residual_setting = [ -# # t, c, n, s -# [1, 16, 1, 1], -# [6, 24, 2, 2], -# [6, 32, 3, 2], -# [6, 64, 4, 2], -# [6, 96, 3, 1], -# [6, 160, 3, 2], -# [6, 320, 1, 1], -# ] - -# # only check the first element, assuming user knows t,c,n,s are required -# if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: -# raise ValueError( -# f"inverted_residual_setting should be non-empty or a 4-element list, got {inverted_residual_setting}" -# ) - -# # building first layer -# in_channel = _make_divisible(in_channel * width_multiplier, round_nearest) -# self.out_channel = _make_divisible(out_channel * max(1.0, width_multiplier), round_nearest) -# features: List[nn.Module] = [ -# Conv2DNormActivation(in_activation_channel, in_channel, stride=2, norm_layer=norm_layer, activation_layer=nn.ReLU6) -# ] -# # building inverted residual blocks -# for t, c, n, s in inverted_residual_setting: -# out_channel = _make_divisible(c * width_multiplier, round_nearest) -# for i in range(n): -# stride = s if i == 0 else 1 -# features.append(block(in_channel, out_channel, stride, expand_ratio=t, norm_layer=norm_layer)) -# in_channel = out_channel -# # building last several layers -# features.append( -# Conv2DNormActivation( -# in_channel, self.out_channel, kernel_size=1, norm_layer=norm_layer, activation_layer=nn.ReLU6 -# ) -# ) -# # make it nn.Sequential -# self.features = nn.Sequential(*features) - -# def _forward_impl(self, x: Tensor) -> Tensor: -# x = self.features(x) -# return x - -# def forward(self, x: Tensor) -> Tensor: -# return self._forward_impl(x) diff --git a/modelZoo/shufflenet.py b/modelZoo/shufflenet.py deleted file mode 100644 index 70a41be..0000000 --- a/modelZoo/shufflenet.py +++ /dev/null @@ -1,60 +0,0 @@ -''' -Copyright (C) 2021 Shivam Pandey - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU Affero General Public License as published -by the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU Affero General Public License for more details. - -You should have received a copy of the GNU Affero General Public License -along with this program. If not, see . -''' - -from typing import List - -import torch.nn as nn -from torch import Tensor - -from ..moduleZoo.resblocks import ConvInvertedResidualBlock2d - -#TODO@ShivamPR21: #5 Adapt model based on changed moduleZoo API -# class ShuffleNet(nn.Module): -# def __init__(self, -# in_channel: int, -# settings: List[List[int]] = None -# ) -> None: -# super().__init__() - -# if settings is None: -# settings = [ -# #k, s, r, exp -# [3, 2, 1, 1, 2], -# [3, 1, 3, 1, 2], -# [3, 2, 1, 1, 2], -# [3, 1, 7, 1, 2], -# [3, 2, 1, 1, 2], -# [3, 1, 3, 1, 2]] - -# features : List[nn.Module] = [] -# for k, s, r, e, g in settings: -# for _ in range(r): -# features.append( -# ShuffleInvertedResidual(in_channel, in_channel, e, -# g, k, s, nn.BatchNorm2d, nn.ReLU6) -# ) -# if s == 2: -# in_channel *= 2 - -# self.features = nn.Sequential(*features) - -# def _forward_impl(self, x: Tensor) -> Tensor: -# x = self.features(x) -# return x - -# def forward(self, x: Tensor) -> Tensor: -# return self._forward_impl(x) diff --git a/setup.py b/setup.py index 56c9fc3..ba1d292 100644 --- a/setup.py +++ b/setup.py @@ -12,14 +12,14 @@ def read(fname): setup( name = "mzt", - version = "1.1.6", + version = "1.2.0", author = "Shivam Pandey", author_email = "pandeyshivam2017robotics@gmail.com", description = ("Package to host DeepLearning modules for pytorch ecosystem," " to ease out model implementations."), license = "AGPLv3+", keywords = "DeepLearning Pytorch Modules", - url = "https://github.com/ShivamPR21/ModuleZooTorch.git", + url = "https://github.com/ShivamPR21/MZT.git", packages=['moduleZoo', 'moduleZoo.convolution', 'moduleZoo.resblocks', @@ -27,8 +27,6 @@ def read(fname): 'moduleZoo.dense', 'moduleZoo.graphs', 'moduleZoo.graphs.attention', - 'modelZoo', - 'modelZoo.graphs', 'mzLosses', 'mzExtras'], long_description=read('README.md'), diff --git a/test/import_tests.py b/test/import_tests.py index c61a438..e69de29 100644 --- a/test/import_tests.py +++ b/test/import_tests.py @@ -1 +0,0 @@ -from moduleZoo.attention import SelfAttention1d