diff --git a/Makefile b/Makefile index 414eac0..eead3d9 100644 --- a/Makefile +++ b/Makefile @@ -3,3 +3,7 @@ lint: test: python -m unittest -f tests/*.py + +generate_weight_hashes: + python scripts/generate_weight_hashes.py \ + --input weights/ diff --git a/README.md b/README.md index 4ab51b9..f34365a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ +EfficientNetV2 models rewriteen in Keras functional API. + +### Changelog: +* 10 Sept. 2021 - Added XL model variant. + * Changed layer naming convention. + * Rexported weights. + + # Table of contens 1. [Introduction](https://github.com/sebastian-sz/efficientnet-v2-keras#introduction) 2. [Quickstart](https://github.com/sebastian-sz/efficientnet-v2-keras#quickstart) diff --git a/docs/efficientnet_v2_original_outputs.ipynb b/docs/efficientnet_v2_original_outputs.ipynb new file mode 100644 index 0000000..a03e371 --- /dev/null +++ b/docs/efficientnet_v2_original_outputs.ipynb @@ -0,0 +1,790 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "efficientnet-v2-original-outputs.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3UgGs16yIEVI", + "outputId": "9089656d-7b7e-4ba7-bd6c-5265d5784f75" + }, + "source": [ + "# Clone repo and install dependencies\n", + "\n", + "!git clone https://github.com/google/automl.git\n", + "!pip install tensorflow_addons" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'automl'...\n", + "remote: Enumerating objects: 3996, done.\u001b[K\n", + "remote: Counting objects: 100% (383/383), done.\u001b[K\n", + "remote: Compressing objects: 100% (197/197), done.\u001b[K\n", + "remote: Total 3996 (delta 226), reused 297 (delta 184), pack-reused 3613\u001b[K\n", + "Receiving objects: 100% (3996/3996), 25.16 MiB | 19.46 MiB/s, done.\n", + "Resolving deltas: 100% (2978/2978), done.\n", + "Collecting tensorflow_addons\n", + " Downloading tensorflow_addons-0.14.0-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)\n", + "\u001b[K |████████████████████████████████| 1.1 MB 5.4 MB/s \n", + "\u001b[?25hRequirement already satisfied: typeguard>=2.7 in /usr/local/lib/python3.7/dist-packages (from tensorflow_addons) (2.7.1)\n", + "Installing collected packages: tensorflow-addons\n", + "Successfully installed tensorflow-addons-0.14.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "4OmvuKTrINFN", + "outputId": "e2e369bb-5ded-466a-9c65-78a90bd9e3bb" + }, + "source": [ + "%cd automl/efficientnetv2/" + ], + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/automl/efficientnetv2\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C5smQA00IOt5" + }, + "source": [ + "import os\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "\n", + "from effnetv2_model import EffNetV2Model\n", + "import preprocessing\n" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OMBX4mLVIZHQ", + "outputId": "99385056-13da-48d3-e0ea-bc4b33eef50e" + }, + "source": [ + "# Load data\n", + "\n", + "image_file = 'panda.jpg'\n", + "!wget https://upload.wikimedia.org/wikipedia/commons/f/fe/Giant_Panda_in_Beijing_Zoo_1.JPG -O {image_file}" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "--2021-09-10 08:57:01-- https://upload.wikimedia.org/wikipedia/commons/f/fe/Giant_Panda_in_Beijing_Zoo_1.JPG\n", + "Resolving upload.wikimedia.org (upload.wikimedia.org)... 208.80.153.240, 2620:0:860:ed1a::2:b\n", + "Connecting to upload.wikimedia.org (upload.wikimedia.org)|208.80.153.240|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 116068 (113K) [image/jpeg]\n", + "Saving to: ‘panda.jpg’\n", + "\n", + "\rpanda.jpg 0%[ ] 0 --.-KB/s \rpanda.jpg 100%[===================>] 113.35K --.-KB/s in 0.07s \n", + "\n", + "2021-09-10 08:57:01 (1.62 MB/s) - ‘panda.jpg’ saved [116068/116068]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "_NyEMfTbIQNa" + }, + "source": [ + "MODELS = [\n", + " \"efficientnetv2-b0\",\n", + " \"efficientnetv2-b1\",\n", + " \"efficientnetv2-b2\",\n", + " \"efficientnetv2-b3\",\n", + " \"efficientnetv2-s\",\n", + " \"efficientnetv2-m\",\n", + " \"efficientnetv2-l\",\n", + "]" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1D49WvlzIR1a", + "outputId": "7d1b7dbe-e24b-44f2-8072-410fbc53edd4" + }, + "source": [ + "# Run for Imagenet-1k pretrained logits:\n", + "\n", + "for name in MODELS:\n", + " tf.keras.backend.clear_session()\n", + " \n", + " !curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/{name}.tgz | tar xzf -\n", + "\n", + " model = EffNetV2Model(name)\n", + "\n", + " # Initialize variables:\n", + " _ = model(tf.ones([1, model.cfg.eval.isize, model.cfg.eval.isize, 3]), training=False)\n", + " \n", + " # Load weights:\n", + " ckpt_path = os.path.join(os.getcwd(), name)\n", + " if tf.io.gfile.isdir(ckpt_path):\n", + " ckpt_path = tf.train.latest_checkpoint(ckpt_path)\n", + " model.load_weights(ckpt_path)\n", + "\n", + " print(f\"Model variant: {name}\")\n", + " print(f\"Train image size: {model.cfg.train.isize}\")\n", + " print(f\"Eval image size: {model.cfg.eval.isize}\")\n", + " print()\n", + "\n", + " image = tf.image.decode_jpeg(tf.io.read_file(image_file))\n", + " input_tensor = preprocessing.preprocess_image(\n", + " image,\n", + " image_size=model.cfg.eval.isize,\n", + " is_training=False\n", + " )\n", + " input_tensor = tf.expand_dims(input_tensor, axis=0)\n", + "\n", + " logits = model(input_tensor, training=False)\n", + "\n", + " np.save(\n", + " f\"/content/{name}_{model.cfg.eval.isize}_original_logits.npy\", \n", + " logits.numpy()\n", + " ) " + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 100M 100 100M 0 0 54.4M 0 0:00:01 0:00:01 --:--:-- 54.4M\n", + "Model variant: efficientnetv2-b0\n", + "Train image size: 192\n", + "Eval image size: 224\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 114M 100 114M 0 0 48.0M 0 0:00:02 0:00:02 --:--:-- 48.0M\n", + "Model variant: efficientnetv2-b1\n", + "Train image size: 192\n", + "Eval image size: 240\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 142M 100 142M 0 0 50.6M 0 0:00:02 0:00:02 --:--:-- 50.5M\n", + "Model variant: efficientnetv2-b2\n", + "Train image size: 208\n", + "Eval image size: 260\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 202M 100 202M 0 0 50.9M 0 0:00:03 0:00:03 --:--:-- 50.9M\n", + "Model variant: efficientnetv2-b3\n", + "Train image size: 240\n", + "Eval image size: 300\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 302M 100 302M 0 0 48.6M 0 0:00:06 0:00:06 --:--:-- 51.3M\n", + "Model variant: efficientnetv2-s\n", + "Train image size: 300\n", + "Eval image size: 384\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 762M 100 762M 0 0 45.2M 0 0:00:16 0:00:16 --:--:-- 41.4M\n", + "Model variant: efficientnetv2-m\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "100 1659M 100 1659M 0 0 56.1M 0 0:00:29 0:00:29 --:--:-- 58.3M\n", + "Model variant: efficientnetv2-l\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "07wgKevlItYu", + "outputId": "cd36409c-b78e-4588-8fbc-1be3ef308b83" + }, + "source": [ + "# Run for Imagenet-1k pretrained features:\n", + "for name in MODELS:\n", + " tf.keras.backend.clear_session()\n", + "\n", + " # Do not redownload weights if present:\n", + " ![ -e $name ] || curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/{name}.tgz | tar xzf -\n", + "\n", + " model = EffNetV2Model(name, include_top=False)\n", + "\n", + " # Initialize variables:\n", + " _ = model(tf.ones([1, model.cfg.eval.isize, model.cfg.eval.isize, 3]), training=False)\n", + " \n", + " # Load weights:\n", + " ckpt_path = os.path.join(os.getcwd(), name)\n", + " if tf.io.gfile.isdir(ckpt_path):\n", + " ckpt_path = tf.train.latest_checkpoint(ckpt_path)\n", + " model.load_weights(ckpt_path)\n", + "\n", + " print(f\"Model variant: {name}\")\n", + " print(f\"Train image size: {model.cfg.train.isize}\")\n", + " print(f\"Eval image size: {model.cfg.eval.isize}\")\n", + " print()\n", + "\n", + " image = tf.image.decode_jpeg(tf.io.read_file(image_file))\n", + " input_tensor = preprocessing.preprocess_image(\n", + " image,\n", + " image_size=model.cfg.eval.isize,\n", + " is_training=False\n", + " )\n", + " input_tensor = tf.expand_dims(input_tensor, axis=0)\n", + "\n", + " features = model(input_tensor, training=False)\n", + "\n", + " np.save(\n", + " f\"/content/{name}_{model.cfg.eval.isize}_original_features.npy\", \n", + " features.numpy()\n", + " ) " + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Model variant: efficientnetv2-b0\n", + "Train image size: 192\n", + "Eval image size: 224\n", + "\n", + "Model variant: efficientnetv2-b1\n", + "Train image size: 192\n", + "Eval image size: 240\n", + "\n", + "Model variant: efficientnetv2-b2\n", + "Train image size: 208\n", + "Eval image size: 260\n", + "\n", + "Model variant: efficientnetv2-b3\n", + "Train image size: 240\n", + "Eval image size: 300\n", + "\n", + "Model variant: efficientnetv2-s\n", + "Train image size: 300\n", + "Eval image size: 384\n", + "\n", + "Model variant: efficientnetv2-m\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n", + "Model variant: efficientnetv2-l\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "BO-DPHMPJQL6" + }, + "source": [ + "# Collect logits for imagenet-21k pretrained variants:\n", + "MODELS = [\n", + " \"efficientnetv2-s\",\n", + " \"efficientnetv2-m\",\n", + " \"efficientnetv2-l\",\n", + " \"efficientnetv2-xl\",\n", + "]" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lCDHHSKPJvCO", + "outputId": "21183aac-1c30-4275-fc1b-546a99bcc6ff" + }, + "source": [ + "for name in MODELS:\n", + " tf.keras.backend.clear_session()\n", + " !curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/{name}-21k-ft1k.tgz | tar xzvf -\n", + "\n", + " model = EffNetV2Model(name)\n", + " _ = model(tf.ones([1, model.cfg.eval.isize, model.cfg.eval.isize, 3]), training=False)\n", + " \n", + " ckpt_path = os.path.join(os.getcwd(), f\"{name}-21k-ft1k\")\n", + " if tf.io.gfile.isdir(ckpt_path):\n", + " ckpt_path = tf.train.latest_checkpoint(ckpt_path)\n", + " \n", + " model.load_weights(ckpt_path)\n", + "\n", + " print(f\"Model variant: {name}\")\n", + " print(f\"Train image size: {model.cfg.train.isize}\")\n", + " print(f\"Eval image size: {model.cfg.eval.isize}\")\n", + " print()\n", + "\n", + " image = tf.image.decode_jpeg(tf.io.read_file(image_file))\n", + " input_tensor = preprocessing.preprocess_image(\n", + " image,\n", + " image_size=model.cfg.eval.isize,\n", + " is_training=False\n", + " )\n", + " input_tensor = tf.expand_dims(input_tensor, axis=0)\n", + "\n", + " logits = model(input_tensor, training=False)\n", + "\n", + " np.save(\n", + " f\"/content/{name}_{model.cfg.eval.isize}_21k-ft1k_original_logits.npy\", \n", + " logits.numpy()\n", + " ) " + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0efficientnetv2-s-21k-ft1k/\n", + "efficientnetv2-s-21k-ft1k/model.meta\n", + " 34 303M 34 105M 0 0 49.9M 0 0:00:06 0:00:02 0:00:04 49.9Mefficientnetv2-s-21k-ft1k/model.index\n", + "efficientnetv2-s-21k-ft1k/checkpoint\n", + "efficientnetv2-s-21k-ft1k/model.data-00000-of-00001\n", + "100 303M 100 303M 0 0 54.4M 0 0:00:05 0:00:05 --:--:-- 56.6M\n", + "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/training/tracking/util.py:1361: NameBasedSaverStatus.__init__ (from tensorflow.python.training.tracking.util) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:tensorflow:From /usr/local/lib/python3.7/dist-packages/tensorflow/python/training/tracking/util.py:1361: NameBasedSaverStatus.__init__ (from tensorflow.python.training.tracking.util) is deprecated and will be removed in a future version.\n", + "Instructions for updating:\n", + "Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model variant: efficientnetv2-s\n", + "Train image size: 300\n", + "Eval image size: 384\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0efficientnetv2-m-21k-ft1k/\n", + " 0 76efficientnetv2-m-21k-ft1k/model.meta\n", + " 43 762M 43 330M 0 0 53.9M 0 0:00:14 0:00:06 0:00:08 54.6Mefficientnetv2-m-21k-ft1k/model.index\n", + "efficientnetv2-m-21k-ft1k/checkpoint\n", + "efficientnetv2-m-21k-ft1k/model.data-00000-of-00001\n", + "100 762M 100 762M 0 0 55.5M 0 0:00:13 0:00:13 --:--:-- 54.4M\n", + "Model variant: efficientnetv2-m\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0efficientnetv2-l-21k-ft1k/\n", + "efficientnetv2-l-21k-ft1k/model.meta\n", + " 48 1632M 48 789M 0 0 48.5M 0 0:00:33 0:00:16 0:00:17 50.0Mefficientnetv2-l-21k-ft1k/model.index\n", + "efficientnetv2-l-21k-ft1k/checkpoint\n", + "efficientnetv2-l-21k-ft1k/model.data-00000-of-00001\n", + "100 1632M 100 1632M 0 0 50.5M 0 0:00:32 0:00:32 --:--:-- 44.9M\n", + "Model variant: efficientnetv2-l\n", + "Train image size: 384\n", + "Eval image size: 480\n", + "\n", + " % Total % Received % Xferd Average Speed Time Time Time Current\n", + " Dload Upload Total Spent Left Speed\n", + " 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0efficientnetv2-xl-21k-ft1k/\n", + "efficientnetv2-xl-21k-ft1k/model.meta\n", + " 48 2895M 48 1411M 0 0 49.6M 0 0:00:58 0:00:28 0:00:30 41.6Mefficientnetv2-xl-21k-ft1k/model.index\n", + "efficientnetv2-xl-21k-ft1k/checkpoint\n", + "efficientnetv2-xl-21k-ft1k/model.data-00000-of-00001\n", + "100 2895M 100 2895M 0 0 49.7M 0 0:00:58 0:00:58 --:--:-- 49.3M\n", + "Model variant: efficientnetv2-xl\n", + "Train image size: 384\n", + "Eval image size: 512\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "HStwxESo7_pP" + }, + "source": [ + "# Get extracted feature shape for more sanity checks regarding shape:" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "PmSLpWvL8FbK" + }, + "source": [ + "# Add print(outputs.shape)\n", + "# to line 474 in effnetv2_models.py" + ], + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "l8MtgRVR8YTW" + }, + "source": [ + "# RESTART RUNTIME" + ], + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "j7zryHw_8fOm", + "outputId": "ec8b55ee-9e05-4db7-c501-ef7351d612b0" + }, + "source": [ + "%cd automl/efficientnetv2/" + ], + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/automl/efficientnetv2\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1hjaq7pR8gTQ" + }, + "source": [ + "import os\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "\n", + "from effnetv2_model import EffNetV2Model\n", + "import preprocessing\n" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "7E1Q1_nh8mEO" + }, + "source": [ + "MODELS = [\n", + " \"efficientnetv2-b0\",\n", + " \"efficientnetv2-b1\",\n", + " \"efficientnetv2-b2\",\n", + " \"efficientnetv2-b3\",\n", + " \"efficientnetv2-s\",\n", + " \"efficientnetv2-m\",\n", + " \"efficientnetv2-l\",\n", + " \"efficientnetv2-xl\",\n", + "]" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "etPQdUlH8IKi", + "outputId": "8955d686-1596-4031-fdbe-e02d23f586fc" + }, + "source": [ + "for name in MODELS:\n", + " tf.keras.backend.clear_session()\n", + " \n", + "\n", + " print(f\"Model variant: {name}\")\n", + " model = EffNetV2Model(name, include_top=False)\n", + " _ = model(tf.ones([1, model.cfg.eval.isize, model.cfg.eval.isize, 3]), training=False)" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Model variant: efficientnetv2-b0\n", + "(1, 7, 7, 1280)\n", + "Model variant: efficientnetv2-b1\n", + "(1, 8, 8, 1280)\n", + "Model variant: efficientnetv2-b2\n", + "(1, 9, 9, 1408)\n", + "Model variant: efficientnetv2-b3\n", + "(1, 10, 10, 1536)\n", + "Model variant: efficientnetv2-s\n", + "(1, 12, 12, 1280)\n", + "Model variant: efficientnetv2-m\n", + "(1, 15, 15, 1280)\n", + "Model variant: efficientnetv2-l\n", + "(1, 15, 15, 1280)\n", + "Model variant: efficientnetv2-xl\n", + "(1, 16, 16, 1280)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "B71mPZ9NKaVk", + "outputId": "543d58d3-d41a-4d89-da46-8c4d361b234d" + }, + "source": [ + "%cd /content/" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xJL2xU1qKxCC", + "outputId": "f3ff9c30-03bb-4838-9f55-d2cf490291d3" + }, + "source": [ + "!tar czvf efficientnet-v2-outputs.tar.gz *.npy" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "text": [ + "efficientnetv2-b0_224_original_features.npy\n", + "efficientnetv2-b0_224_original_logits.npy\n", + "efficientnetv2-b1_240_original_features.npy\n", + "efficientnetv2-b1_240_original_logits.npy\n", + "efficientnetv2-b2_260_original_features.npy\n", + "efficientnetv2-b2_260_original_logits.npy\n", + "efficientnetv2-b3_300_original_features.npy\n", + "efficientnetv2-b3_300_original_logits.npy\n", + "efficientnetv2-l_480_21k-ft1k_original_features.npy\n", + "efficientnetv2-l_480_21k-ft1k_original_logits.npy\n", + "efficientnetv2-l_480_original_features.npy\n", + "efficientnetv2-l_480_original_logits.npy\n", + "efficientnetv2-m_480_21k-ft1k_original_features.npy\n", + "efficientnetv2-m_480_21k-ft1k_original_logits.npy\n", + "efficientnetv2-m_480_original_features.npy\n", + "efficientnetv2-m_480_original_logits.npy\n", + "efficientnetv2-s_384_21k-ft1k_original_features.npy\n", + "efficientnetv2-s_384_21k-ft1k_original_logits.npy\n", + "efficientnetv2-s_384_original_features.npy\n", + "efficientnetv2-s_384_original_logits.npy\n" + ], + "name": "stdout" + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "te_ufFu1K3Co" + }, + "source": [ + "from google.colab import files" + ], + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 17 + }, + "id": "qYdIT5vDK6Yp", + "outputId": "2dd5dfb3-07bc-4063-cfed-ffe9010dc805" + }, + "source": [ + "files.download(\"efficientnet-v2-outputs.tar.gz\")" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "\n", + " async function download(id, filename, size) {\n", + " if (!google.colab.kernel.accessAllowed) {\n", + " return;\n", + " }\n", + " const div = document.createElement('div');\n", + " const label = document.createElement('label');\n", + " label.textContent = `Downloading \"${filename}\": `;\n", + " div.appendChild(label);\n", + " const progress = document.createElement('progress');\n", + " progress.max = size;\n", + " div.appendChild(progress);\n", + " document.body.appendChild(div);\n", + "\n", + " const buffers = [];\n", + " let downloaded = 0;\n", + "\n", + " const channel = await google.colab.kernel.comms.open(id);\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + "\n", + " for await (const message of channel.messages) {\n", + " // Send a message to notify the kernel that we're ready.\n", + " channel.send({})\n", + " if (message.buffers) {\n", + " for (const buffer of message.buffers) {\n", + " buffers.push(buffer);\n", + " downloaded += buffer.byteLength;\n", + " progress.value = downloaded;\n", + " }\n", + " }\n", + " }\n", + " const blob = new Blob(buffers, {type: 'application/binary'});\n", + " const a = document.createElement('a');\n", + " a.href = window.URL.createObjectURL(blob);\n", + " a.download = filename;\n", + " div.appendChild(a);\n", + " a.click();\n", + " div.remove();\n", + " }\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + }, + { + "output_type": "display_data", + "data": { + "application/javascript": [ + "download(\"download_2b72a664-c1e3-4a9e-ac98-005f1df17d36\", \"efficientnet-v2-outputs.tar.gz\", 87216)" + ], + "text/plain": [ + "" + ] + }, + "metadata": { + "tags": [] + } + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6x16phivK90u" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/efficientnet_v2/__init__.py b/efficientnet_v2/__init__.py index c4505fa..3c54c86 100644 --- a/efficientnet_v2/__init__.py +++ b/efficientnet_v2/__init__.py @@ -8,6 +8,7 @@ EfficientNetV2L, EfficientNetV2M, EfficientNetV2S, + EfficientNetV2XL, ) __all__ = [ @@ -18,4 +19,5 @@ "EfficientNetV2S", "EfficientNetV2M", "EfficientNetV2L", + "EfficientNetV2XL", ] diff --git a/efficientnet_v2/blocks_args.py b/efficientnet_v2/blocks_args.py index bccf4e1..dfd285a 100644 --- a/efficientnet_v2/blocks_args.py +++ b/efficientnet_v2/blocks_args.py @@ -459,4 +459,76 @@ "conv_type": 0, }, ], + "efficientnetv2-xl": [ + { + "conv_type": 1, + "expand_ratio": 1, + "input_filters": 32, + "output_filters": 32, + "kernel_size": 3, + "num_repeat": 4, + "se_ratio": 0, + "strides": 1, + }, + { + "conv_type": 1, + "expand_ratio": 4, + "input_filters": 32, + "output_filters": 64, + "kernel_size": 3, + "num_repeat": 8, + "se_ratio": 0, + "strides": 2, + }, + { + "conv_type": 1, + "expand_ratio": 4, + "input_filters": 64, + "output_filters": 96, + "kernel_size": 3, + "num_repeat": 8, + "se_ratio": 0, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 4, + "input_filters": 96, + "output_filters": 192, + "kernel_size": 3, + "num_repeat": 16, + "se_ratio": 0.25, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 6, + "input_filters": 192, + "output_filters": 256, + "kernel_size": 3, + "num_repeat": 24, + "se_ratio": 0.25, + "strides": 1, + }, + { + "conv_type": 0, + "expand_ratio": 6, + "input_filters": 256, + "output_filters": 512, + "kernel_size": 3, + "num_repeat": 32, + "se_ratio": 0.25, + "strides": 2, + }, + { + "conv_type": 0, + "expand_ratio": 6, + "input_filters": 512, + "output_filters": 640, + "kernel_size": 3, + "num_repeat": 8, + "se_ratio": 0.25, + "strides": 1, + }, + ], } diff --git a/efficientnet_v2/efficientnet_v2.py b/efficientnet_v2/efficientnet_v2.py index 0e5ce01..16120e5 100644 --- a/efficientnet_v2/efficientnet_v2.py +++ b/efficientnet_v2/efficientnet_v2.py @@ -1,6 +1,7 @@ """Code for EfficientNetV2 models.""" import copy import math +import sys from typing import Any, Callable, Dict, List, Tuple, Union import tensorflow as tf @@ -12,31 +13,33 @@ from efficientnet_v2.blocks_args import BLOCKS_ARGS BASE_WEIGHTS_URL = ( - "https://github.com/sebastian-sz/efficientnet-v2-keras/releases/download/v1.0/" + "https://github.com/sebastian-sz/efficientnet-v2-keras/releases/download/v2.0/" ) + WEIGHT_HASHES = { - "efficientnetv2-b0.h5": "c6b770b1c8cf213eb1399e9fbedf1871", - "efficientnetv2-b1.h5": "79059d1067a7779887d3859706ef8480", - "efficientnetv2-b2.h5": "b6c5c911b3cd7c8863d2aeb55b8ee1ee", - "efficientnetv2-b3.h5": "e6bc1b2f04140a8eb1bf03d66343ea3a", - "efficientnetv2-s.h5": "f0b49bdc045de8889f35234618edb59f", - "efficientnetv2-m.h5": "9fb1ef92f80797b31fee575d1c0a24fe", - "efficientnetv2-l.h5": "1e5d90cc5102212ba38cd7194c8d97d7", - "efficientnetv2-b0_notop.h5": "8648ed1dd0b260705d02d29f8c651e91", - "efficientnetv2-b1_notop.h5": "b859b006bc3fdbcad68be88c757d1b0a", - "efficientnetv2-b2_notop.h5": "869924ed4837062b6a75f241b87c5afc", - "efficientnetv2-b3_notop.h5": "090dd36d2024381bbbad4f8e4edcc30e", - "efficientnetv2-s_notop.h5": "36cd089046169b7a1a2b3654ec2fa2a8", - "efficientnetv2-m_notop.h5": "87a2dcf21014c367218c8495197fb35c", - "efficientnetv2-l_notop.h5": "71f80290f1ae93e71c9ddd11e05ba721", - "efficientnetv2-s-21k-ft1k.h5": "73d4916795840bb6cc3f1cd109e6858c", - "efficientnetv2-m-21k-ft1k.h5": "7e4671a02dfe2673902f48c371bdbfd1", - "efficientnetv2-l-21k-ft1k.h5": "2ad5eaaf1d1a48b3d7b544f306eaca51", - "efficientnetv2-s-21k-ft1k_notop.h5": "534a11a6a4517d67b4d6dc021e642716", - "efficientnetv2-m-21k-ft1k_notop.h5": "805410db76a6c7ada3202c4b61c40fc4", - "efficientnetv2-l-21k-ft1k_notop.h5": "7a1233fdfe370c2a2e33a1b0af33f000", + "efficientnetv2-b0.h5": "040bd13d0e1120f3d3ff64dcb1b311da", + "efficientnetv2-b0_notop.h5": "0ee6a45fb049baaaf5dd710e50828382", + "efficientnetv2-b1.h5": "2e640a47676a72aab97fbcd5cdc5aee5", + "efficientnetv2-b1_notop.h5": "650f09a0e2d4282201b5187ac2709721", + "efficientnetv2-b2.h5": "ff25e799dd33de560322a2f0bfba1b53", + "efficientnetv2-b2_notop.h5": "4236cc709ddb4616c81c877b3f92457f", + "efficientnetv2-b3.h5": "7a9f26b46c88c64a428ca998fa31e9d4", + "efficientnetv2-b3_notop.h5": "cb807fb01931c554fd00ae79d5b9cf4d", + "efficientnetv2-l-21k-ft1k.h5": "78e5ffa224184f1481252a115a5f003d", + "efficientnetv2-l-21k-ft1k_notop.h5": "5a4795a11ae52a7d8626c9e20ba275a5", + "efficientnetv2-l.h5": "25db7bfb451abc977bcc4140c91c4e9e", + "efficientnetv2-l_notop.h5": "451021c40955e974b7627b9e588211a1", + "efficientnetv2-m-21k-ft1k.h5": "8f6f7ca84d948da4b93f4b9053c19413", + "efficientnetv2-m-21k-ft1k_notop.h5": "f670a1cb04aeed321c554c21f219f895", + "efficientnetv2-m.h5": "4766229c2bd41aa09c7271e3c3a5403d", + "efficientnetv2-m_notop.h5": "4bb03763f7be9b3829a3e640c358de17", + "efficientnetv2-s-21k-ft1k.h5": "62a850f1b111c4872277c18d64b928d4", + "efficientnetv2-s-21k-ft1k_notop.h5": "85d8dcc7a63523abea94469b833be01e", + "efficientnetv2-s.h5": "6cb2135fe05dbd9ced79348b8b76f05f", + "efficientnetv2-s_notop.h5": "551df41bf4f0951006926610e93c17c1", + "efficientnetv2-xl-21k-ft1k.h5": "f48b9f1c12effdf9d70a33d81eb9f5ca", + "efficientnetv2-xl-21k-ft1k_notop.h5": "a0cbe206c87e8fafe7434451e5ac79a9", } - CONV_KERNEL_INITIALIZER = { "class_name": "VarianceScaling", "config": {"scale": 2.0, "mode": "fan_out", "distribution": "truncated_normal"}, @@ -341,11 +344,20 @@ def EfficientNetV2( 'If using `weights` as `"imagenet"` or `"imagenet++"` with `include_top`' " as true, `classes` should be 1000" ) - if weights == "imagenet++" and model_name.split("-")[-1] not in {"s", "m", "l"}: + if weights == "imagenet++" and model_name.split("-")[-1] not in { + "s", + "m", + "l", + "xl", + }: raise ValueError( "Weights pretrained on 21k and fine tuned on 1k are only" "available for s-m-l model variants." ) + if model_name.split("-")[-1] == "xl" and weights == "imagenet": + raise ValueError( + "This variant has not been released. For XL only imagenet++ is available." + ) # Determine proper input shape input_shape = imagenet_utils.obtain_input_shape( @@ -424,7 +436,7 @@ def round_repeats(repeats): activation=activation, bn_momentum=bn_momentum, survival_probability=drop_connect_rate * b / blocks, - name="block{}{}_".format(i + 1, chr(j + 97)), + name=f"block{i+1}-{j+1:02d}_", **args, ) b += 1 @@ -686,3 +698,38 @@ def EfficientNetV2B3( classifier_activation=classifier_activation, **kwargs, ) + + +def EfficientNetV2XL( + include_top=True, + weights="imagenet", + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation="softmax", + **kwargs, +): + """Create EfficientNetV2 XL variant.""" + # This model is so big that it's creation exceeds default recursion limit + current_limit = sys.getrecursionlimit() + target_limit = 2000 + if current_limit < target_limit: + sys.setrecursionlimit(target_limit) + + model = EfficientNetV2( + width_coefficient=1.0, + depth_coefficient=1.0, + default_size=512, + model_name="efficientnetv2-xl", + include_top=include_top, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation, + **kwargs, + ) + + return model diff --git a/scripts/convert_all.sh b/scripts/convert_all.sh index eb49b43..54eb371 100644 --- a/scripts/convert_all.sh +++ b/scripts/convert_all.sh @@ -103,6 +103,11 @@ python scripts/convert_weights.py \ --ckpt weights/original_weights/efficientnetv2-l-21k-ft1k \ --output weights/efficientnetv2-l-21k-ft1k.h5 +python scripts/convert_weights.py \ + --model xl \ + --ckpt weights/original_weights/efficientnetv2-xl-21k-ft1k \ + --output weights/efficientnetv2-xl-21k-ft1k.h5 + # Notop python scripts/convert_weights.py \ --model s \ @@ -121,3 +126,9 @@ python scripts/convert_weights.py \ --ckpt weights/original_weights/efficientnetv2-l-21k-ft1k \ --output weights/efficientnetv2-l-21k-ft1k_notop.h5 \ --include_top=False + +python scripts/convert_weights.py \ + --model xl \ + --ckpt weights/original_weights/efficientnetv2-xl-21k-ft1k \ + --output weights/efficientnetv2-xl-21k-ft1k_notop.h5 \ + --include_top=False diff --git a/scripts/convert_weights.py b/scripts/convert_weights.py index 8abf8d7..fa3d13b 100644 --- a/scripts/convert_weights.py +++ b/scripts/convert_weights.py @@ -14,6 +14,7 @@ EfficientNetV2L, EfficientNetV2M, EfficientNetV2S, + EfficientNetV2XL, ) FLAGS = flags.FLAGS @@ -49,6 +50,7 @@ def main(argv): "s": (EfficientNetV2S, (384, 384)), "m": (EfficientNetV2M, (480, 480)), "l": (EfficientNetV2L, (480, 480)), + "xl": (EfficientNetV2XL, (512, 512)), } model_fn, input_shape = arg_to_model_and_shape[FLAGS.model] @@ -190,6 +192,8 @@ def _get_blocks_layer_map( tf_block_name = var_name.split("/")[1] keras_block_name = tf_to_keras_block_name_map[tf_block_name] + keras_prefix = None + # 1st blocks only have project layer: if keras_block_name[5] == "1": if layer_type == "conv2d": @@ -234,6 +238,9 @@ def _get_blocks_layer_map( elif layer_type == "tpu_batch_normalization_2": keras_prefix = f"{keras_block_name}_project_bn/" + if keras_prefix is None: + raise ValueError("Variable name could not be matched.") + return {var_name: f"{keras_prefix}{last_name}:0"} diff --git a/scripts/download_all.sh b/scripts/download_all.sh index 7115a95..d8e0289 100644 --- a/scripts/download_all.sh +++ b/scripts/download_all.sh @@ -1,10 +1,11 @@ +curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b0.tgz | tar xz -C weights/original_weights/ +curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b1.tgz | tar xz -C weights/original_weights/ +curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b2.tgz | tar xz -C weights/original_weights/ +curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b3.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-s.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-m.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-l.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-s-21k-ft1k.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-m-21k-ft1k.tgz | tar xz -C weights/original_weights/ curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-l-21k-ft1k.tgz | tar xz -C weights/original_weights/ -curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b0.tgz | tar xz -C weights/original_weights/ -curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b1.tgz | tar xz -C weights/original_weights/ -curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b2.tgz | tar xz -C weights/original_weights/ -curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-b3.tgz | tar xz -C weights/original_weights/ +curl https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/v2/efficientnetv2-xl-21k-ft1k.tgz | tar xz -C weights/original_weights/ diff --git a/scripts/generate_weight_hashes.py b/scripts/generate_weight_hashes.py new file mode 100644 index 0000000..bd904e8 --- /dev/null +++ b/scripts/generate_weight_hashes.py @@ -0,0 +1,40 @@ +import hashlib +import json +import os +import pathlib +from argparse import ArgumentParser + +from tensorflow.keras.utils import Progbar + + +def parse_args(): + """Parse CLI argument.""" + parser = ArgumentParser() + parser.add_argument("--input", "-i", help="Path to directory with .h5 files.") + return parser.parse_args() + + +def main(): + """Generate md5 file has for each .h5 file in a given directory.""" + args = parse_args() + + weight_files = sorted([x for x in os.listdir(args.input) if x.endswith(".h5")]) + progbar = Progbar(target=len(weight_files)) + + summary = {} + + for file in weight_files: + path = os.path.join(args.input, file) + file_hash = _md5_hash(path) + summary.update({file: file_hash}) + progbar.add(1) + + print(json.dumps(summary, indent=4)) + + +def _md5_hash(path): + return hashlib.md5(pathlib.Path(path).read_bytes()).hexdigest() + + +if __name__ == "__main__": + main() diff --git a/scripts/sanity_check_conversion.py b/scripts/sanity_check_conversion.py index 62bec2c..41561f6 100644 --- a/scripts/sanity_check_conversion.py +++ b/scripts/sanity_check_conversion.py @@ -15,6 +15,7 @@ EfficientNetV2L, EfficientNetV2M, EfficientNetV2S, + EfficientNetV2XL, ) from root_dir import ROOT_DIR @@ -123,6 +124,17 @@ "efficientnetv2-l_480_original_logits_21k-ft1k.npy", ), }, + { + "testcase_name": "xl-21k-ft1k", + "model_fn": EfficientNetV2XL, + "input_shape": (512, 512), + "weights_path": os.path.join(ROOT_DIR, "weights/efficientnetv2-xl-21k-ft1k.h5"), + "original_outputs": os.path.join( + ROOT_DIR, + "tests/assets/original_outputs/" + "efficientnetv2-xl_512_original_logits_21k-ft1k.npy", + ), + }, ] FEATURE_EXTRACTION_TEST_PARAMS = [ @@ -175,6 +187,15 @@ "weights_path": os.path.join(ROOT_DIR, "weights/efficientnetv2-l_notop.h5"), "expected_feature_shape": (1, 15, 15, 1280), }, + { + "testcase_name": "xl-fe", + "model_fn": EfficientNetV2XL, + "input_shape": (512, 512), + "weights_path": os.path.join( + ROOT_DIR, "weights/efficientnetv2-xl-21k-ft1k_notop.h5" + ), + "expected_feature_shape": (1, 16, 16, 1280), + }, ] diff --git a/tests/assets/original_outputs/efficientnetv2-xl_512_original_logits_21k-ft1k.npy b/tests/assets/original_outputs/efficientnetv2-xl_512_original_logits_21k-ft1k.npy new file mode 100644 index 0000000..0b12b85 Binary files /dev/null and b/tests/assets/original_outputs/efficientnetv2-xl_512_original_logits_21k-ft1k.npy differ diff --git a/tests/test_efficientnet_v2.py b/tests/test_efficientnet_v2.py index f88bd26..d3f949e 100644 --- a/tests/test_efficientnet_v2.py +++ b/tests/test_efficientnet_v2.py @@ -13,6 +13,7 @@ EfficientNetV2L, EfficientNetV2M, EfficientNetV2S, + EfficientNetV2XL, ) TEST_PARAMS = [ @@ -23,6 +24,7 @@ {"testcase_name": "s", "model_fn": EfficientNetV2S, "input_shape": (384, 384)}, {"testcase_name": "m", "model_fn": EfficientNetV2M, "input_shape": (480, 480)}, {"testcase_name": "l", "model_fn": EfficientNetV2L, "input_shape": (480, 480)}, + {"testcase_name": "xl", "model_fn": EfficientNetV2XL, "input_shape": (512, 512)}, ] @@ -57,6 +59,9 @@ def tearDown(self) -> None: if os.path.exists(self.model_path): os.remove(self.model_path) + def setUp(self): + tf.keras.backend.clear_session() + if __name__ == "__main__": absltest.main() diff --git a/tests/test_onnx_conversion.py b/tests/test_onnx_conversion.py index 797b279..8bd2d62 100644 --- a/tests/test_onnx_conversion.py +++ b/tests/test_onnx_conversion.py @@ -21,6 +21,7 @@ "s": 6.5, "m": 9.5, "l": 14, + "xl": 22, } @@ -41,8 +42,10 @@ def tearDown(self) -> None: def test_model_onnx_conversion( self, model_fn: Callable, input_shape: Tuple[int, int] ): + # Load imagenet++ for XL variant + weights_arg = "imagenet++" if input_shape == (512, 512) else "imagenet" model = model_fn( - weights="imagenet", + weights=weights_arg, input_shape=(*input_shape, 3), classifier_activation=None, ) diff --git a/tests/test_output_consistency.py b/tests/test_output_consistency.py index e5f123b..b29d8e8 100644 --- a/tests/test_output_consistency.py +++ b/tests/test_output_consistency.py @@ -13,6 +13,7 @@ EfficientNetV2L, EfficientNetV2M, EfficientNetV2S, + EfficientNetV2XL, ) from root_dir import ROOT_DIR @@ -124,6 +125,17 @@ "efficientnetv2-l_480_original_logits_21k-ft1k.npy", ), }, + { + "testcase_name": "xl-21k-ft1k", + "model_fn": EfficientNetV2XL, + "input_shape": (512, 512), + "weights_arg": "imagenet++", + "original_outputs": os.path.join( + ROOT_DIR, + "tests/assets/original_outputs/" + "efficientnetv2-xl_512_original_logits_21k-ft1k.npy", + ), + }, ] diff --git a/tests/test_tflite_conversion.py b/tests/test_tflite_conversion.py index 0ea9d05..37c5004 100644 --- a/tests/test_tflite_conversion.py +++ b/tests/test_tflite_conversion.py @@ -19,6 +19,7 @@ "s": 5, "m": 7, "l": 11, + "xl": 19, } @@ -37,8 +38,10 @@ def test_tflite_conversion(self, model_fn: Callable, input_shape: Tuple[int, int tf.keras.backend.clear_session() # Comparison will fail with random weights as we are comparing - # very low floats: - model = model_fn(weights="imagenet", input_shape=(*input_shape, 3)) + # very low floats. + # Load XL variant with imagenet++ weights as these are only available. + weights_arg = "imagenet++" if input_shape == (512, 512) else "imagenet" + model = model_fn(weights=weights_arg, input_shape=(*input_shape, 3)) # Skip test if not enough RAM: model_variant = model.name.split("-")[-1]