From 7bdb6f9c109ec655bd61b69e334681474a5c763e Mon Sep 17 00:00:00 2001 From: x4nth055 Date: Sun, 12 Feb 2023 11:50:46 +0100 Subject: [PATCH] add image segmentation using transformers tutorial --- README.md | 1 + ...eSegmentationTransformers_PythonCode.ipynb | 1522 +++++++++++++++++ .../image-segmentation-transformers/README.md | 1 + .../image_segmentation_transformers.py | 190 ++ .../requirements.txt | 6 + 5 files changed, 1720 insertions(+) create mode 100644 machine-learning/image-segmentation-transformers/ImageSegmentationTransformers_PythonCode.ipynb create mode 100644 machine-learning/image-segmentation-transformers/README.md create mode 100644 machine-learning/image-segmentation-transformers/image_segmentation_transformers.py create mode 100644 machine-learning/image-segmentation-transformers/requirements.txt diff --git a/README.md b/README.md index 98f92f9f..3bb8c9c9 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy - [Gender Detection using OpenCV in Python](https://www.thepythoncode.com/article/gender-detection-using-opencv-in-python). ([code](machine-learning/face-gender-detection)) - [Age and Gender Detection using OpenCV in Python](https://www.thepythoncode.com/article/gender-and-age-detection-using-opencv-python). ([code](machine-learning/age-and-gender-detection)) - [Satellite Image Classification using TensorFlow in Python](https://www.thepythoncode.com/article/satellite-image-classification-using-tensorflow-python). ([code](machine-learning/satellite-image-classification)) + - [How to Perform Image Segmentation using Transformers in Python](https://www.thepythoncode.com/article/image-segmentation-using-huggingface-transformers-python). ([code](machine-learning/image-segmentation-transformers)) - [Building a Speech Emotion Recognizer using Scikit-learn](https://www.thepythoncode.com/article/building-a-speech-emotion-recognizer-using-sklearn). ([code](machine-learning/speech-emotion-recognition)) - [How to Convert Speech to Text in Python](https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python). ([code](machine-learning/speech-recognition)) - [Top 8 Python Libraries For Data Scientists and Machine Learning Engineers](https://www.thepythoncode.com/article/top-python-libraries-for-data-scientists). diff --git a/machine-learning/image-segmentation-transformers/ImageSegmentationTransformers_PythonCode.ipynb b/machine-learning/image-segmentation-transformers/ImageSegmentationTransformers_PythonCode.ipynb new file mode 100644 index 00000000..6a538d89 --- /dev/null +++ b/machine-learning/image-segmentation-transformers/ImageSegmentationTransformers_PythonCode.ipynb @@ -0,0 +1,1522 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "M1D2lpUcGw5h" + }, + "source": [ + "# Set up environment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MI-dYn9_7dLR", + "outputId": "8551fc6d-9f07-477b-e133-1bd2ad91bf52" + }, + "outputs": [], + "source": [ + "!pip install transformers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qT7r3OZBIw7T" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "# !pip3 install transformers\n", + "clear_output()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HfZ_GJZwJmFB" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import torch\n", + "import torch.nn.functional as F\n", + "from torchvision import transforms\n", + "from transformers import pipeline, SegformerImageProcessor, SegformerForSemanticSegmentation\n", + "import requests\n", + "from PIL import Image\n", + "import urllib.parse as parse\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iE1u564q7yPB" + }, + "outputs": [], + "source": [ + "# a function to determine whether a string is a URL or not\n", + "def is_url(string):\n", + " try:\n", + " result = parse.urlparse(string)\n", + " return all([result.scheme, result.netloc, result.path])\n", + " except:\n", + " return False\n", + "\n", + "# a function to load an image\n", + "def load_image(image_path):\n", + " \"\"\"Helper function to load images from their URLs or paths.\"\"\"\n", + " if is_url(image_path):\n", + " return Image.open(requests.get(image_path, stream=True).raw)\n", + " elif os.path.exists(image_path):\n", + " return Image.open(image_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y1SmhZoYOrvy" + }, + "source": [ + "# Load Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "29ZtvebWB9_b" + }, + "outputs": [], + "source": [ + "img_path = \"https://shorthaircatbreeds.com/wp-content/uploads/2020/06/Urban-cat-crossing-a-road-300x180.jpg\"\n", + "image = load_image(img_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "id": "OJklTNORvBXR", + "outputId": "0eb0d627-31cd-42bb-c8a6-a1e2a48aa119" + }, + "outputs": [], + "source": [ + "image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "87AL5K_BFYYR", + "outputId": "f20f1494-8c1a-4bb4-88e1-c246fa67500c" + }, + "outputs": [], + "source": [ + "# convert PIL Image to pytorch tensors\n", + "transform = transforms.ToTensor()\n", + "image_tensor = image.convert(\"RGB\")\n", + "image_tensor = transform(image_tensor)\n", + "image_tensor.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "moDfKkvUOuRo" + }, + "source": [ + "# Helper functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "grSPZVnO3lNf" + }, + "outputs": [], + "source": [ + "def color_palette():\n", + " \"\"\"Color palette to map each class to its corresponding color.\"\"\"\n", + " return [[0, 128, 128],\n", + " [255, 170, 0],\n", + " [161, 19, 46],\n", + " [118, 171, 47],\n", + " [255, 255, 0],\n", + " [84, 170, 127],\n", + " [170, 84, 127],\n", + " [33, 138, 200],\n", + " [255, 84, 0],\n", + " [255, 140, 208]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L2DFsquBZyum" + }, + "outputs": [], + "source": [ + "def overlay_segments(image, seg_mask):\n", + " \"\"\"Return different segments predicted by the model overlaid on image.\"\"\"\n", + " H, W = seg_mask.shape\n", + " image_mask = np.zeros((H, W, 3), dtype=np.uint8)\n", + " colors = np.array(color_palette())\n", + "\n", + " # convert to a pytorch tensor if seg_mask is not one already\n", + " seg_mask = seg_mask if torch.is_tensor(seg_mask) else torch.tensor(seg_mask)\n", + " unique_labels = torch.unique(seg_mask)\n", + "\n", + " # map each segment label to a unique color\n", + " for i, label in enumerate(unique_labels):\n", + " image_mask[seg_mask == label.item(), :] = colors[i]\n", + "\n", + " image = np.array(image)\n", + " # percentage of original image in the final overlaid iamge\n", + " img_weight = 0.5 \n", + "\n", + " # overlay input image and the generated segment mask\n", + " img = img_weight * np.array(image) * 255 + (1 - img_weight) * image_mask\n", + "\n", + " return img.astype(np.uint8)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pxbZmVr2FBY7" + }, + "outputs": [], + "source": [ + "def replace_label(mask, label):\n", + " \"\"\"Replace the segment masks values with label.\"\"\"\n", + " mask = np.array(mask)\n", + " mask[mask == 255] = label\n", + " return mask" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7LL5Mt9FG4FW" + }, + "source": [ + "# Image segmentation using Hugging Face Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 202, + "referenced_widgets": [ + "727fe3b012cb41ca9915de7877f41000", + "f933278fef544c2eba9f23a8e4c44ab0", + "b19fb1fc738a441d9b4051f1523dfe35", + "426b5ccc8526443589dac6d9535250b8", + "68c265c4342a41538e06444b4d3fcdfa", + "f74fb33b9b624ec48e468819f842f33b", + "00d8e22338254bd08321ca9d45acfa48", + "bf841c0b892c49b08f3a959e21107491", + "f816dbb4c1fa4f2887159b73a35b4dce", + "424b9cd5ff92404da08cf585d0fd1b5e", + "4464b8197aa8455b8693ef65fadac239", + "fa98530ac49245889046ab71969e6052", + "397fd67f8acc477781754a049ae215ab", + "4876eac9b98d4f74bb78c0e41c8a443c", + "d49071172b274c3bb9bf70724b0467f4", + "f7279e6ea98d4e1cb4870664c260c10a", + "a6f96bc4fa1b4566a29a6ed7cd65f10a", + "30f2e53a9c1e47a6a3403d2a86fdf4b9", + "8dccab6a3b034408b6ff2d11d08d5d76", + "9f37ec0be4734cca9c4cd3aa92e8c7ee", + "57876779d2bf45d2ab4063ed6b0efa42", + "2330b62b6c784f65ba2dabe6cdfbd4ea", + "ff8e193d67494a4f88f4b9e2deedcbb2", + "91afa17c14d544179e614d0d49effafc", + "2645a760797841f0a5db81f98b4be5bf", + "b084a5a86c4a4bfcb8b4622bea6b7093", + "32f5b18a9b5442169cb00d73135f94d1", + "c546bf524eb34a1e9c7d9f4437e359fc", + "8f3ca8d642424f8094b37bf294126197", + "9fb792eabce24f08acaccbb68d95346b", + "4d27fbddf49a4a789bd1c5cf9ab15f50", + "ac119c7197dd4ac8af81519726bc507a", + "5ab141e6b2274e128598a014ba8d901a" + ] + }, + "id": "C3c1JHC6FuEU", + "outputId": "129e69e9-5d0c-46b3-ab24-ad7c90f86b7b" + }, + "outputs": [], + "source": [ + "# load the entire image segmentation pipeline\n", + "img_segmentation_pipeline = pipeline('image-segmentation', \n", + " model=\"nvidia/segformer-b5-finetuned-ade-640-640\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "C8tbGPylZywm", + "outputId": "8f32fffe-3281-4d89-b90f-00e47b7edb4f" + }, + "outputs": [], + "source": [ + "output = img_segmentation_pipeline(image)\n", + "output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "id": "3GO1nJBQUn1g", + "outputId": "c9d21459-f65e-4b3a-caed-52dadc514b9c" + }, + "outputs": [], + "source": [ + "output[0]['mask']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "id": "CEL1NkYP8P0J", + "outputId": "6b50e3e8-9e8c-4f94-8730-74987610db68" + }, + "outputs": [], + "source": [ + "output[2]['mask']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "voaoIa8kh1Yk" + }, + "outputs": [], + "source": [ + "# load the feature extractor (to preprocess images) and the model (to get outputs)\n", + "W, H = image.size\n", + "segmentation_mask = np.zeros((H, W), dtype=np.uint8)\n", + "\n", + "for i in range(len(output)):\n", + " segmentation_mask += replace_label(output[i]['mask'], i)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 197 + }, + "id": "0XLcHLqN7JnE", + "outputId": "afb65063-c882-490c-d04e-c3be05481001" + }, + "outputs": [], + "source": [ + "# overlay the predicted segmentation masks on the original image\n", + "segmented_img = overlay_segments(image_tensor.permute(1, 2, 0), segmentation_mask)\n", + "\n", + "# convert to PIL Image\n", + "Image.fromarray(segmented_img)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MRtEM9s5G-Jm" + }, + "source": [ + "# Image segmentation using custom Hugging Face models" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o0IbIPmm-VNp" + }, + "outputs": [], + "source": [ + "# load the feature extractor (to preprocess images) and the model (to get outputs)\n", + "feature_extractor = SegformerImageProcessor.from_pretrained(\"nvidia/segformer-b5-finetuned-ade-640-640\")\n", + "model = SegformerForSemanticSegmentation.from_pretrained(\"nvidia/segformer-b5-finetuned-ade-640-640\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2mVAr5UX8xLp" + }, + "outputs": [], + "source": [ + "def to_tensor(image):\n", + " \"\"\"Convert PIL Image to pytorch tensor.\"\"\"\n", + " transform = transforms.ToTensor()\n", + " image_tensor = image.convert(\"RGB\")\n", + " image_tensor = transform(image_tensor)\n", + " return image_tensor\n", + "\n", + "# a function that takes an image and return the segmented image\n", + "def get_segmented_image(model, feature_extractor, image_path):\n", + " \"\"\"Return the predicted segmentation mask for the input image.\"\"\"\n", + " # load the image\n", + " image = load_image(image_path)\n", + " # preprocess input\n", + " inputs = feature_extractor(images=image, return_tensors=\"pt\")\n", + " # convert to pytorch tensor\n", + " image_tensor = to_tensor(image)\n", + " # pass the processed input to the model\n", + " outputs = model(**inputs)\n", + " print(\"outputs.logits.shape:\", outputs.logits.shape)\n", + " # interpolate output logits to the same shape as the input image\n", + " upsampled_logits = F.interpolate(\n", + " outputs.logits, # tensor to be interpolated\n", + " size=image_tensor.shape[1:], # output size we want\n", + " mode='bilinear', # do bilinear interpolation\n", + " align_corners=False)\n", + "\n", + " # get the class with max probabilities\n", + " segmentation_mask = upsampled_logits.argmax(dim=1)[0]\n", + " print(f\"{segmentation_mask.shape=}\")\n", + " # get the segmented image\n", + " segmented_img = overlay_segments(image_tensor.permute(1, 2, 0), segmentation_mask)\n", + " # convert to PIL Image\n", + " return Image.fromarray(segmented_img)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 232 + }, + "id": "ADSXrFO4ADon", + "outputId": "e5435612-ccdf-4f97-996b-b6ed196ec24a" + }, + "outputs": [], + "source": [ + "get_segmented_image(model, feature_extractor, \"https://shorthaircatbreeds.com/wp-content/uploads/2020/06/Urban-cat-crossing-a-road-300x180.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 532 + }, + "id": "f0v_SEfjAGER", + "outputId": "c88ec523-8c72-4efd-c8bc-60816fb54b96" + }, + "outputs": [], + "source": [ + "get_segmented_image(model, feature_extractor, \"http://images.cocodataset.org/test-stuff2017/000000000001.jpg\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CDd7PM8wHz-Y" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.12" + }, + "vscode": { + "interpreter": { + "hash": "f89a88aed07bbcd763ac68893150ace71e487877d8c6527a76855322f20001c6" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "00d8e22338254bd08321ca9d45acfa48": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2330b62b6c784f65ba2dabe6cdfbd4ea": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "2645a760797841f0a5db81f98b4be5bf": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9fb792eabce24f08acaccbb68d95346b", + "max": 271, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4d27fbddf49a4a789bd1c5cf9ab15f50", + "value": 271 + } + }, + "30f2e53a9c1e47a6a3403d2a86fdf4b9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "32f5b18a9b5442169cb00d73135f94d1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "397fd67f8acc477781754a049ae215ab": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_a6f96bc4fa1b4566a29a6ed7cd65f10a", + "placeholder": "​", + "style": "IPY_MODEL_30f2e53a9c1e47a6a3403d2a86fdf4b9", + "value": "Downloading (…)"pytorch_model.bin";: 100%" + } + }, + "424b9cd5ff92404da08cf585d0fd1b5e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "426b5ccc8526443589dac6d9535250b8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_424b9cd5ff92404da08cf585d0fd1b5e", + "placeholder": "​", + "style": "IPY_MODEL_4464b8197aa8455b8693ef65fadac239", + "value": " 6.89k/6.89k [00:00<00:00, 220kB/s]" + } + }, + "4464b8197aa8455b8693ef65fadac239": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "4876eac9b98d4f74bb78c0e41c8a443c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8dccab6a3b034408b6ff2d11d08d5d76", + "max": 339299397, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_9f37ec0be4734cca9c4cd3aa92e8c7ee", + "value": 339299397 + } + }, + "4d27fbddf49a4a789bd1c5cf9ab15f50": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "57876779d2bf45d2ab4063ed6b0efa42": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5ab141e6b2274e128598a014ba8d901a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "68c265c4342a41538e06444b4d3fcdfa": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "727fe3b012cb41ca9915de7877f41000": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_f933278fef544c2eba9f23a8e4c44ab0", + "IPY_MODEL_b19fb1fc738a441d9b4051f1523dfe35", + "IPY_MODEL_426b5ccc8526443589dac6d9535250b8" + ], + "layout": "IPY_MODEL_68c265c4342a41538e06444b4d3fcdfa" + } + }, + "8dccab6a3b034408b6ff2d11d08d5d76": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8f3ca8d642424f8094b37bf294126197": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "91afa17c14d544179e614d0d49effafc": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c546bf524eb34a1e9c7d9f4437e359fc", + "placeholder": "​", + "style": "IPY_MODEL_8f3ca8d642424f8094b37bf294126197", + "value": "Downloading (…)rocessor_config.json: 100%" + } + }, + "9f37ec0be4734cca9c4cd3aa92e8c7ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "9fb792eabce24f08acaccbb68d95346b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "a6f96bc4fa1b4566a29a6ed7cd65f10a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ac119c7197dd4ac8af81519726bc507a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b084a5a86c4a4bfcb8b4622bea6b7093": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ac119c7197dd4ac8af81519726bc507a", + "placeholder": "​", + "style": "IPY_MODEL_5ab141e6b2274e128598a014ba8d901a", + "value": " 271/271 [00:00<00:00, 5.11kB/s]" + } + }, + "b19fb1fc738a441d9b4051f1523dfe35": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_bf841c0b892c49b08f3a959e21107491", + "max": 6886, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f816dbb4c1fa4f2887159b73a35b4dce", + "value": 6886 + } + }, + "bf841c0b892c49b08f3a959e21107491": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c546bf524eb34a1e9c7d9f4437e359fc": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d49071172b274c3bb9bf70724b0467f4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_57876779d2bf45d2ab4063ed6b0efa42", + "placeholder": "​", + "style": "IPY_MODEL_2330b62b6c784f65ba2dabe6cdfbd4ea", + "value": " 339M/339M [00:04<00:00, 85.7MB/s]" + } + }, + "f7279e6ea98d4e1cb4870664c260c10a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f74fb33b9b624ec48e468819f842f33b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f816dbb4c1fa4f2887159b73a35b4dce": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f933278fef544c2eba9f23a8e4c44ab0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f74fb33b9b624ec48e468819f842f33b", + "placeholder": "​", + "style": "IPY_MODEL_00d8e22338254bd08321ca9d45acfa48", + "value": "Downloading (…)lve/main/config.json: 100%" + } + }, + "fa98530ac49245889046ab71969e6052": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_397fd67f8acc477781754a049ae215ab", + "IPY_MODEL_4876eac9b98d4f74bb78c0e41c8a443c", + "IPY_MODEL_d49071172b274c3bb9bf70724b0467f4" + ], + "layout": "IPY_MODEL_f7279e6ea98d4e1cb4870664c260c10a" + } + }, + "ff8e193d67494a4f88f4b9e2deedcbb2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_91afa17c14d544179e614d0d49effafc", + "IPY_MODEL_2645a760797841f0a5db81f98b4be5bf", + "IPY_MODEL_b084a5a86c4a4bfcb8b4622bea6b7093" + ], + "layout": "IPY_MODEL_32f5b18a9b5442169cb00d73135f94d1" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/machine-learning/image-segmentation-transformers/README.md b/machine-learning/image-segmentation-transformers/README.md new file mode 100644 index 00000000..04376921 --- /dev/null +++ b/machine-learning/image-segmentation-transformers/README.md @@ -0,0 +1 @@ +# [How to Perform Image Segmentation using Transformers in Python](https://www.thepythoncode.com/article/image-segmentation-using-huggingface-transformers-python) \ No newline at end of file diff --git a/machine-learning/image-segmentation-transformers/image_segmentation_transformers.py b/machine-learning/image-segmentation-transformers/image_segmentation_transformers.py new file mode 100644 index 00000000..28772220 --- /dev/null +++ b/machine-learning/image-segmentation-transformers/image_segmentation_transformers.py @@ -0,0 +1,190 @@ +# %% [markdown] +# # Set up environment + +# %% +!pip install transformers + +# %% +from IPython.display import clear_output +# !pip3 install transformers +clear_output() + +# %% +import numpy as np +import torch +import torch.nn.functional as F +from torchvision import transforms +from transformers import pipeline, SegformerImageProcessor, SegformerForSemanticSegmentation +import requests +from PIL import Image +import urllib.parse as parse +import os + +# %% +# a function to determine whether a string is a URL or not +def is_url(string): + try: + result = parse.urlparse(string) + return all([result.scheme, result.netloc, result.path]) + except: + return False + +# a function to load an image +def load_image(image_path): + """Helper function to load images from their URLs or paths.""" + if is_url(image_path): + return Image.open(requests.get(image_path, stream=True).raw) + elif os.path.exists(image_path): + return Image.open(image_path) + +# %% [markdown] +# # Load Image + +# %% +img_path = "https://shorthaircatbreeds.com/wp-content/uploads/2020/06/Urban-cat-crossing-a-road-300x180.jpg" +image = load_image(img_path) + +# %% +image + +# %% +# convert PIL Image to pytorch tensors +transform = transforms.ToTensor() +image_tensor = image.convert("RGB") +image_tensor = transform(image_tensor) +image_tensor.shape + +# %% [markdown] +# # Helper functions + +# %% +def color_palette(): + """Color palette to map each class to its corresponding color.""" + return [[0, 128, 128], + [255, 170, 0], + [161, 19, 46], + [118, 171, 47], + [255, 255, 0], + [84, 170, 127], + [170, 84, 127], + [33, 138, 200], + [255, 84, 0], + [255, 140, 208]] + +# %% +def overlay_segments(image, seg_mask): + """Return different segments predicted by the model overlaid on image.""" + H, W = seg_mask.shape + image_mask = np.zeros((H, W, 3), dtype=np.uint8) + colors = np.array(color_palette()) + + # convert to a pytorch tensor if seg_mask is not one already + seg_mask = seg_mask if torch.is_tensor(seg_mask) else torch.tensor(seg_mask) + unique_labels = torch.unique(seg_mask) + + # map each segment label to a unique color + for i, label in enumerate(unique_labels): + image_mask[seg_mask == label.item(), :] = colors[i] + + image = np.array(image) + # percentage of original image in the final overlaid iamge + img_weight = 0.5 + + # overlay input image and the generated segment mask + img = img_weight * np.array(image) * 255 + (1 - img_weight) * image_mask + + return img.astype(np.uint8) + +# %% +def replace_label(mask, label): + """Replace the segment masks values with label.""" + mask = np.array(mask) + mask[mask == 255] = label + return mask + +# %% [markdown] +# # Image segmentation using Hugging Face Pipeline + +# %% +# load the entire image segmentation pipeline +img_segmentation_pipeline = pipeline('image-segmentation', + model="nvidia/segformer-b5-finetuned-ade-640-640") + +# %% +output = img_segmentation_pipeline(image) +output + +# %% +output[0]['mask'] + +# %% +output[2]['mask'] + +# %% +# load the feature extractor (to preprocess images) and the model (to get outputs) +W, H = image.size +segmentation_mask = np.zeros((H, W), dtype=np.uint8) + +for i in range(len(output)): + segmentation_mask += replace_label(output[i]['mask'], i) + +# %% +# overlay the predicted segmentation masks on the original image +segmented_img = overlay_segments(image_tensor.permute(1, 2, 0), segmentation_mask) + +# convert to PIL Image +Image.fromarray(segmented_img) + +# %% [markdown] +# # Image segmentation using custom Hugging Face models + +# %% +# load the feature extractor (to preprocess images) and the model (to get outputs) +feature_extractor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640") +model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b5-finetuned-ade-640-640") + +# %% +def to_tensor(image): + """Convert PIL Image to pytorch tensor.""" + transform = transforms.ToTensor() + image_tensor = image.convert("RGB") + image_tensor = transform(image_tensor) + return image_tensor + +# a function that takes an image and return the segmented image +def get_segmented_image(model, feature_extractor, image_path): + """Return the predicted segmentation mask for the input image.""" + # load the image + image = load_image(image_path) + # preprocess input + inputs = feature_extractor(images=image, return_tensors="pt") + # convert to pytorch tensor + image_tensor = to_tensor(image) + # pass the processed input to the model + outputs = model(**inputs) + print("outputs.logits.shape:", outputs.logits.shape) + # interpolate output logits to the same shape as the input image + upsampled_logits = F.interpolate( + outputs.logits, # tensor to be interpolated + size=image_tensor.shape[1:], # output size we want + mode='bilinear', # do bilinear interpolation + align_corners=False) + + # get the class with max probabilities + segmentation_mask = upsampled_logits.argmax(dim=1)[0] + print(f"{segmentation_mask.shape=}") + # get the segmented image + segmented_img = overlay_segments(image_tensor.permute(1, 2, 0), segmentation_mask) + # convert to PIL Image + return Image.fromarray(segmented_img) + +# %% +get_segmented_image(model, feature_extractor, "https://shorthaircatbreeds.com/wp-content/uploads/2020/06/Urban-cat-crossing-a-road-300x180.jpg") + +# %% +get_segmented_image(model, feature_extractor, "http://images.cocodataset.org/test-stuff2017/000000000001.jpg") + +# %% + + + diff --git a/machine-learning/image-segmentation-transformers/requirements.txt b/machine-learning/image-segmentation-transformers/requirements.txt new file mode 100644 index 00000000..0c1d9a5d --- /dev/null +++ b/machine-learning/image-segmentation-transformers/requirements.txt @@ -0,0 +1,6 @@ +requests +Pillow +numpy +torch +torchvision +transformers \ No newline at end of file