Merge pull request #643 from Anshg07/Ansh

Facial Landmark Detection Using Python's MediaPipe Library
abhisheks008 · Jun 15, 2024 · 740beae · 740beae
2 parents 48ab48b + 80c807c
commit 740beae
Show file tree

Hide file tree

Showing 10 changed files with 575 additions and 0 deletions.
diff --git a/Facial Landmark Detection/Model/code_v2.ipynb b/Facial Landmark Detection/Model/code_v2.ipynb
@@ -0,0 +1,169 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import Libraries\n",
+    "import cv2\n",
+    "import time\n",
+    "import mediapipe as mp\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Grabbing the Holistic Model from Mediapipe and\n",
+    "# Initializing the Model\n",
+    "mp_holistic = mp.solutions.holistic\n",
+    "holistic_model = mp_holistic.Holistic(\n",
+    "\tmin_detection_confidence=0.5,\n",
+    "\tmin_tracking_confidence=0.5\n",
+    ")\n",
+    "\n",
+    "# Initializing the drawing utils for drawing the facial landmarks on image\n",
+    "mp_drawing = mp.solutions.drawing_utils\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\protobuf\\symbol_database.py:55: UserWarning: SymbolDatabase.GetPrototype() is deprecated. Please use message_factory.GetMessageClass() instead. SymbolDatabase.GetPrototype() will be removed soon.\n",
+      "  warnings.warn('SymbolDatabase.GetPrototype() is deprecated. Please '\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[4], line 22\u001b[0m\n\u001b[0;32m     18\u001b[0m \u001b[38;5;66;03m# Making predictions using holistic model\u001b[39;00m\n\u001b[0;32m     19\u001b[0m \u001b[38;5;66;03m# To improve performance, optionally mark the image as not writeable to\u001b[39;00m\n\u001b[0;32m     20\u001b[0m \u001b[38;5;66;03m# pass by reference.\u001b[39;00m\n\u001b[0;32m     21\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mholistic_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     23\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m     25\u001b[0m \u001b[38;5;66;03m# Converting back the RGB image to BGR\u001b[39;00m\n",
+      "File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solutions\\holistic.py:160\u001b[0m, in \u001b[0;36mHolistic.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m    136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m    137\u001b[0m   \u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.\u001b[39;00m\n\u001b[0;32m    138\u001b[0m \n\u001b[0;32m    139\u001b[0m \u001b[38;5;124;03m  Args:\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    157\u001b[0m \u001b[38;5;124;03m         \"enable_segmentation\" is set to true.\u001b[39;00m\n\u001b[0;32m    158\u001b[0m \u001b[38;5;124;03m  \"\"\"\u001b[39;00m\n\u001b[1;32m--> 160\u001b[0m   results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    161\u001b[0m   \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks:  \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n\u001b[0;32m    162\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m landmark \u001b[38;5;129;01min\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks\u001b[38;5;241m.\u001b[39mlandmark:  \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n",
+      "File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solution_base.py:340\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m    334\u001b[0m   \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    335\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m    336\u001b[0m         stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m    337\u001b[0m         packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m    338\u001b[0m                                  data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 340\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    341\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m    342\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m    343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# (0) in VideoCapture is used to connect to your computer's default camera\n",
+    "capture = cv2.VideoCapture(0)\n",
+    "\n",
+    "# Initializing current time and precious time for calculating the FPS\n",
+    "previousTime = 0\n",
+    "currentTime = 0\n",
+    "\n",
+    "while capture.isOpened():\n",
+    "\t# capture frame by frame\n",
+    "\tret, frame = capture.read()\n",
+    "\n",
+    "\t# resizing the frame for better view\n",
+    "\tframe = cv2.resize(frame, (800, 600))\n",
+    "\n",
+    "\t# Converting the from BGR to RGB\n",
+    "\timage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
+    "\n",
+    "\t# Making predictions using holistic model\n",
+    "\t# To improve performance, optionally mark the image as not writeable to\n",
+    "\t# pass by reference.\n",
+    "\timage.flags.writeable = False\n",
+    "\tresults = holistic_model.process(image)\n",
+    "\timage.flags.writeable = True\n",
+    "\n",
+    "\t# Converting back the RGB image to BGR\n",
+    "\timage = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
+    "\n",
+    "\t# Drawing the Facial Landmarks\n",
+    "\tmp_drawing.draw_landmarks(\n",
+    "\timage,\n",
+    "\tresults.face_landmarks,\n",
+    "\tmp_holistic.FACEMESH_CONTOURS,\n",
+    "\tmp_drawing.DrawingSpec(\n",
+    "\t\tcolor=(255,0,255),\n",
+    "\t\tthickness=1,\n",
+    "\t\tcircle_radius=1\n",
+    "\t),\n",
+    "\tmp_drawing.DrawingSpec(\n",
+    "\t\tcolor=(0,255,255),\n",
+    "\t\tthickness=1,\n",
+    "\t\tcircle_radius=1\n",
+    "\t)\n",
+    "\t)\n",
+    "\n",
+    "\t# Drawing Right hand Land Marks\n",
+    "\tmp_drawing.draw_landmarks(\n",
+    "\timage, \n",
+    "\tresults.right_hand_landmarks, \n",
+    "\tmp_holistic.HAND_CONNECTIONS\n",
+    "\t)\n",
+    "\n",
+    "\t# Drawing Left hand Land Marks\n",
+    "\tmp_drawing.draw_landmarks(\n",
+    "\timage, \n",
+    "\tresults.left_hand_landmarks, \n",
+    "\tmp_holistic.HAND_CONNECTIONS\n",
+    "\t)\n",
+    "\t\n",
+    "\t# Calculating the FPS\n",
+    "\tcurrentTime = time.time()\n",
+    "\tfps = 1 / (currentTime-previousTime)\n",
+    "\tpreviousTime = currentTime\n",
+    "\t\n",
+    "\t# Displaying FPS on the image\n",
+    "\tcv2.putText(image, str(int(fps))+\" FPS\", (10, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)\n",
+    "\n",
+    "\t# Display the resulting image\n",
+    "\tcv2.imshow(\"Facial and Hand Landmarks\", image)\n",
+    "\n",
+    "\t# Enter key 'q' to break the loop\n",
+    "\tif cv2.waitKey(5) & 0xFF == ord('q'):\n",
+    "\t\tbreak\n",
+    "\n",
+    "# When all the process is done\n",
+    "# Release the capture and destroy all windows\n",
+    "capture.release()\n",
+    "cv2.destroyAllWindows()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Facial Landmark Detection/Model/source_code.ipynb b/Facial Landmark Detection/Model/source_code.ipynb
diff --git a/Facial Landmark Detection/README.md b/Facial Landmark Detection/README.md
@@ -0,0 +1,48 @@
+# Facial Landmark Detection
+
+## Project Overview
+Facial Landmark Detection is a sophisticated application that utilizes cutting-edge computer vision technologies to detect and visualize facial landmarks in real-time. Built using the Streamlit framework, this application employs the MediaPipe library, renowned for its robust facial recognition capabilities. This project aims to provide an interactive interface where users can see the detected facial landmarks overlaid on their face using a live video feed from their webcam.
+
+## Features
+- **Real-time Detection**: Utilizes the webcam to detect facial landmarks in real-time, providing immediate visual feedback.
+- **High Accuracy**: Leverages MediaPipe's FaceMesh model, which is known for its high accuracy in detecting multiple facial landmarks.
+- **User-Friendly Interface**: Built with Streamlit, the application offers a clean and navigable interface that is easy for users to operate.
+
+## Screenshots
+![image](https://github.com/Anshg07/ML-Crate/assets/96684989/3706a046-2f71-47e3-b7cb-caed404b5906)
+![image](https://github.com/Anshg07/ML-Crate/assets/96684989/46c409d6-bcd9-4d17-8924-6cd79e8a782d)
+![image](https://github.com/Anshg07/ML-Crate/assets/96684989/b9036098-274d-431b-b40f-ca02278a6b76)
+![image](https://github.com/Anshg07/ML-Crate/assets/96684989/e954ce1c-d8b3-4b74-be11-3fff37e95420)
+
+## Installation
+Before running this application, ensure you have Python installed on your computer. Follow these steps to set up the environment:
+
+1. **Clone the Repository**: First, clone this repository to your local machine using Git commands:
+   ```bash
+   git clone <repository-url>
+   ```
+2. **Install Dependencies**: Navigate to the cloned directory and install the necessary Python libraries using pip:
+   ```bash
+   pip install streamlit opencv-python numpy mediapipe Pillow
+   ```
+
+## Usage
+To use the application, follow these steps:
+
+1. **Start the Application**: In your terminal, navigate to the project directory and execute the following command:
+   ```bash
+   streamlit run app.py
+   ```
+2. **Access the Application**: The application will automatically open in your default web browser. If it does not, you can manually access it by visiting `http://localhost:8501` in your browser.
+3. **Interact with the App**: Enable your webcam when prompted and observe the facial landmark detection in real-time. The application will display the facial landmarks as overlays on your live video feed.
+
+## Contributing
+We encourage contributions from the community, whether it's fixing bugs, improving the documentation, or suggesting new features. Here's how you can contribute:
+
+1. **Fork the Repository**: Fork the project to your GitHub account.
+2. **Create a Feature Branch**: Create a new branch for each feature or improvement.
+3. **Send a Pull Request**: After you've completed your changes, send a pull request from your feature branch. Please provide a clear description of the problem and solution, including any relevant issues.
+
+## Acknowledgments
+- **MediaPipe**: For providing the powerful FaceMesh technology.
+- **Streamlit**: For the intuitive framework that powers the application's frontend.
diff --git a/Facial Landmark Detection/images/pexels-david-garrison-1128051-2128807.jpg b/Facial Landmark Detection/images/pexels-david-garrison-1128051-2128807.jpg
diff --git a/Facial Landmark Detection/images/pexels-italo-melo-881954-2379005.jpg b/Facial Landmark Detection/images/pexels-italo-melo-881954-2379005.jpg
diff --git a/Facial Landmark Detection/images/pexels-pixabay-415829.jpg b/Facial Landmark Detection/images/pexels-pixabay-415829.jpg
diff --git a/Facial Landmark Detection/images/pexels-simon-robben-55958-614810.jpg b/Facial Landmark Detection/images/pexels-simon-robben-55958-614810.jpg
diff --git a/Facial Landmark Detection/requirements.txt b/Facial Landmark Detection/requirements.txt
diff --git a/Facial Landmark Detection/webapp/README.md b/Facial Landmark Detection/webapp/README.md
@@ -0,0 +1,19 @@
+## Facial Landmark Detection
+
+### Goal 🎯
+The main goal of this project is to provide a real-time facial landmark detection system. The application uses advanced computer vision techniques to identify and overlay facial landmarks on the user's face during a live video feed or on uploaded images. This can be useful for various applications including augmented reality, facial recognition, and more.
+
+### Model(s) used for the Web App 🧮
+The backend of this web application leverages the MediaPipe FaceMesh model for facial landmark detection. MediaPipe offers state-of-the-art speed and accuracy in detecting multiple facial landmarks across different face types. The application is built in Python using the Streamlit framework, facilitating a seamless integration of the ML model with the web interface.
+
+### Video Demonstration 🎥
+
+
+https://github.com/Anshg07/ML-Crate/assets/96684989/1a0117e1-91a8-4f13-a3d4-452301547896
+
+
+### Signature ✒️
+Ansh Gupta
+
+- LinkedIn: [Ansh Gupta](https://www.linkedin.com/in/ansh-ml/)
+- X (Twitter): [anshgupta001](https://twitter.com/anshgupta001)
diff --git a/Facial Landmark Detection/webapp/app.py b/Facial Landmark Detection/webapp/app.py
@@ -0,0 +1,103 @@
+# STEP 1
+import streamlit as st
+import cv2
+import numpy as np
+from PIL import Image
+import mediapipe as mp
+from streamlit_webrtc import webrtc_streamer, VideoTransformerBase
+
+# Initialize MediaPipe FaceMesh
+mp_face_mesh = mp.solutions.face_mesh
+face_mesh = mp_face_mesh.FaceMesh(
+    max_num_faces=1,
+    refine_landmarks=True,
+    min_detection_confidence=0.5,
+    min_tracking_confidence=0.5)
+mp_drawing = mp.solutions.drawing_utils
+drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)
+
+# STEP 2
+class VideoTransformer(VideoTransformerBase):
+    def transform(self, frame):
+        image = frame.to_ndarray(format="bgr24")
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+        # Perform facial landmark detection
+        results = face_mesh.process(image)
+
+        # Draw facial landmarks
+        if results.multi_face_landmarks:
+            for face_landmarks in results.multi_face_landmarks:
+                mp_drawing.draw_landmarks(
+                    image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
+                    landmark_drawing_spec=None,
+                    connection_drawing_spec=drawing_spec)
+
+        return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+#STEP 3
+def detect_facial_landmarks(image):
+    image = np.array(image)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    results = face_mesh.process(image)
+
+    if results.multi_face_landmarks:
+        for face_landmarks in results.multi_face_landmarks:
+            mp_drawing.draw_landmarks(
+                image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
+                landmark_drawing_spec=None,
+                connection_drawing_spec=drawing_spec)
+
+    return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+
+# STEP 4
+def main():
+    st.title('Face Mask and Landmark Detection App')
+
+    option = st.sidebar.selectbox('Choose the App Mode:', ['Documentation','Live Video', 'Photo'])
+
+    if option == 'Photo':
+        image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
+        if image_file is not None:
+            image = Image.open(image_file)
+            st.image(image, caption='Uploaded Image', use_column_width=True)
+            if st.button('Detect Landmarks'):
+                result_img = detect_facial_landmarks(image)
+                st.image(result_img, use_column_width=True)
+
+    elif option == 'Live Video':
+        st.header("Live Facial Landmark Detection")
+        st.write("This will use your webcam to detect facial landmarks.")
+        webrtc_streamer(key="example", video_transformer_factory=VideoTransformer)
+
+    elif option == 'Documentation':
+        st.header('Documentation')
+        st.subheader('Facial Landmark Detection App Documentation')
+        st.markdown('''
+        #### Overview
+        This application utilizes MediaPipe and Streamlit to perform real-time facial landmark detection. Users can see their facial landmarks overlaid on their video feed in real-time.
+        Options can be chosen from the provided SideBar.
+        #### How to Install and Run the Application
+        1. **Install Required Libraries**: You need to have Python installed on your system. Install the required Python libraries using pip:
+        ```
+            pip install streamlit cv2 numpy Pillow mediapipe streamlit_webrtc
+        ```
+        2. **Run the Application**: Navigate to the directory containing the `app.py` file and run the following command:
+        ```
+            streamlit run app.py
+        ```
+        3. **Access the Application**: Open your web browser and go to `http://localhost:8501`. The application should be running and ready to use.
+
+        #### How It Works
+        - **Video Streaming**: Once the application is running, it will access your webcam. Make sure you permit the browser to use your webcam.
+        - **Facial Landmark Detection**: The application processes each video frame to detect facial landmarks using MediaPipe's FaceMesh model. Detected landmarks are then drawn directly on the video feed, providing a visual representation of the face structure in real-time.
+
+        #### Use Cases
+        This tool can be used for various purposes, including:
+        - Augmented reality development.
+        - Facial recognition projects.
+        - Studies and applications in human-computer interaction.
+        ''')
+
+if __name__ == "__main__":
+    main()