Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Facial Landmark Detection Using Python's MediaPipe Library #643

Merged
merged 8 commits into from
Jun 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 169 additions & 0 deletions Facial Landmark Detection/Model/code_v2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Import Libraries\n",
"import cv2\n",
"import time\n",
"import mediapipe as mp\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Grabbing the Holistic Model from Mediapipe and\n",
"# Initializing the Model\n",
"mp_holistic = mp.solutions.holistic\n",
"holistic_model = mp_holistic.Holistic(\n",
"\tmin_detection_confidence=0.5,\n",
"\tmin_tracking_confidence=0.5\n",
")\n",
"\n",
"# Initializing the drawing utils for drawing the facial landmarks on image\n",
"mp_drawing = mp.solutions.drawing_utils\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\protobuf\\symbol_database.py:55: UserWarning: SymbolDatabase.GetPrototype() is deprecated. Please use message_factory.GetMessageClass() instead. SymbolDatabase.GetPrototype() will be removed soon.\n",
" warnings.warn('SymbolDatabase.GetPrototype() is deprecated. Please '\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[4], line 22\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# Making predictions using holistic model\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# To improve performance, optionally mark the image as not writeable to\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# pass by reference.\u001b[39;00m\n\u001b[0;32m 21\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mholistic_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 23\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Converting back the RGB image to BGR\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solutions\\holistic.py:160\u001b[0m, in \u001b[0;36mHolistic.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m 137\u001b[0m \u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.\u001b[39;00m\n\u001b[0;32m 138\u001b[0m \n\u001b[0;32m 139\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[38;5;124;03m \"enable_segmentation\" is set to true.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 160\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks: \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n\u001b[0;32m 162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m landmark \u001b[38;5;129;01min\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks\u001b[38;5;241m.\u001b[39mlandmark: \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n",
"File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solution_base.py:340\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 335\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m 336\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m 337\u001b[0m packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m 338\u001b[0m data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 340\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 341\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m 342\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [
"# (0) in VideoCapture is used to connect to your computer's default camera\n",
"capture = cv2.VideoCapture(0)\n",
"\n",
"# Initializing current time and precious time for calculating the FPS\n",
"previousTime = 0\n",
"currentTime = 0\n",
"\n",
"while capture.isOpened():\n",
"\t# capture frame by frame\n",
"\tret, frame = capture.read()\n",
"\n",
"\t# resizing the frame for better view\n",
"\tframe = cv2.resize(frame, (800, 600))\n",
"\n",
"\t# Converting the from BGR to RGB\n",
"\timage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n",
"\n",
"\t# Making predictions using holistic model\n",
"\t# To improve performance, optionally mark the image as not writeable to\n",
"\t# pass by reference.\n",
"\timage.flags.writeable = False\n",
"\tresults = holistic_model.process(image)\n",
"\timage.flags.writeable = True\n",
"\n",
"\t# Converting back the RGB image to BGR\n",
"\timage = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n",
"\n",
"\t# Drawing the Facial Landmarks\n",
"\tmp_drawing.draw_landmarks(\n",
"\timage,\n",
"\tresults.face_landmarks,\n",
"\tmp_holistic.FACEMESH_CONTOURS,\n",
"\tmp_drawing.DrawingSpec(\n",
"\t\tcolor=(255,0,255),\n",
"\t\tthickness=1,\n",
"\t\tcircle_radius=1\n",
"\t),\n",
"\tmp_drawing.DrawingSpec(\n",
"\t\tcolor=(0,255,255),\n",
"\t\tthickness=1,\n",
"\t\tcircle_radius=1\n",
"\t)\n",
"\t)\n",
"\n",
"\t# Drawing Right hand Land Marks\n",
"\tmp_drawing.draw_landmarks(\n",
"\timage, \n",
"\tresults.right_hand_landmarks, \n",
"\tmp_holistic.HAND_CONNECTIONS\n",
"\t)\n",
"\n",
"\t# Drawing Left hand Land Marks\n",
"\tmp_drawing.draw_landmarks(\n",
"\timage, \n",
"\tresults.left_hand_landmarks, \n",
"\tmp_holistic.HAND_CONNECTIONS\n",
"\t)\n",
"\t\n",
"\t# Calculating the FPS\n",
"\tcurrentTime = time.time()\n",
"\tfps = 1 / (currentTime-previousTime)\n",
"\tpreviousTime = currentTime\n",
"\t\n",
"\t# Displaying FPS on the image\n",
"\tcv2.putText(image, str(int(fps))+\" FPS\", (10, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)\n",
"\n",
"\t# Display the resulting image\n",
"\tcv2.imshow(\"Facial and Hand Landmarks\", image)\n",
"\n",
"\t# Enter key 'q' to break the loop\n",
"\tif cv2.waitKey(5) & 0xFF == ord('q'):\n",
"\t\tbreak\n",
"\n",
"# When all the process is done\n",
"# Release the capture and destroy all windows\n",
"capture.release()\n",
"cv2.destroyAllWindows()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
236 changes: 236 additions & 0 deletions Facial Landmark Detection/Model/source_code.ipynb

Large diffs are not rendered by default.

48 changes: 48 additions & 0 deletions Facial Landmark Detection/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Facial Landmark Detection

## Project Overview
Facial Landmark Detection is a sophisticated application that utilizes cutting-edge computer vision technologies to detect and visualize facial landmarks in real-time. Built using the Streamlit framework, this application employs the MediaPipe library, renowned for its robust facial recognition capabilities. This project aims to provide an interactive interface where users can see the detected facial landmarks overlaid on their face using a live video feed from their webcam.

## Features
- **Real-time Detection**: Utilizes the webcam to detect facial landmarks in real-time, providing immediate visual feedback.
- **High Accuracy**: Leverages MediaPipe's FaceMesh model, which is known for its high accuracy in detecting multiple facial landmarks.
- **User-Friendly Interface**: Built with Streamlit, the application offers a clean and navigable interface that is easy for users to operate.

## Screenshots
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/3706a046-2f71-47e3-b7cb-caed404b5906)
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/46c409d6-bcd9-4d17-8924-6cd79e8a782d)
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/b9036098-274d-431b-b40f-ca02278a6b76)
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/e954ce1c-d8b3-4b74-be11-3fff37e95420)

## Installation
Before running this application, ensure you have Python installed on your computer. Follow these steps to set up the environment:

1. **Clone the Repository**: First, clone this repository to your local machine using Git commands:
```bash
git clone <repository-url>
```
2. **Install Dependencies**: Navigate to the cloned directory and install the necessary Python libraries using pip:
```bash
pip install streamlit opencv-python numpy mediapipe Pillow
```

## Usage
To use the application, follow these steps:

1. **Start the Application**: In your terminal, navigate to the project directory and execute the following command:
```bash
streamlit run app.py
```
2. **Access the Application**: The application will automatically open in your default web browser. If it does not, you can manually access it by visiting `http://localhost:8501` in your browser.
3. **Interact with the App**: Enable your webcam when prompted and observe the facial landmark detection in real-time. The application will display the facial landmarks as overlays on your live video feed.

## Contributing
We encourage contributions from the community, whether it's fixing bugs, improving the documentation, or suggesting new features. Here's how you can contribute:

1. **Fork the Repository**: Fork the project to your GitHub account.
2. **Create a Feature Branch**: Create a new branch for each feature or improvement.
3. **Send a Pull Request**: After you've completed your changes, send a pull request from your feature branch. Please provide a clear description of the problem and solution, including any relevant issues.

## Acknowledgments
- **MediaPipe**: For providing the powerful FaceMesh technology.
- **Streamlit**: For the intuitive framework that powers the application's frontend.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Facial Landmark Detection/requirements.txt
Binary file not shown.
19 changes: 19 additions & 0 deletions Facial Landmark Detection/webapp/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## Facial Landmark Detection

### Goal 🎯
The main goal of this project is to provide a real-time facial landmark detection system. The application uses advanced computer vision techniques to identify and overlay facial landmarks on the user's face during a live video feed or on uploaded images. This can be useful for various applications including augmented reality, facial recognition, and more.

### Model(s) used for the Web App 🧮
The backend of this web application leverages the MediaPipe FaceMesh model for facial landmark detection. MediaPipe offers state-of-the-art speed and accuracy in detecting multiple facial landmarks across different face types. The application is built in Python using the Streamlit framework, facilitating a seamless integration of the ML model with the web interface.

### Video Demonstration 🎥


https://github.com/Anshg07/ML-Crate/assets/96684989/1a0117e1-91a8-4f13-a3d4-452301547896


### Signature ✒️
Ansh Gupta

- LinkedIn: [Ansh Gupta](https://www.linkedin.com/in/ansh-ml/)
- X (Twitter): [anshgupta001](https://twitter.com/anshgupta001)
103 changes: 103 additions & 0 deletions Facial Landmark Detection/webapp/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# STEP 1
import streamlit as st
import cv2
import numpy as np
from PIL import Image
import mediapipe as mp
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase

# Initialize MediaPipe FaceMesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
max_num_faces=1,
refine_landmarks=True,
min_detection_confidence=0.5,
min_tracking_confidence=0.5)
mp_drawing = mp.solutions.drawing_utils
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1)

# STEP 2
class VideoTransformer(VideoTransformerBase):
def transform(self, frame):
image = frame.to_ndarray(format="bgr24")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Perform facial landmark detection
results = face_mesh.process(image)

# Draw facial landmarks
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec)

return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

#STEP 3
def detect_facial_landmarks(image):
image = np.array(image)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
results = face_mesh.process(image)

if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
mp_drawing.draw_landmarks(
image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION,
landmark_drawing_spec=None,
connection_drawing_spec=drawing_spec)

return cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# STEP 4
def main():
st.title('Face Mask and Landmark Detection App')

option = st.sidebar.selectbox('Choose the App Mode:', ['Documentation','Live Video', 'Photo'])

if option == 'Photo':
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg'])
if image_file is not None:
image = Image.open(image_file)
st.image(image, caption='Uploaded Image', use_column_width=True)
if st.button('Detect Landmarks'):
result_img = detect_facial_landmarks(image)
st.image(result_img, use_column_width=True)

elif option == 'Live Video':
st.header("Live Facial Landmark Detection")
st.write("This will use your webcam to detect facial landmarks.")
webrtc_streamer(key="example", video_transformer_factory=VideoTransformer)

elif option == 'Documentation':
st.header('Documentation')
st.subheader('Facial Landmark Detection App Documentation')
st.markdown('''
#### Overview
This application utilizes MediaPipe and Streamlit to perform real-time facial landmark detection. Users can see their facial landmarks overlaid on their video feed in real-time.
Options can be chosen from the provided SideBar.
#### How to Install and Run the Application
1. **Install Required Libraries**: You need to have Python installed on your system. Install the required Python libraries using pip:
```
pip install streamlit cv2 numpy Pillow mediapipe streamlit_webrtc
```
2. **Run the Application**: Navigate to the directory containing the `app.py` file and run the following command:
```
streamlit run app.py
```
3. **Access the Application**: Open your web browser and go to `http://localhost:8501`. The application should be running and ready to use.

#### How It Works
- **Video Streaming**: Once the application is running, it will access your webcam. Make sure you permit the browser to use your webcam.
- **Facial Landmark Detection**: The application processes each video frame to detect facial landmarks using MediaPipe's FaceMesh model. Detected landmarks are then drawn directly on the video feed, providing a visual representation of the face structure in real-time.

#### Use Cases
This tool can be used for various purposes, including:
- Augmented reality development.
- Facial recognition projects.
- Studies and applications in human-computer interaction.
''')

if __name__ == "__main__":
main()