-
-
Notifications
You must be signed in to change notification settings - Fork 216
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #643 from Anshg07/Ansh
Facial Landmark Detection Using Python's MediaPipe Library
- Loading branch information
Showing
10 changed files
with
575 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Import Libraries\n", | ||
"import cv2\n", | ||
"import time\n", | ||
"import mediapipe as mp\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Grabbing the Holistic Model from Mediapipe and\n", | ||
"# Initializing the Model\n", | ||
"mp_holistic = mp.solutions.holistic\n", | ||
"holistic_model = mp_holistic.Holistic(\n", | ||
"\tmin_detection_confidence=0.5,\n", | ||
"\tmin_tracking_confidence=0.5\n", | ||
")\n", | ||
"\n", | ||
"# Initializing the drawing utils for drawing the facial landmarks on image\n", | ||
"mp_drawing = mp.solutions.drawing_utils\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"c:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\google\\protobuf\\symbol_database.py:55: UserWarning: SymbolDatabase.GetPrototype() is deprecated. Please use message_factory.GetMessageClass() instead. SymbolDatabase.GetPrototype() will be removed soon.\n", | ||
" warnings.warn('SymbolDatabase.GetPrototype() is deprecated. Please '\n" | ||
] | ||
}, | ||
{ | ||
"ename": "KeyboardInterrupt", | ||
"evalue": "", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | ||
"Cell \u001b[1;32mIn[4], line 22\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;66;03m# Making predictions using holistic model\u001b[39;00m\n\u001b[0;32m 19\u001b[0m \u001b[38;5;66;03m# To improve performance, optionally mark the image as not writeable to\u001b[39;00m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;66;03m# pass by reference.\u001b[39;00m\n\u001b[0;32m 21\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m---> 22\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mholistic_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43mimage\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 23\u001b[0m image\u001b[38;5;241m.\u001b[39mflags\u001b[38;5;241m.\u001b[39mwriteable \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;66;03m# Converting back the RGB image to BGR\u001b[39;00m\n", | ||
"File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solutions\\holistic.py:160\u001b[0m, in \u001b[0;36mHolistic.process\u001b[1;34m(self, image)\u001b[0m\n\u001b[0;32m 136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess\u001b[39m(\u001b[38;5;28mself\u001b[39m, image: np\u001b[38;5;241m.\u001b[39mndarray) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NamedTuple:\n\u001b[0;32m 137\u001b[0m \u001b[38;5;124;03m\"\"\"Processes an RGB image and returns the pose landmarks, left and right hand landmarks, and face landmarks on the most prominent person detected.\u001b[39;00m\n\u001b[0;32m 138\u001b[0m \n\u001b[0;32m 139\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 157\u001b[0m \u001b[38;5;124;03m \"enable_segmentation\" is set to true.\u001b[39;00m\n\u001b[0;32m 158\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 160\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess\u001b[49m\u001b[43m(\u001b[49m\u001b[43minput_data\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mimage\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mimage\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 161\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks: \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n\u001b[0;32m 162\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m landmark \u001b[38;5;129;01min\u001b[39;00m results\u001b[38;5;241m.\u001b[39mpose_landmarks\u001b[38;5;241m.\u001b[39mlandmark: \u001b[38;5;66;03m# pytype: disable=attribute-error\u001b[39;00m\n", | ||
"File \u001b[1;32mc:\\Users\\ANSH GUPTA\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\mediapipe\\python\\solution_base.py:340\u001b[0m, in \u001b[0;36mSolutionBase.process\u001b[1;34m(self, input_data)\u001b[0m\n\u001b[0;32m 334\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 335\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_graph\u001b[38;5;241m.\u001b[39madd_packet_to_input_stream(\n\u001b[0;32m 336\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream_name,\n\u001b[0;32m 337\u001b[0m packet\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_packet(input_stream_type,\n\u001b[0;32m 338\u001b[0m data)\u001b[38;5;241m.\u001b[39mat(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_simulated_timestamp))\n\u001b[1;32m--> 340\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_graph\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait_until_idle\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 341\u001b[0m \u001b[38;5;66;03m# Create a NamedTuple object where the field names are mapping to the graph\u001b[39;00m\n\u001b[0;32m 342\u001b[0m \u001b[38;5;66;03m# output stream names.\u001b[39;00m\n\u001b[0;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_output_stream_type_info \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", | ||
"\u001b[1;31mKeyboardInterrupt\u001b[0m: " | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# (0) in VideoCapture is used to connect to your computer's default camera\n", | ||
"capture = cv2.VideoCapture(0)\n", | ||
"\n", | ||
"# Initializing current time and precious time for calculating the FPS\n", | ||
"previousTime = 0\n", | ||
"currentTime = 0\n", | ||
"\n", | ||
"while capture.isOpened():\n", | ||
"\t# capture frame by frame\n", | ||
"\tret, frame = capture.read()\n", | ||
"\n", | ||
"\t# resizing the frame for better view\n", | ||
"\tframe = cv2.resize(frame, (800, 600))\n", | ||
"\n", | ||
"\t# Converting the from BGR to RGB\n", | ||
"\timage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n", | ||
"\n", | ||
"\t# Making predictions using holistic model\n", | ||
"\t# To improve performance, optionally mark the image as not writeable to\n", | ||
"\t# pass by reference.\n", | ||
"\timage.flags.writeable = False\n", | ||
"\tresults = holistic_model.process(image)\n", | ||
"\timage.flags.writeable = True\n", | ||
"\n", | ||
"\t# Converting back the RGB image to BGR\n", | ||
"\timage = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)\n", | ||
"\n", | ||
"\t# Drawing the Facial Landmarks\n", | ||
"\tmp_drawing.draw_landmarks(\n", | ||
"\timage,\n", | ||
"\tresults.face_landmarks,\n", | ||
"\tmp_holistic.FACEMESH_CONTOURS,\n", | ||
"\tmp_drawing.DrawingSpec(\n", | ||
"\t\tcolor=(255,0,255),\n", | ||
"\t\tthickness=1,\n", | ||
"\t\tcircle_radius=1\n", | ||
"\t),\n", | ||
"\tmp_drawing.DrawingSpec(\n", | ||
"\t\tcolor=(0,255,255),\n", | ||
"\t\tthickness=1,\n", | ||
"\t\tcircle_radius=1\n", | ||
"\t)\n", | ||
"\t)\n", | ||
"\n", | ||
"\t# Drawing Right hand Land Marks\n", | ||
"\tmp_drawing.draw_landmarks(\n", | ||
"\timage, \n", | ||
"\tresults.right_hand_landmarks, \n", | ||
"\tmp_holistic.HAND_CONNECTIONS\n", | ||
"\t)\n", | ||
"\n", | ||
"\t# Drawing Left hand Land Marks\n", | ||
"\tmp_drawing.draw_landmarks(\n", | ||
"\timage, \n", | ||
"\tresults.left_hand_landmarks, \n", | ||
"\tmp_holistic.HAND_CONNECTIONS\n", | ||
"\t)\n", | ||
"\t\n", | ||
"\t# Calculating the FPS\n", | ||
"\tcurrentTime = time.time()\n", | ||
"\tfps = 1 / (currentTime-previousTime)\n", | ||
"\tpreviousTime = currentTime\n", | ||
"\t\n", | ||
"\t# Displaying FPS on the image\n", | ||
"\tcv2.putText(image, str(int(fps))+\" FPS\", (10, 70), cv2.FONT_HERSHEY_COMPLEX, 1, (0,255,0), 2)\n", | ||
"\n", | ||
"\t# Display the resulting image\n", | ||
"\tcv2.imshow(\"Facial and Hand Landmarks\", image)\n", | ||
"\n", | ||
"\t# Enter key 'q' to break the loop\n", | ||
"\tif cv2.waitKey(5) & 0xFF == ord('q'):\n", | ||
"\t\tbreak\n", | ||
"\n", | ||
"# When all the process is done\n", | ||
"# Release the capture and destroy all windows\n", | ||
"capture.release()\n", | ||
"cv2.destroyAllWindows()\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Facial Landmark Detection | ||
|
||
## Project Overview | ||
Facial Landmark Detection is a sophisticated application that utilizes cutting-edge computer vision technologies to detect and visualize facial landmarks in real-time. Built using the Streamlit framework, this application employs the MediaPipe library, renowned for its robust facial recognition capabilities. This project aims to provide an interactive interface where users can see the detected facial landmarks overlaid on their face using a live video feed from their webcam. | ||
|
||
## Features | ||
- **Real-time Detection**: Utilizes the webcam to detect facial landmarks in real-time, providing immediate visual feedback. | ||
- **High Accuracy**: Leverages MediaPipe's FaceMesh model, which is known for its high accuracy in detecting multiple facial landmarks. | ||
- **User-Friendly Interface**: Built with Streamlit, the application offers a clean and navigable interface that is easy for users to operate. | ||
|
||
## Screenshots | ||
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/3706a046-2f71-47e3-b7cb-caed404b5906) | ||
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/46c409d6-bcd9-4d17-8924-6cd79e8a782d) | ||
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/b9036098-274d-431b-b40f-ca02278a6b76) | ||
![image](https://github.com/Anshg07/ML-Crate/assets/96684989/e954ce1c-d8b3-4b74-be11-3fff37e95420) | ||
|
||
## Installation | ||
Before running this application, ensure you have Python installed on your computer. Follow these steps to set up the environment: | ||
|
||
1. **Clone the Repository**: First, clone this repository to your local machine using Git commands: | ||
```bash | ||
git clone <repository-url> | ||
``` | ||
2. **Install Dependencies**: Navigate to the cloned directory and install the necessary Python libraries using pip: | ||
```bash | ||
pip install streamlit opencv-python numpy mediapipe Pillow | ||
``` | ||
|
||
## Usage | ||
To use the application, follow these steps: | ||
|
||
1. **Start the Application**: In your terminal, navigate to the project directory and execute the following command: | ||
```bash | ||
streamlit run app.py | ||
``` | ||
2. **Access the Application**: The application will automatically open in your default web browser. If it does not, you can manually access it by visiting `http://localhost:8501` in your browser. | ||
3. **Interact with the App**: Enable your webcam when prompted and observe the facial landmark detection in real-time. The application will display the facial landmarks as overlays on your live video feed. | ||
|
||
## Contributing | ||
We encourage contributions from the community, whether it's fixing bugs, improving the documentation, or suggesting new features. Here's how you can contribute: | ||
|
||
1. **Fork the Repository**: Fork the project to your GitHub account. | ||
2. **Create a Feature Branch**: Create a new branch for each feature or improvement. | ||
3. **Send a Pull Request**: After you've completed your changes, send a pull request from your feature branch. Please provide a clear description of the problem and solution, including any relevant issues. | ||
## Acknowledgments | ||
- **MediaPipe**: For providing the powerful FaceMesh technology. | ||
- **Streamlit**: For the intuitive framework that powers the application's frontend. |
Binary file added
BIN
+1.8 MB
Facial Landmark Detection/images/pexels-david-garrison-1128051-2128807.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added
BIN
+1.35 MB
Facial Landmark Detection/images/pexels-italo-melo-881954-2379005.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
## Facial Landmark Detection | ||
|
||
### Goal 🎯 | ||
The main goal of this project is to provide a real-time facial landmark detection system. The application uses advanced computer vision techniques to identify and overlay facial landmarks on the user's face during a live video feed or on uploaded images. This can be useful for various applications including augmented reality, facial recognition, and more. | ||
|
||
### Model(s) used for the Web App 🧮 | ||
The backend of this web application leverages the MediaPipe FaceMesh model for facial landmark detection. MediaPipe offers state-of-the-art speed and accuracy in detecting multiple facial landmarks across different face types. The application is built in Python using the Streamlit framework, facilitating a seamless integration of the ML model with the web interface. | ||
|
||
### Video Demonstration 🎥 | ||
|
||
|
||
https://github.com/Anshg07/ML-Crate/assets/96684989/1a0117e1-91a8-4f13-a3d4-452301547896 | ||
|
||
|
||
### Signature ✒️ | ||
Ansh Gupta | ||
|
||
- LinkedIn: [Ansh Gupta](https://www.linkedin.com/in/ansh-ml/) | ||
- X (Twitter): [anshgupta001](https://twitter.com/anshgupta001) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# STEP 1 | ||
import streamlit as st | ||
import cv2 | ||
import numpy as np | ||
from PIL import Image | ||
import mediapipe as mp | ||
from streamlit_webrtc import webrtc_streamer, VideoTransformerBase | ||
|
||
# Initialize MediaPipe FaceMesh | ||
mp_face_mesh = mp.solutions.face_mesh | ||
face_mesh = mp_face_mesh.FaceMesh( | ||
max_num_faces=1, | ||
refine_landmarks=True, | ||
min_detection_confidence=0.5, | ||
min_tracking_confidence=0.5) | ||
mp_drawing = mp.solutions.drawing_utils | ||
drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) | ||
|
||
# STEP 2 | ||
class VideoTransformer(VideoTransformerBase): | ||
def transform(self, frame): | ||
image = frame.to_ndarray(format="bgr24") | ||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | ||
|
||
# Perform facial landmark detection | ||
results = face_mesh.process(image) | ||
|
||
# Draw facial landmarks | ||
if results.multi_face_landmarks: | ||
for face_landmarks in results.multi_face_landmarks: | ||
mp_drawing.draw_landmarks( | ||
image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION, | ||
landmark_drawing_spec=None, | ||
connection_drawing_spec=drawing_spec) | ||
|
||
return cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | ||
|
||
#STEP 3 | ||
def detect_facial_landmarks(image): | ||
image = np.array(image) | ||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | ||
results = face_mesh.process(image) | ||
|
||
if results.multi_face_landmarks: | ||
for face_landmarks in results.multi_face_landmarks: | ||
mp_drawing.draw_landmarks( | ||
image, face_landmarks, mp_face_mesh.FACEMESH_TESSELATION, | ||
landmark_drawing_spec=None, | ||
connection_drawing_spec=drawing_spec) | ||
|
||
return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | ||
|
||
# STEP 4 | ||
def main(): | ||
st.title('Face Mask and Landmark Detection App') | ||
|
||
option = st.sidebar.selectbox('Choose the App Mode:', ['Documentation','Live Video', 'Photo']) | ||
|
||
if option == 'Photo': | ||
image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg']) | ||
if image_file is not None: | ||
image = Image.open(image_file) | ||
st.image(image, caption='Uploaded Image', use_column_width=True) | ||
if st.button('Detect Landmarks'): | ||
result_img = detect_facial_landmarks(image) | ||
st.image(result_img, use_column_width=True) | ||
|
||
elif option == 'Live Video': | ||
st.header("Live Facial Landmark Detection") | ||
st.write("This will use your webcam to detect facial landmarks.") | ||
webrtc_streamer(key="example", video_transformer_factory=VideoTransformer) | ||
|
||
elif option == 'Documentation': | ||
st.header('Documentation') | ||
st.subheader('Facial Landmark Detection App Documentation') | ||
st.markdown(''' | ||
#### Overview | ||
This application utilizes MediaPipe and Streamlit to perform real-time facial landmark detection. Users can see their facial landmarks overlaid on their video feed in real-time. | ||
Options can be chosen from the provided SideBar. | ||
#### How to Install and Run the Application | ||
1. **Install Required Libraries**: You need to have Python installed on your system. Install the required Python libraries using pip: | ||
``` | ||
pip install streamlit cv2 numpy Pillow mediapipe streamlit_webrtc | ||
``` | ||
2. **Run the Application**: Navigate to the directory containing the `app.py` file and run the following command: | ||
``` | ||
streamlit run app.py | ||
``` | ||
3. **Access the Application**: Open your web browser and go to `http://localhost:8501`. The application should be running and ready to use. | ||
#### How It Works | ||
- **Video Streaming**: Once the application is running, it will access your webcam. Make sure you permit the browser to use your webcam. | ||
- **Facial Landmark Detection**: The application processes each video frame to detect facial landmarks using MediaPipe's FaceMesh model. Detected landmarks are then drawn directly on the video feed, providing a visual representation of the face structure in real-time. | ||
#### Use Cases | ||
This tool can be used for various purposes, including: | ||
- Augmented reality development. | ||
- Facial recognition projects. | ||
- Studies and applications in human-computer interaction. | ||
''') | ||
|
||
if __name__ == "__main__": | ||
main() |