SAMLabels2Yolov8TargetModel.py

import os
import supervision as sv
import cv2
import shutil

# Folder
product_folder = 'Carne'

# Get current working directory
HOME = os.getcwd()

# Get the path of the data
DATA = os.path.join(HOME, 'Data')
IMAGES = os.path.join(DATA, product_folder, 'Images')

# Check if the path exists and image count
image_paths = sv.list_files_with_extensions(
    directory=IMAGES, 
    extensions=["png", "jpg", "bmp"])

print('image count:', len(image_paths))


# Plot set of images
SAMPLE_SIZE = 6
SAMPLE_GRID_SIZE = (2, 3)
SAMPLE_PLOT_SIZE = (4, 6)

titles = [
    image_path.stem
    for image_path
    in image_paths[:SAMPLE_SIZE]]
images = [
    cv2.imread(str(image_path))
    for image_path
    in image_paths[:SAMPLE_SIZE]]

sv.plot_images_grid(images=images, titles=titles, grid_size=SAMPLE_GRID_SIZE, size=SAMPLE_PLOT_SIZE)


# Ontology - an Ontology defines how your Base Model is prompted, what your Dataset will describe, and what your Target Model will predict.
# A simple Ontology is the CaptionOntology which prompts a Base Model with text captions and maps them to class names.
# Other Ontologies may, for instance, use a CLIP vector or example images instead of a text caption.

from autodistill.detection import CaptionOntology

ontology=CaptionOntology({
    "informative label": "etiqueta",
    "plastic tray": "bandeja",
    "raw meat steaks": "carne"
})

# Base Model - A Base Model is a large foundation model that knows a lot about a lot. Base models are often multimodal and can perform many tasks. 
# They're large, slow, and expensive. 
# Examples of Base Models are GroundedSAM and GPT-4's upcoming multimodal variant. We use a Base Model (along with unlabeled input data and an Ontology) to create a Dataset.

DATASET_DIR_PATH = f"{DATA}/dataset"

# delete folder if it already exists
if os.path.exists(DATASET_DIR_PATH):
    shutil.rmtree(DATASET_DIR_PATH)

from autodistill_grounded_sam import GroundedSAM

base_model = GroundedSAM(ontology=ontology)
dataset = base_model.label(
    input_folder=IMAGES, 
    extension=".png", 
    output_folder=DATASET_DIR_PATH)

# Dataset - a Dataset is a set of auto-labeled data that can be used to train a Target Model. It is the output generated by a Base Model.

ANNOTATIONS_DIRECTORY_PATH = f"{DATA}/dataset/train/labels"
IMAGES_DIRECTORY_PATH = f"{DATA}/dataset/train/images"
DATA_YAML_PATH = f"{DATA}/dataset/data.yaml"

dataset = sv.DetectionDataset.from_yolo(
    images_directory_path=IMAGES_DIRECTORY_PATH, 
    annotations_directory_path=ANNOTATIONS_DIRECTORY_PATH, 
    data_yaml_path=DATA_YAML_PATH)

len(dataset)

image_names = list(dataset.images.keys())[:SAMPLE_SIZE]

mask_annotator = sv.MaskAnnotator()
box_annotator = sv.BoxAnnotator()

images = []
for image_name in image_names:
    image = dataset.images[image_name]
    annotations = dataset.annotations[image_name]
    labels = [
        dataset.classes[class_id]
        for class_id
        in annotations.class_id]
    annotates_image = mask_annotator.annotate(
        scene=image.copy(), 
        detections=annotations)
    annotates_image = box_annotator.annotate(
        scene=annotates_image, 
        detections=annotations, 
        labels=labels)
    images.append(annotates_image)

sv.plot_images_grid(
    images=images, 
    titles=image_names, 
    grid_size=SAMPLE_GRID_SIZE, 
    size=SAMPLE_PLOT_SIZE)