-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSAMLabels2Yolov8TargetModel.py
111 lines (84 loc) · 3.28 KB
/
SAMLabels2Yolov8TargetModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
import supervision as sv
import cv2
import shutil
# Folder
product_folder = 'Carne'
# Get current working directory
HOME = os.getcwd()
# Get the path of the data
DATA = os.path.join(HOME, 'Data')
IMAGES = os.path.join(DATA, product_folder, 'Images')
# Check if the path exists and image count
image_paths = sv.list_files_with_extensions(
directory=IMAGES,
extensions=["png", "jpg", "bmp"])
print('image count:', len(image_paths))
# Plot set of images
SAMPLE_SIZE = 6
SAMPLE_GRID_SIZE = (2, 3)
SAMPLE_PLOT_SIZE = (4, 6)
titles = [
image_path.stem
for image_path
in image_paths[:SAMPLE_SIZE]]
images = [
cv2.imread(str(image_path))
for image_path
in image_paths[:SAMPLE_SIZE]]
sv.plot_images_grid(images=images, titles=titles, grid_size=SAMPLE_GRID_SIZE, size=SAMPLE_PLOT_SIZE)
# Ontology - an Ontology defines how your Base Model is prompted, what your Dataset will describe, and what your Target Model will predict.
# A simple Ontology is the CaptionOntology which prompts a Base Model with text captions and maps them to class names.
# Other Ontologies may, for instance, use a CLIP vector or example images instead of a text caption.
from autodistill.detection import CaptionOntology
ontology=CaptionOntology({
"informative label": "etiqueta",
"plastic tray": "bandeja",
"raw meat steaks": "carne"
})
# Base Model - A Base Model is a large foundation model that knows a lot about a lot. Base models are often multimodal and can perform many tasks.
# They're large, slow, and expensive.
# Examples of Base Models are GroundedSAM and GPT-4's upcoming multimodal variant. We use a Base Model (along with unlabeled input data and an Ontology) to create a Dataset.
DATASET_DIR_PATH = f"{DATA}/dataset"
# delete folder if it already exists
if os.path.exists(DATASET_DIR_PATH):
shutil.rmtree(DATASET_DIR_PATH)
from autodistill_grounded_sam import GroundedSAM
base_model = GroundedSAM(ontology=ontology)
dataset = base_model.label(
input_folder=IMAGES,
extension=".png",
output_folder=DATASET_DIR_PATH)
# Dataset - a Dataset is a set of auto-labeled data that can be used to train a Target Model. It is the output generated by a Base Model.
ANNOTATIONS_DIRECTORY_PATH = f"{DATA}/dataset/train/labels"
IMAGES_DIRECTORY_PATH = f"{DATA}/dataset/train/images"
DATA_YAML_PATH = f"{DATA}/dataset/data.yaml"
dataset = sv.DetectionDataset.from_yolo(
images_directory_path=IMAGES_DIRECTORY_PATH,
annotations_directory_path=ANNOTATIONS_DIRECTORY_PATH,
data_yaml_path=DATA_YAML_PATH)
len(dataset)
image_names = list(dataset.images.keys())[:SAMPLE_SIZE]
mask_annotator = sv.MaskAnnotator()
box_annotator = sv.BoxAnnotator()
images = []
for image_name in image_names:
image = dataset.images[image_name]
annotations = dataset.annotations[image_name]
labels = [
dataset.classes[class_id]
for class_id
in annotations.class_id]
annotates_image = mask_annotator.annotate(
scene=image.copy(),
detections=annotations)
annotates_image = box_annotator.annotate(
scene=annotates_image,
detections=annotations,
labels=labels)
images.append(annotates_image)
sv.plot_images_grid(
images=images,
titles=image_names,
grid_size=SAMPLE_GRID_SIZE,
size=SAMPLE_PLOT_SIZE)