demo.py

import numpy as np
import cv2
import matplotlib.pyplot as plt
from regionspot.modeling.regionspot import build_regionspot_model
from regionspot import RegionSpot_Predictor
# Function to show masks on an image
def show_mask(mask, ax, random_color=False):
    if random_color:
        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
    else:
        color = np.array([30/255, 144/255, 255/255, 0.6])
    h, w = mask.shape[-2:]
    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
    ax.imshow(mask_image)

# Function to show points on an image
def show_points(coords, labels, ax, marker_size=375):
    pos_points = coords[labels == 1]
    neg_points = coords[labels == 0]
    ax.scatter(pos_points[:, 0], pos_points[:, 1], color='green', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)
    ax.scatter(neg_points[:, 0], neg_points[:, 1], color='red', marker='*', s=marker_size, edgecolor='white', linewidth=1.25)

# Function to show bounding boxes on an image
def show_box(box, ax):
    x0, y0 = box[0], box[1]
    w, h = box[2] - x0, box[3] - y0
    ax.add_patch(plt.Rectangle((x0, y0), w, h, edgecolor='green', facecolor='none', lw=2))

# Read image and set up model
image = cv2.imread('assets/image.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert image to RGB format
# Multiple boxes
box_prompt = np.array([[64, 926, 804, 1978], [1237, 490, 1615, 771.], [1510, 64, 1670, 167]]) 
ckpt_path = '/path/to/model_weights.pth'
clip_type = 'CLIP_400M_Large_336'
clip_input_size = 336
custom_vocabulary =  ["Smoothie bowl", "Banana", "Strawberry", "Chia seeds", "Shredded coconut", "Wooden spoons", "Grapefruit", "Goji berries", "Flaxseeds seeds"]

# Build and initialize the model
model, msg = build_regionspot_model(is_training=False, image_size=clip_input_size, clip_type=clip_type, pretrain_ckpt=ckpt_path, custom_vocabulary=custom_vocabulary)

# Create predictor and set image
predictor = RegionSpot_Predictor(model.cuda())
predictor.set_image(image, clip_input_size=clip_input_size)

# Prediction based on box prompt
masks, mask_iou_score, class_score, class_index = predictor.predict(
    point_coords=None,
    point_labels=None,
    box=box_prompt,
    multimask_output=False,  
)
# Extract class name and display image with masks and box
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(image)
for idx in range(len(box_prompt)):
    show_mask(masks[idx], ax)  
    show_box(box_prompt[idx], ax)  # Assuming box_prompt contains all your boxes
    # You might want to modify the text display for multiple classes as well
    class_name = custom_vocabulary[int(class_index[idx])]
    ax.text(box_prompt[idx][0], box_prompt[idx][1] - 10, class_name, color='white', fontsize=14, bbox=dict(facecolor='green', edgecolor='green', alpha=0.6))

ax.axis('off')
plt.show()
fig.savefig('result.png')
plt.close(fig)