Skip to content

Commit

Permalink
feat: assign unknowns via vss
Browse files Browse the repository at this point in the history
  • Loading branch information
danellecline committed Oct 22, 2024
1 parent 67c4202 commit 947a8a3
Showing 1 changed file with 18 additions and 5 deletions.
23 changes: 18 additions & 5 deletions sdcat/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def run_vss(image_t: tuple[str, np.array], vss_url: str, project: str, vss_thres
:param vss_url: url for vss service
:param project: project name in vss
:param top_k: number of vss to use for prediction; 1, 3, 5 etc.
:return:
:return: best prediction and score
"""
url_vss = f"{vss_url}/{top_k}/{project}"
debug(f"URL: {url_vss} threshold: {vss_threshold}")
Expand All @@ -96,13 +96,13 @@ def run_vss(image_t: tuple[str, np.array], vss_url: str, project: str, vss_thres

if response.status_code != 200:
err(f"Error processing images: {response.text}")
return None, None
return "", 0.0

predictions = response.json()["predictions"]
debug(f"Predictions: {predictions}")

if len(predictions) == 0:
return None, None
return "", 0.0

scores = response.json()["scores"][0]
# Scores are 1 - score, so we need to invert them
Expand All @@ -112,7 +112,7 @@ def run_vss(image_t: tuple[str, np.array], vss_url: str, project: str, vss_thres

if best_pred is None:
err(f"No majority prediction for {image_t[0]}")
return None, None
return "", 0.0

return best_pred, best_score

Expand Down Expand Up @@ -514,14 +514,27 @@ def cluster_vits(
# Run the VSS service to assign the cluster to a class
image_t = read_image(exemplar['image_path'])
best_prediction, best_score = run_vss(image_t, vss_url=vss_url, vss_threshold=.1, project='901103-biodiversity', top_k=1)
if best_prediction is None:
if len(best_prediction) == 0:
warn(f'No predictions found for {exemplar["image_path"]}')
continue
# Assign the class to the cluster in df_dets
info(f'Assigning {cluster_id} to class {best_prediction} with score {best_score}')
df_dets.loc[df_dets['cluster'] == cluster_id, 'class'] = best_prediction
df_dets.loc[df_dets['cluster'] == cluster_id, 'score'] = best_score

# Try to assign everything not in a cluster to a class
unknowns = df_dets[df_dets['cluster'] == -1]
for idx, row in unknowns.iterrows():
image_t = read_image(row['crop_path'])
best_prediction, best_score = run_vss(image_t, vss_url=vss_url, vss_threshold=.1, project='901103-biodiversity', top_k=1)
if len(best_prediction) == 0:
warn(f'No predictions found for {row["crop_path"]}')
continue
# Assign the class to the cluster in df_dets
info(f'Assigning {row["crop_path"]} to class {best_prediction} with score {best_score}')
df_dets.loc[df_dets['crop_path'] == row['crop_path'], 'class'] = best_prediction
df_dets.loc[df_dets['crop_path'] == row['crop_path'], 'score'] = best_score

# Save the exemplar embeddings with the model type
exemplar_df['model'] = model
exemplar_df.to_csv(output_path / f'{prefix}_exemplars.csv', index=False)
Expand Down

0 comments on commit 947a8a3

Please sign in to comment.