Skip to content

Commit

Permalink
fix: improve add-logo-to-ann CLI command
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Oct 19, 2023
1 parent a93844d commit 81311a0
Showing 1 changed file with 18 additions and 9 deletions.
27 changes: 18 additions & 9 deletions robotoff/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ def add_logo_to_ann(
sleep_time: float = typer.Option(
0.0, help="Time to sleep between each query (in s)"
),
existing_ids_path: Optional[Path] = typer.Argument(
None,
file_okay=True,
dir_okay=False,
help="Path of the plain text file containing logo IDs (one ID per line). If not provided, "
"existing IDs will be fetched from Elasticsearch.",
),
) -> None:
"""Index all missing logos in Elasticsearch ANN index."""
import logging
Expand All @@ -490,26 +497,28 @@ def add_logo_to_ann(
from robotoff.elasticsearch import get_es_client
from robotoff.logos import add_logos_to_ann, get_stored_logo_ids
from robotoff.models import LogoEmbedding, db
from robotoff.utils import get_logger
from robotoff.utils import get_logger, text_file_iter

Check warning on line 500 in robotoff/cli/main.py

View check run for this annotation

Codecov / codecov/patch

robotoff/cli/main.py#L500

Added line #L500 was not covered by tests

logger = get_logger()
logging.getLogger("elastic_transport.transport").setLevel(logging.WARNING)

es_client = get_es_client()
seen = get_stored_logo_ids(es_client)
if existing_ids_path is not None and existing_ids_path.is_file():
seen = set(int(x) for x in text_file_iter(existing_ids_path))

Check warning on line 507 in robotoff/cli/main.py

View check run for this annotation

Codecov / codecov/patch

robotoff/cli/main.py#L506-L507

Added lines #L506 - L507 were not covered by tests
else:
seen = get_stored_logo_ids(es_client)

Check warning on line 509 in robotoff/cli/main.py

View check run for this annotation

Codecov / codecov/patch

robotoff/cli/main.py#L509

Added line #L509 was not covered by tests

added = 0

with db.connection_context():
logger.info("Fetching logo embedding to index...")
query = LogoEmbedding.select().objects()
logo_embedding_iter = tqdm.tqdm(
(
logo_embedding
for logo_embedding in ServerSide(query)
if logo_embedding.logo_id not in seen
),
desc="logo",
logo_embedding_iter = (

Check warning on line 516 in robotoff/cli/main.py

View check run for this annotation

Codecov / codecov/patch

robotoff/cli/main.py#L516

Added line #L516 was not covered by tests
logo_embedding
for logo_embedding in tqdm.tqdm(ServerSide(query), desc="logo")
if logo_embedding.logo_id not in seen
)

for logo_embedding_batch in chunked(logo_embedding_iter, 500):
try:
add_logos_to_ann(es_client, logo_embedding_batch, server_type)
Expand Down

0 comments on commit 81311a0

Please sign in to comment.