Skip to content

Commit

Permalink
add completions download
Browse files Browse the repository at this point in the history
  • Loading branch information
wenzhe-log10 committed Mar 5, 2024
1 parent b9b7a05 commit 5bfc3dc
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 3 deletions.
3 changes: 2 additions & 1 deletion log10/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import click

from log10.completions.completions import get_completion, list_completions
from log10.completions.completions import download_completions, get_completion, list_completions
from log10.feedback.feedback import create_feedback, list_feedback
from log10.feedback.feedback_task import create_feedback_task, list_feedback_task

Expand Down Expand Up @@ -37,6 +37,7 @@ def feedback_task():
cli.add_command(completions)
completions.add_command(list_completions, "list")
completions.add_command(get_completion, "get")
completions.add_command(download_completions, "download")

cli.add_command(feedback)
feedback.add_command(create_feedback, "create")
Expand Down
115 changes: 113 additions & 2 deletions log10/completions/completions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from datetime import datetime, timezone

import tqdm
import click
import httpx
import rich
Expand Down Expand Up @@ -136,9 +137,9 @@ def list_completions(limit, offset, timeout, tags, from_date, to_date):
res = client.get(url=url, timeout=httpx_timeout)
res.raise_for_status()
except Exception as e:
click.echo(f"Error: {e}")
rich.print(f"Error: {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
click.echo(e.response.json()["error"])
rich.print(e.response.json()["error"])
return

completions = res.json()
Expand Down Expand Up @@ -202,3 +203,113 @@ def get_completion(id):
"""
res = _get_completion(id)
rich.print(res.json())


@click.command()
@click.option("--limit", default="", help="Number of completions to fetch")
@click.option("--offset", default="", help="Offset for the completions")
@click.option("--timeout", default=10, help="Timeout for the http request")
@click.option("--tags", default="", help="Filter completions by tag")
@click.option(
"--from", "from_date", type=click.DateTime(), help="Start date of the range (inclusive). Format: YYYY-MM-DD"
)
@click.option("--to", "to_date", type=click.DateTime(), help="End date of the range (inclusive). Format: YYYY-MM-DD")
@click.option("--compact", is_flag=True, help="Download the compact output only")
@click.option("--output", default="completions.jsonl", help="Output file")
def download_completions(limit, offset, timeout, tags, from_date, to_date, output, compact):
"""
Download completions to a jsonl file
"""
base_url = _log10_config.url
token = _log10_config.token
org_id = _log10_config.org_id
completion_url_prefix = f"{base_url}/api/completions?organization_id={org_id}&sort=created_at&desc=true&ids="

# Fetch completions
with httpx.Client() as client:
client.headers = {
"x-log10-token": token,
"x-log10-organization-id": org_id,
"Content-Type": "application/json",
}

tag_ids_str = ""
if tags:
tag_ids = []
for tag in tags.split(","):
tag_id = _get_tag_id(tag)
if tag_id:
tag_ids.append(tag_id)
tag_ids_str = ",".join(tag_ids)
rich.print(f"Filter with tags: {tags}")

if (from_date is None) != (to_date is None): # Check if only one date is provided
raise click.UsageError("Both --from and --to must be set together.")

if from_date and to_date:
if from_date >= to_date:
raise click.UsageError(f"from_date {from_date} must be earlier than to_date {to_date}")

parsed_from_date = from_date.replace(hour=8).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
parsed_to_date = to_date.replace(hour=8).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"

date_range = {"from": parsed_from_date, "to": parsed_to_date}
rich.print(f"Filter with created date: {date_range['from'][:10]} to {date_range['to'][:10]}")
else:
date_range = {}

init_url = (
f"{completion_url_prefix}&offset=&limit=1&tagFilter={tag_ids_str}&createdFilter={json.dumps(date_range)}"
)
try:
res = client.get(url=init_url, timeout=timeout)
res.raise_for_status()
except Exception as e:
rich.print(f"Error: {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
rich.print(e.response.json()["error"])
return
else:
total_completions = res.json()["total"]
rich.print(f"Download total completions: {total_completions}")
# prompt the user to confirm the download, only [y]es will continue
if not click.confirm("Do you want to continue?"):
return

# if offset + limit > total_completions:
if not offset:
offset = 0
if not limit:
limit = total_completions

# dowlnoad completions
batch_size = 10

# tqdm progress bar
with tqdm.tqdm(total=limit // batch_size) as pbar:
for batch in tqdm.tqdm(range(offset, limit, batch_size)):
download_url = f"{completion_url_prefix}&offset={batch}&limit={batch_size}&tagFilter={tag_ids_str}&createdFilter={json.dumps(date_range)}"
# rich.print(f"Downloading completions from {batch} to {batch + batch_size}")
# rich.print(f"URL: {download_url}")
# continue

try:
res = client.get(url=download_url, timeout=timeout)
res.raise_for_status()
except Exception as e:
rich.print(f"Error: {e}")
if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
rich.print(e.response.json()["error"])
return

if compact:
res = res.json()["data"]
with open(output, "a") as f:
for completion in res:
f.write(json.dumps(completion) + "\n")
else:
completions_id_list = [completion["id"] for completion in res.json()["data"]]
for id in completions_id_list:
completion = _get_completion(id)
with open(output, "a") as f:
f.write(json.dumps(completion.json()) + "\n")

0 comments on commit 5bfc3dc

Please sign in to comment.