diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7956e481..036f6062 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -63,7 +63,10 @@ jobs:
       - name: Install dependencies
         run: poetry install --all-extras
 
-      - name: Run dispatch tests
+      - name: Run cli tests
+        run: poetry run pytest -vv tests/test_cli.py
+
+      - name: Run dispatch llm tests
         if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' }}
         run: |
           echo "This is a dispatch event"
@@ -113,13 +116,13 @@ jobs:
 
           if $empty_inputs; then
             echo "All variables are empty"
-            poetry run pytest -vv tests/
+            poetry run pytest -vv tests/ --ignore=tests/test_cli.py
             poetry run pytest --llm_provider=anthropic -vv tests/test_magentic.py
           fi
 
-      - name: Run scheduled tests
+      - name: Run scheduled llm tests
         if: ${{ github.event_name == 'schedule' }}
         run: |
           echo "This is a schedule event"
-          poetry run pytest -vv tests/
+          poetry run pytest -vv tests/ --ignore=tests/test_cli.py
           poetry run pytest --openai_model=gpt-4o -m chat -vv tests/test_openai.py
diff --git a/log10/__main__.py b/log10/__main__.py
index 6365121b..07c9387e 100644
--- a/log10/__main__.py
+++ b/log10/__main__.py
@@ -1,68 +1,5 @@
-import click
+from log10.cli.cli_commands import cli
 
-from log10.completions.completions import benchmark_models, download_completions, get_completion, list_completions
-from log10.feedback.autofeedback import auto_feedback_icl, get_autofeedback_cli
-from log10.feedback.feedback import create_feedback, download_feedback, get_feedback, list_feedback
-from log10.feedback.feedback_task import create_feedback_task, get_feedback_task, list_feedback_task
-
-
-@click.group()
-def cli():
-    pass
-
-
-@click.group()
-def completions():
-    """
-    Manage logs from completions i.e. logs from users
-    """
-    pass
-
-
-@click.group(name="feedback")
-def feedback():
-    """
-    Manage feedback for completions i.e. capturing feedback from users
-    """
-    pass
-
-
-@click.group(name="auto_feedback")
-def auto_feedback():
-    """
-    Manage auto feedback for completions i.e. capturing feedback from users
-    """
-    pass
-
-
-@click.group()
-def feedback_task():
-    """
-    Manage tasks for feedback i.e. instructions and schema for feedback
-    """
-    pass
-
-
-cli.add_command(completions)
-completions.add_command(list_completions, "list")
-completions.add_command(get_completion, "get")
-completions.add_command(download_completions, "download")
-completions.add_command(benchmark_models, "benchmark_models")
-
-cli.add_command(feedback)
-feedback.add_command(create_feedback, "create")
-feedback.add_command(list_feedback, "list")
-feedback.add_command(get_feedback, "get")
-feedback.add_command(download_feedback, "download")
-feedback.add_command(auto_feedback_icl, "predict")
-feedback.add_command(auto_feedback, "autofeedback")
-# Subcommands for auto_feedback under feedback command
-auto_feedback.add_command(get_autofeedback_cli, "get")
-
-cli.add_command(feedback_task)
-feedback_task.add_command(create_feedback_task, "create")
-feedback_task.add_command(list_feedback_task, "list")
-feedback_task.add_command(get_feedback_task, "get")
 
 if __name__ == "__main__":
     cli()
diff --git a/log10/cli/autofeedback.py b/log10/cli/autofeedback.py
new file mode 100644
index 00000000..a1ecf364
--- /dev/null
+++ b/log10/cli/autofeedback.py
@@ -0,0 +1,47 @@
+import json
+
+import click
+import rich
+from rich.console import Console
+
+from log10.feedback.autofeedback import AutoFeedbackICL, get_autofeedback
+
+
+@click.command()
+@click.option("--task_id", help="Feedback task ID")
+@click.option("--content", help="Completion content")
+@click.option("--file", "-f", help="File containing completion content")
+@click.option("--completion_id", help="Completion ID")
+@click.option("--num_samples", default=5, help="Number of samples to use for few-shot learning")
+def auto_feedback_icl(task_id: str, content: str, file: str, completion_id: str, num_samples: int):
+    """
+    Generate feedback with existing human feedback based on in context learning
+    """
+    options_count = sum([1 for option in [content, file, completion_id] if option])
+    if options_count > 1:
+        click.echo("Only one of --content, --file, or --completion_id should be provided.")
+        return
+
+    console = Console()
+    auto_feedback_icl = AutoFeedbackICL(task_id, num_samples=num_samples)
+    if completion_id:
+        results = auto_feedback_icl.predict(completion_id=completion_id)
+        console.print_json(results)
+        return
+
+    if file:
+        with open(file, "r") as f:
+            content = f.read()
+    results = auto_feedback_icl.predict(text=content)
+    console.print_json(results)
+
+
+@click.command()
+@click.option("--completion-id", required=True, help="Completion ID")
+def get_autofeedback_cli(completion_id: str):
+    """
+    Get an auto feedback by completion id
+    """
+    res = get_autofeedback(completion_id)
+    if res:
+        rich.print_json(json.dumps(res["data"], indent=4))
diff --git a/log10/cli/cli_commands.py b/log10/cli/cli_commands.py
new file mode 100644
index 00000000..0396841b
--- /dev/null
+++ b/log10/cli/cli_commands.py
@@ -0,0 +1,74 @@
+try:
+    import click
+    import pandas  # noqa: F401
+    import rich  # noqa: F401
+    import tabulate  # noqa: F401
+except ImportError:
+    print(
+        "To use log10 cli you must install optional modules. Please install them with `pip install 'log10-io[cli]'`."
+    )
+    exit(1)
+
+from log10.cli.autofeedback import auto_feedback_icl, get_autofeedback_cli
+from log10.cli.completions import benchmark_models, download_completions, get_completion, list_completions
+from log10.cli.feedback import create_feedback, download_feedback, get_feedback, list_feedback
+from log10.cli.feedback_task import create_feedback_task, get_feedback_task, list_feedback_task
+
+
+@click.group()
+def cli():
+    pass
+
+
+@click.group()
+def completions():
+    """
+    Manage logs from completions i.e. logs from users
+    """
+    pass
+
+
+@click.group(name="feedback")
+def feedback():
+    """
+    Manage feedback for completions i.e. capturing feedback from users
+    """
+    pass
+
+
+@click.group(name="auto_feedback")
+def auto_feedback():
+    """
+    Manage auto feedback for completions i.e. capturing feedback from users
+    """
+    pass
+
+
+@click.group()
+def feedback_task():
+    """
+    Manage tasks for feedback i.e. instructions and schema for feedback
+    """
+    pass
+
+
+cli.add_command(completions)
+completions.add_command(list_completions, "list")
+completions.add_command(get_completion, "get")
+completions.add_command(download_completions, "download")
+completions.add_command(benchmark_models, "benchmark_models")
+
+cli.add_command(feedback)
+feedback.add_command(create_feedback, "create")
+feedback.add_command(list_feedback, "list")
+feedback.add_command(get_feedback, "get")
+feedback.add_command(download_feedback, "download")
+feedback.add_command(auto_feedback_icl, "predict")
+feedback.add_command(auto_feedback, "autofeedback")
+# Subcommands for auto_feedback under feedback command
+auto_feedback.add_command(get_autofeedback_cli, "get")
+
+cli.add_command(feedback_task)
+feedback_task.add_command(create_feedback_task, "create")
+feedback_task.add_command(list_feedback_task, "list")
+feedback_task.add_command(get_feedback_task, "get")
diff --git a/log10/cli/completions.py b/log10/cli/completions.py
new file mode 100644
index 00000000..885447ba
--- /dev/null
+++ b/log10/cli/completions.py
@@ -0,0 +1,408 @@
+import json
+
+import click
+import pandas as pd
+import rich
+import tqdm
+from rich.console import Console
+from rich.table import Table
+
+from log10._httpx_utils import _get_time_diff, _try_get
+from log10.cli_utils import generate_markdown_report, generate_results_table
+from log10.completions.completions import (
+    _check_model_support,
+    _compare,
+    _get_completion,
+    _get_completions_url,
+    _write_completions,
+)
+from log10.llm import Log10Config
+from log10.prompt_analyzer import PromptAnalyzer, convert_suggestion_to_markdown, display_prompt_analyzer_suggestions
+
+
+_log10_config = Log10Config()
+
+
+def _render_completions_table(completions_data, total_completions):
+    data_for_table = []
+    for completion in completions_data:
+        prompt, response = "", ""
+        if completion.get("kind") == "completion":
+            prompt = completion.get("request", {}).get("prompt", "")
+            response_choices = completion.get("response", {}).get("choices", [])
+            if response_choices:
+                response = response_choices[0].get("text", "")
+        elif completion.get("kind") == "chat":
+            request_messages = completion.get("request", {}).get("messages", [])
+            prompt = request_messages[0].get("content", "") if request_messages else ""
+
+            response_choices = completion.get("response", {}).get("choices", [])
+            if response_choices:
+                # Handle 'message' and 'function_call' within the first choice safely
+                first_choice = response_choices[0]
+                if "message" in first_choice:
+                    message = first_choice["message"]
+                    response = (
+                        message.get("content")
+                        or message.get("tool_calls", [])[-1].get("function", {}).get("arguments", "")
+                        if message.get("tool_calls")
+                        else ""
+                    )
+                elif "function_call" in first_choice:
+                    response = json.dumps(first_choice.get("function_call", {}))
+        else:
+            rich.print(f"Unknown completion kind: {completion['kind']} for id: {completion['id']}")
+
+        data_for_table.append(
+            {
+                "id": completion["id"],
+                "status": "success" if completion["status"] == "finished" else completion["status"],
+                "created_at": _get_time_diff(completion["created_at"]),
+                "prompt": prompt,
+                "completion": response,
+                "tags": [t["name"] for t in completion["tagResolved"]],
+            }
+        )
+    # render data_for_table with rich table
+    table = Table(show_header=True, header_style="bold magenta")
+
+    table.add_column("ID", style="dim")
+    table.add_column("Status")
+    table.add_column("Created At")
+    table.add_column("Prompt", overflow="fold")
+    table.add_column("Completion", overflow="fold")
+    table.add_column("Tags", justify="right")
+
+    max_len = 40
+    for item in data_for_table:
+        tags = ", ".join(item["tags"]) if item["tags"] else ""
+        if isinstance(item["prompt"], list):
+            item["prompt"] = " ".join(item["prompt"])
+        short_prompt = item["prompt"][:max_len] + "..." if len(item["prompt"]) > max_len else item["prompt"]
+        completion = item.get("completion", "")
+        short_completion = completion[:max_len] + "..." if len(completion) > max_len else completion
+        table.add_row(item["id"], item["status"], item["created_at"], short_prompt, short_completion, tags)
+
+    console = Console()
+    console.print(table)
+    console.print(f"{total_completions=}")
+
+
+def _render_comparison_table(model_response_raw_data):
+    rich.print(f"completion_id: {model_response_raw_data['completion_id']}")
+    rich.print("original_request:")
+    rich.print_json(json.dumps(model_response_raw_data["original_request"], indent=4))
+
+    table = rich.table.Table(show_header=True, header_style="bold magenta", box=rich.box.ROUNDED, show_lines=True)
+    table.add_column("Model")
+    table.add_column("Content")
+    table.add_column("Total Token Usage (Input/Output)")
+    table.add_column("Duration (ms)")
+
+    for model, data in model_response_raw_data.items():
+        # only display model data
+        if model not in ["completion_id", "original_request"]:
+            usage = data["usage"]
+            formatted_usage = f"{usage['total_tokens']} ({usage['prompt_tokens']}/{usage['completion_tokens']})"
+            table.add_row(model, data["content"], formatted_usage, str(data["duration"]))
+    rich.print(table)
+
+
+def _create_dataframe_from_comparison_data(model_response_raw_data):
+    completion_id = model_response_raw_data["completion_id"]
+    original_request = model_response_raw_data["original_request"]
+    rows = []
+    for model, model_data in model_response_raw_data.items():
+        # only display model data
+        if model not in ["completion_id", "original_request"]:
+            content = model_data["content"]
+            usage = model_data["usage"]
+            prompt_tokens = usage["prompt_tokens"]
+            completion_tokens = usage["completion_tokens"]
+            total_tokens = usage["total_tokens"]
+            duration = model_data["duration"]
+            prompt_messages = json.dumps(original_request["messages"])
+            rows.append(
+                [
+                    completion_id,
+                    prompt_messages,
+                    model,
+                    content,
+                    prompt_tokens,
+                    completion_tokens,
+                    total_tokens,
+                    duration,
+                ]
+            )
+
+    df = pd.DataFrame(
+        rows,
+        columns=[
+            "Completion ID",
+            "Prompt Messages",
+            "Model",
+            "Content",
+            "Prompt Tokens",
+            "Completion Tokens",
+            "Total Tokens",
+            "Duration (ms)",
+        ],
+    )
+
+    return df
+
+
+@click.command()
+@click.option("--limit", default=25, help="Specify the maximum number of completions to retrieve.")
+@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.")
+@click.option(
+    "--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
+)
+@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
+@click.option(
+    "--from",
+    "from_date",
+    type=click.DateTime(),
+    help="Define the start date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
+)
+@click.option(
+    "--to",
+    "to_date",
+    type=click.DateTime(),
+    help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
+)
+def list_completions(limit, offset, timeout, tags, from_date, to_date):
+    """
+    List completions
+    """
+    base_url = _log10_config.url
+    org_id = _log10_config.org_id
+
+    url = _get_completions_url(limit, offset, tags, from_date, to_date, base_url, org_id)
+    # Fetch completions
+    res = _try_get(url, timeout)
+
+    completions = res.json()
+    total_completions = completions["total"]
+    completions = completions["data"]
+
+    _render_completions_table(completions, total_completions)
+
+
+@click.command()
+@click.option("--id", prompt="Enter completion id", help="Completion ID")
+def get_completion(id):
+    """
+    Get a completion by id
+    """
+    res = _get_completion(id)
+    rich.print_json(json.dumps(res.json()["data"], indent=4))
+
+
+@click.command()
+@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.")
+@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.")
+@click.option(
+    "--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
+)
+@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
+@click.option(
+    "--from",
+    "from_date",
+    type=click.DateTime(),
+    help="Define the start date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
+)
+@click.option(
+    "--to",
+    "to_date",
+    type=click.DateTime(),
+    help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
+)
+@click.option("--compact", is_flag=True, help="Enable to download only the compact version of the output.")
+@click.option("--file", "-f", default="completions.jsonl", help="Specify the filename and path for the output file.")
+def download_completions(limit, offset, timeout, tags, from_date, to_date, compact, file):
+    """
+    Download completions to a jsonl file
+    """
+    base_url = _log10_config.url
+    org_id = _log10_config.org_id
+
+    init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id)
+    res = _try_get(init_url)
+    if res.status_code != 200:
+        rich.print(f"Error: {res.json()}")
+        return
+
+    total_completions = res.json()["total"]
+    offset = int(offset) if offset else 0
+    limit = int(limit) if limit else total_completions
+    rich.print(f"Download total completions: {limit}/{total_completions}")
+    if not click.confirm("Do you want to continue?"):
+        return
+
+    # dowlnoad completions
+    pbar = tqdm.tqdm(total=limit)
+    batch_size = 10
+    end = offset + limit if offset + limit < total_completions else total_completions
+    for batch in range(offset, end, batch_size):
+        current_batch_size = batch_size if batch + batch_size < end else end - batch
+        download_url = _get_completions_url(
+            current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False
+        )
+        res = _try_get(download_url, timeout)
+        _write_completions(res, file, compact)
+        pbar.update(current_batch_size)
+
+
+@click.command()
+@click.option("--ids", default="", help="Completion IDs. Separate multiple ids with commas.")
+@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
+@click.option("--limit", help="Specify the maximum number of completions to retrieve filtered by tags.")
+@click.option(
+    "--offset", help="Set the starting point (offset) from where to begin fetching completions filtered by tags."
+)
+@click.option("--models", default="", help="Comma separated list of models to compare")
+@click.option("--temperature", default=0.2, help="Temperature")
+@click.option("--max_tokens", default=512, help="Max tokens")
+@click.option("--top_p", default=1.0, help="Top p")
+@click.option("--analyze_prompt", is_flag=True, help="Run prompt analyzer on the messages.")
+@click.option("--file", "-f", help="Specify the filename for the report in markdown format.")
+def benchmark_models(ids, tags, limit, offset, models, temperature, max_tokens, top_p, file, analyze_prompt):
+    """
+    Compare completions using different models and generate report
+    """
+    if ids and tags:
+        raise click.UsageError("--ids and --tags cannot be set together.")
+    if (limit or offset) and not tags:
+        raise click.UsageError("--limit and --offset can only be used with --tags.")
+    if tags:
+        if not limit:
+            limit = 5
+        if not offset:
+            offset = 0
+
+    if not models:
+        raise click.UsageError("--models must be set to compare.")
+    else:
+        for model in [m for m in models.split(",") if m]:
+            if not _check_model_support(model):
+                raise click.UsageError(f"Model {model} is not supported.")
+
+    # get completions ids
+    completion_ids = []
+    if ids:
+        completion_ids = [id for id in ids.split(",") if id]
+    elif tags:
+        base_url = _log10_config.url
+        org_id = _log10_config.org_id
+        url = _get_completions_url(limit, offset, tags, None, None, base_url, org_id)
+        res = _try_get(url)
+        completions = res.json()["data"]
+        completion_ids = [completion["id"] for completion in completions]
+        if not completion_ids:
+            SystemExit(f"No completions found for tags: {tags}")
+
+    compare_models = [m for m in models.split(",") if m]
+
+    data = []
+    skipped_completion_ids = []
+    for id in completion_ids:
+        # get message from id
+        completion_data = _get_completion(id).json()["data"]
+
+        # skip completion if status is not finished or kind is not chat
+        if completion_data["status"] != "finished" or completion_data["kind"] != "chat":
+            rich.print(f"Skip completion {id}. Status is not finished or kind is not chat.")
+            skipped_completion_ids.append(id)
+            continue
+
+        original_model_request = completion_data["request"]
+        original_model_response = completion_data["response"]
+        original_model = original_model_response["model"]
+        benchmark_data = {
+            "completion_id": id,
+            "original_request": original_model_request,
+            f"{original_model} (original model)": {
+                "content": original_model_response["choices"][0]["message"]["content"],
+                "usage": original_model_response["usage"],
+                "duration": completion_data["duration"],
+            },
+        }
+        messages = original_model_request["messages"]
+        compare_models_data = _compare(compare_models, messages, temperature, max_tokens, top_p)
+        benchmark_data.update(compare_models_data)
+        data.append(benchmark_data)
+
+    prompt_analysis_data = {}
+    if analyze_prompt:
+        rich.print("Analyzing prompts")
+        for item in data:
+            completion_id = item["completion_id"]
+            prompt_messages = item["original_request"]["messages"]
+            all_messages = "\n\n".join([m["content"] for m in prompt_messages])
+            analyzer = PromptAnalyzer()
+            suggestions = analyzer.analyze(all_messages)
+            prompt_analysis_data[completion_id] = suggestions
+
+    # create an empty dataframe
+    all_df = pd.DataFrame(
+        columns=[
+            "Completion ID",
+            "Prompt Messages",
+            "Model",
+            "Content",
+            "Prompt Tokens",
+            "Completion Tokens",
+            "Total Tokens",
+            "Duration (ms)",
+        ]
+    )
+
+    #
+    # Display or save the results
+    #
+    if not file:
+        # display in terminal using rich
+        for ret in data:
+            _render_comparison_table(ret)
+            if analyze_prompt:
+                completion_id = ret["completion_id"]
+                suggestions = prompt_analysis_data[completion_id]
+                rich.print(f"Prompt Analysis for completion_id: {completion_id}")
+                display_prompt_analyzer_suggestions(suggestions)
+    else:
+        # generate markdown report and save to file
+        for ret in data:
+            df = _create_dataframe_from_comparison_data(ret)
+            all_df = pd.concat([all_df, df])
+        pivot_df = all_df.pivot(index="Completion ID", columns="Model", values="Content")
+        pivot_df["Prompt Messages"] = all_df.groupby("Completion ID")["Prompt Messages"].first()
+        # Reorder the columns
+        cols = pivot_df.columns.tolist()
+        cols = [cols[-1]] + cols[:-1]
+        pivot_df = pivot_df[cols]
+
+        pivot_table = generate_results_table(pivot_df, section_name="model comparison")
+        all_results_table = generate_results_table(all_df, section_name="All Results")
+
+        prompt_analysis_markdown = ""
+        if analyze_prompt:
+            prompt_analysis_markdown = "## Prompt Analysis\n\n"
+            for completion_id, suggestions in prompt_analysis_data.items():
+                prompt_messages = all_df[all_df["Completion ID"] == completion_id]["Prompt Messages"].values[0]
+                prompt_analysis_markdown += (
+                    f"### Prompt Analysis for completion_id: {completion_id}\n\n{prompt_messages}\n\n"
+                )
+                prompt_analysis_markdown += convert_suggestion_to_markdown(suggestions)
+
+        # generate the list of skipped completions ids
+        skipped_completion_markdown = ""
+        if skipped_completion_ids:
+            skipped_completion_ids_str = ", ".join(skipped_completion_ids)
+            skipped_completion_markdown += "## Skipped Completion IDs\n\n"
+            skipped_completion_markdown += f"Skipped completions: {skipped_completion_ids_str}\n\n"
+
+        generate_markdown_report(
+            file, [pivot_table, prompt_analysis_markdown, all_results_table, skipped_completion_markdown]
+        )
+        rich.print(f"Report saved to {file}")
diff --git a/log10/cli/feedback.py b/log10/cli/feedback.py
new file mode 100644
index 00000000..f8398b77
--- /dev/null
+++ b/log10/cli/feedback.py
@@ -0,0 +1,129 @@
+import json
+
+import click
+from rich.console import Console
+from rich.table import Table
+from tqdm import tqdm
+
+from log10.feedback.feedback import Feedback, _get_feedback_list
+
+
+@click.command()
+@click.option("--task_id", prompt="Enter task id", help="Task ID")
+@click.option("--values", prompt="Enter task values", help="Feedback in JSON format")
+@click.option(
+    "--completion_tags_selector",
+    prompt="Enter completion tags selector",
+    help="Completion tags selector",
+)
+@click.option("--comment", help="Comment", default="")
+def create_feedback(task_id, values, completion_tags_selector, comment):
+    """
+    Add feedback to a group of completions associated with a task
+    """
+    click.echo("Creating feedback")
+    tags = completion_tags_selector.split(",")
+    values = json.loads(values)
+    feedback = Feedback().create(task_id=task_id, values=values, completion_tags_selector=tags, comment=comment)
+    click.echo(feedback.json())
+
+
+@click.command()
+@click.option(
+    "--offset", default=0, type=int, help="The starting index from which to begin the feedback fetch. Defaults to 0."
+)
+@click.option(
+    "--limit", default=25, type=int, help="The maximum number of feedback items to retrieve. Defaults to 25."
+)
+@click.option(
+    "--task_id",
+    default="",
+    type=str,
+    help="The specific Task ID to filter feedback. If not provided, feedback for all tasks will be fetched.",
+)
+def list_feedback(offset, limit, task_id):
+    """
+    List feedback based on the provided criteria. This command allows fetching feedback for a specific task or across all tasks,
+    with control over the starting point and the number of items to retrieve.
+    """
+    feedback_data = _get_feedback_list(offset, limit, task_id)
+    data_for_table = []
+    for feedback in feedback_data:
+        data_for_table.append(
+            {
+                "id": feedback["id"],
+                "task_name": feedback["task_name"],
+                "feedback": json.dumps(feedback["json_values"], ensure_ascii=False),
+                "matched_completion_ids": ",".join(feedback["matched_completion_ids"]),
+            }
+        )
+    table = Table(title="Feedback")
+    table.add_column("ID")
+    table.add_column("Task Name")
+    table.add_column("Feedback")
+    table.add_column("Completion ID")
+
+    for item in data_for_table:
+        table.add_row(item["id"], item["task_name"], item["feedback"], item["matched_completion_ids"])
+    console = Console()
+    console.print(table)
+    console.print(f"Total feedback: {len(feedback_data)}")
+
+
+@click.command()
+@click.option("--id", required=True, help="Get feedback by ID")
+def get_feedback(id):
+    """
+    Get feedback based on provided ID.
+    """
+    try:
+        res = Feedback().get(id)
+    except Exception as e:
+        click.echo(f"Error fetching feedback {e}")
+        if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
+            click.echo(e.response.json()["error"])
+        return
+    console = Console()
+    feedback = json.dumps(res.json(), indent=4)
+    console.print_json(feedback)
+
+
+@click.command()
+@click.option(
+    "--offset",
+    default=0,
+    help="The starting index from which to begin the feedback fetch. Leave empty to start from the beginning.",
+)
+@click.option(
+    "--limit", default="", help="The maximum number of feedback items to retrieve. Leave empty to retrieve all."
+)
+@click.option(
+    "--task_id",
+    default="",
+    type=str,
+    help="The specific Task ID to filter feedback. If not provided, feedback for all tasks will be fetched.",
+)
+@click.option(
+    "--file",
+    "-f",
+    type=str,
+    required=False,
+    help="Path to the file where the feedback will be saved. The feedback data is saved in JSON Lines (jsonl) format. If not specified, feedback will be printed to stdout.",
+)
+def download_feedback(offset, limit, task_id, file):
+    """
+    Download feedback based on the provided criteria. This command allows fetching feedback for a specific task or across all tasks,
+    with control over the starting point and the number of items to retrieve.
+    """
+    feedback_data = _get_feedback_list(offset, limit, task_id)
+
+    console = Console()
+    if not file:
+        for feedback in feedback_data:
+            console.print_json(json.dumps(feedback, indent=4))
+        return
+
+    with open(file, "w") as f:
+        console.print(f"Saving feedback to {file}")
+        for feedback in tqdm(feedback_data):
+            f.write(json.dumps(feedback) + "\n")
diff --git a/log10/cli/feedback_task.py b/log10/cli/feedback_task.py
new file mode 100644
index 00000000..056730d3
--- /dev/null
+++ b/log10/cli/feedback_task.py
@@ -0,0 +1,80 @@
+import json
+
+import click
+from rich.console import Console
+from rich.table import Table
+
+from log10._httpx_utils import _get_time_diff
+from log10.feedback.feedback_task import FeedbackTask
+
+
+# create a cli interface for FeebackTask.create function
+@click.command()
+@click.option("--name", prompt="Enter feedback task name", help="Name of the task")
+@click.option("--task_schema", prompt="Enter feedback task schema", help="Task schema")
+@click.option("--instruction", help="Task instruction", default="")
+@click.option(
+    "--completion_tags_selector",
+    help="Completion tags selector",
+)
+def create_feedback_task(name, task_schema, instruction, completion_tags_selector=None):
+    click.echo("Creating feedback task")
+    tags = []
+
+    if completion_tags_selector:
+        tags = completion_tags_selector.split(",")
+
+    task_schema = json.loads(task_schema)
+    task = FeedbackTask().create(
+        name=name, task_schema=task_schema, completion_tags_selector=tags, instruction=instruction
+    )
+    click.echo(f"Use this task_id to add feedback: {task.json()['id']}")
+
+
+@click.command()
+@click.option("--limit", default=25, help="Number of feedback tasks to fetch")
+@click.option("--offset", default=0, help="Offset for the feedback tasks")
+def list_feedback_task(limit, offset):
+    res = FeedbackTask().list(limit=limit, offset=offset)
+    feedback_tasks = res.json()
+
+    data_for_table = []
+
+    for task in feedback_tasks["data"]:
+        data_for_table.append(
+            {
+                "id": task["id"],
+                "created_at": _get_time_diff(task["created_at"]),
+                "name": task["name"],
+                "required": task["json_schema"]["required"],
+                "instruction": task["instruction"],
+            }
+        )
+
+    table = Table(title="Feedback Tasks")
+    table.add_column("ID", style="dim")
+    table.add_column("Created At")
+    table.add_column("Name")
+    table.add_column("Required")
+    table.add_column("Instruction")
+    for item in data_for_table:
+        required = ", ".join(item["required"]) if item["required"] else ""
+        table.add_row(item["id"], item["created_at"], item["name"], required, item["instruction"])
+
+    console = Console()
+    console.print(table)
+
+
+@click.command()
+@click.option("--id", help="Get feedback task by ID")
+def get_feedback_task(id):
+    try:
+        res = FeedbackTask().get(id)
+    except Exception as e:
+        click.echo(f"Error fetching feedback task {e}")
+        if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
+            click.echo(e.response.json()["error"])
+        return
+    task = json.dumps(res.json())
+    console = Console()
+    console.print_json(task)
diff --git a/log10/completions/completions.py b/log10/completions/completions.py
index c0d46492..ed479dd4 100644
--- a/log10/completions/completions.py
+++ b/log10/completions/completions.py
@@ -3,16 +3,9 @@
 
 import click
 import httpx
-import pandas as pd
-import rich
-import tqdm
-from rich.console import Console
-from rich.table import Table
-
-from log10._httpx_utils import _get_time_diff, _try_get
-from log10.cli_utils import generate_markdown_report, generate_results_table
+
+from log10._httpx_utils import _try_get
 from log10.llm import Log10Config
-from log10.prompt_analyzer import PromptAnalyzer, convert_suggestion_to_markdown, display_prompt_analyzer_suggestions
 
 
 _log10_config = Log10Config()
@@ -55,11 +48,11 @@ def _get_tag_ids(tags):
 def _get_completions_url(limit, offset, tags, from_date, to_date, base_url, org_id, printout=True):
     tag_ids_str = _get_tag_ids(tags) if tags else ""
     if tag_ids_str and printout:
-        rich.print(f"Filter with tags: {tags}")
+        print(f"Filter with tags: {tags}")
 
     date_range = _get_valid_date_range(from_date, to_date)
     if date_range and printout:
-        rich.print(f"Filter with created date: {date_range['from'][:10]} to {date_range['to'][:10]}")
+        print(f"Filter with created date: {date_range['from'][:10]} to {date_range['to'][:10]}")
 
     url = f"{base_url}/api/completions?organization_id={org_id}&offset={offset}&limit={limit}&tagFilter={tag_ids_str}&createdFilter={json.dumps(date_range)}&sort=created_at&desc=true&ids="
     return url
@@ -82,113 +75,6 @@ def _get_valid_date_range(from_date, to_date):
     return date_range
 
 
-def _render_completions_table(completions_data, total_completions):
-    data_for_table = []
-    for completion in completions_data:
-        prompt, response = "", ""
-        if completion.get("kind") == "completion":
-            prompt = completion.get("request", {}).get("prompt", "")
-            response_choices = completion.get("response", {}).get("choices", [])
-            if response_choices:
-                response = response_choices[0].get("text", "")
-        elif completion.get("kind") == "chat":
-            request_messages = completion.get("request", {}).get("messages", [])
-            prompt = request_messages[0].get("content", "") if request_messages else ""
-
-            response_choices = completion.get("response", {}).get("choices", [])
-            if response_choices:
-                # Handle 'message' and 'function_call' within the first choice safely
-                first_choice = response_choices[0]
-                if "message" in first_choice:
-                    response = first_choice["message"].get("content", "")
-                elif "function_call" in first_choice:
-                    response = json.dumps(first_choice.get("function_call", {}))
-        else:
-            rich.print(f"Unknown completion kind: {completion['kind']} for id: {completion['id']}")
-
-        data_for_table.append(
-            {
-                "id": completion["id"],
-                "status": "success" if completion["status"] == "finished" else completion["status"],
-                "created_at": _get_time_diff(completion["created_at"]),
-                "prompt": prompt,
-                "completion": response,
-                "tags": [t["name"] for t in completion["tagResolved"]],
-            }
-        )
-    # render data_for_table with rich table
-    table = Table(show_header=True, header_style="bold magenta")
-
-    table.add_column("ID", style="dim")
-    table.add_column("Status")
-    table.add_column("Created At")
-    table.add_column("Prompt", overflow="fold")
-    table.add_column("Completion", overflow="fold")
-    table.add_column("Tags", justify="right")
-
-    max_len = 40
-    for item in data_for_table:
-        tags = ", ".join(item["tags"]) if item["tags"] else ""
-        if isinstance(item["prompt"], list):
-            item["prompt"] = " ".join(item["prompt"])
-        short_prompt = item["prompt"][:max_len] + "..." if len(item["prompt"]) > max_len else item["prompt"]
-        short_completion = (
-            item["completion"][:max_len] + "..." if len(item["completion"]) > max_len else item["completion"]
-        )
-        table.add_row(item["id"], item["status"], item["created_at"], short_prompt, short_completion, tags)
-
-    console = Console()
-    console.print(table)
-    console.print(f"{total_completions=}")
-
-
-@click.command()
-@click.option("--limit", default=25, help="Specify the maximum number of completions to retrieve.")
-@click.option("--offset", default=0, help="Set the starting point (offset) from where to begin fetching completions.")
-@click.option(
-    "--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
-)
-@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
-@click.option(
-    "--from",
-    "from_date",
-    type=click.DateTime(),
-    help="Define the start date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
-)
-@click.option(
-    "--to",
-    "to_date",
-    type=click.DateTime(),
-    help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
-)
-def list_completions(limit, offset, timeout, tags, from_date, to_date):
-    """
-    List completions
-    """
-    base_url = _log10_config.url
-    org_id = _log10_config.org_id
-
-    url = _get_completions_url(limit, offset, tags, from_date, to_date, base_url, org_id)
-    # Fetch completions
-    res = _try_get(url, timeout)
-
-    completions = res.json()
-    total_completions = completions["total"]
-    completions = completions["data"]
-
-    _render_completions_table(completions, total_completions)
-
-
-@click.command()
-@click.option("--id", prompt="Enter completion id", help="Completion ID")
-def get_completion(id):
-    """
-    Get a completion by id
-    """
-    res = _get_completion(id)
-    rich.print_json(json.dumps(res.json()["data"], indent=4))
-
-
 def _write_completions(res, output_file, compact_mode):
     """Processes completions and appends them to the output file."""
     with open(output_file, "a") as file:
@@ -202,61 +88,6 @@ def _write_completions(res, output_file, compact_mode):
                 file.write(json.dumps(completion) + "\n")
 
 
-@click.command()
-@click.option("--limit", default="", help="Specify the maximum number of completions to retrieve.")
-@click.option("--offset", default="", help="Set the starting point (offset) from where to begin fetching completions.")
-@click.option(
-    "--timeout", default=10, help="Set the maximum time (in seconds) allowed for the HTTP request to complete."
-)
-@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
-@click.option(
-    "--from",
-    "from_date",
-    type=click.DateTime(),
-    help="Define the start date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
-)
-@click.option(
-    "--to",
-    "to_date",
-    type=click.DateTime(),
-    help="Set the end date for fetching completions (inclusive). Use the format: YYYY-MM-DD.",
-)
-@click.option("--compact", is_flag=True, help="Enable to download only the compact version of the output.")
-@click.option("--file", "-f", default="completions.jsonl", help="Specify the filename and path for the output file.")
-def download_completions(limit, offset, timeout, tags, from_date, to_date, compact, file):
-    """
-    Download completions to a jsonl file
-    """
-    base_url = _log10_config.url
-    org_id = _log10_config.org_id
-
-    init_url = _get_completions_url(1, 0, tags, from_date, to_date, base_url, org_id)
-    res = _try_get(init_url)
-    if res.status_code != 200:
-        rich.print(f"Error: {res.json()}")
-        return
-
-    total_completions = res.json()["total"]
-    offset = int(offset) if offset else 0
-    limit = int(limit) if limit else total_completions
-    rich.print(f"Download total completions: {limit}/{total_completions}")
-    if not click.confirm("Do you want to continue?"):
-        return
-
-    # dowlnoad completions
-    pbar = tqdm.tqdm(total=limit)
-    batch_size = 10
-    end = offset + limit if offset + limit < total_completions else total_completions
-    for batch in range(offset, end, batch_size):
-        current_batch_size = batch_size if batch + batch_size < end else end - batch
-        download_url = _get_completions_url(
-            current_batch_size, batch, tags, from_date, to_date, base_url, org_id, printout=False
-        )
-        res = _try_get(download_url, timeout)
-        _write_completions(res, file, compact)
-        pbar.update(current_batch_size)
-
-
 def _get_llm_repsone(
     model: str,
     messages: list[dict],
@@ -317,75 +148,11 @@ def _get_llm_repsone(
     return ret
 
 
-def _render_comparison_table(model_response_raw_data):
-    rich.print(f"completion_id: {model_response_raw_data['completion_id']}")
-    rich.print("original_request:")
-    rich.print_json(json.dumps(model_response_raw_data["original_request"], indent=4))
-
-    table = rich.table.Table(show_header=True, header_style="bold magenta", box=rich.box.ROUNDED, show_lines=True)
-    table.add_column("Model")
-    table.add_column("Content")
-    table.add_column("Total Token Usage (Input/Output)")
-    table.add_column("Duration (ms)")
-
-    for model, data in model_response_raw_data.items():
-        # only display model data
-        if model not in ["completion_id", "original_request"]:
-            usage = data["usage"]
-            formatted_usage = f"{usage['total_tokens']} ({usage['prompt_tokens']}/{usage['completion_tokens']})"
-            table.add_row(model, data["content"], formatted_usage, str(data["duration"]))
-    rich.print(table)
-
-
-def _create_dataframe_from_comparison_data(model_response_raw_data):
-    completion_id = model_response_raw_data["completion_id"]
-    original_request = model_response_raw_data["original_request"]
-    rows = []
-    for model, model_data in model_response_raw_data.items():
-        # only display model data
-        if model not in ["completion_id", "original_request"]:
-            content = model_data["content"]
-            usage = model_data["usage"]
-            prompt_tokens = usage["prompt_tokens"]
-            completion_tokens = usage["completion_tokens"]
-            total_tokens = usage["total_tokens"]
-            duration = model_data["duration"]
-            prompt_messages = json.dumps(original_request["messages"])
-            rows.append(
-                [
-                    completion_id,
-                    prompt_messages,
-                    model,
-                    content,
-                    prompt_tokens,
-                    completion_tokens,
-                    total_tokens,
-                    duration,
-                ]
-            )
-
-    df = pd.DataFrame(
-        rows,
-        columns=[
-            "Completion ID",
-            "Prompt Messages",
-            "Model",
-            "Content",
-            "Prompt Tokens",
-            "Completion Tokens",
-            "Total Tokens",
-            "Duration (ms)",
-        ],
-    )
-
-    return df
-
-
 def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_tokens: float = 256, top_p: float = 1.0):
     ret = {}
     if models:
         for model in models:
-            rich.print(f"Running {model}")
+            print(f"Running {model}")
             response = _get_llm_repsone(
                 model,
                 messages,
@@ -434,157 +201,3 @@ def _compare(models: list[str], messages: dict, temperature: float = 0.2, max_to
 
 def _check_model_support(model: str) -> bool:
     return model in _SUPPORTED_MODELS
-
-
-@click.command()
-@click.option("--ids", default="", help="Completion IDs. Separate multiple ids with commas.")
-@click.option("--tags", default="", help="Filter completions by specific tags. Separate multiple tags with commas.")
-@click.option("--limit", help="Specify the maximum number of completions to retrieve filtered by tags.")
-@click.option(
-    "--offset", help="Set the starting point (offset) from where to begin fetching completions filtered by tags."
-)
-@click.option("--models", default="", help="Comma separated list of models to compare")
-@click.option("--temperature", default=0.2, help="Temperature")
-@click.option("--max_tokens", default=512, help="Max tokens")
-@click.option("--top_p", default=1.0, help="Top p")
-@click.option("--analyze_prompt", is_flag=True, help="Run prompt analyzer on the messages.")
-@click.option("--file", "-f", help="Specify the filename for the report in markdown format.")
-def benchmark_models(ids, tags, limit, offset, models, temperature, max_tokens, top_p, file, analyze_prompt):
-    """
-    Compare completions using different models and generate report
-    """
-    if ids and tags:
-        raise click.UsageError("--ids and --tags cannot be set together.")
-    if (limit or offset) and not tags:
-        raise click.UsageError("--limit and --offset can only be used with --tags.")
-    if tags:
-        if not limit:
-            limit = 5
-        if not offset:
-            offset = 0
-
-    if not models:
-        raise click.UsageError("--models must be set to compare.")
-    else:
-        for model in [m for m in models.split(",") if m]:
-            if not _check_model_support(model):
-                raise click.UsageError(f"Model {model} is not supported.")
-
-    # get completions ids
-    completion_ids = []
-    if ids:
-        completion_ids = [id for id in ids.split(",") if id]
-    elif tags:
-        base_url = _log10_config.url
-        org_id = _log10_config.org_id
-        url = _get_completions_url(limit, offset, tags, None, None, base_url, org_id)
-        res = _try_get(url)
-        completions = res.json()["data"]
-        completion_ids = [completion["id"] for completion in completions]
-        if not completion_ids:
-            SystemExit(f"No completions found for tags: {tags}")
-
-    compare_models = [m for m in models.split(",") if m]
-
-    data = []
-    skipped_completion_ids = []
-    for id in completion_ids:
-        # get message from id
-        completion_data = _get_completion(id).json()["data"]
-
-        # skip completion if status is not finished or kind is not chat
-        if completion_data["status"] != "finished" or completion_data["kind"] != "chat":
-            rich.print(f"Skip completion {id}. Status is not finished or kind is not chat.")
-            skipped_completion_ids.append(id)
-            continue
-
-        original_model_request = completion_data["request"]
-        original_model_response = completion_data["response"]
-        original_model = original_model_response["model"]
-        benchmark_data = {
-            "completion_id": id,
-            "original_request": original_model_request,
-            f"{original_model} (original model)": {
-                "content": original_model_response["choices"][0]["message"]["content"],
-                "usage": original_model_response["usage"],
-                "duration": completion_data["duration"],
-            },
-        }
-        messages = original_model_request["messages"]
-        compare_models_data = _compare(compare_models, messages, temperature, max_tokens, top_p)
-        benchmark_data.update(compare_models_data)
-        data.append(benchmark_data)
-
-    prompt_analysis_data = {}
-    if analyze_prompt:
-        rich.print("Analyzing prompts")
-        for item in data:
-            completion_id = item["completion_id"]
-            prompt_messages = item["original_request"]["messages"]
-            all_messages = "\n\n".join([m["content"] for m in prompt_messages])
-            analyzer = PromptAnalyzer()
-            suggestions = analyzer.analyze(all_messages)
-            prompt_analysis_data[completion_id] = suggestions
-
-    # create an empty dataframe
-    all_df = pd.DataFrame(
-        columns=[
-            "Completion ID",
-            "Prompt Messages",
-            "Model",
-            "Content",
-            "Prompt Tokens",
-            "Completion Tokens",
-            "Total Tokens",
-            "Duration (ms)",
-        ]
-    )
-
-    #
-    # Display or save the results
-    #
-    if not file:
-        # display in terminal using rich
-        for ret in data:
-            _render_comparison_table(ret)
-            if analyze_prompt:
-                completion_id = ret["completion_id"]
-                suggestions = prompt_analysis_data[completion_id]
-                rich.print(f"Prompt Analysis for completion_id: {completion_id}")
-                display_prompt_analyzer_suggestions(suggestions)
-    else:
-        # generate markdown report and save to file
-        for ret in data:
-            df = _create_dataframe_from_comparison_data(ret)
-            all_df = pd.concat([all_df, df])
-        pivot_df = all_df.pivot(index="Completion ID", columns="Model", values="Content")
-        pivot_df["Prompt Messages"] = all_df.groupby("Completion ID")["Prompt Messages"].first()
-        # Reorder the columns
-        cols = pivot_df.columns.tolist()
-        cols = [cols[-1]] + cols[:-1]
-        pivot_df = pivot_df[cols]
-
-        pivot_table = generate_results_table(pivot_df, section_name="model comparison")
-        all_results_table = generate_results_table(all_df, section_name="All Results")
-
-        prompt_analysis_markdown = ""
-        if analyze_prompt:
-            prompt_analysis_markdown = "## Prompt Analysis\n\n"
-            for completion_id, suggestions in prompt_analysis_data.items():
-                prompt_messages = all_df[all_df["Completion ID"] == completion_id]["Prompt Messages"].values[0]
-                prompt_analysis_markdown += (
-                    f"### Prompt Analysis for completion_id: {completion_id}\n\n{prompt_messages}\n\n"
-                )
-                prompt_analysis_markdown += convert_suggestion_to_markdown(suggestions)
-
-        # generate the list of skipped completions ids
-        skipped_completion_markdown = ""
-        if skipped_completion_ids:
-            skipped_completion_ids_str = ", ".join(skipped_completion_ids)
-            skipped_completion_markdown += "## Skipped Completion IDs\n\n"
-            skipped_completion_markdown += f"Skipped completions: {skipped_completion_ids_str}\n\n"
-
-        generate_markdown_report(
-            file, [pivot_table, prompt_analysis_markdown, all_results_table, skipped_completion_markdown]
-        )
-        rich.print(f"Report saved to {file}")
diff --git a/log10/feedback/autofeedback.py b/log10/feedback/autofeedback.py
index e435bd13..d6e0e353 100644
--- a/log10/feedback/autofeedback.py
+++ b/log10/feedback/autofeedback.py
@@ -3,10 +3,7 @@
 import random
 from types import FunctionType
 
-import click
 import httpx
-import rich
-from rich.console import Console
 
 from log10._httpx_utils import _try_post_graphql_request
 from log10.completions.completions import _get_completion
@@ -130,43 +127,3 @@ def get_autofeedback(completion_id: str) -> httpx.Response:
         return response.json()
     else:
         response.raise_for_status()
-
-
-@click.command()
-@click.option("--task_id", help="Feedback task ID")
-@click.option("--content", help="Completion content")
-@click.option("--file", "-f", help="File containing completion content")
-@click.option("--completion_id", help="Completion ID")
-@click.option("--num_samples", default=5, help="Number of samples to use for few-shot learning")
-def auto_feedback_icl(task_id: str, content: str, file: str, completion_id: str, num_samples: int):
-    """
-    Generate feedback with existing human feedback based on in context learning
-    """
-    options_count = sum([1 for option in [content, file, completion_id] if option])
-    if options_count > 1:
-        click.echo("Only one of --content, --file, or --completion_id should be provided.")
-        return
-
-    console = Console()
-    auto_feedback_icl = AutoFeedbackICL(task_id, num_samples=num_samples)
-    if completion_id:
-        results = auto_feedback_icl.predict(completion_id=completion_id)
-        console.print_json(results)
-        return
-
-    if file:
-        with open(file, "r") as f:
-            content = f.read()
-    results = auto_feedback_icl.predict(text=content)
-    console.print_json(results)
-
-
-@click.command()
-@click.option("--completion-id", required=True, help="Completion ID")
-def get_autofeedback_cli(completion_id: str):
-    """
-    Get an auto feedback by completion id
-    """
-    res = get_autofeedback(completion_id)
-    if res:
-        rich.print_json(json.dumps(res["data"], indent=4))
diff --git a/log10/feedback/feedback.py b/log10/feedback/feedback.py
index 08c15c90..1f2b557e 100644
--- a/log10/feedback/feedback.py
+++ b/log10/feedback/feedback.py
@@ -1,11 +1,6 @@
-import json
 import logging
 
-import click
 import httpx
-from rich.console import Console
-from rich.table import Table
-from tqdm import tqdm
 
 from log10._httpx_utils import _try_get
 from log10.llm import Log10Config
@@ -86,26 +81,6 @@ def get(self, id: str) -> httpx.Response:
         return res
 
 
-@click.command()
-@click.option("--task_id", prompt="Enter task id", help="Task ID")
-@click.option("--values", prompt="Enter task values", help="Feedback in JSON format")
-@click.option(
-    "--completion_tags_selector",
-    prompt="Enter completion tags selector",
-    help="Completion tags selector",
-)
-@click.option("--comment", help="Comment", default="")
-def create_feedback(task_id, values, completion_tags_selector, comment):
-    """
-    Add feedback to a group of completions associated with a task
-    """
-    click.echo("Creating feedback")
-    tags = completion_tags_selector.split(",")
-    values = json.loads(values)
-    feedback = Feedback().create(task_id=task_id, values=values, completion_tags_selector=tags, comment=comment)
-    click.echo(feedback.json())
-
-
 def _get_feedback_list(offset, limit, task_id):
     total_fetched = 0
     feedback_data = []
@@ -130,110 +105,9 @@ def _get_feedback_list(offset, limit, task_id):
             if total_fetched >= limit or total_fetched >= total_feedback:
                 break
     except Exception as e:
-        click.echo(f"Error fetching feedback {e}")
+        logger.error(f"Error fetching feedback {e}")
         if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
-            click.echo(e.response.json()["error"])
+            logger.error(e.response.json()["error"])
         return []
 
     return feedback_data
-
-
-@click.command()
-@click.option(
-    "--offset", default=0, type=int, help="The starting index from which to begin the feedback fetch. Defaults to 0."
-)
-@click.option(
-    "--limit", default=25, type=int, help="The maximum number of feedback items to retrieve. Defaults to 25."
-)
-@click.option(
-    "--task_id",
-    default="",
-    type=str,
-    help="The specific Task ID to filter feedback. If not provided, feedback for all tasks will be fetched.",
-)
-def list_feedback(offset, limit, task_id):
-    """
-    List feedback based on the provided criteria. This command allows fetching feedback for a specific task or across all tasks,
-    with control over the starting point and the number of items to retrieve.
-    """
-    feedback_data = _get_feedback_list(offset, limit, task_id)
-    data_for_table = []
-    for feedback in feedback_data:
-        data_for_table.append(
-            {
-                "id": feedback["id"],
-                "task_name": feedback["task_name"],
-                "feedback": json.dumps(feedback["json_values"], ensure_ascii=False),
-                "matched_completion_ids": ",".join(feedback["matched_completion_ids"]),
-            }
-        )
-    table = Table(title="Feedback")
-    table.add_column("ID")
-    table.add_column("Task Name")
-    table.add_column("Feedback")
-    table.add_column("Completion ID")
-
-    for item in data_for_table:
-        table.add_row(item["id"], item["task_name"], item["feedback"], item["matched_completion_ids"])
-    console = Console()
-    console.print(table)
-    console.print(f"Total feedback: {len(feedback_data)}")
-
-
-@click.command()
-@click.option("--id", required=True, help="Get feedback by ID")
-def get_feedback(id):
-    """
-    Get feedback based on provided ID.
-    """
-    try:
-        res = Feedback().get(id)
-    except Exception as e:
-        click.echo(f"Error fetching feedback {e}")
-        if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
-            click.echo(e.response.json()["error"])
-        return
-    console = Console()
-    feedback = json.dumps(res.json(), indent=4)
-    console.print_json(feedback)
-
-
-@click.command()
-@click.option(
-    "--offset",
-    default=0,
-    help="The starting index from which to begin the feedback fetch. Leave empty to start from the beginning.",
-)
-@click.option(
-    "--limit", default="", help="The maximum number of feedback items to retrieve. Leave empty to retrieve all."
-)
-@click.option(
-    "--task_id",
-    default="",
-    type=str,
-    help="The specific Task ID to filter feedback. If not provided, feedback for all tasks will be fetched.",
-)
-@click.option(
-    "--file",
-    "-f",
-    type=str,
-    required=False,
-    help="Path to the file where the feedback will be saved. The feedback data is saved in JSON Lines (jsonl) format. If not specified, feedback will be printed to stdout.",
-)
-def download_feedback(offset, limit, task_id, file):
-    """
-    Download feedback based on the provided criteria. This command allows fetching feedback for a specific task or across all tasks,
-    with control over the starting point and the number of items to retrieve.
-    """
-    feedback_data = _get_feedback_list(offset, limit, task_id)
-
-    console = Console()
-    if not file:
-        for feedback in feedback_data:
-            console.print_json(json.dumps(feedback, indent=4))
-        return
-
-    with open(file, "w") as f:
-        console.print(f"Saving feedback to {file}")
-        for feedback in tqdm(feedback_data):
-            f.write(json.dumps(feedback) + "\n")
diff --git a/log10/feedback/feedback_task.py b/log10/feedback/feedback_task.py
index 53c91049..9c75c3b8 100644
--- a/log10/feedback/feedback_task.py
+++ b/log10/feedback/feedback_task.py
@@ -1,13 +1,9 @@
-import json
 import logging
 
-import click
 import httpx
 from dotenv import load_dotenv
-from rich.console import Console
-from rich.table import Table
 
-from log10._httpx_utils import _get_time_diff, _try_get
+from log10._httpx_utils import _try_get
 from log10.llm import Log10Config
 
 
@@ -74,75 +70,3 @@ def get(self, id: str) -> httpx.Response:
         if res.status_code != 200:
             raise Exception(f"Error fetching feedback task {res.json()}")
         return res
-
-
-# create a cli interface for FeebackTask.create function
-@click.command()
-@click.option("--name", prompt="Enter feedback task name", help="Name of the task")
-@click.option("--task_schema", prompt="Enter feedback task schema", help="Task schema")
-@click.option("--instruction", help="Task instruction", default="")
-@click.option(
-    "--completion_tags_selector",
-    help="Completion tags selector",
-)
-def create_feedback_task(name, task_schema, instruction, completion_tags_selector=None):
-    click.echo("Creating feedback task")
-    tags = []
-
-    if completion_tags_selector:
-        tags = completion_tags_selector.split(",")
-
-    task_schema = json.loads(task_schema)
-    task = FeedbackTask().create(
-        name=name, task_schema=task_schema, completion_tags_selector=tags, instruction=instruction
-    )
-    click.echo(f"Use this task_id to add feedback: {task.json()['id']}")
-
-
-@click.command()
-@click.option("--limit", default=25, help="Number of feedback tasks to fetch")
-@click.option("--offset", default=0, help="Offset for the feedback tasks")
-def list_feedback_task(limit, offset):
-    res = FeedbackTask().list(limit=limit, offset=offset)
-    feedback_tasks = res.json()
-
-    data_for_table = []
-
-    for task in feedback_tasks["data"]:
-        data_for_table.append(
-            {
-                "id": task["id"],
-                "created_at": _get_time_diff(task["created_at"]),
-                "name": task["name"],
-                "required": task["json_schema"]["required"],
-                "instruction": task["instruction"],
-            }
-        )
-
-    table = Table(title="Feedback Tasks")
-    table.add_column("ID", style="dim")
-    table.add_column("Created At")
-    table.add_column("Name")
-    table.add_column("Required")
-    table.add_column("Instruction")
-    for item in data_for_table:
-        required = ", ".join(item["required"]) if item["required"] else ""
-        table.add_row(item["id"], item["created_at"], item["name"], required, item["instruction"])
-
-    console = Console()
-    console.print(table)
-
-
-@click.command()
-@click.option("--id", help="Get feedback task by ID")
-def get_feedback_task(id):
-    try:
-        res = FeedbackTask().get(id)
-    except Exception as e:
-        click.echo(f"Error fetching feedback task {e}")
-        if hasattr(e, "response") and hasattr(e.response, "json") and "error" in e.response.json():
-            click.echo(e.response.json()["error"])
-        return
-    task = json.dumps(res.json())
-    console = Console()
-    console.print_json(task)
diff --git a/poetry.lock b/poetry.lock
index 474aaa2a..be74e05b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -2178,7 +2178,7 @@ files = [
 name = "numpy"
 version = "1.26.4"
 description = "Fundamental package for array computing in Python"
-optional = false
+optional = true
 python-versions = ">=3.9"
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
@@ -2317,7 +2317,7 @@ files = [
 name = "pandas"
 version = "2.2.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
-optional = false
+optional = true
 python-versions = ">=3.9"
 files = [
     {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
@@ -2701,7 +2701,7 @@ testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
 name = "python-dateutil"
 version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
-optional = false
+optional = true
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
@@ -2729,7 +2729,7 @@ cli = ["click (>=5.0)"]
 name = "pytz"
 version = "2024.1"
 description = "World timezone definitions, modern and historical"
-optional = false
+optional = true
 python-versions = "*"
 files = [
     {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
@@ -3222,7 +3222,7 @@ test = ["pytest", "pytest-cov"]
 name = "six"
 version = "1.16.0"
 description = "Python 2 and 3 compatibility utilities"
-optional = false
+optional = true
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
 files = [
     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
@@ -3659,7 +3659,7 @@ typing-extensions = ">=3.7.4"
 name = "tzdata"
 version = "2024.1"
 description = "Provider of IANA time zone data"
-optional = false
+optional = true
 python-versions = ">=2"
 files = [
     {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
@@ -3915,6 +3915,7 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 
 [extras]
 autofeedback-icl = ["magentic"]
+cli = ["click", "pandas", "rich", "tabulate"]
 gemini = ["google-cloud-aiplatform"]
 google-generativeai = ["google-generativeai"]
 lamini = ["lamini"]
@@ -3927,4 +3928,4 @@ together = ["together"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<4.0"
-content-hash = "2190ee6487f544438498170965a7719667b908a07ec2e43aaa21e7b9a34e53ad"
+content-hash = "5dc9a83d0bf79d0fd7c4ee631af6381db64820612e76907ea635b9ee3b69d6ac"
diff --git a/pyproject.toml b/pyproject.toml
index 435be36c..a024fc55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -41,7 +41,6 @@ anthropic = "<1"
 requests = "^2.31.0"
 python-dotenv = "^1.0.0"
 backoff = "^2.2.1"
-pandas = ">=2"
 langchain = {version = "<0.2.0", optional = true}
 magentic = {version = ">=0.17.0", optional = true, markers = "python_version >= '3.10'"}
 litellm = {version = "^1.34.18", optional = true}
@@ -52,6 +51,10 @@ together = {version = "^0.2.7", optional = true}
 mosaicml-cli = {version = "^0.5.30", optional = true}
 google-cloud-bigquery = {version = "^3.11.4", optional = true}
 google-generativeai = {version = "^0.6.0", optional = true}
+click = {version = "^8.1.7", optional = true}
+rich = {version = "^13.7.1", optional = true}
+tabulate = {version = "^0.9.0", optional = true}
+pandas = {version = ">=2", optional = true}
 
 [tool.poetry.extras]
 autofeedback_icl = ["magentic"]
@@ -63,6 +66,7 @@ together = ["together"]
 mosaicml = ["mosaicml-cli"]
 google-generativeai = ["google-generativeai"]
 lamini = ["lamini"]
+cli = ["click", "rich", "tabulate", "pandas"]
 
 [tool.ruff]
 # Never enforce `E501` (line length violations).
diff --git a/tests/test_cli.py b/tests/test_cli.py
new file mode 100644
index 00000000..3758ead8
--- /dev/null
+++ b/tests/test_cli.py
@@ -0,0 +1,75 @@
+import pytest
+from click.testing import CliRunner
+
+from log10.cli.cli_commands import cli
+
+
+completion_id = "fe3c10f0-df31-4a42-b224-233adfe1eb7f"
+feedback_id = "58b8d9b7-1d6a-4b7d-952e-bc97a649dc94"
+feedback_task_id = "890bda39-2232-4cde-ba95-7c501afc4b95"
+
+
+@pytest.fixture
+def runner():
+    return CliRunner()
+
+
+def test_list_completions(runner):
+    result = runner.invoke(cli, ["completions", "list"])
+    print(result.output)
+    assert result.exit_code == 0
+    assert "total_completions=" in result.output
+
+
+def test_get_completion(runner):
+    result = runner.invoke(cli, ["completions", "get", "--id", completion_id])
+    assert result.exit_code == 0
+    assert completion_id in result.output
+
+
+def test_download_completions(runner):
+    result = runner.invoke(cli, ["completions", "download", "--limit", "1", "--tags", "log10/summary-grading"])
+    assert result.exit_code == 0
+    assert "Download total completions: 1/" in result.output
+
+
+def test_benchmark_models(runner):
+    tag = "test_tag_c"
+    model = "gpt-3.5-turbo"
+    result = runner.invoke(cli, ["completions", "benchmark_models", "--models", model, "--limit", "1", "--tags", tag])
+    assert result.exit_code == 0
+    assert f"Filter with tags: {tag}" in result.output
+    assert f"Running {model}" in result.output
+
+
+def test_list_feedback(runner):
+    result = runner.invoke(cli, ["feedback", "list"])
+    assert result.exit_code == 0
+    assert "Total feedback:" in result.output
+
+
+def test_get_feedback(runner):
+    result = runner.invoke(cli, ["feedback", "get", "--id", feedback_id])
+    assert result.exit_code == 0
+    assert feedback_id in result.output
+
+
+def test_download_feedback(runner):
+    result = runner.invoke(cli, ["feedback", "download", "--limit", "1"])
+    assert result.exit_code == 0
+
+
+def test_get_autofeedback(runner):
+    result = runner.invoke(cli, ["feedback", "autofeedback", "get", "--completion-id", completion_id])
+    assert result.exit_code == 0
+    assert completion_id in result.output
+
+
+def test_list_feedback_task(runner):
+    result = runner.invoke(cli, ["feedback-task", "list"])
+    assert result.exit_code == 0
+
+
+def test_get_feedback_task(runner):
+    result = runner.invoke(cli, ["feedback-task", "get", "--id", feedback_task_id])
+    assert result.exit_code == 0