diff --git a/notebooks/__init__.py b/notebooks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/notebooks/evaluation/rag_experiments.py b/notebooks/evaluation/rag_experiments.py new file mode 100644 index 000000000..d64f7fe4e --- /dev/null +++ b/notebooks/evaluation/rag_experiments.py @@ -0,0 +1,510 @@ +import json +import sys +from dataclasses import asdict +from operator import itemgetter +from pathlib import Path +from typing import Annotated + +import click +import jsonlines +import pandas as pd +import seaborn as sns +from deepeval import evaluate +from deepeval.dataset import EvaluationDataset +from deepeval.metrics import ( + AnswerRelevancyMetric, + ContextualPrecisionMetric, + ContextualRecallMetric, + ContextualRelevancyMetric, + FaithfulnessMetric, + HallucinationMetric, +) +from dotenv import find_dotenv, load_dotenv +from elasticsearch import Elasticsearch +from elasticsearch.helpers import bulk, scan +from fastapi import Depends +from langchain.globals import set_verbose +from langchain.schema import StrOutputParser +from langchain_community.chat_models import ChatLiteLLM +from langchain_core.retrievers import BaseRetriever +from langchain_core.runnables import ConfigurableField, Runnable, RunnableLambda, RunnablePassthrough +from langchain_core.vectorstores import VectorStoreRetriever +from scipy import stats +from tiktoken import Encoding + +# Temp hack - there is an issue with the importing of redbox-core +sys.path.append(str(Path(__file__).parents[2])) + +from core_api.src import dependencies +from core_api.src.dependencies import get_tokeniser +from core_api.src.format import format_documents +from core_api.src.retriever import ParameterisedElasticsearchRetriever +from core_api.src.runnables import make_chat_prompt_from_messages_runnable +from redbox.models import ChatRoute, Settings +from redbox.models.chain import ChainInput +from redbox.models.file import UUID +from redbox.models.settings import ElasticLocalSettings + +set_verbose(False) + + +_ = load_dotenv(find_dotenv()) + + +class GetExperimentResults: + """ + Class to handle experiment results retrieval and processing. + + Attributes: + data_version (str): Data version to be used in experiment + benchmark (bool): Benchmark stated or not + V_EMBEDDINGS (str): Embeddings version + V_ROOT (path): Path to where the root of experiment data lies + V_SYNTHETIC (path): Path to the synthetic data + V_RESULTS (path): Path to the results from data processing + MODEL (str): Embedding model + ES_CLIENT (func): Elastic search client from settings + INDEX (str): Index for the json lines file + experiment_name (str): Name of the experiment + """ + + def __init__(self): + self.data_version = None + self.benchmark = None + self.V_EMBEDDINGS = "" + self.V_ROOT = None + self.V_SYNTHETIC = None + self.V_RESULTS = None + self.MODEL = None + self.ES_CLIENT = None + self.INDEX = None + self.experiment_name = None + self.retrieval_system_prompt = None + self.retrieval_question_prompt = None + self.eval_results = None + self.ENV = Settings(minio_host="localhost", elastic=ElasticLocalSettings(host="localhost")) + self.LLM = ChatLiteLLM( + model="gpt-4o", + streaming=True, + ) + self.FILE_UUIDS = None + self.USER_UUID = UUID("aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa") + self.experiment_file_name = None + self.experiment_parameters = None + + def set_data_version(self, data_version): + """ + This function sets the necessary environment variables depending on your data version. + It assumes you have a versioned evaluation folder in your repository e.g. notebooks/evaluation/data/0.2.0 + This should be copied from the Redbox shared Google Drive. + This folder contains the raw files, QA sets, embeddings etc. + """ + self.data_version = data_version + root = Path(__file__).parents[2] + evaluation_dir = root / "notebooks/evaluation" + self.V_ROOT = evaluation_dir / f"data/{self.data_version}" + self.V_SYNTHETIC = self.V_ROOT / "synthetic" + self.V_RESULTS = self.V_ROOT / "results" + self.V_EMBEDDINGS = self.V_ROOT / "embeddings" + self.MODEL = self.ENV.embedding_model + self.INDEX = f"{self.data_version}-{self.MODEL}".lower() + self.ES_CLIENT = self.ENV.elasticsearch_client() + + def load_chunks_from_jsonl_to_index(self) -> set: + """ + This function takes the versioned embeddings (e.g. from notebooks/evaluation/data/0.2.0/embeddings) + and loads them to ElasticSearch. + """ + file_uuids = set() + file_path = self.V_EMBEDDINGS / f"{self.MODEL}.jsonl" + + with jsonlines.open(file_path, mode="r") as reader: + for chunk_raw in reader: + chunk = json.loads(chunk_raw) + self.ES_CLIENT.index( + index=self.INDEX, + id=chunk["uuid"], + body=chunk, + ) + + file_uuids.add(chunk["parent_file_uuid"]) + self.FILE_UUIDS = file_uuids + return file_uuids + + def clear_index(self) -> None: + """ + This function clears the indexes from ElasticSearch. + """ + if not self.ES_CLIENT.indices.exists(index=self.INDEX): + return None + + documents = list(scan(self.ES_CLIENT, index=self.INDEX, query={"query": {"match_all": {}}})) + bulk_data = [{"_op_type": "delete", "_index": doc["_index"], "_id": doc["_id"]} for doc in documents] + if bulk_data: + return bulk(self.ES_CLIENT, bulk_data, request_timeout=300) + return None + + def get_parameterised_retriever( + self, es: Annotated[Elasticsearch, Depends(dependencies.get_elasticsearch_client)] + ) -> BaseRetriever: + """ + Creates an Elasticsearch retriever runnable. + Runnable takes input of a dict keyed to question, file_uuids and user_uuid. + Runnable returns a list of Chunks. + """ + default_params = { + "size": self.ENV.ai.rag_k, + "num_candidates": self.ENV.ai.rag_num_candidates, + "match_boost": 1, + "knn_boost": 1, + "similarity_threshold": 0, + } + + return ParameterisedElasticsearchRetriever( + es_client=es, + index_name=self.INDEX, + params=default_params, + embedding_model=dependencies.get_embedding_model(self.ENV), + ).configurable_fields( + params=ConfigurableField( + id="params", + name="Retriever parameters", + description="A dictionary of parameters to use for the retriever.", + ) + ) + + def build_retrieval_chain( + self, + llm: Annotated[ChatLiteLLM, Depends(dependencies.get_llm)], + retriever: Annotated[VectorStoreRetriever, Depends(dependencies.get_parameterised_retriever)], + tokeniser: Annotated[Encoding, Depends(dependencies.get_tokeniser)], + env: Annotated[Settings, Depends(dependencies.get_env)], + ) -> Runnable: + """ + This is an adaptation of core_api.src.build_chains.build_retrieval_chain. + Function experiements with different retrieval_system_prompt and retrieval_question_prompt. + """ + return ( + RunnablePassthrough.assign(documents=retriever) + | RunnablePassthrough.assign( + formatted_documents=(RunnablePassthrough() | itemgetter("documents") | format_documents) + ) + | { + "response": make_chat_prompt_from_messages_runnable( + system_prompt=str(self.retrieval_system_prompt), + question_prompt=str(self.retrieval_question_prompt), + input_token_budget=env.ai.context_window_size - env.llm_max_tokens, + tokeniser=tokeniser, + ) + | llm + | StrOutputParser(), + "source_documents": itemgetter("documents"), + "route_name": RunnableLambda(lambda _: ChatRoute.search.value), + } + ) + + def get_rag_results( + self, + question, + ) -> dict: + """ + Get Redbox response for a given question. + """ + retriever = self.get_parameterised_retriever(es=self.ES_CLIENT) + + chain = self.build_retrieval_chain(llm=self.LLM, retriever=retriever, tokeniser=get_tokeniser(), env=self.ENV) + + response = chain.invoke( + input=ChainInput( + question=question, + chat_history=[{"text": "", "role": "user"}], + file_uuids=list(self.FILE_UUIDS), + user_uuid=self.USER_UUID, + ).model_dump() + ) + + filtered_chunks = [] + + for chunk in response["source_documents"]: + dict_chunk = dict(chunk) + filtered_chunk = { + "page_content": dict_chunk["page_content"], + "page_number": dict_chunk["metadata"]["page_number"], + "parent_file_uuid": dict_chunk["metadata"]["parent_file_uuid"], + } + filtered_chunks.append(filtered_chunk) + + return {"output_text": response["response"], "source_documents": filtered_chunks} + + def write_rag_results(self) -> None: + """ + Format and write Redbox responses to evaluation dataset. + """ + + synthetic_df = pd.read_csv(f"{self.V_SYNTHETIC}/ragas_synthetic_data.csv") + inputs = synthetic_df["input"].tolist() + + df_function = synthetic_df.copy() + + actual_output = [] + retrieval_context = [] + + for question in inputs: + data = self.get_rag_results(question=question) + actual_output.append(data["output_text"]) + retrieval_context.append(data["source_documents"]) + + df_function["actual_output"] = actual_output + df_function["retrieval_context"] = retrieval_context + + df_function_clean = df_function.dropna() + df_function_clean.to_csv( + f"{self.V_SYNTHETIC}/{self.experiment_name}_complete_ragas_synthetic_data.csv", index=False + ) + + def do_evaluation(self) -> None: + """ + Calculate evaluation metrics for a synthetic RAGAS dataset, aggregate results + and write as CSV. + """ + + dataset = EvaluationDataset() + dataset.add_test_cases_from_csv_file( + file_path=f"{self.V_SYNTHETIC}/{self.experiment_name}_complete_ragas_synthetic_data.csv", + input_col_name="input", + actual_output_col_name="actual_output", + expected_output_col_name="expected_output", + context_col_name="context", + context_col_delimiter=";", + retrieval_context_col_name="retrieval_context", + retrieval_context_col_delimiter=";", + ) + + # Instantiate retrieval metrics + contextual_precision = ContextualPrecisionMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + contextual_recall = ContextualRecallMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + contextual_relevancy = ContextualRelevancyMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + # Instantiate generation metrics + answer_relevancy = AnswerRelevancyMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + faithfulness = FaithfulnessMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + hallucination = HallucinationMetric( + threshold=0.5, # default is 0.5 + model="gpt-4o", + include_reason=True, + ) + + self.eval_results = evaluate( + test_cases=dataset, + metrics=[ + contextual_precision, + contextual_recall, + contextual_relevancy, + answer_relevancy, + faithfulness, + hallucination, + ], + ) + + return self.eval_results + + def write_evaluation_results(self) -> None: + """ + This function writes the evaluation results to a csv, identifiable by experiment_name. + """ + metric_type = { + "metric_name": [ + "Contextual Precision", + "Contextual Recall", + "Contextual Relevancy", + "Answer Relevancy", + "Faithfulness", + "Hallucination", + ], + "metric_type": ["retrieval", "retrieval", "retrieval", "generation", "generation", "generation"], + } + + evaluation = ( + pd.DataFrame.from_records(asdict(result) for result in self.eval_results) + .explode("metrics_metadata") + .reset_index(drop=True) + .assign( + metric_name=lambda df: df.metrics_metadata.apply(getattr, args=["metric"]), + score=lambda df: df.metrics_metadata.apply(getattr, args=["score"]), + reason=lambda df: df.metrics_metadata.apply(getattr, args=["reason"]), + ) + .merge(pd.DataFrame(metric_type), on="metric_name") + .drop(columns=["success", "metrics_metadata"]) + ) + + evaluation.to_csv(f"{self.V_RESULTS}/{self.experiment_name}_val_results.csv", index=False) + evaluation.head() + + def load_experiment_param_data( + self, + experiment_file_name=None, + benchmark=None, + ): + """ + This function loads an csv of experiments to try unless benchmark is specified; + in this case it will take the core_api retrieval_system_prompt and retrieval_question_prompt. + """ + + if benchmark: + self.benchmark = benchmark + self.experiment_file_name = "benchmark" + benchmark_df = pd.DataFrame() + benchmark_df["experiment_name"] = ["benchmark"] + benchmark_df["retrieval_system_prompt"] = [self.ENV.ai.retrieval_system_prompt] + benchmark_df["retrieval_question_prompt"] = [self.ENV.ai.retrieval_question_prompt] + self.experiment_parameters = benchmark_df + else: + self.experiment_file_name = experiment_file_name + self.experiment_parameters = pd.read_csv( + f"notebooks/evaluation/data/experiment_parameters/{self.experiment_file_name}.csv" + ) + + def loop_through_experiements(self): + """ + This function calls the other functions to run and write the different experiments. + """ + for _index, row in self.experiment_parameters.iterrows(): + self.experiment_name = row["experiment_name"] + self.retrieval_system_prompt = (row["retrieval_system_prompt"],) + self.retrieval_question_prompt = row["retrieval_question_prompt"] + + self.write_rag_results() + self.do_evaluation() + self.write_evaluation_results() + + def empirical_ci(self, df: pd.DataFrame) -> pd.DataFrame: + """ + Calculate confidence intervals for aggregated metrics. + """ + + df_grouped = ( + df.groupby(["experiment_name", "metric_name"])["score"] + .agg(["mean", "sem", "min", "max", "count"]) + .reset_index() + ) + + ci = stats.t.interval( + confidence=0.95, df=df_grouped["count"] - 1, loc=df_grouped["mean"], scale=df_grouped["sem"] + ) + + df_grouped["ci_low"] = ci[0] + df_grouped["ci_high"] = ci[1] + + return df_grouped + + def create_visualisation_plus_grouped_results(self): + """ + This function uses the stored experiment result to save the aggregated metics using empirical_ci(). + It also saves a barplot (here confidence intervals are calculated by bootstrapping). + """ + experiments = [] + experiment_names = self.experiment_parameters["experiment_name"] + for experiment_name in experiment_names: + experiment = pd.read_csv(f"{self.V_RESULTS}/{self.experiment_name}_val_results.csv") + experiment["experiment_name"] = experiment_name + experiments.append(experiment) + + experiments_df = pd.concat(experiments) + + barplot = sns.barplot(experiments_df, x="score", y="metric_name", hue="experiment_name", errorbar=("ci", 95)) + fig = barplot.get_figure() + fig.savefig(f"{self.V_RESULTS}/{self.experiment_file_name}_boxplot.png", bbox_inches="tight") + + experiment_metrics = self.empirical_ci(experiments_df) + experiment_metrics.to_csv(f"{self.V_RESULTS}/{self.experiment_file_name}_eval_results_full.csv") + + +class Mutex(click.Option): + def __init__(self, *args, **kwargs): + self.not_required_if = kwargs.pop("not_required_if") + self.required_if_not_set = kwargs.pop("required_if_not_set", True) + + if not self.not_required_if: + msg = "'not_required_if' parameter required" + raise ValueError(msg) + + kwargs["help"] = ( + kwargs.get("help", "") + " Option is mutually exclusive with " + ", ".join(self.not_required_if) + "." + ).strip() + super().__init__(*args, **kwargs) + + def handle_parse_result(self, ctx, opts, args): + current_opt = self.name in opts + for mutex_opt in self.not_required_if: + if mutex_opt in opts: + if current_opt: + msg = f"Illegal usage: '{self.name}' is mutually exclusive with {mutex_opt}." + raise click.UsageError(msg) + self.prompt = None + + if not current_opt and self.required_if_not_set and not any(opt in opts for opt in self.not_required_if): + msg = f"Illegal usage: Either '{self.name}' or one of {', '.join(self.not_required_if)} must be provided." + raise click.UsageError(msg) + return super().handle_parse_result(ctx, opts, args) + + +@click.command() +@click.option( + "--data_version", + required=True, + type=str, + help="Specify the data version you want to use.", +) +@click.option( + "--experiment_file_name", + cls=Mutex, + not_required_if=["benchmark"], + required_if_not_set=True, + type=str, + help="Specify the experiment data file name you want to use. (CSV)", +) +@click.option( + "--benchmark", + "-b", + cls=Mutex, + not_required_if=["experiment_file_name"], + required_if_not_set=True, + is_flag=True, + help="Use the baseline rag function to get benchmarking results.", +) +def main(data_version, experiment_file_name, benchmark): + get_experiment_results = GetExperimentResults() + get_experiment_results.set_data_version(data_version) + get_experiment_results.load_experiment_param_data(experiment_file_name=experiment_file_name, benchmark=benchmark) + get_experiment_results.load_chunks_from_jsonl_to_index() + get_experiment_results.loop_through_experiements() + get_experiment_results.create_visualisation_plus_grouped_results() + get_experiment_results.clear_index() + + +if __name__ == "__main__": + main() diff --git a/notebooks/evaluation/tests/test_rag_experiments.py b/notebooks/evaluation/tests/test_rag_experiments.py new file mode 100644 index 000000000..9a6034994 --- /dev/null +++ b/notebooks/evaluation/tests/test_rag_experiments.py @@ -0,0 +1,212 @@ +import json +import sys +from pathlib import Path +from unittest import mock + +import pytest + +# Hack - wasn't picking up imports properly +sys.path.append(str(Path(__file__).parents[2])) +import tempfile +import unittest +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pandas as pd +from elasticsearch import Elasticsearch +from elasticsearch.helpers import scan +from evaluation.rag_experiments import GetExperimentResults + +from redbox.models import Settings + + +@pytest.fixture() +def env(): + return Settings() + + +@pytest.fixture() +def es_client(): + with ( + mock.patch("elasticsearch.Elasticsearch.search") as mocked_search, + mock.patch("elasticsearch.client.IndicesClient.create") as mocked_index_create, + mock.patch("elasticsearch.Elasticsearch.index") as mocked_index, + mock.patch("elasticsearch.helpers.scan") as mocked_scan, + mock.patch("elasticsearch.Elasticsearch.delete_by_query") as mocked_delete_by_query, + ): + mocked_search.return_value = {"hits": {"hits": [{"_id": "1", "_source": {"field": "value"}}]}} + mocked_index_create.return_value = {"acknowledged": True} + mocked_index.return_value = {"result": "created"} + mocked_delete_by_query.return_value = {"deleted": 1} + mocked_scan.return_value = iter( + [{"_id": "1", "_source": {"field": "value"}}, {"_id": "2", "_source": {"field": "value"}}] + ) + + client = Elasticsearch(hosts=["http://localhost:9200"]) + client.indices = mock.Mock() + client.indices.create = mocked_index_create + client.search = mocked_search + client.index = mocked_index + client.delete_by_query = mocked_delete_by_query + + yield client, mocked_search, mocked_index_create, mocked_index, mocked_scan, mocked_delete_by_query + + +@pytest.mark.usefixtures("es_client") +class TestGetExperimentResults(unittest.TestCase): + @pytest.fixture(autouse=True) + def inject_fixtures(self, es_client): + ( + self.es_client, + self.mocked_search, + self.mocked_index_create, + self.mocked_index, + self.mocked_scan, + self.mocked_delete_by_query, + ) = es_client + + return es_client + + @classmethod + def setUpClass(cls): + super().setUpClass() + + def setUp(self): + self.get_experiment_results = GetExperimentResults() + self.get_experiment_results.set_data_version("0.2.3") + + self.get_experiment_results.ENV = MagicMock() + + self.get_experiment_results.ES_CLIENT = self.es_client + + self.temp_dir = tempfile.TemporaryDirectory() + self.get_experiment_results.V_RESULTS = self.temp_dir.name + + self.mock_synthetic_path = "/mock/synthetic" + self.mock_complete_ragas_file = "mock_complete_ragas_synthetic_data.csv" + self.get_experiment_results.synthetic_data_dir = self.mock_synthetic_path + + def tearDown(self): + self.temp_dir.cleanup() + + @patch("jsonlines.open") + def test_load_chunks_from_jsonl_to_index(self, mock_jsonlines_open): + mock_jsonlines_open.return_value.__enter__.return_value = iter( + [json.dumps({"uuid": "1234", "parent_file_uuid": "abcd", "data": "test data"})] + ) + + file_uuids = self.get_experiment_results.load_chunks_from_jsonl_to_index() + assert "abcd" in file_uuids + self.get_experiment_results.ES_CLIENT.index.assert_called_once() + + @patch("jsonlines.open") + def test_load_chunks_from_empty_jsonl(self, mock_jsonlines_open): + mock_jsonlines_open.return_value.__enter__.return_value = iter([]) + + result = self.get_experiment_results.load_chunks_from_jsonl_to_index() + assert result == set() + + def test_load_experiment_param_data(self): + with patch("pandas.read_csv") as mock_read_csv: + mock_read_csv.return_value = pd.DataFrame( + { + "experiment_name": ["test_experiment"], + "retrieval_system_prompt": ["test_prompt"], + "retrieval_question_prompt": ["test_question_prompt"], + } + ) + + self.get_experiment_results.load_experiment_param_data("test_file") + assert self.get_experiment_results.experiment_parameters["experiment_name"][0] == "test_experiment" + + @patch("evaluation.rag_experiments.GetExperimentResults.get_rag_results") + @patch("pandas.DataFrame.to_csv") + def test_write_rag_results(self, mock_to_csv, mock_get_rag_results): + mock_get_rag_results.return_value = { + "output_text": "test_output", + "source_documents": [{"page_content": "test_content"}], + } + + with patch("pandas.read_csv") as mock_read_csv: + mock_read_csv.return_value = pd.DataFrame({"input": ["test_input"]}) + self.get_experiment_results.write_rag_results() + mock_to_csv.assert_called_once() + + @patch("evaluation.rag_experiments.pd.read_csv") + @patch("evaluation.rag_experiments.evaluate") + def test_do_evaluation(self, mock_evaluate, mock_read_csv): + # Setting up the mock return value for read_csv + mock_data = pd.DataFrame({"column1": [1, 2, 3], "column2": ["a", "b", "c"]}) + mock_read_csv.return_value = mock_data + + mock_eval_result = pd.DataFrame( + { + "input": ["input1", "input2", "input3"], + "actual_output": ["output1", "output2", "output3"], + "expected_output": ["expected1", "expected2", "expected3"], + "context": [["context1"], ["context2"], ["context3"]], + "retrieval_context": [["retrieval1"], ["retrieval2"], ["retrieval3"]], + "additional_metadata": [None, None, None], + "comments": [None, None, None], + } + ) + mock_evaluate.return_value = mock_eval_result + + result = self.get_experiment_results.do_evaluation() + + assert result is not None + assert isinstance(result, pd.DataFrame) + assert result.shape == (3, 7) + + @patch("seaborn.barplot") + @patch("pandas.concat") + @patch("pandas.read_csv") + def test_create_visualisation_plus_grouped_results(self, mock_read_csv, mock_concat, mock_barplot): # noqa: ARG002 + mock_read_csv.return_value = pd.DataFrame( + {"experiment_name": ["test_experiment"], "score": [0.5], "metric_name": ["test_metric"]} + ) + mock_concat.return_value = mock_read_csv.return_value + + self.get_experiment_results.experiment_parameters = {"experiment_name": ["test_experiment"]} + self.get_experiment_results.create_visualisation_plus_grouped_results() + + @patch("pandas.read_csv") + def test_create_visualisation_empty_data(self, mock_read_csv): + mock_read_csv.return_value = pd.DataFrame() + self.get_experiment_results.experiment_parameters = {"experiment_name": []} + + with pytest.raises(ValueError): # noqa: PT011 + self.get_experiment_results.create_visualisation_plus_grouped_results() + + @patch("jsonlines.open") + def test_clear_index(self, mock_jsonlines_open): + with patch("pandas.read_csv") as mock_read_csv: + mock_read_csv.return_value = pd.DataFrame( + { + "experiment_name": ["test_experiment"], + "retrieval_system_prompt": ["test_prompt"], + "retrieval_question_prompt": ["test_question_prompt"], + } + ) + + self.get_experiment_results.load_experiment_param_data("test_file") + + mock_jsonlines_open.return_value.__enter__.return_value = iter( + [json.dumps({"uuid": "1234", "parent_file_uuid": "abcd", "data": "test data"})] + ) + + self.get_experiment_results.load_chunks_from_jsonl_to_index() + + self.get_experiment_results.ES_CLIENT.index.assert_called_once() + + self.get_experiment_results.clear_index() + documents_after_clear = list( + scan(self.get_experiment_results.ES_CLIENT, query={"query": {"match_all": {}}}) + ) + assert len(documents_after_clear) == 0 + + self.get_experiment_results.clear_index() + + +if __name__ == "__main__": + unittest.main() diff --git a/poetry.lock b/poetry.lock index d13ec4a80..3804dab8d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohttp" @@ -3921,9 +3921,13 @@ files = [ {file = "lxml-5.2.2-cp36-cp36m-win_amd64.whl", hash = "sha256:edcfa83e03370032a489430215c1e7783128808fd3e2e0a3225deee278585196"}, {file = "lxml-5.2.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:28bf95177400066596cdbcfc933312493799382879da504633d16cf60bba735b"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a745cc98d504d5bd2c19b10c79c61c7c3df9222629f1b6210c0368177589fb8"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b590b39ef90c6b22ec0be925b211298e810b4856909c8ca60d27ffbca6c12e6"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b336b0416828022bfd5a2e3083e7f5ba54b96242159f83c7e3eebaec752f1716"}, + {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:c2faf60c583af0d135e853c86ac2735ce178f0e338a3c7f9ae8f622fd2eb788c"}, {file = "lxml-5.2.2-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:4bc6cb140a7a0ad1f7bc37e018d0ed690b7b6520ade518285dc3171f7a117905"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7ff762670cada8e05b32bf1e4dc50b140790909caa8303cfddc4d702b71ea184"}, {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:57f0a0bbc9868e10ebe874e9f129d2917750adf008fe7b9c1598c0fbbfdde6a6"}, + {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:a6d2092797b388342c1bc932077ad232f914351932353e2e8706851c870bca1f"}, {file = "lxml-5.2.2-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:60499fe961b21264e17a471ec296dcbf4365fbea611bf9e303ab69db7159ce61"}, {file = "lxml-5.2.2-cp37-cp37m-win32.whl", hash = "sha256:d9b342c76003c6b9336a80efcc766748a333573abf9350f4094ee46b006ec18f"}, {file = "lxml-5.2.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b16db2770517b8799c79aa80f4053cd6f8b716f21f8aca962725a9565ce3ee40"}, @@ -4465,6 +4469,22 @@ files = [ griffe = ">=0.47" mkdocstrings = ">=0.25" +[[package]] +name = "mock" +version = "5.1.0" +description = "Rolling backport of unittest.mock for all Pythons" +optional = false +python-versions = ">=3.6" +files = [ + {file = "mock-5.1.0-py3-none-any.whl", hash = "sha256:18c694e5ae8a208cdb3d2c20a993ca1a7b0efa258c247a1e565150f477f83744"}, + {file = "mock-5.1.0.tar.gz", hash = "sha256:5e96aad5ccda4718e0a229ed94b2024df75cc2d55575ba5762d31f5767b8767d"}, +] + +[package.extras] +build = ["blurb", "twine", "wheel"] +docs = ["sphinx"] +test = ["pytest", "pytest-cov"] + [[package]] name = "moto" version = "5.0.10" @@ -5560,6 +5580,7 @@ optional = false python-versions = ">=3.9" files = [ {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, + {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, @@ -5580,6 +5601,7 @@ files = [ {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, + {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, @@ -7039,6 +7061,7 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -7996,6 +8019,27 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"] test = ["Cython", "array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] +[[package]] +name = "seaborn" +version = "0.13.2" +description = "Statistical data visualization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, + {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, +] + +[package.dependencies] +matplotlib = ">=3.4,<3.6.1 || >3.6.1" +numpy = ">=1.20,<1.24.0 || >1.24.0" +pandas = ">=1.2" + +[package.extras] +dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"] +docs = ["ipykernel", "nbconvert", "numpydoc", "pydata_sphinx_theme (==0.10.0rc2)", "pyyaml", "sphinx (<6.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-issues"] +stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"] + [[package]] name = "semantic-router" version = "0.0.48" @@ -10169,4 +10213,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "0f6bc0402cbde8567d6cdd705acee1d3af4bdb2e7e713362877266e89b6533ce" +content-hash = "0d54ca3b9faad13a7d7c77be4047b29aa8bf07d57acb3a6166e9cdab2cb60421" diff --git a/pyproject.toml b/pyproject.toml index c5cc872b1..f7afa4d37 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,8 @@ faststream = {extras = ["redis"], version = "<0.5.0"} langchain-elasticsearch = "^0.2.0" +seaborn = "^0.13.2" +mock = "^5.1.0" [tool.poetry.group.api.dependencies] fastapi = "^0.111.0" uvicorn = "^0.30.1"