From bb5a2ae89cbcd13573e32012cc67f1d017812eea Mon Sep 17 00:00:00 2001 From: Guangya Liu Date: Mon, 25 Mar 2024 15:45:16 -0400 Subject: [PATCH] eval (#167) --- eval/evaluation_with_langchain.ipynb | 521 ++++++++++++++++++++ eval/integration_openai_sdk.ipynb | 693 +++++++++++++++++++++++++++ 2 files changed, 1214 insertions(+) create mode 100644 eval/evaluation_with_langchain.ipynb create mode 100644 eval/integration_openai_sdk.ipynb diff --git a/eval/evaluation_with_langchain.ipynb b/eval/evaluation_with_langchain.ipynb new file mode 100644 index 0000000..52ed297 --- /dev/null +++ b/eval/evaluation_with_langchain.ipynb @@ -0,0 +1,521 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "SWL354n0DECo" + }, + "source": [ + "---\n", + "description: Cookbook that demonstrates how to run Langchain evaluations on data in Langfuse.\n", + "category: Evaluation\n", + "---\n", + "\n", + "# Run Langchain Evaluations on data in Langfuse\n", + "\n", + "This cookbook shows how model-based evaluations can be used to automate the evaluation of production completions in Langfuse. This example uses Langchain and is adaptable to other libraries. Which library is the best to use depends heavily on the use case.\n", + "\n", + "This cookbook follows three steps:\n", + "1. Fetch production `generations` stored in Langfuse\n", + "2. Evaluate these `generations` using Langchain\n", + "3. Ingest results back into Langfuse as `scores`\n", + "\n", + "\n", + "----\n", + "Not using Langfuse yet? [Get started](https://langfuse.com/docs/get-started) by capturing LLM events." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WbfTYaTkEu3G" + }, + "source": [ + "### Setup\n", + "\n", + "First you need to install Langfuse and Langchain via pip and then set the environment variables." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "Qclwxd9LRPAL" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langfuse in /Users/gyliu/venvlf/lib/python3.10/site-packages (1.1.14)\n", + "Collecting langfuse\n", + " Downloading langfuse-2.21.1-py3-none-any.whl (135 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.1/135.1 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: langchain in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.0.335)\n", + "Collecting langchain\n", + " Downloading langchain-0.1.13-py3-none-any.whl (810 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m810.5/810.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: openai in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.27.10)\n", + "Collecting openai\n", + " Downloading openai-1.14.3-py3-none-any.whl (262 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m262.9/262.9 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: cohere in /Users/gyliu/venvlf/lib/python3.10/site-packages (4.34)\n", + "Collecting cohere\n", + " Downloading cohere-5.1.2-py3-none-any.whl (142 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.6/142.6 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: tiktoken in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.5.1)\n", + "Collecting tiktoken\n", + " Using cached tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl (949 kB)\n", + "Requirement already satisfied: wrapt<2.0,>=1.14 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (1.16.0)\n", + "Requirement already satisfied: pydantic<3.0,>=1.10.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.4.0)\n", + "Collecting chevron<0.15.0,>=0.14.0\n", + " Using cached chevron-0.14.0-py3-none-any.whl (11 kB)\n", + "Requirement already satisfied: httpx<1.0,>=0.15.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.24.1)\n", + "Requirement already satisfied: backoff<3.0.0,>=2.2.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.2.1)\n", + "Requirement already satisfied: packaging<24.0,>=23.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (23.2)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (3.8.6)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (2.0.11)\n", + "Requirement already satisfied: numpy<2,>=1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (1.26.2)\n", + "Requirement already satisfied: requests<3,>=2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (2.31.0)\n", + "Collecting langsmith<0.2.0,>=0.1.17\n", + " Downloading langsmith-0.1.31-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hCollecting langchain-community<0.1,>=0.0.29\n", + " Downloading langchain_community-0.0.29-py3-none-any.whl (1.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (6.0.1)\n", + "Collecting langchain-text-splitters<0.1,>=0.0.1\n", + " Using cached langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (8.2.3)\n", + "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (0.6.2)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (4.0.3)\n", + "Collecting langchain-core<0.2.0,>=0.1.33\n", + " Downloading langchain_core-0.1.33-py3-none-any.whl (269 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m269.1/269.1 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (1.33)\n", + "Requirement already satisfied: sniffio in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.3.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.8.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.8.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: tqdm>4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.66.1)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from tiktoken) (2023.10.3)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n", + "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.3.2)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.2)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n", + "Requirement already satisfied: idna>=2.8 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n", + "Requirement already satisfied: exceptiongroup in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.1.3)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.20.1)\n", + "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (0.17.3)\n", + "Requirement already satisfied: certifi in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (2023.7.22)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n", + "Collecting orjson<4.0.0,>=3.9.14\n", + " Using cached orjson-3.9.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl (248 kB)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (0.6.0)\n", + "Requirement already satisfied: pydantic-core==2.10.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (2.10.0)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3,>=2->langchain) (1.26.18)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1.0,>=0.15.4->langfuse) (0.14.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n", + "Installing collected packages: chevron, orjson, tiktoken, langsmith, openai, langfuse, langchain-core, cohere, langchain-text-splitters, langchain-community, langchain\n", + " Attempting uninstall: orjson\n", + " Found existing installation: orjson 3.9.3\n", + " Uninstalling orjson-3.9.3:\n", + " Successfully uninstalled orjson-3.9.3\n", + " Attempting uninstall: tiktoken\n", + " Found existing installation: tiktoken 0.5.1\n", + " Uninstalling tiktoken-0.5.1:\n", + " Successfully uninstalled tiktoken-0.5.1\n", + " Attempting uninstall: langsmith\n", + " Found existing installation: langsmith 0.0.64\n", + " Uninstalling langsmith-0.0.64:\n", + " Successfully uninstalled langsmith-0.0.64\n", + " Attempting uninstall: openai\n", + " Found existing installation: openai 0.27.10\n", + " Uninstalling openai-0.27.10:\n", + " Successfully uninstalled openai-0.27.10\n", + " Attempting uninstall: langfuse\n", + " Found existing installation: langfuse 1.1.14\n", + " Uninstalling langfuse-1.1.14:\n", + " Successfully uninstalled langfuse-1.1.14\n", + " Attempting uninstall: cohere\n", + " Found existing installation: cohere 4.34\n", + " Uninstalling cohere-4.34:\n", + " Successfully uninstalled cohere-4.34\n", + " Attempting uninstall: langchain\n", + " Found existing installation: langchain 0.0.335\n", + " Uninstalling langchain-0.0.335:\n", + " Successfully uninstalled langchain-0.0.335\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n", + "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n", + "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n", + "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n", + "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n", + "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed chevron-0.14.0 cohere-5.1.2 langchain-0.1.13 langchain-community-0.0.29 langchain-core-0.1.33 langchain-text-splitters-0.0.1 langfuse-2.21.1 langsmith-0.1.31 openai-1.14.3 orjson-3.9.15 tiktoken-0.6.0\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install langfuse langchain openai cohere tiktoken --upgrade" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "CQhmQQpLRa1K" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['EVAL_MODEL'] = \"text-davinci-003\"\n", + "# os.environ['EVAL_MODEL'] = \"gpt-3.5-turbo\"\n", + "\n", + "# Langchain Eval types\n", + "EVAL_TYPES={\n", + " \"hallucination\": True,\n", + " \"conciseness\": True,\n", + " \"relevance\": True,\n", + " \"coherence\": True,\n", + " \"harmfulness\": True,\n", + " \"maliciousness\": True,\n", + " \"helpfulness\": True,\n", + " \"controversiality\": True,\n", + " \"misogyny\": True,\n", + " \"criminality\": True,\n", + " \"insensitivity\": True\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yiwrz1-mavJ4" + }, + "source": [ + "Initialize the Langfuse Python SDK, more information [here](https://langfuse.com/docs/sdk/python#1-installation)." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "id": "8viV4KT5RMjA" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langfuse import Langfuse\n", + "\n", + "langfuse = Langfuse()\n", + "\n", + "langfuse.auth_check()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bjMZ1VLhF2Vv" + }, + "source": [ + "### Fetching data\n", + "\n", + "Load all `generations` from Langfuse filtered by `name`, in this case `OpenAI`. Names are used in Langfuse to identify different types of generations within an application. Change it to the name you want to evaluate.\n", + "\n", + "Checkout [docs](https://langfuse.com/docs/sdk/python#generation) on how to set the name when ingesting an LLM Generation." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "3r3jOEX0RvXi" + }, + "outputs": [], + "source": [ + "def fetch_all_pages(name=None, user_id = None, limit=50):\n", + " page = 1\n", + " all_data = []\n", + "\n", + " while True:\n", + " response = langfuse.get_generations(name=name, limit=limit, user_id=user_id, page=page)\n", + " if not response.data:\n", + " break\n", + "\n", + " all_data.extend(response.data)\n", + " page += 1\n", + "\n", + " return all_data" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "cAnLShvjBDBU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ObservationsView(id='7fe0f7c8-4ee8-4dbd-a210-679cd7ca98f5', trace_id='c86e90c4-e601-4249-876f-e1246088fe8c', type='GENERATION', name='poet', start_time=datetime.datetime(2024, 3, 25, 19, 26, 59, 317000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 27, 3, 120000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 200, 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a poet. Create a poem about a city.'}, {'role': 'user', 'content': 'Sofia'}], version=None, metadata=None, output={'role': 'assistant', 'content': \"In the heart of the Balkans, where history meets modernity,\\nLies a city of beauty, Sofia, a place of pure serenity.\\nWith ancient ruins whispering tales of days long gone,\\nAnd vibrant street art that dances with the dawn.\\n\\nA melting pot of cultures, where East meets West,\\nSofia's charm will put your wandering soul to the test.\\nThe rhythm of the city pulses through its veins,\\nAs vibrant markets bustle and soothing fountains reign.\\n\\nBeneath the shadow of Vitosha, the mountain so grand,\\nSofia stands proud, a jewel in Bulgaria's hand.\\nWith its grand cathedrals and majestic domes,\\nIt's a city that calls you to wander and roam.\\n\\nFrom the bustling boulevards to quiet cobbled lanes,\\nSofia's spirit will stir in your heart like gentle rains.\\nSo come, wanderer, and let the city reveal,\\nThe magic and wonder that its streets conceal.\"}, usage=Usage(input=26, output=196, total=222, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.3e-05, calculated_output_cost=0.000294, calculated_total_cost=0.000307, latency=3.803), ObservationsView(id='e1e2e036-4cba-4c52-970d-43ef6b40b04d', trace_id='c86e90c4-e601-4249-876f-e1246088fe8c', type='GENERATION', name='geography-teacher', start_time=datetime.datetime(2024, 3, 25, 19, 26, 58, 180000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 26, 59, 316000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.'}, {'role': 'user', 'content': 'Bulgaria'}], version=None, metadata=None, output={'role': 'assistant', 'content': 'Sofia'}, usage=Usage(input=35, output=3, total=38, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.75e-05, calculated_output_cost=4.5e-06, calculated_total_cost=2.2e-05, latency=1.136), ObservationsView(id='55d14693-b04e-480d-93ab-9579c7a74946', trace_id='08b3cfcf-5a1c-4c4b-9bf8-a7db6fd859e4', type='GENERATION', name='poet', start_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 909000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 35, 552000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 200, 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a poet. Create a poem about a city.'}, {'role': 'user', 'content': 'Sofia'}], version=None, metadata=None, output={'role': 'assistant', 'content': \"In the heart of the Balkans, she stands so proud,\\nA city of shadows and light, both silent and loud.\\nWith ancient streets that weave and wind,\\nThrough history's tapestry, a story to find.\\n\\nSofia, the city of red-roofed homes,\\nWhere saints and sinners freely roam.\\nThe Alexander Nevsky Cathedral so grand,\\nA beacon of faith in this ancient land.\\n\\nAmong the bustling markets and café's cheer,\\nThe spirit of Sofia is ever near.\\nFrom Vitosha Mountain, her guardian high,\\nTo the Serdika ruins where old worlds lie.\\n\\nThe heartbeat of Bulgaria, a city so alive,\\nIn her cobblestone streets, stories thrive.\\nWith each passing moment, a new tale begun,\\nSofia, eternal, beneath the Balkan sun.\\n\\nHer people, vibrant, diverse and strong,\\nIn unity and resilience, they belong.\\nA city of contrasts, old and new,\\nSofia, forever in my heart, I'll hold\"}, usage=Usage(input=26, output=200, total=226, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.3e-05, calculated_output_cost=0.0003, calculated_total_cost=0.000313, latency=25.643), ObservationsView(id='51508482-5db8-42b6-be1a-dfd462bb24d3', trace_id='08b3cfcf-5a1c-4c4b-9bf8-a7db6fd859e4', type='GENERATION', name='geography-teacher', start_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 398000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 909000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.'}, {'role': 'user', 'content': 'Bulgaria'}], version=None, metadata=None, output={'role': 'assistant', 'content': 'Sofia'}, usage=Usage(input=35, output=3, total=38, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.75e-05, calculated_output_cost=4.5e-06, calculated_total_cost=2.2e-05, latency=0.511), ObservationsView(id='d73e68ae-57a0-4616-aa12-32f720c27461', trace_id='19e2347c-bb51-4a31-8f75-9ddb7c0259ac', type='GENERATION', name='test-function', start_time=datetime.datetime(2024, 3, 25, 19, 23, 57, 913000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 1, 304000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0613', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input={'messages': [{'role': 'user', 'content': 'Explain how to assemble a PC'}], 'functions': [{'name': 'get_answer_for_user_query', 'parameters': {'type': 'object', 'title': 'StepByStepAIResponse', 'required': ['title', 'steps'], 'properties': {'steps': {'type': 'array', 'items': {'type': 'string'}, 'title': 'Steps'}, 'title': {'type': 'string', 'title': 'Title'}}}, 'description': 'Get user answer in series of steps'}], 'function_call': {'name': 'get_answer_for_user_query'}}, version=None, metadata=None, output={'role': 'assistant', 'content': None, 'function_call': {'name': 'get_answer_for_user_query', 'arguments': '{\\n \"title\": \"How to assemble a PC\",\\n \"steps\": [\\n \"Step 1: Gather the necessary components\",\\n \"Step 2: Prepare the workspace\",\\n \"Step 3: Install the CPU\",\\n \"Step 4: Install the CPU cooler\",\\n \"Step 5: Install the RAM\",\\n \"Step 6: Install the motherboard\",\\n \"Step 7: Connect the power supply\",\\n \"Step 8: Connect the storage devices\",\\n \"Step 9: Install the graphics card\",\\n \"Step 10: Connect peripherals\",\\n \"Step 11: Check for proper connections\",\\n \"Step 12: Power on and test the PC\"\\n ]\\n}'}}, usage=Usage(input=70, output=148, total=218, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clrkwk4cc000808l51xmk4uic', input_price=1.5e-06, output_price=2e-06, total_price=None, calculated_input_cost=0.000105, calculated_output_cost=0.000296, calculated_total_cost=0.000401, latency=3.391), ObservationsView(id='fd1ae1bf-4e78-49dc-bca3-71e1188f4449', trace_id='be8221d3-3e09-4d8c-9fe8-fdea004f5ac5', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 40, 507000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 40, 987000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a very accurate calculator. You output only the result of the calculation.'}, {'role': 'user', 'content': '1 + 100 = '}], version=None, metadata={'someMetadataKey': 'someValue'}, output={'role': 'assistant', 'content': '101'}, usage=Usage(input=33, output=1, total=34, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.65e-05, calculated_output_cost=1.5e-06, calculated_total_cost=1.8e-05, latency=0.48), ObservationsView(id='b26567ae-8e23-4737-a291-ed6808f7f508', trace_id='21f515bc-710c-4a0a-bc51-49b657dbe691', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 34, 264000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 35, 313000, tzinfo=datetime.timezone.utc), completion_start_time=datetime.datetime(2024, 3, 25, 19, 23, 35, 312000, tzinfo=datetime.timezone.utc), model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a professional comedian.'}, {'role': 'user', 'content': 'Tell me a joke.'}], version=None, metadata={'someMetadataKey': 'someValue'}, output='Sure thing! Why did the scarecrow win an award? Because he was outstanding in his field!', usage=Usage(input=22, output=20, total=42, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.1e-05, calculated_output_cost=3e-05, calculated_total_cost=4.1e-05, latency=1.049), ObservationsView(id='68f2890e-ba81-4e63-be8d-acb3571eaff4', trace_id='c65a4fdd-1e09-4645-94ad-c28b1bbb4016', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 30, 759000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 32, 516000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a very accurate calculator. You output only the result of the calculation.'}, {'role': 'user', 'content': '1 + 1 = '}], version=None, metadata={'someMetadataKey': 'someValue'}, output={'role': 'assistant', 'content': '2'}, usage=Usage(input=33, output=1, total=34, unit=, input_cost=None, output_cost=None, total_cost=None), level=, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.65e-05, calculated_output_cost=1.5e-06, calculated_total_cost=1.8e-05, latency=1.757)]\n" + ] + } + ], + "source": [ + "# generations = fetch_all_pages(user_id='user:abc')\n", + "generations = fetch_all_pages()\n", + "print(generations)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hYM6UG_dGbb6" + }, + "source": [ + "### Set up evaluation functions\n", + "\n", + "In this section, we define functions to set up the Langchain eval based on the entries in `EVAL_TYPES`. Hallucinations require their own function. More on the Langchain evals can be found [here](https://python.langchain.com/docs/guides/evaluation/string/criteria_eval_chain)." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "7NijTmslvyK8" + }, + "outputs": [], + "source": [ + "from langchain.evaluation import load_evaluator, EvaluatorType\n", + "from langchain import PromptTemplate, OpenAI, LLMChain\n", + "from langchain.evaluation.criteria import LabeledCriteriaEvalChain\n", + "\n", + "def get_evaluator_for_key(key: str):\n", + " llm = OpenAI(temperature=0, model=os.environ.get('EVAL_MODEL'))\n", + " return load_evaluator(\"criteria\", criteria=key, llm=llm)\n", + "\n", + "def get_hallucination_eval():\n", + " criteria = {\n", + " \"hallucination\": (\n", + " \"Does this submission contain information\"\n", + " \" not present in the input or reference?\"\n", + " ),\n", + " }\n", + " llm = OpenAI(temperature=0, model=os.environ.get('EVAL_MODEL'))\n", + "\n", + " return LabeledCriteriaEvalChain.from_llm(\n", + " llm=llm,\n", + " criteria=criteria,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tzZZfztGdrIQ" + }, + "source": [ + "### Execute evaluation\n", + "\n", + "Below, we execute the evaluation for each `Generation` loaded above. Each score is ingested into Langfuse via [`langfuse.score()`](https://langfuse.com/docs/scores).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "qMa2OEtqvyGg" + }, + "outputs": [ + { + "ename": "NotFoundError", + "evalue": "Error code: 404 - {'error': {'message': 'The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[31], line 15\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(eval_result)\n\u001b[1;32m 13\u001b[0m langfuse\u001b[38;5;241m.\u001b[39mscore(name\u001b[38;5;241m=\u001b[39mcriterion, trace_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mtrace_id, observation_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mid, value\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m], comment\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreasoning\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m---> 15\u001b[0m \u001b[43mexecute_eval_and_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "Cell \u001b[0;32mIn[31], line 7\u001b[0m, in \u001b[0;36mexecute_eval_and_score\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m criteria \u001b[38;5;241m=\u001b[39m [key \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m EVAL_TYPES\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mand\u001b[39;00m key \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhallucination\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m criterion \u001b[38;5;129;01min\u001b[39;00m criteria:\n\u001b[0;32m----> 7\u001b[0m eval_result \u001b[38;5;241m=\u001b[39m \u001b[43mget_evaluator_for_key\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate_strings\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mprediction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(eval_result)\n\u001b[1;32m 13\u001b[0m langfuse\u001b[38;5;241m.\u001b[39mscore(name\u001b[38;5;241m=\u001b[39mcriterion, trace_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mtrace_id, observation_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mid, value\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m], comment\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreasoning\u001b[39m\u001b[38;5;124m'\u001b[39m])\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/evaluation/schema.py:219\u001b[0m, in \u001b[0;36mStringEvaluator.evaluate_strings\u001b[0;34m(self, prediction, reference, input, **kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Evaluate Chain or LLM output, based on optional input and label.\u001b[39;00m\n\u001b[1;32m 209\u001b[0m \n\u001b[1;32m 210\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 216\u001b[0m \u001b[38;5;124;03m dict: The evaluation results containing the score or value.\u001b[39;00m\n\u001b[1;32m 217\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m \u001b[38;5;66;03m# noqa: E501\u001b[39;00m\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_evaluation_args(reference\u001b[38;5;241m=\u001b[39mreference, \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28minput\u001b[39m)\n\u001b[0;32m--> 219\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_evaluate_strings\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43mprediction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprediction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreference\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreference\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/evaluation/criteria/eval_chain.py:447\u001b[0m, in \u001b[0;36mCriteriaEvalChain._evaluate_strings\u001b[0;34m(self, prediction, reference, input, callbacks, tags, metadata, include_run_info, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Evaluate a prediction against the criteria.\u001b[39;00m\n\u001b[1;32m 414\u001b[0m \n\u001b[1;32m 415\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 444\u001b[0m \u001b[38;5;124;03m )\u001b[39;00m\n\u001b[1;32m 445\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 446\u001b[0m input_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_eval_input(prediction, reference, \u001b[38;5;28minput\u001b[39m)\n\u001b[0;32m--> 447\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 448\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 449\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 450\u001b[0m \u001b[43m \u001b[49m\u001b[43mtags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 451\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 452\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_run_info\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_run_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 453\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_output(result)\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:145\u001b[0m, in \u001b[0;36mdeprecated..deprecate..warning_emitting_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 143\u001b[0m warned \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 144\u001b[0m emit_warning()\n\u001b[0;32m--> 145\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapped\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:378\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Execute the chain.\u001b[39;00m\n\u001b[1;32m 347\u001b[0m \n\u001b[1;32m 348\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 369\u001b[0m \u001b[38;5;124;03m `Chain.output_keys`.\u001b[39;00m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 371\u001b[0m config \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 372\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcallbacks\u001b[39m\u001b[38;5;124m\"\u001b[39m: callbacks,\n\u001b[1;32m 373\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtags\u001b[39m\u001b[38;5;124m\"\u001b[39m: tags,\n\u001b[1;32m 374\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m: metadata,\n\u001b[1;32m 375\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: run_name,\n\u001b[1;32m 376\u001b[0m }\n\u001b[0;32m--> 378\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 379\u001b[0m \u001b[43m \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 380\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mRunnableConfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 381\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_only_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_only_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 382\u001b[0m \u001b[43m \u001b[49m\u001b[43minclude_run_info\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_run_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:163\u001b[0m, in \u001b[0;36mChain.invoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 162\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 164\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_chain_end(outputs)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m include_run_info:\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:153\u001b[0m, in \u001b[0;36mChain.invoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_inputs(inputs)\n\u001b[1;32m 152\u001b[0m outputs \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 153\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 155\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(inputs)\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 158\u001b[0m final_outputs: Dict[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprep_outputs(\n\u001b[1;32m 159\u001b[0m inputs, outputs, return_only_outputs\n\u001b[1;32m 160\u001b[0m )\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/llm.py:103\u001b[0m, in \u001b[0;36mLLMChain._call\u001b[0;34m(self, inputs, run_manager)\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_call\u001b[39m(\n\u001b[1;32m 99\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 100\u001b[0m inputs: Dict[\u001b[38;5;28mstr\u001b[39m, Any],\n\u001b[1;32m 101\u001b[0m run_manager: Optional[CallbackManagerForChainRun] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 102\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]:\n\u001b[0;32m--> 103\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_outputs(response)[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/llm.py:115\u001b[0m, in \u001b[0;36mLLMChain.generate\u001b[0;34m(self, input_list, run_manager)\u001b[0m\n\u001b[1;32m 113\u001b[0m callbacks \u001b[38;5;241m=\u001b[39m run_manager\u001b[38;5;241m.\u001b[39mget_child() \u001b[38;5;28;01mif\u001b[39;00m run_manager \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm, BaseLanguageModel):\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 116\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 117\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 119\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 120\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 122\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm\u001b[38;5;241m.\u001b[39mbind(stop\u001b[38;5;241m=\u001b[39mstop, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_kwargs)\u001b[38;5;241m.\u001b[39mbatch(\n\u001b[1;32m 123\u001b[0m cast(List, prompts), {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcallbacks\u001b[39m\u001b[38;5;124m\"\u001b[39m: callbacks}\n\u001b[1;32m 124\u001b[0m )\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:569\u001b[0m, in \u001b[0;36mBaseLLM.generate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m 561\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_prompt\u001b[39m(\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 563\u001b[0m prompts: List[PromptValue],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 566\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 567\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 568\u001b[0m prompt_strings \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_string() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[0;32m--> 569\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_strings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:748\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[0m\n\u001b[1;32m 731\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 732\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 733\u001b[0m )\n\u001b[1;32m 734\u001b[0m run_managers \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 735\u001b[0m callback_manager\u001b[38;5;241m.\u001b[39mon_llm_start(\n\u001b[1;32m 736\u001b[0m dumpd(\u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 746\u001b[0m )\n\u001b[1;32m 747\u001b[0m ]\n\u001b[0;32m--> 748\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 749\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbool\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 750\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 751\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output\n\u001b[1;32m 752\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_prompts) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:606\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 604\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n\u001b[1;32m 605\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_llm_error(e, response\u001b[38;5;241m=\u001b[39mLLMResult(generations\u001b[38;5;241m=\u001b[39m[]))\n\u001b[0;32m--> 606\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 607\u001b[0m flattened_outputs \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m manager, flattened_output \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(run_managers, flattened_outputs):\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:593\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate_helper\u001b[39m(\n\u001b[1;32m 584\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 585\u001b[0m prompts: List[\u001b[38;5;28mstr\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 589\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 590\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m 591\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 592\u001b[0m output \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 593\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 594\u001b[0m \u001b[43m \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 596\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[1;32m 597\u001b[0m \u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 598\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 599\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 600\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m 601\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate(prompts, stop\u001b[38;5;241m=\u001b[39mstop)\n\u001b[1;32m 602\u001b[0m )\n\u001b[1;32m 603\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 604\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_community/llms/openai.py:460\u001b[0m, in \u001b[0;36mBaseOpenAI._generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 448\u001b[0m choices\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m 449\u001b[0m {\n\u001b[1;32m 450\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: generation\u001b[38;5;241m.\u001b[39mtext,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 457\u001b[0m }\n\u001b[1;32m 458\u001b[0m )\n\u001b[1;32m 459\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 460\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mcompletion_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_prompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\n\u001b[1;32m 462\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m 464\u001b[0m \u001b[38;5;66;03m# V1 client returns the response in an PyDantic object instead of\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;66;03m# dict. For the transition period, we deep convert it to dict.\u001b[39;00m\n\u001b[1;32m 466\u001b[0m response \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mdict()\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_community/llms/openai.py:115\u001b[0m, in \u001b[0;36mcompletion_with_retry\u001b[0;34m(llm, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Use tenacity to retry the completion call.\"\"\"\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_openai_v1():\n\u001b[0;32m--> 115\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 117\u001b[0m retry_decorator \u001b[38;5;241m=\u001b[39m _create_retry_decorator(llm, run_manager\u001b[38;5;241m=\u001b[39mrun_manager)\n\u001b[1;32m 119\u001b[0m \u001b[38;5;129m@retry_decorator\u001b[39m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_completion_with_retry\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/resources/completions.py:516\u001b[0m, in \u001b[0;36mCompletions.create\u001b[0;34m(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 488\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 489\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 490\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 514\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m 515\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Completion \u001b[38;5;241m|\u001b[39m Stream[Completion]:\n\u001b[0;32m--> 516\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/completions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 519\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 521\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 522\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbest_of\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mbest_of\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 523\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mecho\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mecho\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 524\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 525\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogit_bias\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogit_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 526\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 527\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mn\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 529\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseed\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstop\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 532\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 533\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuffix\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 534\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 536\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 537\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 538\u001b[0m \u001b[43m \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCompletionCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 539\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 540\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 541\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 542\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 543\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCompletion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 544\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 545\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mCompletion\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 546\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:1208\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1194\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1195\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1196\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1203\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1204\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1205\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1206\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1207\u001b[0m )\n\u001b[0;32m-> 1208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:897\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m 889\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 890\u001b[0m cast_to: Type[ResponseT],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 895\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 896\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[0;32m--> 897\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 898\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 899\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 900\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 901\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 902\u001b[0m \u001b[43m \u001b[49m\u001b[43mremaining_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremaining_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 903\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:988\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 985\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 987\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 988\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 990\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 991\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 992\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 995\u001b[0m stream_cls\u001b[38;5;241m=\u001b[39mstream_cls,\n\u001b[1;32m 996\u001b[0m )\n", + "\u001b[0;31mNotFoundError\u001b[0m: Error code: 404 - {'error': {'message': 'The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}" + ] + } + ], + "source": [ + "def execute_eval_and_score():\n", + "\n", + " for generation in generations:\n", + " criteria = [key for key, value in EVAL_TYPES.items() if value and key != \"hallucination\"]\n", + "\n", + " for criterion in criteria:\n", + " eval_result = get_evaluator_for_key(criterion).evaluate_strings(\n", + " prediction=generation.output,\n", + " input=generation.input,\n", + " )\n", + " print(eval_result)\n", + "\n", + " langfuse.score(name=criterion, trace_id=generation.trace_id, observation_id=generation.id, value=eval_result[\"score\"], comment=eval_result['reasoning'])\n", + "\n", + "execute_eval_and_score()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "YcTF-z8eeL0a" + }, + "outputs": [], + "source": [ + "# hallucination\n", + "\n", + "\n", + "def eval_hallucination():\n", + "\n", + " chain = get_hallucination_eval()\n", + "\n", + " for generation in generations:\n", + " eval_result = chain.evaluate_strings(\n", + " prediction=generation.output,\n", + " input=generation.input,\n", + " reference=generation.input\n", + " )\n", + " print(eval_result)\n", + " if eval_result is not None and eval_result[\"score\"] is not None and eval_result[\"reasoning\"] is not None:\n", + " langfuse.score(name='hallucination', trace_id=generation.trace_id, observation_id=generation.id, value=eval_result[\"score\"], comment=eval_result['reasoning'])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "id": "n4zeFEKlfjQ-" + }, + "outputs": [], + "source": [ + "if EVAL_TYPES.get(\"hallucination\") == True:\n", + " eval_hallucination()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "-ROOd8d8rdl6" + }, + "outputs": [], + "source": [ + "# SDK is async, make sure to await all requests\n", + "langfuse.flush()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MsKpVyYdavJ5" + }, + "source": [ + "### See Scores in Langfuse\n", + "\n", + " In the Langfuse UI, you can filter Traces by `Scores` and look into the details for each. Check out Langfuse Analytics to understand the impact of new prompt versions or application releases on these scores.\n", + "\n", + "![Image of Trace](https://langfuse.com/images/docs/trace-conciseness-score.jpg)\n", + "_Example trace with conciseness score_\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CkeLD_ciVD2w" + }, + "source": [ + "## Get in touch\n", + "\n", + "Looking for a specific way to score your production data in Langfuse? Join the [Discord](https://langfuse.com/discord) and discuss your use case!" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/eval/integration_openai_sdk.ipynb b/eval/integration_openai_sdk.ipynb new file mode 100644 index 0000000..7679082 --- /dev/null +++ b/eval/integration_openai_sdk.ipynb @@ -0,0 +1,693 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ki7E44X5ViQB" + }, + "source": [ + "---\n", + "description: Drop-in replacement of OpenAI SDK to get full observability in Langfuse by changing only the import\n", + "category: Integrations\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mfMAzJYcirtK" + }, + "source": [ + "# Cookbook: OpenAI Integration (Python)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B0A389k2irtK" + }, + "source": [ + "This is a cookbook with examples of the Langfuse Integration for OpenAI (Python).\n", + "\n", + "Follow the [integration guide](https://langfuse.com/docs/integrations/openai/get-started) to add this integration to your OpenAI project." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uq04G_FSWjF-" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XYoil3FcOIQt" + }, + "source": [ + "The integration is compatible with OpenAI SDK versions `>=0.27.8`. It supports async functions and streaming for OpenAI SDK versions `>=1.0.0`." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "hVOOiBtUPtOO" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: langfuse in /Users/gyliu/venvlf/lib/python3.10/site-packages (2.21.1)\n", + "Requirement already satisfied: openai in /Users/gyliu/venvlf/lib/python3.10/site-packages (1.14.3)\n", + "Requirement already satisfied: wrapt<2.0,>=1.14 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (1.16.0)\n", + "Requirement already satisfied: packaging<24.0,>=23.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (23.2)\n", + "Requirement already satisfied: pydantic<3.0,>=1.10.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.4.0)\n", + "Requirement already satisfied: httpx<1.0,>=0.15.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.24.1)\n", + "Requirement already satisfied: chevron<0.15.0,>=0.14.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.14.0)\n", + "Requirement already satisfied: backoff<3.0.0,>=2.2.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.2.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.8.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.8.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: sniffio in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.3.0)\n", + "Requirement already satisfied: tqdm>4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.66.1)\n", + "Requirement already satisfied: idna>=2.8 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n", + "Requirement already satisfied: exceptiongroup in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.1.3)\n", + "Requirement already satisfied: certifi in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (2023.7.22)\n", + "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (0.17.3)\n", + "Requirement already satisfied: pydantic-core==2.10.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (2.10.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (0.6.0)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1.0,>=0.15.4->langfuse) (0.14.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install langfuse openai --upgrade" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "ldSEJ0bAP4sj" + }, + "outputs": [], + "source": [ + "# instead of: import openai\n", + "from langfuse.openai import openai" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "G8qkHd8oK_o9" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# For debugging, checks the SDK connection with the server. Do not use in production as it adds latency.\n", + "from langfuse.openai import auth_check\n", + "\n", + "auth_check()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ovnAAdbaLmD" + }, + "source": [ + "## Examples\n", + "\n", + "### Chat completion" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "c8RhokKUP9I0" + }, + "outputs": [], + "source": [ + "completion = openai.chat.completions.create(\n", + " name=\"test-chat\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a very accurate calculator. You output only the result of the calculation.\"},\n", + " {\"role\": \"user\", \"content\": \"1 + 1 = \"}],\n", + " temperature=0,\n", + " metadata={\"someMetadataKey\": \"someValue\"},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SAqxBgOqKTzO" + }, + "source": [ + "### Chat completion (streaming)\n", + "\n", + "Simple example using the OpenAI streaming functionality." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "b9gRlb2rKTaA" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sure thing! Why did the scarecrow win an award? Because he was outstanding in his field!None" + ] + } + ], + "source": [ + "completion = openai.chat.completions.create(\n", + " name=\"test-chat\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a professional comedian.\"},\n", + " {\"role\": \"user\", \"content\": \"Tell me a joke.\"}],\n", + " temperature=0,\n", + " metadata={\"someMetadataKey\": \"someValue\"},\n", + " stream=True\n", + ")\n", + "\n", + "for chunk in completion:\n", + " print(chunk.choices[0].delta.content, end=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F2pvm0qLKg7Q" + }, + "source": [ + "### Chat completion (async)\n", + "\n", + "Simple example using the OpenAI async client. It takes the Langfuse configurations either from the environment variables or from the attributes on the `openai` module." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "Hggwggv_MKpV" + }, + "outputs": [], + "source": [ + "from langfuse.openai import AsyncOpenAI\n", + "\n", + "async_client = AsyncOpenAI()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "ZIUKD8Z3KmvQ" + }, + "outputs": [], + "source": [ + "completion = await async_client.chat.completions.create(\n", + " name=\"test-chat\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a very accurate calculator. You output only the result of the calculation.\"},\n", + " {\"role\": \"user\", \"content\": \"1 + 100 = \"}],\n", + " temperature=0,\n", + " metadata={\"someMetadataKey\": \"someValue\"},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M4iJpqYQirtM" + }, + "source": [ + "Go to https://cloud.langfuse.com or your own instance to see your generation.\n", + "\n", + "![Chat completion](https://langfuse.com/images/docs/openai-chat.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ky7CtCNzaSrn" + }, + "source": [ + "### Functions\n", + "\n", + "Simple example using Pydantic to generate the function schema." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "jJfBdHowaRgs" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pydantic in /Users/gyliu/venvlf/lib/python3.10/site-packages (2.4.0)\n", + "Collecting pydantic\n", + " Using cached pydantic-2.6.4-py3-none-any.whl (394 kB)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic) (0.6.0)\n", + "Collecting pydantic-core==2.16.3\n", + " Using cached pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl (1.7 MB)\n", + "Requirement already satisfied: typing-extensions>=4.6.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic) (4.8.0)\n", + "Installing collected packages: pydantic-core, pydantic\n", + " Attempting uninstall: pydantic-core\n", + " Found existing installation: pydantic_core 2.10.0\n", + " Uninstalling pydantic_core-2.10.0:\n", + " Successfully uninstalled pydantic_core-2.10.0\n", + " Attempting uninstall: pydantic\n", + " Found existing installation: pydantic 2.4.0\n", + " Uninstalling pydantic-2.4.0:\n", + " Successfully uninstalled pydantic-2.4.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "sqlmodel 0 requires pydantic[email]<=2.4,>=2.1.1, but you have pydantic 2.6.4 which is incompatible.\n", + "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n", + "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n", + "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n", + "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n", + "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n", + "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed pydantic-2.6.4 pydantic-core-2.16.3\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install pydantic --upgrade" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "2gA-zGk7VYYp" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/m3/cz7sy7cd4v7_mklwrfjs45wm0000gn/T/ipykernel_99748/2496491748.py:7: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/\n", + " schema = StepByStepAIResponse.schema() # returns a dict like JSON schema\n" + ] + } + ], + "source": [ + "from typing import List\n", + "from pydantic import BaseModel\n", + "\n", + "class StepByStepAIResponse(BaseModel):\n", + " title: str\n", + " steps: List[str]\n", + "schema = StepByStepAIResponse.schema() # returns a dict like JSON schema" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "ORtNcN4-afDC" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/m3/cz7sy7cd4v7_mklwrfjs45wm0000gn/T/ipykernel_99748/162860978.py:12: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/\n", + " \"parameters\": StepByStepAIResponse.schema()\n" + ] + } + ], + "source": [ + "import json\n", + "response = openai.chat.completions.create(\n", + " name=\"test-function\",\n", + " model=\"gpt-3.5-turbo-0613\",\n", + " messages=[\n", + " {\"role\": \"user\", \"content\": \"Explain how to assemble a PC\"}\n", + " ],\n", + " functions=[\n", + " {\n", + " \"name\": \"get_answer_for_user_query\",\n", + " \"description\": \"Get user answer in series of steps\",\n", + " \"parameters\": StepByStepAIResponse.schema()\n", + " }\n", + " ],\n", + " function_call={\"name\": \"get_answer_for_user_query\"}\n", + ")\n", + "\n", + "output = json.loads(response.choices[0].message.function_call.arguments)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qurrm-Ntp24O" + }, + "source": [ + "Go to https://cloud.langfuse.com or your own instance to see your generation.\n", + "\n", + "![Function](https://langfuse.com/images/docs/openai-function.png)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Su1OaQq3rPPh" + }, + "source": [ + "### Group multiple generations into a single trace\n", + "\n", + "Many applications require more than one OpenAI call. The `@observe()` decorator allows to nest all LLM calls of a single API invocation into the same `trace` in Langfuse." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "id": "zMDVxzS1ltWU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the heart of the Balkans, she stands so proud,\n", + "A city of shadows and light, both silent and loud.\n", + "With ancient streets that weave and wind,\n", + "Through history's tapestry, a story to find.\n", + "\n", + "Sofia, the city of red-roofed homes,\n", + "Where saints and sinners freely roam.\n", + "The Alexander Nevsky Cathedral so grand,\n", + "A beacon of faith in this ancient land.\n", + "\n", + "Among the bustling markets and café's cheer,\n", + "The spirit of Sofia is ever near.\n", + "From Vitosha Mountain, her guardian high,\n", + "To the Serdika ruins where old worlds lie.\n", + "\n", + "The heartbeat of Bulgaria, a city so alive,\n", + "In her cobblestone streets, stories thrive.\n", + "With each passing moment, a new tale begun,\n", + "Sofia, eternal, beneath the Balkan sun.\n", + "\n", + "Her people, vibrant, diverse and strong,\n", + "In unity and resilience, they belong.\n", + "A city of contrasts, old and new,\n", + "Sofia, forever in my heart, I'll hold\n" + ] + } + ], + "source": [ + "from langfuse.openai import openai\n", + "from langfuse.decorators import observe\n", + "\n", + "@observe() # decorator to automatically create trace and nest generations\n", + "def main(country: str, user_id: str, **kwargs) -> str:\n", + " # nested generation 1: use openai to get capital of country\n", + " capital = openai.chat.completions.create(\n", + " name=\"geography-teacher\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.\"},\n", + " {\"role\": \"user\", \"content\": country}],\n", + " temperature=0,\n", + " ).choices[0].message.content\n", + "\n", + " # nested generation 2: use openai to write poem on capital\n", + " poem = openai.chat.completions.create(\n", + " name=\"poet\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a poet. Create a poem about a city.\"},\n", + " {\"role\": \"user\", \"content\": capital}],\n", + " temperature=1,\n", + " max_tokens=200,\n", + " ).choices[0].message.content\n", + "\n", + " return poem\n", + "\n", + "# run main function and let Langfuse decorator do the rest\n", + "print(main(\"Bulgaria\", \"admin\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ehx2NZuIrPPh" + }, + "source": [ + "Go to https://cloud.langfuse.com or your own instance to see your trace.\n", + "\n", + "![Trace with multiple OpenAI calls](https://langfuse.com/images/docs/openai-trace-grouped.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-HeMqTWgK4xL" + }, + "source": [ + "#### Fully featured: Interoperability with Langfuse SDK\n", + "\n", + "The `trace` is a core object in Langfuse and you can add rich metadata to it. See [Python SDK docs](https://langfuse.com/docs/sdk/python#traces-1) for full documentation on this.\n", + "\n", + "Some of the functionality enabled by custom traces:\n", + "- custom name to identify a specific trace-type\n", + "- user-level tracking\n", + "- experiment tracking via versions and releases\n", + "- custom metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "28to65wpK4xL" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the heart of the Balkans, where history meets modernity,\n", + "Lies a city of beauty, Sofia, a place of pure serenity.\n", + "With ancient ruins whispering tales of days long gone,\n", + "And vibrant street art that dances with the dawn.\n", + "\n", + "A melting pot of cultures, where East meets West,\n", + "Sofia's charm will put your wandering soul to the test.\n", + "The rhythm of the city pulses through its veins,\n", + "As vibrant markets bustle and soothing fountains reign.\n", + "\n", + "Beneath the shadow of Vitosha, the mountain so grand,\n", + "Sofia stands proud, a jewel in Bulgaria's hand.\n", + "With its grand cathedrals and majestic domes,\n", + "It's a city that calls you to wander and roam.\n", + "\n", + "From the bustling boulevards to quiet cobbled lanes,\n", + "Sofia's spirit will stir in your heart like gentle rains.\n", + "So come, wanderer, and let the city reveal,\n", + "The magic and wonder that its streets conceal.\n" + ] + } + ], + "source": [ + "import uuid\n", + "from langfuse.openai import openai\n", + "from langfuse.decorators import langfuse_context, observe\n", + "\n", + "@observe() # decorator to automatically create trace and nest generations\n", + "def main(country: str, user_id: str, **kwargs) -> str:\n", + " # nested generation 1: use openai to get capital of country\n", + " capital = openai.chat.completions.create(\n", + " name=\"geography-teacher\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.\"},\n", + " {\"role\": \"user\", \"content\": country}],\n", + " temperature=0,\n", + " ).choices[0].message.content\n", + "\n", + " # nested generation 2: use openai to write poem on capital\n", + " poem = openai.chat.completions.create(\n", + " name=\"poet\",\n", + " model=\"gpt-3.5-turbo\",\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a poet. Create a poem about a city.\"},\n", + " {\"role\": \"user\", \"content\": capital}],\n", + " temperature=1,\n", + " max_tokens=200,\n", + " ).choices[0].message.content\n", + "\n", + " # rename trace and set attributes (e.g., medatata) as needed\n", + " langfuse_context.update_current_trace(\n", + " name=\"City poem generator\",\n", + " session_id=\"1234\",\n", + " user_id=user_id,\n", + " tags=[\"tag1\", \"tag2\"],\n", + " public=True,\n", + " metadata = {\n", + " \"env\": \"development\",\n", + " },\n", + " release = \"v0.0.21\"\n", + " )\n", + "\n", + " return poem\n", + "\n", + "# create random trace_id, could also use existing id from your application, e.g. conversation id\n", + "trace_id = str(uuid.uuid4())\n", + "\n", + "# run main function, set your own id, and let Langfuse decorator do the rest\n", + "print(main(\"Bulgaria\", \"admin\", langfuse_observation_id=trace_id))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O3jxed-VrPPi" + }, + "source": [ + "### Programmatically add scores" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uMO6tn53rPPi" + }, + "source": [ + "You can add [scores](https://langfuse.com/docs/scores) to the trace, to e.g. record user feedback or some programmatic evaluation. Scores are used throughout Langfuse to filter traces and on the dashboard. See the docs on scores for more details.\n", + "\n", + "The score is associated to the trace using the `trace_id`." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "J0argbJhrPPi" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langfuse import Langfuse\n", + "from langfuse.decorators import langfuse_context, observe\n", + "\n", + "langfuse = Langfuse()\n", + "\n", + "@observe() # decorator to automatically create trace and nest generations\n", + "def main():\n", + " # get trace_id of current trace\n", + " trace_id = langfuse_context.get_current_trace_id()\n", + "\n", + " # rest of your application ...\n", + "\n", + " return \"res\", trace_id\n", + "\n", + "# execute the main function to generate a trace\n", + "_, trace_id = main()\n", + "\n", + "# Score the trace from outside the trace context\n", + "langfuse.score(\n", + " trace_id=trace_id,\n", + " name=\"my-score-name\",\n", + " value=1\n", + ")" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}