From bb5a2ae89cbcd13573e32012cc67f1d017812eea Mon Sep 17 00:00:00 2001
From: Guangya Liu <gyliu@ibm.com>
Date: Mon, 25 Mar 2024 15:45:16 -0400
Subject: [PATCH] eval (#167)

---
 eval/evaluation_with_langchain.ipynb | 521 ++++++++++++++++++++
 eval/integration_openai_sdk.ipynb    | 693 +++++++++++++++++++++++++++
 2 files changed, 1214 insertions(+)
 create mode 100644 eval/evaluation_with_langchain.ipynb
 create mode 100644 eval/integration_openai_sdk.ipynb

diff --git a/eval/evaluation_with_langchain.ipynb b/eval/evaluation_with_langchain.ipynb
new file mode 100644
index 0000000..52ed297
--- /dev/null
+++ b/eval/evaluation_with_langchain.ipynb
@@ -0,0 +1,521 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SWL354n0DECo"
+      },
+      "source": [
+        "---\n",
+        "description: Cookbook that demonstrates how to run Langchain evaluations on data in Langfuse.\n",
+        "category: Evaluation\n",
+        "---\n",
+        "\n",
+        "# Run Langchain Evaluations on data in Langfuse\n",
+        "\n",
+        "This cookbook shows how model-based evaluations can be used to automate the evaluation of production completions in Langfuse. This example uses Langchain and is adaptable to other libraries. Which library is the best to use depends heavily on the use case.\n",
+        "\n",
+        "This cookbook follows three steps:\n",
+        "1. Fetch production `generations` stored in Langfuse\n",
+        "2. Evaluate these `generations` using Langchain\n",
+        "3. Ingest results back into Langfuse as `scores`\n",
+        "\n",
+        "\n",
+        "----\n",
+        "Not using Langfuse yet? [Get started](https://langfuse.com/docs/get-started) by capturing LLM events."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 13,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 13,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from dotenv import load_dotenv\n",
+        "load_dotenv()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WbfTYaTkEu3G"
+      },
+      "source": [
+        "### Setup\n",
+        "\n",
+        "First you need to install Langfuse and Langchain via pip and then set the environment variables."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "Qclwxd9LRPAL"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: langfuse in /Users/gyliu/venvlf/lib/python3.10/site-packages (1.1.14)\n",
+            "Collecting langfuse\n",
+            "  Downloading langfuse-2.21.1-py3-none-any.whl (135 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m135.1/135.1 kB\u001b[0m \u001b[31m2.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: langchain in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.0.335)\n",
+            "Collecting langchain\n",
+            "  Downloading langchain-0.1.13-py3-none-any.whl (810 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m810.5/810.5 kB\u001b[0m \u001b[31m10.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: openai in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.27.10)\n",
+            "Collecting openai\n",
+            "  Downloading openai-1.14.3-py3-none-any.whl (262 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m262.9/262.9 kB\u001b[0m \u001b[31m30.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: cohere in /Users/gyliu/venvlf/lib/python3.10/site-packages (4.34)\n",
+            "Collecting cohere\n",
+            "  Downloading cohere-5.1.2-py3-none-any.whl (142 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m142.6/142.6 kB\u001b[0m \u001b[31m24.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: tiktoken in /Users/gyliu/venvlf/lib/python3.10/site-packages (0.5.1)\n",
+            "Collecting tiktoken\n",
+            "  Using cached tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl (949 kB)\n",
+            "Requirement already satisfied: wrapt<2.0,>=1.14 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (1.16.0)\n",
+            "Requirement already satisfied: pydantic<3.0,>=1.10.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.4.0)\n",
+            "Collecting chevron<0.15.0,>=0.14.0\n",
+            "  Using cached chevron-0.14.0-py3-none-any.whl (11 kB)\n",
+            "Requirement already satisfied: httpx<1.0,>=0.15.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.24.1)\n",
+            "Requirement already satisfied: backoff<3.0.0,>=2.2.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.2.1)\n",
+            "Requirement already satisfied: packaging<24.0,>=23.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (23.2)\n",
+            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (3.8.6)\n",
+            "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (2.0.11)\n",
+            "Requirement already satisfied: numpy<2,>=1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (1.26.2)\n",
+            "Requirement already satisfied: requests<3,>=2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (2.31.0)\n",
+            "Collecting langsmith<0.2.0,>=0.1.17\n",
+            "  Downloading langsmith-0.1.31-py3-none-any.whl (71 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.6/71.6 kB\u001b[0m \u001b[31m13.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hCollecting langchain-community<0.1,>=0.0.29\n",
+            "  Downloading langchain_community-0.0.29-py3-none-any.whl (1.8 MB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m34.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: PyYAML>=5.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (6.0.1)\n",
+            "Collecting langchain-text-splitters<0.1,>=0.0.1\n",
+            "  Using cached langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)\n",
+            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (8.2.3)\n",
+            "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (0.6.2)\n",
+            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (4.0.3)\n",
+            "Collecting langchain-core<0.2.0,>=0.1.33\n",
+            "  Downloading langchain_core-0.1.33-py3-none-any.whl (269 kB)\n",
+            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m269.1/269.1 kB\u001b[0m \u001b[31m33.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+            "\u001b[?25hRequirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langchain) (1.33)\n",
+            "Requirement already satisfied: sniffio in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.3.0)\n",
+            "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.8.0)\n",
+            "Requirement already satisfied: distro<2,>=1.7.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.8.0)\n",
+            "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (3.7.1)\n",
+            "Requirement already satisfied: tqdm>4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.66.1)\n",
+            "Requirement already satisfied: regex>=2022.1.18 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from tiktoken) (2023.10.3)\n",
+            "Requirement already satisfied: frozenlist>=1.1.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.0)\n",
+            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.3.2)\n",
+            "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
+            "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.2)\n",
+            "Requirement already satisfied: aiosignal>=1.1.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
+            "Requirement already satisfied: attrs>=17.3.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\n",
+            "Requirement already satisfied: idna>=2.8 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n",
+            "Requirement already satisfied: exceptiongroup in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.1.3)\n",
+            "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (0.9.0)\n",
+            "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain) (3.20.1)\n",
+            "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (0.17.3)\n",
+            "Requirement already satisfied: certifi in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (2023.7.22)\n",
+            "Requirement already satisfied: jsonpointer>=1.9 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from jsonpatch<2.0,>=1.33->langchain) (2.4)\n",
+            "Collecting orjson<4.0.0,>=3.9.14\n",
+            "  Using cached orjson-3.9.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl (248 kB)\n",
+            "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (0.6.0)\n",
+            "Requirement already satisfied: pydantic-core==2.10.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (2.10.0)\n",
+            "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3,>=2->langchain) (1.26.18)\n",
+            "Requirement already satisfied: h11<0.15,>=0.13 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1.0,>=0.15.4->langfuse) (0.14.0)\n",
+            "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain) (1.0.0)\n",
+            "Installing collected packages: chevron, orjson, tiktoken, langsmith, openai, langfuse, langchain-core, cohere, langchain-text-splitters, langchain-community, langchain\n",
+            "  Attempting uninstall: orjson\n",
+            "    Found existing installation: orjson 3.9.3\n",
+            "    Uninstalling orjson-3.9.3:\n",
+            "      Successfully uninstalled orjson-3.9.3\n",
+            "  Attempting uninstall: tiktoken\n",
+            "    Found existing installation: tiktoken 0.5.1\n",
+            "    Uninstalling tiktoken-0.5.1:\n",
+            "      Successfully uninstalled tiktoken-0.5.1\n",
+            "  Attempting uninstall: langsmith\n",
+            "    Found existing installation: langsmith 0.0.64\n",
+            "    Uninstalling langsmith-0.0.64:\n",
+            "      Successfully uninstalled langsmith-0.0.64\n",
+            "  Attempting uninstall: openai\n",
+            "    Found existing installation: openai 0.27.10\n",
+            "    Uninstalling openai-0.27.10:\n",
+            "      Successfully uninstalled openai-0.27.10\n",
+            "  Attempting uninstall: langfuse\n",
+            "    Found existing installation: langfuse 1.1.14\n",
+            "    Uninstalling langfuse-1.1.14:\n",
+            "      Successfully uninstalled langfuse-1.1.14\n",
+            "  Attempting uninstall: cohere\n",
+            "    Found existing installation: cohere 4.34\n",
+            "    Uninstalling cohere-4.34:\n",
+            "      Successfully uninstalled cohere-4.34\n",
+            "  Attempting uninstall: langchain\n",
+            "    Found existing installation: langchain 0.0.335\n",
+            "    Uninstalling langchain-0.0.335:\n",
+            "      Successfully uninstalled langchain-0.0.335\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n",
+            "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n",
+            "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n",
+            "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n",
+            "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n",
+            "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\u001b[0m\u001b[31m\n",
+            "\u001b[0mSuccessfully installed chevron-0.14.0 cohere-5.1.2 langchain-0.1.13 langchain-community-0.0.29 langchain-core-0.1.33 langchain-text-splitters-0.0.1 langfuse-2.21.1 langsmith-0.1.31 openai-1.14.3 orjson-3.9.15 tiktoken-0.6.0\n",
+            "\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+            "Note: you may need to restart the kernel to use updated packages.\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install langfuse langchain openai cohere tiktoken --upgrade"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 26,
+      "metadata": {
+        "id": "CQhmQQpLRa1K"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "os.environ['EVAL_MODEL'] = \"text-davinci-003\"\n",
+        "# os.environ['EVAL_MODEL'] = \"gpt-3.5-turbo\"\n",
+        "\n",
+        "# Langchain Eval types\n",
+        "EVAL_TYPES={\n",
+        "    \"hallucination\": True,\n",
+        "    \"conciseness\": True,\n",
+        "    \"relevance\": True,\n",
+        "    \"coherence\": True,\n",
+        "    \"harmfulness\": True,\n",
+        "    \"maliciousness\": True,\n",
+        "    \"helpfulness\": True,\n",
+        "    \"controversiality\": True,\n",
+        "    \"misogyny\": True,\n",
+        "    \"criminality\": True,\n",
+        "    \"insensitivity\": True\n",
+        "}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Yiwrz1-mavJ4"
+      },
+      "source": [
+        "Initialize the Langfuse Python SDK, more information [here](https://langfuse.com/docs/sdk/python#1-installation)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 27,
+      "metadata": {
+        "id": "8viV4KT5RMjA"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 27,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from langfuse import Langfuse\n",
+        "\n",
+        "langfuse = Langfuse()\n",
+        "\n",
+        "langfuse.auth_check()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bjMZ1VLhF2Vv"
+      },
+      "source": [
+        "### Fetching data\n",
+        "\n",
+        "Load all `generations` from Langfuse filtered by `name`, in this case `OpenAI`. Names are used in Langfuse to identify different types of generations within an application. Change it to the name you want to evaluate.\n",
+        "\n",
+        "Checkout [docs](https://langfuse.com/docs/sdk/python#generation) on how to set the name when ingesting an LLM Generation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 28,
+      "metadata": {
+        "id": "3r3jOEX0RvXi"
+      },
+      "outputs": [],
+      "source": [
+        "def fetch_all_pages(name=None, user_id = None, limit=50):\n",
+        "    page = 1\n",
+        "    all_data = []\n",
+        "\n",
+        "    while True:\n",
+        "        response = langfuse.get_generations(name=name, limit=limit, user_id=user_id, page=page)\n",
+        "        if not response.data:\n",
+        "            break\n",
+        "\n",
+        "        all_data.extend(response.data)\n",
+        "        page += 1\n",
+        "\n",
+        "    return all_data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 29,
+      "metadata": {
+        "id": "cAnLShvjBDBU"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "[ObservationsView(id='7fe0f7c8-4ee8-4dbd-a210-679cd7ca98f5', trace_id='c86e90c4-e601-4249-876f-e1246088fe8c', type='GENERATION', name='poet', start_time=datetime.datetime(2024, 3, 25, 19, 26, 59, 317000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 27, 3, 120000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 200, 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a poet. Create a poem about a city.'}, {'role': 'user', 'content': 'Sofia'}], version=None, metadata=None, output={'role': 'assistant', 'content': \"In the heart of the Balkans, where history meets modernity,\\nLies a city of beauty, Sofia, a place of pure serenity.\\nWith ancient ruins whispering tales of days long gone,\\nAnd vibrant street art that dances with the dawn.\\n\\nA melting pot of cultures, where East meets West,\\nSofia's charm will put your wandering soul to the test.\\nThe rhythm of the city pulses through its veins,\\nAs vibrant markets bustle and soothing fountains reign.\\n\\nBeneath the shadow of Vitosha, the mountain so grand,\\nSofia stands proud, a jewel in Bulgaria's hand.\\nWith its grand cathedrals and majestic domes,\\nIt's a city that calls you to wander and roam.\\n\\nFrom the bustling boulevards to quiet cobbled lanes,\\nSofia's spirit will stir in your heart like gentle rains.\\nSo come, wanderer, and let the city reveal,\\nThe magic and wonder that its streets conceal.\"}, usage=Usage(input=26, output=196, total=222, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.3e-05, calculated_output_cost=0.000294, calculated_total_cost=0.000307, latency=3.803), ObservationsView(id='e1e2e036-4cba-4c52-970d-43ef6b40b04d', trace_id='c86e90c4-e601-4249-876f-e1246088fe8c', type='GENERATION', name='geography-teacher', start_time=datetime.datetime(2024, 3, 25, 19, 26, 58, 180000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 26, 59, 316000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.'}, {'role': 'user', 'content': 'Bulgaria'}], version=None, metadata=None, output={'role': 'assistant', 'content': 'Sofia'}, usage=Usage(input=35, output=3, total=38, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.75e-05, calculated_output_cost=4.5e-06, calculated_total_cost=2.2e-05, latency=1.136), ObservationsView(id='55d14693-b04e-480d-93ab-9579c7a74946', trace_id='08b3cfcf-5a1c-4c4b-9bf8-a7db6fd859e4', type='GENERATION', name='poet', start_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 909000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 35, 552000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 200, 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a poet. Create a poem about a city.'}, {'role': 'user', 'content': 'Sofia'}], version=None, metadata=None, output={'role': 'assistant', 'content': \"In the heart of the Balkans, she stands so proud,\\nA city of shadows and light, both silent and loud.\\nWith ancient streets that weave and wind,\\nThrough history's tapestry, a story to find.\\n\\nSofia, the city of red-roofed homes,\\nWhere saints and sinners freely roam.\\nThe Alexander Nevsky Cathedral so grand,\\nA beacon of faith in this ancient land.\\n\\nAmong the bustling markets and café's cheer,\\nThe spirit of Sofia is ever near.\\nFrom Vitosha Mountain, her guardian high,\\nTo the Serdika ruins where old worlds lie.\\n\\nThe heartbeat of Bulgaria, a city so alive,\\nIn her cobblestone streets, stories thrive.\\nWith each passing moment, a new tale begun,\\nSofia, eternal, beneath the Balkan sun.\\n\\nHer people, vibrant, diverse and strong,\\nIn unity and resilience, they belong.\\nA city of contrasts, old and new,\\nSofia, forever in my heart, I'll hold\"}, usage=Usage(input=26, output=200, total=226, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.3e-05, calculated_output_cost=0.0003, calculated_total_cost=0.000313, latency=25.643), ObservationsView(id='51508482-5db8-42b6-be1a-dfd462bb24d3', trace_id='08b3cfcf-5a1c-4c4b-9bf8-a7db6fd859e4', type='GENERATION', name='geography-teacher', start_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 398000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 9, 909000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.'}, {'role': 'user', 'content': 'Bulgaria'}], version=None, metadata=None, output={'role': 'assistant', 'content': 'Sofia'}, usage=Usage(input=35, output=3, total=38, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.75e-05, calculated_output_cost=4.5e-06, calculated_total_cost=2.2e-05, latency=0.511), ObservationsView(id='d73e68ae-57a0-4616-aa12-32f720c27461', trace_id='19e2347c-bb51-4a31-8f75-9ddb7c0259ac', type='GENERATION', name='test-function', start_time=datetime.datetime(2024, 3, 25, 19, 23, 57, 913000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 24, 1, 304000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0613', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 1, 'presence_penalty': 0, 'frequency_penalty': 0}, input={'messages': [{'role': 'user', 'content': 'Explain how to assemble a PC'}], 'functions': [{'name': 'get_answer_for_user_query', 'parameters': {'type': 'object', 'title': 'StepByStepAIResponse', 'required': ['title', 'steps'], 'properties': {'steps': {'type': 'array', 'items': {'type': 'string'}, 'title': 'Steps'}, 'title': {'type': 'string', 'title': 'Title'}}}, 'description': 'Get user answer in series of steps'}], 'function_call': {'name': 'get_answer_for_user_query'}}, version=None, metadata=None, output={'role': 'assistant', 'content': None, 'function_call': {'name': 'get_answer_for_user_query', 'arguments': '{\\n  \"title\": \"How to assemble a PC\",\\n  \"steps\": [\\n    \"Step 1: Gather the necessary components\",\\n    \"Step 2: Prepare the workspace\",\\n    \"Step 3: Install the CPU\",\\n    \"Step 4: Install the CPU cooler\",\\n    \"Step 5: Install the RAM\",\\n    \"Step 6: Install the motherboard\",\\n    \"Step 7: Connect the power supply\",\\n    \"Step 8: Connect the storage devices\",\\n    \"Step 9: Install the graphics card\",\\n    \"Step 10: Connect peripherals\",\\n    \"Step 11: Check for proper connections\",\\n    \"Step 12: Power on and test the PC\"\\n  ]\\n}'}}, usage=Usage(input=70, output=148, total=218, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clrkwk4cc000808l51xmk4uic', input_price=1.5e-06, output_price=2e-06, total_price=None, calculated_input_cost=0.000105, calculated_output_cost=0.000296, calculated_total_cost=0.000401, latency=3.391), ObservationsView(id='fd1ae1bf-4e78-49dc-bca3-71e1188f4449', trace_id='be8221d3-3e09-4d8c-9fe8-fdea004f5ac5', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 40, 507000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 40, 987000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a very accurate calculator. You output only the result of the calculation.'}, {'role': 'user', 'content': '1 + 100 = '}], version=None, metadata={'someMetadataKey': 'someValue'}, output={'role': 'assistant', 'content': '101'}, usage=Usage(input=33, output=1, total=34, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.65e-05, calculated_output_cost=1.5e-06, calculated_total_cost=1.8e-05, latency=0.48), ObservationsView(id='b26567ae-8e23-4737-a291-ed6808f7f508', trace_id='21f515bc-710c-4a0a-bc51-49b657dbe691', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 34, 264000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 35, 313000, tzinfo=datetime.timezone.utc), completion_start_time=datetime.datetime(2024, 3, 25, 19, 23, 35, 312000, tzinfo=datetime.timezone.utc), model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a professional comedian.'}, {'role': 'user', 'content': 'Tell me a joke.'}], version=None, metadata={'someMetadataKey': 'someValue'}, output='Sure thing! Why did the scarecrow win an award? Because he was outstanding in his field!', usage=Usage(input=22, output=20, total=42, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.1e-05, calculated_output_cost=3e-05, calculated_total_cost=4.1e-05, latency=1.049), ObservationsView(id='68f2890e-ba81-4e63-be8d-acb3571eaff4', trace_id='c65a4fdd-1e09-4645-94ad-c28b1bbb4016', type='GENERATION', name='test-chat', start_time=datetime.datetime(2024, 3, 25, 19, 23, 30, 759000, tzinfo=datetime.timezone.utc), end_time=datetime.datetime(2024, 3, 25, 19, 23, 32, 516000, tzinfo=datetime.timezone.utc), completion_start_time=None, model='gpt-3.5-turbo-0125', model_parameters={'top_p': 1, 'max_tokens': 'inf', 'temperature': 0, 'presence_penalty': 0, 'frequency_penalty': 0}, input=[{'role': 'system', 'content': 'You are a very accurate calculator. You output only the result of the calculation.'}, {'role': 'user', 'content': '1 + 1 = '}], version=None, metadata={'someMetadataKey': 'someValue'}, output={'role': 'assistant', 'content': '2'}, usage=Usage(input=33, output=1, total=34, unit=<ModelUsageUnit.TOKENS: 'TOKENS'>, input_cost=None, output_cost=None, total_cost=None), level=<ObservationLevel.DEFAULT: 'DEFAULT'>, status_message=None, parent_observation_id=None, prompt_id=None, model_id='clruwnahl00030al7ab9rark7', input_price=5e-07, output_price=1.5e-06, total_price=None, calculated_input_cost=1.65e-05, calculated_output_cost=1.5e-06, calculated_total_cost=1.8e-05, latency=1.757)]\n"
+          ]
+        }
+      ],
+      "source": [
+        "# generations = fetch_all_pages(user_id='user:abc')\n",
+        "generations = fetch_all_pages()\n",
+        "print(generations)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hYM6UG_dGbb6"
+      },
+      "source": [
+        "### Set up evaluation functions\n",
+        "\n",
+        "In this section, we define functions to set up the Langchain eval based on the entries in `EVAL_TYPES`. Hallucinations require their own function. More on the Langchain evals can be found [here](https://python.langchain.com/docs/guides/evaluation/string/criteria_eval_chain)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 30,
+      "metadata": {
+        "id": "7NijTmslvyK8"
+      },
+      "outputs": [],
+      "source": [
+        "from langchain.evaluation import load_evaluator, EvaluatorType\n",
+        "from langchain import PromptTemplate, OpenAI, LLMChain\n",
+        "from langchain.evaluation.criteria import LabeledCriteriaEvalChain\n",
+        "\n",
+        "def get_evaluator_for_key(key: str):\n",
+        "  llm = OpenAI(temperature=0, model=os.environ.get('EVAL_MODEL'))\n",
+        "  return load_evaluator(\"criteria\", criteria=key, llm=llm)\n",
+        "\n",
+        "def get_hallucination_eval():\n",
+        "  criteria = {\n",
+        "    \"hallucination\": (\n",
+        "      \"Does this submission contain information\"\n",
+        "      \" not present in the input or reference?\"\n",
+        "    ),\n",
+        "  }\n",
+        "  llm = OpenAI(temperature=0, model=os.environ.get('EVAL_MODEL'))\n",
+        "\n",
+        "  return LabeledCriteriaEvalChain.from_llm(\n",
+        "      llm=llm,\n",
+        "      criteria=criteria,\n",
+        "  )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tzZZfztGdrIQ"
+      },
+      "source": [
+        "### Execute evaluation\n",
+        "\n",
+        "Below, we execute the evaluation for each `Generation` loaded above. Each score is ingested into Langfuse via [`langfuse.score()`](https://langfuse.com/docs/scores).\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 31,
+      "metadata": {
+        "id": "qMa2OEtqvyGg"
+      },
+      "outputs": [
+        {
+          "ename": "NotFoundError",
+          "evalue": "Error code: 404 - {'error': {'message': 'The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}",
+          "output_type": "error",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mNotFoundError\u001b[0m                             Traceback (most recent call last)",
+            "Cell \u001b[0;32mIn[31], line 15\u001b[0m\n\u001b[1;32m     11\u001b[0m       \u001b[38;5;28mprint\u001b[39m(eval_result)\n\u001b[1;32m     13\u001b[0m       langfuse\u001b[38;5;241m.\u001b[39mscore(name\u001b[38;5;241m=\u001b[39mcriterion, trace_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mtrace_id, observation_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mid, value\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m], comment\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreasoning\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m---> 15\u001b[0m \u001b[43mexecute_eval_and_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+            "Cell \u001b[0;32mIn[31], line 7\u001b[0m, in \u001b[0;36mexecute_eval_and_score\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m criteria \u001b[38;5;241m=\u001b[39m [key \u001b[38;5;28;01mfor\u001b[39;00m key, value \u001b[38;5;129;01min\u001b[39;00m EVAL_TYPES\u001b[38;5;241m.\u001b[39mitems() \u001b[38;5;28;01mif\u001b[39;00m value \u001b[38;5;129;01mand\u001b[39;00m key \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhallucination\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m      6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m criterion \u001b[38;5;129;01min\u001b[39;00m criteria:\n\u001b[0;32m----> 7\u001b[0m   eval_result \u001b[38;5;241m=\u001b[39m \u001b[43mget_evaluator_for_key\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcriterion\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mevaluate_strings\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m      \u001b[49m\u001b[43mprediction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moutput\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m      \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgeneration\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m  \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     11\u001b[0m   \u001b[38;5;28mprint\u001b[39m(eval_result)\n\u001b[1;32m     13\u001b[0m   langfuse\u001b[38;5;241m.\u001b[39mscore(name\u001b[38;5;241m=\u001b[39mcriterion, trace_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mtrace_id, observation_id\u001b[38;5;241m=\u001b[39mgeneration\u001b[38;5;241m.\u001b[39mid, value\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m], comment\u001b[38;5;241m=\u001b[39meval_result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mreasoning\u001b[39m\u001b[38;5;124m'\u001b[39m])\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/evaluation/schema.py:219\u001b[0m, in \u001b[0;36mStringEvaluator.evaluate_strings\u001b[0;34m(self, prediction, reference, input, **kwargs)\u001b[0m\n\u001b[1;32m    208\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Evaluate Chain or LLM output, based on optional input and label.\u001b[39;00m\n\u001b[1;32m    209\u001b[0m \n\u001b[1;32m    210\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    216\u001b[0m \u001b[38;5;124;03m    dict: The evaluation results containing the score or value.\u001b[39;00m\n\u001b[1;32m    217\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m  \u001b[38;5;66;03m# noqa: E501\u001b[39;00m\n\u001b[1;32m    218\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_evaluation_args(reference\u001b[38;5;241m=\u001b[39mreference, \u001b[38;5;28minput\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;28minput\u001b[39m)\n\u001b[0;32m--> 219\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_evaluate_strings\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m    \u001b[49m\u001b[43mprediction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprediction\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreference\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreference\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/evaluation/criteria/eval_chain.py:447\u001b[0m, in \u001b[0;36mCriteriaEvalChain._evaluate_strings\u001b[0;34m(self, prediction, reference, input, callbacks, tags, metadata, include_run_info, **kwargs)\u001b[0m\n\u001b[1;32m    413\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Evaluate a prediction against the criteria.\u001b[39;00m\n\u001b[1;32m    414\u001b[0m \n\u001b[1;32m    415\u001b[0m \u001b[38;5;124;03mParameters\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    444\u001b[0m \u001b[38;5;124;03m    )\u001b[39;00m\n\u001b[1;32m    445\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    446\u001b[0m input_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_eval_input(prediction, reference, \u001b[38;5;28minput\u001b[39m)\n\u001b[0;32m--> 447\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m    448\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    449\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    450\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtags\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtags\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    451\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    452\u001b[0m \u001b[43m    \u001b[49m\u001b[43minclude_run_info\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_run_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    453\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_prepare_output(result)\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/_api/deprecation.py:145\u001b[0m, in \u001b[0;36mdeprecated.<locals>.deprecate.<locals>.warning_emitting_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    143\u001b[0m     warned \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m    144\u001b[0m     emit_warning()\n\u001b[0;32m--> 145\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapped\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:378\u001b[0m, in \u001b[0;36mChain.__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[1;32m    346\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Execute the chain.\u001b[39;00m\n\u001b[1;32m    347\u001b[0m \n\u001b[1;32m    348\u001b[0m \u001b[38;5;124;03mArgs:\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    369\u001b[0m \u001b[38;5;124;03m        `Chain.output_keys`.\u001b[39;00m\n\u001b[1;32m    370\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    371\u001b[0m config \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    372\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcallbacks\u001b[39m\u001b[38;5;124m\"\u001b[39m: callbacks,\n\u001b[1;32m    373\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtags\u001b[39m\u001b[38;5;124m\"\u001b[39m: tags,\n\u001b[1;32m    374\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m: metadata,\n\u001b[1;32m    375\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrun_name\u001b[39m\u001b[38;5;124m\"\u001b[39m: run_name,\n\u001b[1;32m    376\u001b[0m }\n\u001b[0;32m--> 378\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minvoke\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    379\u001b[0m \u001b[43m    \u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    380\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\u001b[43mRunnableConfig\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43mk\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitems\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    381\u001b[0m \u001b[43m    \u001b[49m\u001b[43mreturn_only_outputs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_only_outputs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    382\u001b[0m \u001b[43m    \u001b[49m\u001b[43minclude_run_info\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude_run_info\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    383\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:163\u001b[0m, in \u001b[0;36mChain.invoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m    161\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    162\u001b[0m     run_manager\u001b[38;5;241m.\u001b[39mon_chain_error(e)\n\u001b[0;32m--> 163\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m    164\u001b[0m run_manager\u001b[38;5;241m.\u001b[39mon_chain_end(outputs)\n\u001b[1;32m    166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m include_run_info:\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/base.py:153\u001b[0m, in \u001b[0;36mChain.invoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m    150\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    151\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_inputs(inputs)\n\u001b[1;32m    152\u001b[0m     outputs \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 153\u001b[0m         \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    154\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m    155\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(inputs)\n\u001b[1;32m    156\u001b[0m     )\n\u001b[1;32m    158\u001b[0m     final_outputs: Dict[\u001b[38;5;28mstr\u001b[39m, Any] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprep_outputs(\n\u001b[1;32m    159\u001b[0m         inputs, outputs, return_only_outputs\n\u001b[1;32m    160\u001b[0m     )\n\u001b[1;32m    161\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/llm.py:103\u001b[0m, in \u001b[0;36mLLMChain._call\u001b[0;34m(self, inputs, run_manager)\u001b[0m\n\u001b[1;32m     98\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_call\u001b[39m(\n\u001b[1;32m     99\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    100\u001b[0m     inputs: Dict[\u001b[38;5;28mstr\u001b[39m, Any],\n\u001b[1;32m    101\u001b[0m     run_manager: Optional[CallbackManagerForChainRun] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    102\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Dict[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]:\n\u001b[0;32m--> 103\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    104\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcreate_outputs(response)[\u001b[38;5;241m0\u001b[39m]\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain/chains/llm.py:115\u001b[0m, in \u001b[0;36mLLMChain.generate\u001b[0;34m(self, input_list, run_manager)\u001b[0m\n\u001b[1;32m    113\u001b[0m callbacks \u001b[38;5;241m=\u001b[39m run_manager\u001b[38;5;241m.\u001b[39mget_child() \u001b[38;5;28;01mif\u001b[39;00m run_manager \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm, BaseLanguageModel):\n\u001b[0;32m--> 115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_prompt\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    116\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    117\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    118\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    119\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    120\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    121\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    122\u001b[0m     results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm\u001b[38;5;241m.\u001b[39mbind(stop\u001b[38;5;241m=\u001b[39mstop, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_kwargs)\u001b[38;5;241m.\u001b[39mbatch(\n\u001b[1;32m    123\u001b[0m         cast(List, prompts), {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcallbacks\u001b[39m\u001b[38;5;124m\"\u001b[39m: callbacks}\n\u001b[1;32m    124\u001b[0m     )\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:569\u001b[0m, in \u001b[0;36mBaseLLM.generate_prompt\u001b[0;34m(self, prompts, stop, callbacks, **kwargs)\u001b[0m\n\u001b[1;32m    561\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_prompt\u001b[39m(\n\u001b[1;32m    562\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    563\u001b[0m     prompts: List[PromptValue],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    566\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m    567\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m    568\u001b[0m     prompt_strings \u001b[38;5;241m=\u001b[39m [p\u001b[38;5;241m.\u001b[39mto_string() \u001b[38;5;28;01mfor\u001b[39;00m p \u001b[38;5;129;01min\u001b[39;00m prompts]\n\u001b[0;32m--> 569\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprompt_strings\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:748\u001b[0m, in \u001b[0;36mBaseLLM.generate\u001b[0;34m(self, prompts, stop, callbacks, tags, metadata, run_name, run_id, **kwargs)\u001b[0m\n\u001b[1;32m    731\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    732\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsked to cache, but no cache found at `langchain.cache`.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    733\u001b[0m         )\n\u001b[1;32m    734\u001b[0m     run_managers \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    735\u001b[0m         callback_manager\u001b[38;5;241m.\u001b[39mon_llm_start(\n\u001b[1;32m    736\u001b[0m             dumpd(\u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    746\u001b[0m         )\n\u001b[1;32m    747\u001b[0m     ]\n\u001b[0;32m--> 748\u001b[0m     output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    749\u001b[0m \u001b[43m        \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mbool\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mnew_arg_supported\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    750\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    751\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m output\n\u001b[1;32m    752\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(missing_prompts) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:606\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m    604\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n\u001b[1;32m    605\u001b[0m         run_manager\u001b[38;5;241m.\u001b[39mon_llm_error(e, response\u001b[38;5;241m=\u001b[39mLLMResult(generations\u001b[38;5;241m=\u001b[39m[]))\n\u001b[0;32m--> 606\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m    607\u001b[0m flattened_outputs \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m    608\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m manager, flattened_output \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(run_managers, flattened_outputs):\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_core/language_models/llms.py:593\u001b[0m, in \u001b[0;36mBaseLLM._generate_helper\u001b[0;34m(self, prompts, stop, run_managers, new_arg_supported, **kwargs)\u001b[0m\n\u001b[1;32m    583\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate_helper\u001b[39m(\n\u001b[1;32m    584\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    585\u001b[0m     prompts: List[\u001b[38;5;28mstr\u001b[39m],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    589\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m    590\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m LLMResult:\n\u001b[1;32m    591\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    592\u001b[0m         output \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m--> 593\u001b[0m             \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_generate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    594\u001b[0m \u001b[43m                \u001b[49m\u001b[43mprompts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    595\u001b[0m \u001b[43m                \u001b[49m\u001b[43mstop\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    596\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;66;43;03m# TODO: support multiple run managers\u001b[39;49;00m\n\u001b[1;32m    597\u001b[0m \u001b[43m                \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_managers\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mrun_managers\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    598\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    599\u001b[0m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    600\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m new_arg_supported\n\u001b[1;32m    601\u001b[0m             \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_generate(prompts, stop\u001b[38;5;241m=\u001b[39mstop)\n\u001b[1;32m    602\u001b[0m         )\n\u001b[1;32m    603\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mBaseException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    604\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m run_manager \u001b[38;5;129;01min\u001b[39;00m run_managers:\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_community/llms/openai.py:460\u001b[0m, in \u001b[0;36mBaseOpenAI._generate\u001b[0;34m(self, prompts, stop, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m    448\u001b[0m     choices\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m    449\u001b[0m         {\n\u001b[1;32m    450\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext\u001b[39m\u001b[38;5;124m\"\u001b[39m: generation\u001b[38;5;241m.\u001b[39mtext,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    457\u001b[0m         }\n\u001b[1;32m    458\u001b[0m     )\n\u001b[1;32m    459\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 460\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43mcompletion_with_retry\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    461\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_prompts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrun_manager\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrun_manager\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\n\u001b[1;32m    462\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    463\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(response, \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m    464\u001b[0m         \u001b[38;5;66;03m# V1 client returns the response in an PyDantic object instead of\u001b[39;00m\n\u001b[1;32m    465\u001b[0m         \u001b[38;5;66;03m# dict. For the transition period, we deep convert it to dict.\u001b[39;00m\n\u001b[1;32m    466\u001b[0m         response \u001b[38;5;241m=\u001b[39m response\u001b[38;5;241m.\u001b[39mdict()\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/langchain_community/llms/openai.py:115\u001b[0m, in \u001b[0;36mcompletion_with_retry\u001b[0;34m(llm, run_manager, **kwargs)\u001b[0m\n\u001b[1;32m    113\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Use tenacity to retry the completion call.\"\"\"\u001b[39;00m\n\u001b[1;32m    114\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_openai_v1():\n\u001b[0;32m--> 115\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    117\u001b[0m retry_decorator \u001b[38;5;241m=\u001b[39m _create_retry_decorator(llm, run_manager\u001b[38;5;241m=\u001b[39mrun_manager)\n\u001b[1;32m    119\u001b[0m \u001b[38;5;129m@retry_decorator\u001b[39m\n\u001b[1;32m    120\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_completion_with_retry\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Any:\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/resources/completions.py:516\u001b[0m, in \u001b[0;36mCompletions.create\u001b[0;34m(self, model, prompt, best_of, echo, frequency_penalty, logit_bias, logprobs, max_tokens, n, presence_penalty, seed, stop, stream, suffix, temperature, top_p, user, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    488\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprompt\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m    489\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m    490\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    514\u001b[0m     timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m    515\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Completion \u001b[38;5;241m|\u001b[39m Stream[Completion]:\n\u001b[0;32m--> 516\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    517\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/completions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    518\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    519\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    520\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    521\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprompt\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprompt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    522\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mbest_of\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mbest_of\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    523\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mecho\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mecho\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    524\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfrequency_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    525\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogit_bias\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogit_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    526\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    527\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmax_tokens\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    528\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mn\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    529\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpresence_penalty\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    530\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mseed\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    531\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstop\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    532\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    533\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msuffix\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    534\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtemperature\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    535\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtop_p\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    536\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    537\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    538\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mCompletionCreateParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    539\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    540\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    541\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    542\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    543\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCompletion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    544\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    545\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mCompletion\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    546\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:1208\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1194\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1195\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1196\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1203\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1204\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1205\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1206\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1207\u001b[0m     )\n\u001b[0;32m-> 1208\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:897\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    888\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrequest\u001b[39m(\n\u001b[1;32m    889\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    890\u001b[0m     cast_to: Type[ResponseT],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    895\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m    896\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[0;32m--> 897\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    898\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    899\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    900\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    901\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    902\u001b[0m \u001b[43m        \u001b[49m\u001b[43mremaining_retries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mremaining_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    903\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+            "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/openai/_base_client.py:988\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    985\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m    987\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 988\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    990\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m    991\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m    992\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    995\u001b[0m     stream_cls\u001b[38;5;241m=\u001b[39mstream_cls,\n\u001b[1;32m    996\u001b[0m )\n",
+            "\u001b[0;31mNotFoundError\u001b[0m: Error code: 404 - {'error': {'message': 'The model `text-davinci-003` has been deprecated, learn more here: https://platform.openai.com/docs/deprecations', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}"
+          ]
+        }
+      ],
+      "source": [
+        "def execute_eval_and_score():\n",
+        "\n",
+        "  for generation in generations:\n",
+        "    criteria = [key for key, value in EVAL_TYPES.items() if value and key != \"hallucination\"]\n",
+        "\n",
+        "    for criterion in criteria:\n",
+        "      eval_result = get_evaluator_for_key(criterion).evaluate_strings(\n",
+        "          prediction=generation.output,\n",
+        "          input=generation.input,\n",
+        "      )\n",
+        "      print(eval_result)\n",
+        "\n",
+        "      langfuse.score(name=criterion, trace_id=generation.trace_id, observation_id=generation.id, value=eval_result[\"score\"], comment=eval_result['reasoning'])\n",
+        "\n",
+        "execute_eval_and_score()\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "id": "YcTF-z8eeL0a"
+      },
+      "outputs": [],
+      "source": [
+        "# hallucination\n",
+        "\n",
+        "\n",
+        "def eval_hallucination():\n",
+        "\n",
+        "  chain = get_hallucination_eval()\n",
+        "\n",
+        "  for generation in generations:\n",
+        "    eval_result = chain.evaluate_strings(\n",
+        "      prediction=generation.output,\n",
+        "      input=generation.input,\n",
+        "      reference=generation.input\n",
+        "    )\n",
+        "    print(eval_result)\n",
+        "    if eval_result is not None and eval_result[\"score\"] is not None and eval_result[\"reasoning\"] is not None:\n",
+        "      langfuse.score(name='hallucination', trace_id=generation.trace_id, observation_id=generation.id, value=eval_result[\"score\"], comment=eval_result['reasoning'])\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 14,
+      "metadata": {
+        "id": "n4zeFEKlfjQ-"
+      },
+      "outputs": [],
+      "source": [
+        "if EVAL_TYPES.get(\"hallucination\") == True:\n",
+        "  eval_hallucination()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "id": "-ROOd8d8rdl6"
+      },
+      "outputs": [],
+      "source": [
+        "# SDK is async, make sure to await all requests\n",
+        "langfuse.flush()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "MsKpVyYdavJ5"
+      },
+      "source": [
+        "### See Scores in Langfuse\n",
+        "\n",
+        " In the Langfuse UI, you can filter Traces by `Scores` and look into the details for each. Check out Langfuse Analytics to understand the impact of new prompt versions or application releases on these scores.\n",
+        "\n",
+        "![Image of Trace](https://langfuse.com/images/docs/trace-conciseness-score.jpg)\n",
+        "_Example trace with conciseness score_\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CkeLD_ciVD2w"
+      },
+      "source": [
+        "## Get in touch\n",
+        "\n",
+        "Looking for a specific way to score your production data in Langfuse? Join the [Discord](https://langfuse.com/discord) and discuss your use case!"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/eval/integration_openai_sdk.ipynb b/eval/integration_openai_sdk.ipynb
new file mode 100644
index 0000000..7679082
--- /dev/null
+++ b/eval/integration_openai_sdk.ipynb
@@ -0,0 +1,693 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ki7E44X5ViQB"
+      },
+      "source": [
+        "---\n",
+        "description: Drop-in replacement of OpenAI SDK to get full observability in Langfuse by changing only the import\n",
+        "category: Integrations\n",
+        "---"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mfMAzJYcirtK"
+      },
+      "source": [
+        "# Cookbook: OpenAI Integration (Python)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B0A389k2irtK"
+      },
+      "source": [
+        "This is a cookbook with examples of the Langfuse Integration for OpenAI (Python).\n",
+        "\n",
+        "Follow the [integration guide](https://langfuse.com/docs/integrations/openai/get-started) to add this integration to your OpenAI project."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 1,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from dotenv import load_dotenv\n",
+        "load_dotenv()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Uq04G_FSWjF-"
+      },
+      "source": [
+        "## Setup"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "XYoil3FcOIQt"
+      },
+      "source": [
+        "The integration is compatible with OpenAI SDK versions `>=0.27.8`. It supports async functions and streaming for OpenAI SDK versions `>=1.0.0`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {
+        "id": "hVOOiBtUPtOO"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: langfuse in /Users/gyliu/venvlf/lib/python3.10/site-packages (2.21.1)\n",
+            "Requirement already satisfied: openai in /Users/gyliu/venvlf/lib/python3.10/site-packages (1.14.3)\n",
+            "Requirement already satisfied: wrapt<2.0,>=1.14 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (1.16.0)\n",
+            "Requirement already satisfied: packaging<24.0,>=23.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (23.2)\n",
+            "Requirement already satisfied: pydantic<3.0,>=1.10.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.4.0)\n",
+            "Requirement already satisfied: httpx<1.0,>=0.15.4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.24.1)\n",
+            "Requirement already satisfied: chevron<0.15.0,>=0.14.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (0.14.0)\n",
+            "Requirement already satisfied: backoff<3.0.0,>=2.2.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from langfuse) (2.2.1)\n",
+            "Requirement already satisfied: distro<2,>=1.7.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.8.0)\n",
+            "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.8.0)\n",
+            "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (3.7.1)\n",
+            "Requirement already satisfied: sniffio in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (1.3.0)\n",
+            "Requirement already satisfied: tqdm>4 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from openai) (4.66.1)\n",
+            "Requirement already satisfied: idna>=2.8 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.4)\n",
+            "Requirement already satisfied: exceptiongroup in /Users/gyliu/venvlf/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.1.3)\n",
+            "Requirement already satisfied: certifi in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (2023.7.22)\n",
+            "Requirement already satisfied: httpcore<0.18.0,>=0.15.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpx<1.0,>=0.15.4->langfuse) (0.17.3)\n",
+            "Requirement already satisfied: pydantic-core==2.10.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (2.10.0)\n",
+            "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic<3.0,>=1.10.7->langfuse) (0.6.0)\n",
+            "Requirement already satisfied: h11<0.15,>=0.13 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1.0,>=0.15.4->langfuse) (0.14.0)\n",
+            "\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+            "Note: you may need to restart the kernel to use updated packages.\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install langfuse openai --upgrade"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "ldSEJ0bAP4sj"
+      },
+      "outputs": [],
+      "source": [
+        "# instead of: import openai\n",
+        "from langfuse.openai import openai"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {
+        "id": "G8qkHd8oK_o9"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 4,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "# For debugging, checks the SDK connection with the server. Do not use in production as it adds latency.\n",
+        "from langfuse.openai import auth_check\n",
+        "\n",
+        "auth_check()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "2ovnAAdbaLmD"
+      },
+      "source": [
+        "## Examples\n",
+        "\n",
+        "### Chat completion"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {
+        "id": "c8RhokKUP9I0"
+      },
+      "outputs": [],
+      "source": [
+        "completion = openai.chat.completions.create(\n",
+        "  name=\"test-chat\",\n",
+        "  model=\"gpt-3.5-turbo\",\n",
+        "  messages=[\n",
+        "      {\"role\": \"system\", \"content\": \"You are a very accurate calculator. You output only the result of the calculation.\"},\n",
+        "      {\"role\": \"user\", \"content\": \"1 + 1 = \"}],\n",
+        "  temperature=0,\n",
+        "  metadata={\"someMetadataKey\": \"someValue\"},\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "SAqxBgOqKTzO"
+      },
+      "source": [
+        "### Chat completion (streaming)\n",
+        "\n",
+        "Simple example using the OpenAI streaming functionality."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {
+        "id": "b9gRlb2rKTaA"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Sure thing! Why did the scarecrow win an award? Because he was outstanding in his field!None"
+          ]
+        }
+      ],
+      "source": [
+        "completion = openai.chat.completions.create(\n",
+        "  name=\"test-chat\",\n",
+        "  model=\"gpt-3.5-turbo\",\n",
+        "  messages=[\n",
+        "      {\"role\": \"system\", \"content\": \"You are a professional comedian.\"},\n",
+        "      {\"role\": \"user\", \"content\": \"Tell me a joke.\"}],\n",
+        "  temperature=0,\n",
+        "  metadata={\"someMetadataKey\": \"someValue\"},\n",
+        "  stream=True\n",
+        ")\n",
+        "\n",
+        "for chunk in completion:\n",
+        "  print(chunk.choices[0].delta.content, end=\"\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "F2pvm0qLKg7Q"
+      },
+      "source": [
+        "### Chat completion (async)\n",
+        "\n",
+        "Simple example using the OpenAI async client. It takes the Langfuse configurations either from the environment variables or from the attributes on the `openai` module."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {
+        "id": "Hggwggv_MKpV"
+      },
+      "outputs": [],
+      "source": [
+        "from langfuse.openai import AsyncOpenAI\n",
+        "\n",
+        "async_client = AsyncOpenAI()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "ZIUKD8Z3KmvQ"
+      },
+      "outputs": [],
+      "source": [
+        "completion = await async_client.chat.completions.create(\n",
+        "  name=\"test-chat\",\n",
+        "  model=\"gpt-3.5-turbo\",\n",
+        "  messages=[\n",
+        "      {\"role\": \"system\", \"content\": \"You are a very accurate calculator. You output only the result of the calculation.\"},\n",
+        "      {\"role\": \"user\", \"content\": \"1 + 100 = \"}],\n",
+        "  temperature=0,\n",
+        "  metadata={\"someMetadataKey\": \"someValue\"},\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "M4iJpqYQirtM"
+      },
+      "source": [
+        "Go to https://cloud.langfuse.com or your own instance to see your generation.\n",
+        "\n",
+        "![Chat completion](https://langfuse.com/images/docs/openai-chat.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ky7CtCNzaSrn"
+      },
+      "source": [
+        "### Functions\n",
+        "\n",
+        "Simple example using Pydantic to generate the function schema."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {
+        "id": "jJfBdHowaRgs"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Requirement already satisfied: pydantic in /Users/gyliu/venvlf/lib/python3.10/site-packages (2.4.0)\n",
+            "Collecting pydantic\n",
+            "  Using cached pydantic-2.6.4-py3-none-any.whl (394 kB)\n",
+            "Requirement already satisfied: annotated-types>=0.4.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic) (0.6.0)\n",
+            "Collecting pydantic-core==2.16.3\n",
+            "  Using cached pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl (1.7 MB)\n",
+            "Requirement already satisfied: typing-extensions>=4.6.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pydantic) (4.8.0)\n",
+            "Installing collected packages: pydantic-core, pydantic\n",
+            "  Attempting uninstall: pydantic-core\n",
+            "    Found existing installation: pydantic_core 2.10.0\n",
+            "    Uninstalling pydantic_core-2.10.0:\n",
+            "      Successfully uninstalled pydantic_core-2.10.0\n",
+            "  Attempting uninstall: pydantic\n",
+            "    Found existing installation: pydantic 2.4.0\n",
+            "    Uninstalling pydantic-2.4.0:\n",
+            "      Successfully uninstalled pydantic-2.4.0\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "sqlmodel 0 requires pydantic[email]<=2.4,>=2.1.1, but you have pydantic 2.6.4 which is incompatible.\n",
+            "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n",
+            "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n",
+            "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n",
+            "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n",
+            "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n",
+            "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\u001b[0m\u001b[31m\n",
+            "\u001b[0mSuccessfully installed pydantic-2.6.4 pydantic-core-2.16.3\n",
+            "\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
+            "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+            "Note: you may need to restart the kernel to use updated packages.\n"
+          ]
+        }
+      ],
+      "source": [
+        "%pip install pydantic --upgrade"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 10,
+      "metadata": {
+        "id": "2gA-zGk7VYYp"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/var/folders/m3/cz7sy7cd4v7_mklwrfjs45wm0000gn/T/ipykernel_99748/2496491748.py:7: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/\n",
+            "  schema = StepByStepAIResponse.schema() # returns a dict like JSON schema\n"
+          ]
+        }
+      ],
+      "source": [
+        "from typing import List\n",
+        "from pydantic import BaseModel\n",
+        "\n",
+        "class StepByStepAIResponse(BaseModel):\n",
+        "    title: str\n",
+        "    steps: List[str]\n",
+        "schema = StepByStepAIResponse.schema() # returns a dict like JSON schema"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 11,
+      "metadata": {
+        "id": "ORtNcN4-afDC"
+      },
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/var/folders/m3/cz7sy7cd4v7_mklwrfjs45wm0000gn/T/ipykernel_99748/162860978.py:12: PydanticDeprecatedSince20: The `schema` method is deprecated; use `model_json_schema` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/\n",
+            "  \"parameters\": StepByStepAIResponse.schema()\n"
+          ]
+        }
+      ],
+      "source": [
+        "import json\n",
+        "response = openai.chat.completions.create(\n",
+        "    name=\"test-function\",\n",
+        "    model=\"gpt-3.5-turbo-0613\",\n",
+        "    messages=[\n",
+        "       {\"role\": \"user\", \"content\": \"Explain how to assemble a PC\"}\n",
+        "    ],\n",
+        "    functions=[\n",
+        "        {\n",
+        "          \"name\": \"get_answer_for_user_query\",\n",
+        "          \"description\": \"Get user answer in series of steps\",\n",
+        "          \"parameters\": StepByStepAIResponse.schema()\n",
+        "        }\n",
+        "    ],\n",
+        "    function_call={\"name\": \"get_answer_for_user_query\"}\n",
+        ")\n",
+        "\n",
+        "output = json.loads(response.choices[0].message.function_call.arguments)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qurrm-Ntp24O"
+      },
+      "source": [
+        "Go to https://cloud.langfuse.com or your own instance to see your generation.\n",
+        "\n",
+        "![Function](https://langfuse.com/images/docs/openai-function.png)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Su1OaQq3rPPh"
+      },
+      "source": [
+        "### Group multiple generations into a single trace\n",
+        "\n",
+        "Many applications require more than one OpenAI call. The `@observe()` decorator allows to nest all LLM calls of a single API invocation into the same `trace` in Langfuse."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 12,
+      "metadata": {
+        "id": "zMDVxzS1ltWU"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "In the heart of the Balkans, she stands so proud,\n",
+            "A city of shadows and light, both silent and loud.\n",
+            "With ancient streets that weave and wind,\n",
+            "Through history's tapestry, a story to find.\n",
+            "\n",
+            "Sofia, the city of red-roofed homes,\n",
+            "Where saints and sinners freely roam.\n",
+            "The Alexander Nevsky Cathedral so grand,\n",
+            "A beacon of faith in this ancient land.\n",
+            "\n",
+            "Among the bustling markets and café's cheer,\n",
+            "The spirit of Sofia is ever near.\n",
+            "From Vitosha Mountain, her guardian high,\n",
+            "To the Serdika ruins where old worlds lie.\n",
+            "\n",
+            "The heartbeat of Bulgaria, a city so alive,\n",
+            "In her cobblestone streets, stories thrive.\n",
+            "With each passing moment, a new tale begun,\n",
+            "Sofia, eternal, beneath the Balkan sun.\n",
+            "\n",
+            "Her people, vibrant, diverse and strong,\n",
+            "In unity and resilience, they belong.\n",
+            "A city of contrasts, old and new,\n",
+            "Sofia, forever in my heart, I'll hold\n"
+          ]
+        }
+      ],
+      "source": [
+        "from langfuse.openai import openai\n",
+        "from langfuse.decorators import observe\n",
+        "\n",
+        "@observe() # decorator to automatically create trace and nest generations\n",
+        "def main(country: str, user_id: str, **kwargs) -> str:\n",
+        "    # nested generation 1: use openai to get capital of country\n",
+        "    capital = openai.chat.completions.create(\n",
+        "      name=\"geography-teacher\",\n",
+        "      model=\"gpt-3.5-turbo\",\n",
+        "      messages=[\n",
+        "          {\"role\": \"system\", \"content\": \"You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.\"},\n",
+        "          {\"role\": \"user\", \"content\": country}],\n",
+        "      temperature=0,\n",
+        "    ).choices[0].message.content\n",
+        "\n",
+        "    # nested generation 2: use openai to write poem on capital\n",
+        "    poem = openai.chat.completions.create(\n",
+        "      name=\"poet\",\n",
+        "      model=\"gpt-3.5-turbo\",\n",
+        "      messages=[\n",
+        "          {\"role\": \"system\", \"content\": \"You are a poet. Create a poem about a city.\"},\n",
+        "          {\"role\": \"user\", \"content\": capital}],\n",
+        "      temperature=1,\n",
+        "      max_tokens=200,\n",
+        "    ).choices[0].message.content\n",
+        "\n",
+        "    return poem\n",
+        "\n",
+        "# run main function and let Langfuse decorator do the rest\n",
+        "print(main(\"Bulgaria\", \"admin\"))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ehx2NZuIrPPh"
+      },
+      "source": [
+        "Go to https://cloud.langfuse.com or your own instance to see your trace.\n",
+        "\n",
+        "![Trace with multiple OpenAI calls](https://langfuse.com/images/docs/openai-trace-grouped.png)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-HeMqTWgK4xL"
+      },
+      "source": [
+        "#### Fully featured: Interoperability with Langfuse SDK\n",
+        "\n",
+        "The `trace` is a core object in Langfuse and you can add rich metadata to it. See [Python SDK docs](https://langfuse.com/docs/sdk/python#traces-1) for full documentation on this.\n",
+        "\n",
+        "Some of the functionality enabled by custom traces:\n",
+        "- custom name to identify a specific trace-type\n",
+        "- user-level tracking\n",
+        "- experiment tracking via versions and releases\n",
+        "- custom metadata"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 16,
+      "metadata": {
+        "id": "28to65wpK4xL"
+      },
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "In the heart of the Balkans, where history meets modernity,\n",
+            "Lies a city of beauty, Sofia, a place of pure serenity.\n",
+            "With ancient ruins whispering tales of days long gone,\n",
+            "And vibrant street art that dances with the dawn.\n",
+            "\n",
+            "A melting pot of cultures, where East meets West,\n",
+            "Sofia's charm will put your wandering soul to the test.\n",
+            "The rhythm of the city pulses through its veins,\n",
+            "As vibrant markets bustle and soothing fountains reign.\n",
+            "\n",
+            "Beneath the shadow of Vitosha, the mountain so grand,\n",
+            "Sofia stands proud, a jewel in Bulgaria's hand.\n",
+            "With its grand cathedrals and majestic domes,\n",
+            "It's a city that calls you to wander and roam.\n",
+            "\n",
+            "From the bustling boulevards to quiet cobbled lanes,\n",
+            "Sofia's spirit will stir in your heart like gentle rains.\n",
+            "So come, wanderer, and let the city reveal,\n",
+            "The magic and wonder that its streets conceal.\n"
+          ]
+        }
+      ],
+      "source": [
+        "import uuid\n",
+        "from langfuse.openai import openai\n",
+        "from langfuse.decorators import langfuse_context, observe\n",
+        "\n",
+        "@observe() # decorator to automatically create trace and nest generations\n",
+        "def main(country: str, user_id: str, **kwargs) -> str:\n",
+        "    # nested generation 1: use openai to get capital of country\n",
+        "    capital = openai.chat.completions.create(\n",
+        "      name=\"geography-teacher\",\n",
+        "      model=\"gpt-3.5-turbo\",\n",
+        "      messages=[\n",
+        "          {\"role\": \"system\", \"content\": \"You are a Geography teacher helping students learn the capitals of countries. Output only the capital when being asked.\"},\n",
+        "          {\"role\": \"user\", \"content\": country}],\n",
+        "      temperature=0,\n",
+        "    ).choices[0].message.content\n",
+        "\n",
+        "    # nested generation 2: use openai to write poem on capital\n",
+        "    poem = openai.chat.completions.create(\n",
+        "      name=\"poet\",\n",
+        "      model=\"gpt-3.5-turbo\",\n",
+        "      messages=[\n",
+        "          {\"role\": \"system\", \"content\": \"You are a poet. Create a poem about a city.\"},\n",
+        "          {\"role\": \"user\", \"content\": capital}],\n",
+        "      temperature=1,\n",
+        "      max_tokens=200,\n",
+        "    ).choices[0].message.content\n",
+        "\n",
+        "    # rename trace and set attributes (e.g., medatata) as needed\n",
+        "    langfuse_context.update_current_trace(\n",
+        "        name=\"City poem generator\",\n",
+        "        session_id=\"1234\",\n",
+        "        user_id=user_id,\n",
+        "        tags=[\"tag1\", \"tag2\"],\n",
+        "        public=True,\n",
+        "        metadata = {\n",
+        "        \"env\": \"development\",\n",
+        "        },\n",
+        "        release = \"v0.0.21\"\n",
+        "    )\n",
+        "\n",
+        "    return poem\n",
+        "\n",
+        "# create random trace_id, could also use existing id from your application, e.g. conversation id\n",
+        "trace_id = str(uuid.uuid4())\n",
+        "\n",
+        "# run main function, set your own id, and let Langfuse decorator do the rest\n",
+        "print(main(\"Bulgaria\", \"admin\", langfuse_observation_id=trace_id))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "O3jxed-VrPPi"
+      },
+      "source": [
+        "### Programmatically add scores"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "uMO6tn53rPPi"
+      },
+      "source": [
+        "You can add [scores](https://langfuse.com/docs/scores) to the trace, to e.g. record user feedback or some programmatic evaluation. Scores are used throughout Langfuse to filter traces and on the dashboard. See the docs on scores for more details.\n",
+        "\n",
+        "The score is associated to the trace using the `trace_id`."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "id": "J0argbJhrPPi"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "<langfuse.client.StatefulClient at 0x10cc062f0>"
+            ]
+          },
+          "execution_count": 17,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "from langfuse import Langfuse\n",
+        "from langfuse.decorators import langfuse_context, observe\n",
+        "\n",
+        "langfuse = Langfuse()\n",
+        "\n",
+        "@observe() # decorator to automatically create trace and nest generations\n",
+        "def main():\n",
+        "    # get trace_id of current trace\n",
+        "    trace_id = langfuse_context.get_current_trace_id()\n",
+        "\n",
+        "    # rest of your application ...\n",
+        "\n",
+        "    return \"res\", trace_id\n",
+        "\n",
+        "# execute the main function to generate a trace\n",
+        "_, trace_id = main()\n",
+        "\n",
+        "# Score the trace from outside the trace context\n",
+        "langfuse.score(\n",
+        "    trace_id=trace_id,\n",
+        "    name=\"my-score-name\",\n",
+        "    value=1\n",
+        ")"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3 (ipykernel)",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}