From cc5a14e04e492f6de7ce98b8dbfe215c3890fc27 Mon Sep 17 00:00:00 2001 From: Guangya Liu Date: Tue, 26 Mar 2024 13:16:12 -0400 Subject: [PATCH] watsonx eval (#168) --- ...plication-langchain-quality-evals-v2.ipynb | 986 +++++++++++++++++ ...-application-langchain-quality-evals.ipynb | 990 ++++++++++++++++++ 2 files changed, 1976 insertions(+) create mode 100644 eval/llm-powered-application-langchain-quality-evals-v2.ipynb create mode 100644 eval/llm-powered-application-langchain-quality-evals.ipynb diff --git a/eval/llm-powered-application-langchain-quality-evals-v2.ipynb b/eval/llm-powered-application-langchain-quality-evals-v2.ipynb new file mode 100644 index 0000000..56e96ed --- /dev/null +++ b/eval/llm-powered-application-langchain-quality-evals-v2.ipynb @@ -0,0 +1,986 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1e26f660", + "metadata": { + "id": "701ceb425feb4278992e00794f1ac0dd" + }, + "source": [ + "# For a langchain powered multi-chain application, demonstrate how to evaluate the LLM Quality Metrics using IBM watsonx.governance - monitoring toolkit." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b89f7add-1c48-42f8-82c1-0347bedad786", + "metadata": { + "id": "db3d6273-8347-4c25-8a2a-2de834c8756e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from httpcore<0.18.0,>=0.15.0->httpx<1,>=0.23.0->openai<2.0.0,>=1.10.0->langchain-openai) (0.14.0)\n" + ] + } + ], + "source": [ + "!pip install ibm-watson-machine-learning langchain-openai \"pydantic>=1.10.0\" langchain \"sqlalchemy==1.4.47\" | tail -n 1" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "541a0315", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "79509547-4901-43e6-aa8c-d46aa53eb863", + "metadata": { + "id": "24d2d3d9-a4bd-41f6-990e-2f9b30b2c298" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "225f3735", + "metadata": { + "id": "597f49a6e85b40bd9191eac9239dfe64" + }, + "outputs": [], + "source": [ + "# langchain imports\n", + "from langchain.chains import LLMChain\n", + "from langchain import PromptTemplate\n", + "from langchain.chains import SequentialChain\n", + "\n", + "# watsonx.ai imports\n", + "from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes\n", + "from ibm_watson_machine_learning.foundation_models import Model\n", + "from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM\n", + "from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams\n", + "from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods" + ] + }, + { + "cell_type": "markdown", + "id": "8b159260-ba3f-424b-9a53-a15934b70450", + "metadata": {}, + "source": [ + "## IBM watsonx.gov Access Details" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ccc0eefa-0a0b-4554-9a6e-436640943fd7", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "IAM_URL=\"https://iam.ng.bluemix.net/oidc/token\"" + ] + }, + { + "cell_type": "markdown", + "id": "caf9c9a3", + "metadata": { + "id": "cf8e0dc758bb404e9463790f97251b71" + }, + "source": [ + "# LLM 1 - using Azure OpenAI GPT Model" + ] + }, + { + "cell_type": "markdown", + "id": "12a0e49c", + "metadata": { + "id": "d76b6a6cd55d4099aab31d62bb6fb31c" + }, + "source": [ + "## Summarization prompt - summarize the mobile issue" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4c6c356d", + "metadata": { + "id": "0cf8a813dd2a4c7f8ec05bd5e569f22b" + }, + "outputs": [], + "source": [ + "summarization_prompt_template_text = \"\"\"\n", + "Please provide a summary of the following text with maximum of 20 words. Do not include the Issue in the output.\n", + "\n", + "Issue: Some apps continue to run processes in the background even when not actively in use, leading to increased battery drain. Users can identify and restrict background activity for specific apps in their phone settings.\n", + "Summary: Certain apps running in the background excessively consume battery power.\n", + "\n", + "Issue: Devices with insufficient RAM struggle to handle multiple apps simultaneously, leading to slow performance. Users can close background apps and consider upgrading to a device with more RAM if multitasking is essential.\n", + "Summary: Limited Random Access Memory (RAM) affecting multitasking capabilities.\n", + "\n", + "Issue: Users may encounter issues where the device fails to authenticate with a Wi-Fi network, often requiring re-entry of the correct password or troubleshooting router settings.\n", + "Summary: Inability to connect to a Wi-Fi network due to authentication problems.\n", + "\n", + "Issue: {content}\n", + "Summary: \n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3695aa51", + "metadata": { + "id": "40b5eba806d24fa08d1725af047f5e9f" + }, + "outputs": [], + "source": [ + "summarization_prompt = PromptTemplate(\n", + " input_variables=[\"content\"],\n", + " template=summarization_prompt_template_text\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "b0f42633", + "metadata": { + "id": "ede59bf08c534b1d85ad4325107f0b03" + }, + "source": [ + "# LLM 2 - using IBM watsonx.ai FLAN_T5_XXL Model" + ] + }, + { + "cell_type": "markdown", + "id": "91f72403", + "metadata": { + "id": "ee63717e13984ca88f2138fbff28c705" + }, + "source": [ + "## Issue classification prompt - summarize the mobile issue\n", + "\n", + "### BatteryPerformance or StorageDataManagement or ConnectivityAndNetwork" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a413ebf5", + "metadata": { + "id": "8ee8ccc463974802b615a5757f6b9d91" + }, + "outputs": [], + "source": [ + "classification_prompt_template_text = \"\"\"\n", + "Classify the following as BatteryPerformance or StorageDataManagement or ConnectivityAndNetwork.\n", + "\n", + "Issue: Certain apps running in the background excessively consume battery power.\n", + "Type: BatteryPerformance\n", + "\n", + "Issue: Limited Random Access Memory (RAM) affecting multitasking capabilities.\n", + "Type: StorageDataManagement\n", + "\n", + "Issue: Inability to connect to a Wi-Fi network due to authentication problems.\n", + "Type: ConnectivityAndNetwork\n", + "\n", + "Issue: {summary}\n", + "Type:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "94b8f1e7", + "metadata": { + "id": "7dd8384a45a24af592516d977eb122b8" + }, + "outputs": [], + "source": [ + "classification_prompt = PromptTemplate(\n", + " input_variables=[\"summary\"],\n", + " template=classification_prompt_template_text\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fd473b08-7ab9-4111-9b38-e0a6ef7ad78c", + "metadata": { + "id": "fea40e5f-502e-4e6a-a8b5-86abd9797c89" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['FLAN_T5_XXL', 'FLAN_UL2', 'MT0_XXL', 'GPT_NEOX', 'MPT_7B_INSTRUCT2', 'STARCODER', 'LLAMA_2_70B_CHAT', 'LLAMA_2_13B_CHAT', 'GRANITE_13B_INSTRUCT', 'GRANITE_13B_CHAT', 'FLAN_T5_XL', 'GRANITE_13B_CHAT_V2', 'GRANITE_13B_INSTRUCT_V2', 'ELYZA_JAPANESE_LLAMA_2_7B_INSTRUCT', 'MIXTRAL_8X7B_INSTRUCT_V01_Q']\n" + ] + } + ], + "source": [ + "print([model.name for model in ModelTypes])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "5fa90049-32c3-464e-bbb7-75e463759a8b", + "metadata": { + "id": "7c6adfeb-d2f8-4c7b-b7b6-43118746fa11" + }, + "outputs": [], + "source": [ + "model_id_2 = ModelTypes.FLAN_T5_XXL" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "948b3a6a-ab40-48ca-86b7-08b3a85597e3", + "metadata": { + "id": "14c9c8f2-b981-433c-be1a-d4d416f1f982" + }, + "outputs": [], + "source": [ + "\n", + "import os\n", + "\n", + "flan_t5_model = Model(\n", + " model_id=model_id_2,\n", + " params={\n", + " \"decoding_method\": \"sample\",\n", + " \"max_new_tokens\": 10,\n", + " \"min_new_tokens\": 0,\n", + " \"temperature\":0.0\n", + " },\n", + " credentials={\n", + " \"apikey\": os.getenv(\"IAM_API_KEY\", None),\n", + " \"url\": \"https://us-south.ml.cloud.ibm.com\"\n", + " },\n", + " project_id=os.getenv(\"PROJECT_ID\"))" + ] + }, + { + "cell_type": "markdown", + "id": "31535c3b", + "metadata": { + "id": "73bf9a85-8ad5-44ed-ace8-8240cc9b49f2" + }, + "source": [ + "## Second element of the LLM chain using the watsonx model" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cc7a2456-6737-4799-b2fa-c5b2eb66317a", + "metadata": { + "id": "daa0f501-2d8a-489f-8244-c55eb881b96f" + }, + "outputs": [], + "source": [ + "summarization_prompt_flan_t5 = LLMChain(llm=flan_t5_model.to_langchain(), prompt=classification_prompt, output_key='issue_type')" + ] + }, + { + "cell_type": "markdown", + "id": "0ffd0aca", + "metadata": { + "id": "c62b2a601a7947b8a2d6e749e819c2b3" + }, + "source": [ + "# LLM 3 - using watsonx.ai FLAN_T5_XXL model" + ] + }, + { + "cell_type": "markdown", + "id": "a19da68f", + "metadata": { + "id": "1f7c58d6e3904ee1892fd331a17292b7" + }, + "source": [ + "## Generate issue resolution" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "84c780d4", + "metadata": { + "id": "f12df9ff94b1463381c26b846751f643" + }, + "outputs": [], + "source": [ + "resolution_template_text = \"\"\"\n", + "Provide a resolution for this mobile issue type in a maximum of 10 words.\n", + "\n", + "Issue Type: BatteryPerformance\n", + "Resolution: Optimize background app usage to conserve battery.\n", + "\n", + "Issue Type: StorageDataManagement\n", + "Resolution: Optimize apps, clear cache, upgrade RAM if possible.\n", + "\n", + "Issue Type: ConnectivityAndNetwork\n", + "Resolution: Check Wi-Fi password, reset router, restart device.\n", + "\n", + "Issue Type: {issue_type}\n", + "Resolution: \n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "01908b70", + "metadata": { + "id": "6372510d334e4e3584b130af6b4ceabe" + }, + "outputs": [], + "source": [ + "resolution_template = PromptTemplate(\n", + " input_variables=[\"issue_type\"],\n", + " template=resolution_template_text\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "8d6b5949-b740-49c2-bb85-ac8af5481aaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['FLAN_T5_XXL', 'FLAN_UL2', 'MT0_XXL', 'GPT_NEOX', 'MPT_7B_INSTRUCT2', 'STARCODER', 'LLAMA_2_70B_CHAT', 'LLAMA_2_13B_CHAT', 'GRANITE_13B_INSTRUCT', 'GRANITE_13B_CHAT', 'FLAN_T5_XL', 'GRANITE_13B_CHAT_V2', 'GRANITE_13B_INSTRUCT_V2', 'ELYZA_JAPANESE_LLAMA_2_7B_INSTRUCT', 'MIXTRAL_8X7B_INSTRUCT_V01_Q']\n" + ] + } + ], + "source": [ + "print([model.name for model in ModelTypes])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "0e02a7f0-6df2-4087-9a25-0fb9e9d18aec", + "metadata": {}, + "outputs": [], + "source": [ + "model_id_3 = ModelTypes.LLAMA_2_13B_CHAT" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "5ed7087e-6e24-47fe-a287-1d43df3c53c6", + "metadata": {}, + "outputs": [], + "source": [ + "resolution_model = Model(\n", + " model_id=model_id_3,\n", + " params={\n", + " \"decoding_method\": \"sample\",\n", + " \"max_new_tokens\": 10,\n", + " \"min_new_tokens\": 0,\n", + " \"temperature\":0.0\n", + " },\n", + " credentials={\n", + " \"apikey\": os.getenv(\"IAM_API_KEY\", None),\n", + " \"url\": \"https://us-south.ml.cloud.ibm.com\"\n", + " },\n", + " project_id=os.getenv(\"PROJECT_ID\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "1dca0465", + "metadata": { + "id": "7b57a396f60a4fe7bdab5063aab25ef8" + }, + "outputs": [], + "source": [ + "issue_resolution_flan_t5 = LLMChain(llm=resolution_model.to_langchain(), prompt=resolution_template, output_key='resolution')" + ] + }, + { + "cell_type": "markdown", + "id": "a38a5cf3", + "metadata": { + "id": "ef42dff279704fef905bac600fe61196" + }, + "source": [ + "# IBM watsonx.governance-monitoring toolkit to evaluate the output of summarization element of the LLM chain" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "57a02827", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting ibm_cloud_sdk_core\n", + " Downloading ibm-cloud-sdk-core-3.19.2.tar.gz (61 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.0/62.0 kB\u001b[0m \u001b[31m632.1 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m0:01\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: python-dateutil<3.0.0,>=2.8.2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm_cloud_sdk_core) (2.8.2)\n", + "Collecting PyJWT<3.0.0,>=2.8.0\n", + " Using cached PyJWT-2.8.0-py3-none-any.whl (22 kB)\n", + "Collecting urllib3<3.0.0,>=2.1.0\n", + " Using cached urllib3-2.2.1-py3-none-any.whl (121 kB)\n", + "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm_cloud_sdk_core) (2.31.0)\n", + "Requirement already satisfied: six>=1.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from python-dateutil<3.0.0,>=2.8.2->ibm_cloud_sdk_core) (1.16.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0.0,>=2.31.0->ibm_cloud_sdk_core) (2023.7.22)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0.0,>=2.31.0->ibm_cloud_sdk_core) (3.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0.0,>=2.31.0->ibm_cloud_sdk_core) (3.3.2)\n", + "Building wheels for collected packages: ibm_cloud_sdk_core\n", + " Building wheel for ibm_cloud_sdk_core (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for ibm_cloud_sdk_core: filename=ibm_cloud_sdk_core-3.19.2-py3-none-any.whl size=98827 sha256=4f53e2e5d1a6e4f289ca9fb6fc98242015af212d6f048039a6c12c3d3b873c9a\n", + " Stored in directory: /Users/gyliu/Library/Caches/pip/wheels/90/05/db/f544eb28b85089bf6cf29cb9e30f56b471bf59d56a46c92519\n", + "Successfully built ibm_cloud_sdk_core\n", + "Installing collected packages: urllib3, PyJWT, ibm_cloud_sdk_core\n", + " Attempting uninstall: urllib3\n", + " Found existing installation: urllib3 1.26.18\n", + " Uninstalling urllib3-1.26.18:\n", + " Successfully uninstalled urllib3-1.26.18\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "qdrant-client 1.6.4 requires urllib3<2.0.0,>=1.26.14, but you have urllib3 2.2.1 which is incompatible.\n", + "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n", + "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n", + "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n", + "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n", + "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n", + "langflow 0.6.0a0 requires pandas==2.0.3, but you have pandas 1.5.3 which is incompatible.\n", + "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\n", + "jina 3.15.2 requires urllib3<2.0.0, but you have urllib3 2.2.1 which is incompatible.\n", + "ibm-cos-sdk-core 2.13.4 requires urllib3<2.2,>=1.26.18, but you have urllib3 2.2.1 which is incompatible.\n", + "botocore 1.32.0 requires urllib3<2.1,>=1.25.4; python_version >= \"3.10\", but you have urllib3 2.2.1 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed PyJWT-2.8.0 ibm_cloud_sdk_core-3.19.2 urllib3-2.2.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install ibm_cloud_sdk_core" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "27cdc532", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting ibm_watson_openscale\n", + " Downloading ibm_watson_openscale-3.0.14-py3-none-any.whl (173 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m173.7/173.7 kB\u001b[0m \u001b[31m850.7 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hCollecting ibm-cloud-sdk-core==3.10.1\n", + " Downloading ibm-cloud-sdk-core-3.10.1.tar.gz (40 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.0/40.0 kB\u001b[0m \u001b[31m1.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: requests<3.0,>=2.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm_watson_openscale) (2.31.0)\n", + "Requirement already satisfied: python-dateutil>=2.5.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm_watson_openscale) (2.8.2)\n", + "Requirement already satisfied: pandas>=0.25.3 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm_watson_openscale) (1.5.3)\n", + "Requirement already satisfied: PyJWT<3.0.0,>=2.0.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from ibm-cloud-sdk-core==3.10.1->ibm_watson_openscale) (2.8.0)\n", + "Requirement already satisfied: numpy>=1.21.0 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pandas>=0.25.3->ibm_watson_openscale) (1.26.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from pandas>=0.25.3->ibm_watson_openscale) (2023.3.post1)\n", + "Requirement already satisfied: six>=1.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from python-dateutil>=2.5.3->ibm_watson_openscale) (1.16.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0,>=2.0->ibm_watson_openscale) (3.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0,>=2.0->ibm_watson_openscale) (2023.7.22)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0,>=2.0->ibm_watson_openscale) (2.2.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/gyliu/venvlf/lib/python3.10/site-packages (from requests<3.0,>=2.0->ibm_watson_openscale) (3.3.2)\n", + "Installing collected packages: ibm-cloud-sdk-core, ibm_watson_openscale\n", + " Attempting uninstall: ibm-cloud-sdk-core\n", + " Found existing installation: ibm-cloud-sdk-core 3.19.2\n", + " Uninstalling ibm-cloud-sdk-core-3.19.2:\n", + " Successfully uninstalled ibm-cloud-sdk-core-3.19.2\n", + "\u001b[33m DEPRECATION: ibm-cloud-sdk-core is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m Running setup.py install for ibm-cloud-sdk-core ... \u001b[?25ldone\n", + "\u001b[?25hSuccessfully installed ibm-cloud-sdk-core-3.10.1 ibm_watson_openscale-3.0.14\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install ibm_watson_openscale" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "7088b534", + "metadata": { + "id": "314cfad082b4423186ef0f3f8e834d4e" + }, + "outputs": [ + { + "ename": "AuthorizationError", + "evalue": "You are not authorized to access AI OpenScale instance None", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAuthorizationError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[28], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mibm_watson_openscale\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msupporting_classes\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;241m*\u001b[39m\n\u001b[1;32m 7\u001b[0m authenticator \u001b[38;5;241m=\u001b[39m IAMAuthenticator(apikey\u001b[38;5;241m=\u001b[39mos\u001b[38;5;241m.\u001b[39mgetenv(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCLOUD_API_KEY\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[0;32m----> 8\u001b[0m client \u001b[38;5;241m=\u001b[39m \u001b[43mAPIClient\u001b[49m\u001b[43m(\u001b[49m\u001b[43mauthenticator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mauthenticator\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m client\u001b[38;5;241m.\u001b[39mversion\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/ibm_watson_openscale/client.py:187\u001b[0m, in \u001b[0;36mWatsonOpenScaleV2Adapter.__init__\u001b[0;34m(self, authenticator, service_url, service_instance_id, bedrock_url)\u001b[0m\n\u001b[1;32m 185\u001b[0m token \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mauthenticator\u001b[38;5;241m.\u001b[39mbearer_token\n\u001b[1;32m 186\u001b[0m entitlements \u001b[38;5;241m=\u001b[39m EntitlementClient(service_url, token, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mservice_instance_id)\n\u001b[0;32m--> 187\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mplan_name \u001b[38;5;241m=\u001b[39m \u001b[43mentitlements\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_entitled\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmonitor_instances \u001b[38;5;241m=\u001b[39m MonitorInstances(\u001b[38;5;28mself\u001b[39m) \n\u001b[1;32m 189\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmonitor_definitions \u001b[38;5;241m=\u001b[39m MonitorDefinitions(\u001b[38;5;28mself\u001b[39m)\n", + "File \u001b[0;32m~/venvlf/lib/python3.10/site-packages/ibm_watson_openscale/utils/entitlement_client.py:50\u001b[0m, in \u001b[0;36mEntitlementClient.is_entitled\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m current_instance:\n\u001b[0;32m---> 50\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m AuthorizationError(msg)\n\u001b[1;32m 52\u001b[0m instance_id \u001b[38;5;241m=\u001b[39m current_instance[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mid\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 53\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mplan_name\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;129;01min\u001b[39;00m current_instance:\n", + "\u001b[0;31mAuthorizationError\u001b[0m: You are not authorized to access AI OpenScale instance None" + ] + } + ], + "source": [ + "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator,BearerTokenAuthenticator\n", + "\n", + "from ibm_watson_openscale import *\n", + "from ibm_watson_openscale.supporting_classes.enums import *\n", + "from ibm_watson_openscale.supporting_classes import *\n", + "\n", + "authenticator = IAMAuthenticator(apikey=os.getenv(\"CLOUD_API_KEY\"))\n", + "client = APIClient(authenticator=authenticator)\n", + "client.version" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "ded22e1d", + "metadata": { + "id": "54c6824867cf4cdf8b9b08853066574d" + }, + "outputs": [], + "source": [ + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup\n", + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMSummarizationMetrics\n", + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMGenerationMetrics\n", + "\n", + "summarization_metric_config = { \n", + " \"configuration\": {\n", + " LLMTextMetricGroup.SUMMARIZATION.value: {\n", + " LLMSummarizationMetrics.ROUGE_SCORE.value: {},\n", + " LLMSummarizationMetrics.SARI.value: {},\n", + " LLMSummarizationMetrics.METEOR.value: {},\n", + " LLMSummarizationMetrics.BLEU.value: {},\n", + " LLMSummarizationMetrics.FLESCH.value: {}\n", + " }\n", + " }\n", + "}\n", + "\n", + "generation_metric_config = { \n", + " \"configuration\": {\n", + " LLMTextMetricGroup.GENERATION.value: {\n", + " LLMGenerationMetrics.BLEU.value: {},\n", + " LLMGenerationMetrics.ROUGE_SCORE.value: {},\n", + " LLMGenerationMetrics.FLESCH.value: {},\n", + " LLMGenerationMetrics.METEOR.value: {}, \n", + " LLMGenerationMetrics.NORMALIZED_RECALL.value: {},\n", + " LLMGenerationMetrics.NORMALIZED_PRECISION.value: {},\n", + " LLMGenerationMetrics.NORMALIZED_F1_SCORE.value: {}\n", + " }\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "c9f91b94-2339-45fd-86c6-62801b77ec6e", + "metadata": { + "id": "015b0a57-0108-42ac-9628-6e06e8a69eb2" + }, + "source": [ + "# LLM Chain Callback Handler" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "ea02af3e-32fe-4fd2-b43e-c1975320c9a4", + "metadata": { + "id": "bad208fb-fa3d-4600-8ad7-c88472658883" + }, + "outputs": [], + "source": [ + "from langchain.callbacks.base import BaseCallbackHandler\n", + "from langchain.schema import LLMResult\n", + "from typing import Any, Dict, List, Optional, Union\n", + "import pandas as pd\n", + "import json\n", + "\n", + "class MyCustomHandler(BaseCallbackHandler):\n", + " prompts_text = None\n", + " summary_ground_truth = None\n", + " resolution_ground_truth = None\n", + "\n", + " def __init__(self, summary_ground_truth: str = \"\", resolution_ground_truth: str = \"\"):\n", + " self.summary_ground_truth = summary_ground_truth\n", + " self.resolution_ground_truth = resolution_ground_truth \n", + " print(self.summary_ground_truth)\n", + " print(self.resolution_ground_truth)\n", + "\n", + " def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> Any:\n", + " print('Inside on_llm_start')\n", + " \n", + " def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:\n", + " print('Inside on_llm_end')\n", + "\n", + " def on_chain_start(self, serialized: Dict[str, Any], prompts: Dict[str, Any], **kwargs: Any) -> Any:\n", + " print('Inside on_chain_start')\n", + " self.prompts_text = prompts\n", + " print(self.prompts_text)\n", + " \n", + " def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:\n", + " print('Inside on_chain_end')\n", + " \n", + " overall_context = self.prompts_text['content']\n", + " generated_summary = outputs['summary']\n", + " resolution = outputs['resolution'] \n", + "\n", + " print('Evaluating Summarization quality..')\n", + " df_input = pd.DataFrame({'input_text': [overall_context]})\n", + " df_reference = pd.DataFrame({'ground_truth': [self.summary_ground_truth]})\n", + " df_output = pd.DataFrame({'generated_summary': [generated_summary]})\n", + " evals = client.llm_metrics.compute_metrics(summarization_metric_config, \n", + " sources = df_input, \n", + " predictions = df_output, \n", + " references = df_reference)\n", + " print('Summarization evaluation results:')\n", + " print(json.dumps(evals, indent=2))\n", + "\n", + " print('\\n')\n", + " \n", + " print('Evaluating Content generation quality..')\n", + " df_reference = pd.DataFrame({'ground_truth': [self.resolution_ground_truth]})\n", + " df_output = pd.DataFrame({'resolution': [resolution]})\n", + " evals = client.llm_metrics.compute_metrics(generation_metric_config, \n", + " sources = df_input, \n", + " predictions = df_output, \n", + " references = df_reference)\n", + " print('Content generation evaluation results:')\n", + " print(json.dumps(evals, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "721bdb5a-b9c9-4964-879d-7772970de60d", + "metadata": { + "id": "bcf632ac-d2ee-417d-a8e0-9da42e3655f5" + }, + "source": [ + "# Chaining.." + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "18837b68-1971-4580-8ebd-df25967dedab", + "metadata": { + "id": "e85a0737-4118-423b-b93d-faeb8885b93c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.\n", + "Optimize background app usage to conserve battery.\n" + ] + } + ], + "source": [ + "chain = SequentialChain(chains=[summarization_prompt_azure_openai, summarization_prompt_flan_t5, issue_resolution_flan_t5], \n", + " input_variables=[\"content\"],\n", + " output_variables=[\"summary\", \"issue_type\", \"resolution\"],\n", + " callbacks=[MyCustomHandler(\n", + " summary_ground_truth = 'Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.',\n", + " resolution_ground_truth = 'Optimize background app usage to conserve battery.')],\n", + " verbose=True)" + ] + }, + { + "cell_type": "markdown", + "id": "6f8ebc11", + "metadata": { + "id": "5cbe4fcb5b2b418c9fa87505e6a7c11c" + }, + "source": [ + "## Invoke the LLM chain with callback handler" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "ca6c492a-01e4-4d62-8c97-7980f22c87d0", + "metadata": { + "id": "49393877-7c66-4302-9b38-5aee24a9e483" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inside on_chain_start\n", + "{'content': 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.'}\n", + "\n", + "\n", + "\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n", + "Inside on_chain_end\n", + "Evaluating Summarization quality..\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to /home/hadoop/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package punkt to /home/hadoop/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package omw-1.4 to /home/hadoop/nltk_data...\n", + "[nltk_data] Package omw-1.4 is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarization evaluation results:\n", + "{\n", + " \"flesch\": {\n", + " \"flesch_reading_ease\": {\n", + " \"metric_value\": 28.84,\n", + " \"mean\": 28.84,\n", + " \"min\": 28.84,\n", + " \"max\": 28.84,\n", + " \"std\": 0.0\n", + " },\n", + " \"flesch_kincaid_grade\": {\n", + " \"metric_value\": 13.5,\n", + " \"mean\": 13.5,\n", + " \"min\": 13.5,\n", + " \"max\": 13.5,\n", + " \"std\": 0.0\n", + " }\n", + " },\n", + " \"bleu\": {\n", + " \"precisions\": [\n", + " 1.0,\n", + " 1.0,\n", + " 1.0,\n", + " 1.0\n", + " ],\n", + " \"brevity_penalty\": 1.0,\n", + " \"length_ratio\": 1.0,\n", + " \"translation_length\": 19,\n", + " \"reference_length\": 19,\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"meteor\": {\n", + " \"metric_value\": 0.9999271030762502\n", + " },\n", + " \"rouge_score\": {\n", + " \"rouge1\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rouge2\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rougeL\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rougeLsum\": {\n", + " \"metric_value\": 1.0\n", + " }\n", + " },\n", + " \"sari\": {\n", + " \"metric_value\": 99.12280701754386\n", + " }\n", + "}\n", + "\n", + "\n", + "Evaluating Content generation quality..\n", + "Content generation evaluation results:\n", + "{\n", + " \"flesch\": {\n", + " \"flesch_reading_ease\": {\n", + " \"metric_value\": 64.37,\n", + " \"mean\": 64.37,\n", + " \"min\": 64.37,\n", + " \"max\": 64.37,\n", + " \"std\": 0.0\n", + " },\n", + " \"flesch_kincaid_grade\": {\n", + " \"metric_value\": 6.0,\n", + " \"mean\": 6.0,\n", + " \"min\": 6.0,\n", + " \"max\": 6.0,\n", + " \"std\": 0.0\n", + " }\n", + " },\n", + " \"bleu\": {\n", + " \"precisions\": [\n", + " 0.625,\n", + " 0.5714285714285714,\n", + " 0.5,\n", + " 0.4\n", + " ],\n", + " \"brevity_penalty\": 1.0,\n", + " \"length_ratio\": 1.0,\n", + " \"translation_length\": 8,\n", + " \"reference_length\": 8,\n", + " \"metric_value\": 0.5169731539571706\n", + " },\n", + " \"meteor\": {\n", + " \"metric_value\": 0.6225\n", + " },\n", + " \"normalized_f1\": {\n", + " \"metric_value\": 0.7692307692307692,\n", + " \"mean\": 0.7692307692307692,\n", + " \"min\": 0.7692307692307692,\n", + " \"max\": 0.7692307692307692,\n", + " \"std\": 0.0\n", + " },\n", + " \"normalized_precision\": {\n", + " \"metric_value\": 0.8333333333333334,\n", + " \"mean\": 0.8333333333333334,\n", + " \"min\": 0.8333333333333334,\n", + " \"max\": 0.8333333333333334,\n", + " \"std\": 0.0\n", + " },\n", + " \"normalized_recall\": {\n", + " \"metric_value\": 0.7142857142857143,\n", + " \"mean\": 0.7142857142857143,\n", + " \"min\": 0.7142857142857143,\n", + " \"max\": 0.7142857142857143,\n", + " \"std\": 0.0\n", + " },\n", + " \"rouge_score\": {\n", + " \"rouge1\": {\n", + " \"metric_value\": 0.7143\n", + " },\n", + " \"rouge2\": {\n", + " \"metric_value\": 0.6667\n", + " },\n", + " \"rougeL\": {\n", + " \"metric_value\": 0.7143\n", + " },\n", + " \"rougeLsum\": {\n", + " \"metric_value\": 0.7143\n", + " }\n", + " }\n", + "}\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to /home/hadoop/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package punkt to /home/hadoop/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package omw-1.4 to /home/hadoop/nltk_data...\n", + "[nltk_data] Package omw-1.4 is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "{'content': 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.',\n", + " 'summary': 'Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.',\n", + " 'issue_type': 'BatteryPerformance',\n", + " 'resolution': 'a) Manage background app usage to conserve'}" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue = 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. \\\n", + "Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.'\n", + "\n", + "chain.invoke({\"content\" :issue})" + ] + }, + { + "cell_type": "markdown", + "id": "96bbc341", + "metadata": { + "id": "34003e24-2223-4c60-8f15-eabc617956dc" + }, + "source": [ + "Author: ravi.chamarthy@in.ibm.com" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/eval/llm-powered-application-langchain-quality-evals.ipynb b/eval/llm-powered-application-langchain-quality-evals.ipynb new file mode 100644 index 0000000..70660f2 --- /dev/null +++ b/eval/llm-powered-application-langchain-quality-evals.ipynb @@ -0,0 +1,990 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1e26f660", + "metadata": { + "id": "701ceb425feb4278992e00794f1ac0dd" + }, + "source": [ + "# For a langchain powered multi-chain application, demonstrate how to evaluate the LLM Quality Metrics using IBM watsonx.governance - monitoring toolkit." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b89f7add-1c48-42f8-82c1-0347bedad786", + "metadata": { + "id": "db3d6273-8347-4c25-8a2a-2de834c8756e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[33m DEPRECATION: sqlalchemy is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m DEPRECATION: ibm-cos-sdk-core is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m DEPRECATION: ibm-cos-sdk-s3transfer is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[33m DEPRECATION: ibm-cos-sdk is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559\u001b[0m\u001b[33m\n", + "\u001b[0m\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "sqlmodel 0 requires pydantic[email]<=2.4,>=2.1.1, but you have pydantic 2.6.4 which is incompatible.\n", + "sqlmodel 0 requires SQLAlchemy<=2.0.11,>=2.0.0, but you have sqlalchemy 1.4.47 which is incompatible.\n", + "langflow 0.6.0a0 requires cohere<5.0.0,>=4.32.0, but you have cohere 5.1.2 which is incompatible.\n", + "langflow 0.6.0a0 requires langchain<0.1.0,>=0.0.327, but you have langchain 0.1.13 which is incompatible.\n", + "langflow 0.6.0a0 requires langfuse<2.0.0,>=1.1.11, but you have langfuse 2.21.1 which is incompatible.\n", + "langflow 0.6.0a0 requires openai<0.28.0,>=0.27.8, but you have openai 1.14.3 which is incompatible.\n", + "langflow 0.6.0a0 requires orjson==3.9.3, but you have orjson 3.9.15 which is incompatible.\n", + "langflow 0.6.0a0 requires pandas==2.0.3, but you have pandas 1.5.3 which is incompatible.\n", + "langflow 0.6.0a0 requires tiktoken<0.6.0,>=0.5.0, but you have tiktoken 0.6.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Successfully installed ibm-cos-sdk-2.13.4 ibm-cos-sdk-core-2.13.4 ibm-cos-sdk-s3transfer-2.13.4 ibm-watson-machine-learning-1.0.353 langchain-openai-0.1.1 lomond-0.3.3 pandas-1.5.3 sqlalchemy-1.4.47\n" + ] + } + ], + "source": [ + "!pip install ibm-watson-machine-learning langchain-openai \"pydantic>=1.10.0\" langchain \"sqlalchemy==1.4.47\" | tail -n 1" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "b7e7fd50", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from dotenv import load_dotenv\n", + "load_dotenv()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "79509547-4901-43e6-aa8c-d46aa53eb863", + "metadata": { + "id": "24d2d3d9-a4bd-41f6-990e-2f9b30b2c298" + }, + "outputs": [], + "source": [ + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "225f3735", + "metadata": { + "id": "597f49a6e85b40bd9191eac9239dfe64" + }, + "outputs": [], + "source": [ + "# langchain imports\n", + "from langchain.chains import LLMChain\n", + "from langchain import PromptTemplate\n", + "from langchain.chains import SequentialChain\n", + "\n", + "# watsonx.ai imports\n", + "from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes\n", + "from ibm_watson_machine_learning.foundation_models import Model\n", + "from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM\n", + "from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams\n", + "from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods" + ] + }, + { + "cell_type": "markdown", + "id": "c3a1d597-d71a-4c6c-8438-f533b3a269c5", + "metadata": {}, + "source": [ + "## Azure OpenAI Access Details" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "95adceb5-bbb7-4c83-a5b5-6502dabf25ff", + "metadata": {}, + "outputs": [], + "source": [ + "# PLEASE DO NOT USE THESE. Otherwise Ravi Chamarthy gets charged.\n", + "API_TYPE = \"azure\"\n", + "BASE_URL = \"https://azure-openai-instance-default.openai.azure.com/\"\n", + "API_VERSION = \"2023-09-15-preview\"\n", + "AZURE_OPENAI_API_KEY = 'xxxx'\n", + "DEPLOYMENT_NAME = 'azure-openai-deployment-001'" + ] + }, + { + "cell_type": "markdown", + "id": "2b151f07-6728-4c54-bcda-90f55f8ec0dd", + "metadata": {}, + "source": [ + "## IBM watsonx.ai Access Details" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "e4c85228-abe4-42b0-b7ba-f11335026b93", + "metadata": {}, + "outputs": [], + "source": [ + "# Using YS1Dev as doing multiple scoring, and the account is white listed\n", + "test_GEN_API_KEY = '_iXE92G6-xxxx-xxxxx-'\n", + "GEN_API_KEY = test_GEN_API_KEY\n", + "\n", + "test_api_endpoint = 'https://wml-xxxx.ml.xxxx.cloud.ibm.com/ml/v1-beta/generation/text?version=2023-05-28'\n", + "api_endpoint = test_api_endpoint\n", + "\n", + "test_project_id = 'xxxx-fcdd-4271-bbd7-xxxx'\n", + "project_id = test_project_id\n", + "\n", + "test_endpoint_url = \"https://wml-xxxx.ml.xxxx.cloud.ibm.com\"\n", + "endpoint_url = test_endpoint_url" + ] + }, + { + "cell_type": "markdown", + "id": "8b159260-ba3f-424b-9a53-a15934b70450", + "metadata": {}, + "source": [ + "## IBM watsonx.gov Access Details" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ccc0eefa-0a0b-4554-9a6e-436640943fd7", + "metadata": {}, + "outputs": [], + "source": [ + "CLOUD_API_KEY = \"K-xxxx\"\n", + "IAM_URL=\"https://iam.ng.bluemix.net/oidc/token\"" + ] + }, + { + "cell_type": "markdown", + "id": "caf9c9a3", + "metadata": { + "id": "cf8e0dc758bb404e9463790f97251b71" + }, + "source": [ + "# LLM 1 - using Azure OpenAI GPT Model" + ] + }, + { + "cell_type": "markdown", + "id": "12a0e49c", + "metadata": { + "id": "d76b6a6cd55d4099aab31d62bb6fb31c" + }, + "source": [ + "## Summarization prompt - summarize the mobile issue" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4c6c356d", + "metadata": { + "id": "0cf8a813dd2a4c7f8ec05bd5e569f22b" + }, + "outputs": [], + "source": [ + "summarization_prompt_template_text = \"\"\"\n", + "Please provide a summary of the following text with maximum of 20 words. Do not include the Issue in the output.\n", + "\n", + "Issue: Some apps continue to run processes in the background even when not actively in use, leading to increased battery drain. Users can identify and restrict background activity for specific apps in their phone settings.\n", + "Summary: Certain apps running in the background excessively consume battery power.\n", + "\n", + "Issue: Devices with insufficient RAM struggle to handle multiple apps simultaneously, leading to slow performance. Users can close background apps and consider upgrading to a device with more RAM if multitasking is essential.\n", + "Summary: Limited Random Access Memory (RAM) affecting multitasking capabilities.\n", + "\n", + "Issue: Users may encounter issues where the device fails to authenticate with a Wi-Fi network, often requiring re-entry of the correct password or troubleshooting router settings.\n", + "Summary: Inability to connect to a Wi-Fi network due to authentication problems.\n", + "\n", + "Issue: {content}\n", + "Summary: \n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3695aa51", + "metadata": { + "id": "40b5eba806d24fa08d1725af047f5e9f" + }, + "outputs": [], + "source": [ + "summarization_prompt = PromptTemplate(\n", + " input_variables=[\"content\"],\n", + " template=summarization_prompt_template_text\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "6d8f0120", + "metadata": { + "id": "5bc82fcfac684a5c8783f1fea0695fca" + }, + "outputs": [], + "source": [ + "from langchain_openai import AzureOpenAI\n", + "azure_openai_model = AzureOpenAI(\n", + " azure_endpoint=BASE_URL,\n", + " openai_api_version=API_VERSION,\n", + " deployment_name=DEPLOYMENT_NAME,\n", + " openai_api_key=AZURE_OPENAI_API_KEY,\n", + " openai_api_type = API_TYPE,\n", + " temperature=0.0,\n", + " max_tokens=20,\n", + " stop='\\n',\n", + " top_p=0.5,\n", + " frequency_penalty=0,\n", + " presence_penalty=0\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "83c2df30", + "metadata": { + "id": "414911323e2c4d0f837efde3b1af4ac3" + }, + "source": [ + "## First element of the LLM chain using the Azure OpenAI Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "8cba9083", + "metadata": { + "id": "24194490949541dcb3e0b52834204a99" + }, + "outputs": [], + "source": [ + "summarization_prompt_azure_openai = LLMChain(llm=azure_openai_model, \n", + " prompt=summarization_prompt, \n", + " output_key='summary')" + ] + }, + { + "cell_type": "markdown", + "id": "b0f42633", + "metadata": { + "id": "ede59bf08c534b1d85ad4325107f0b03" + }, + "source": [ + "# LLM 2 - using IBM watsonx.ai FLAN_T5_XXL Model" + ] + }, + { + "cell_type": "markdown", + "id": "91f72403", + "metadata": { + "id": "ee63717e13984ca88f2138fbff28c705" + }, + "source": [ + "## Issue classification prompt - summarize the mobile issue\n", + "\n", + "### BatteryPerformance or StorageDataManagement or ConnectivityAndNetwork" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a413ebf5", + "metadata": { + "id": "8ee8ccc463974802b615a5757f6b9d91" + }, + "outputs": [], + "source": [ + "classification_prompt_template_text = \"\"\"\n", + "Classify the following as BatteryPerformance or StorageDataManagement or ConnectivityAndNetwork.\n", + "\n", + "Issue: Certain apps running in the background excessively consume battery power.\n", + "Type: BatteryPerformance\n", + "\n", + "Issue: Limited Random Access Memory (RAM) affecting multitasking capabilities.\n", + "Type: StorageDataManagement\n", + "\n", + "Issue: Inability to connect to a Wi-Fi network due to authentication problems.\n", + "Type: ConnectivityAndNetwork\n", + "\n", + "Issue: {summary}\n", + "Type:\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "94b8f1e7", + "metadata": { + "id": "7dd8384a45a24af592516d977eb122b8" + }, + "outputs": [], + "source": [ + "classification_prompt = PromptTemplate(\n", + " input_variables=[\"summary\"],\n", + " template=classification_prompt_template_text\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "fd473b08-7ab9-4111-9b38-e0a6ef7ad78c", + "metadata": { + "id": "fea40e5f-502e-4e6a-a8b5-86abd9797c89" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['FLAN_T5_XXL', 'FLAN_UL2', 'MT0_XXL', 'GPT_NEOX', 'MPT_7B_INSTRUCT2', 'STARCODER', 'LLAMA_2_70B_CHAT', 'LLAMA_2_13B_CHAT', 'GRANITE_13B_INSTRUCT', 'GRANITE_13B_CHAT']\n" + ] + } + ], + "source": [ + "print([model.name for model in ModelTypes])" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5fa90049-32c3-464e-bbb7-75e463759a8b", + "metadata": { + "id": "7c6adfeb-d2f8-4c7b-b7b6-43118746fa11" + }, + "outputs": [], + "source": [ + "model_id_2 = ModelTypes.FLAN_T5_XXL" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "948b3a6a-ab40-48ca-86b7-08b3a85597e3", + "metadata": { + "id": "14c9c8f2-b981-433c-be1a-d4d416f1f982" + }, + "outputs": [], + "source": [ + "flan_t5_model = Model(\n", + " model_id=model_id_2,\n", + " params={\n", + " \"decoding_method\": \"sample\",\n", + " \"max_new_tokens\": 10,\n", + " \"min_new_tokens\": 0,\n", + " \"temperature\":0.0\n", + " },\n", + " credentials={\n", + " \"apikey\": GEN_API_KEY,\n", + " \"url\": endpoint_url\n", + " },\n", + " project_id=project_id)" + ] + }, + { + "cell_type": "markdown", + "id": "31535c3b", + "metadata": { + "id": "73bf9a85-8ad5-44ed-ace8-8240cc9b49f2" + }, + "source": [ + "## Second element of the LLM chain using the watsonx model" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "cc7a2456-6737-4799-b2fa-c5b2eb66317a", + "metadata": { + "id": "daa0f501-2d8a-489f-8244-c55eb881b96f" + }, + "outputs": [], + "source": [ + "summarization_prompt_flan_t5 = LLMChain(llm=flan_t5_model.to_langchain(), prompt=classification_prompt, output_key='issue_type')" + ] + }, + { + "cell_type": "markdown", + "id": "0ffd0aca", + "metadata": { + "id": "c62b2a601a7947b8a2d6e749e819c2b3" + }, + "source": [ + "# LLM 3 - using watsonx.ai FLAN_T5_XXL model" + ] + }, + { + "cell_type": "markdown", + "id": "a19da68f", + "metadata": { + "id": "1f7c58d6e3904ee1892fd331a17292b7" + }, + "source": [ + "## Generate issue resolution" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "84c780d4", + "metadata": { + "id": "f12df9ff94b1463381c26b846751f643" + }, + "outputs": [], + "source": [ + "resolution_template_text = \"\"\"\n", + "Provide a resolution for this mobile issue type in a maximum of 10 words.\n", + "\n", + "Issue Type: BatteryPerformance\n", + "Resolution: Optimize background app usage to conserve battery.\n", + "\n", + "Issue Type: StorageDataManagement\n", + "Resolution: Optimize apps, clear cache, upgrade RAM if possible.\n", + "\n", + "Issue Type: ConnectivityAndNetwork\n", + "Resolution: Check Wi-Fi password, reset router, restart device.\n", + "\n", + "Issue Type: {issue_type}\n", + "Resolution: \n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "01908b70", + "metadata": { + "id": "6372510d334e4e3584b130af6b4ceabe" + }, + "outputs": [], + "source": [ + "resolution_template = PromptTemplate(\n", + " input_variables=[\"issue_type\"],\n", + " template=resolution_template_text\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "8d6b5949-b740-49c2-bb85-ac8af5481aaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['FLAN_T5_XXL', 'FLAN_UL2', 'MT0_XXL', 'GPT_NEOX', 'MPT_7B_INSTRUCT2', 'STARCODER', 'LLAMA_2_70B_CHAT', 'LLAMA_2_13B_CHAT', 'GRANITE_13B_INSTRUCT', 'GRANITE_13B_CHAT']\n" + ] + } + ], + "source": [ + "print([model.name for model in ModelTypes])" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "0e02a7f0-6df2-4087-9a25-0fb9e9d18aec", + "metadata": {}, + "outputs": [], + "source": [ + "model_id_3 = ModelTypes.LLAMA_2_13B_CHAT" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "5ed7087e-6e24-47fe-a287-1d43df3c53c6", + "metadata": {}, + "outputs": [], + "source": [ + "resolution_model = Model(\n", + " model_id=model_id_3,\n", + " params={\n", + " \"decoding_method\": \"sample\",\n", + " \"max_new_tokens\": 10,\n", + " \"min_new_tokens\": 0,\n", + " \"temperature\":0.0\n", + " },\n", + " credentials={\n", + " \"apikey\": GEN_API_KEY,\n", + " \"url\": endpoint_url\n", + " },\n", + " project_id=project_id)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "1dca0465", + "metadata": { + "id": "7b57a396f60a4fe7bdab5063aab25ef8" + }, + "outputs": [], + "source": [ + "issue_resolution_flan_t5 = LLMChain(llm=resolution_model.to_langchain(), prompt=resolution_template, output_key='resolution')" + ] + }, + { + "cell_type": "markdown", + "id": "a38a5cf3", + "metadata": { + "id": "ef42dff279704fef905bac600fe61196" + }, + "source": [ + "# IBM watsonx.governance-monitoring toolkit to evaluate the output of summarization element of the LLM chain" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "7088b534", + "metadata": { + "id": "314cfad082b4423186ef0f3f8e834d4e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'3.0.35'" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ibm_cloud_sdk_core.authenticators import IAMAuthenticator,BearerTokenAuthenticator\n", + "\n", + "from ibm_watson_openscale import *\n", + "from ibm_watson_openscale.supporting_classes.enums import *\n", + "from ibm_watson_openscale.supporting_classes import *\n", + "\n", + "authenticator = IAMAuthenticator(apikey=CLOUD_API_KEY)\n", + "client = APIClient(authenticator=authenticator)\n", + "client.version" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "ded22e1d", + "metadata": { + "id": "54c6824867cf4cdf8b9b08853066574d" + }, + "outputs": [], + "source": [ + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMTextMetricGroup\n", + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMSummarizationMetrics\n", + "from ibm_metrics_plugin.metrics.llm.utils.constants import LLMGenerationMetrics\n", + "\n", + "summarization_metric_config = { \n", + " \"configuration\": {\n", + " LLMTextMetricGroup.SUMMARIZATION.value: {\n", + " LLMSummarizationMetrics.ROUGE_SCORE.value: {},\n", + " LLMSummarizationMetrics.SARI.value: {},\n", + " LLMSummarizationMetrics.METEOR.value: {},\n", + " LLMSummarizationMetrics.BLEU.value: {},\n", + " LLMSummarizationMetrics.FLESCH.value: {}\n", + " }\n", + " }\n", + "}\n", + "\n", + "generation_metric_config = { \n", + " \"configuration\": {\n", + " LLMTextMetricGroup.GENERATION.value: {\n", + " LLMGenerationMetrics.BLEU.value: {},\n", + " LLMGenerationMetrics.ROUGE_SCORE.value: {},\n", + " LLMGenerationMetrics.FLESCH.value: {},\n", + " LLMGenerationMetrics.METEOR.value: {}, \n", + " LLMGenerationMetrics.NORMALIZED_RECALL.value: {},\n", + " LLMGenerationMetrics.NORMALIZED_PRECISION.value: {},\n", + " LLMGenerationMetrics.NORMALIZED_F1_SCORE.value: {}\n", + " }\n", + " }\n", + "}" + ] + }, + { + "cell_type": "markdown", + "id": "c9f91b94-2339-45fd-86c6-62801b77ec6e", + "metadata": { + "id": "015b0a57-0108-42ac-9628-6e06e8a69eb2" + }, + "source": [ + "# LLM Chain Callback Handler" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "ea02af3e-32fe-4fd2-b43e-c1975320c9a4", + "metadata": { + "id": "bad208fb-fa3d-4600-8ad7-c88472658883" + }, + "outputs": [], + "source": [ + "from langchain.callbacks.base import BaseCallbackHandler\n", + "from langchain.schema import LLMResult\n", + "from typing import Any, Dict, List, Optional, Union\n", + "import pandas as pd\n", + "import json\n", + "\n", + "class MyCustomHandler(BaseCallbackHandler):\n", + " prompts_text = None\n", + " summary_ground_truth = None\n", + " resolution_ground_truth = None\n", + "\n", + " def __init__(self, summary_ground_truth: str = \"\", resolution_ground_truth: str = \"\"):\n", + " self.summary_ground_truth = summary_ground_truth\n", + " self.resolution_ground_truth = resolution_ground_truth \n", + " print(self.summary_ground_truth)\n", + " print(self.resolution_ground_truth)\n", + "\n", + " def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> Any:\n", + " print('Inside on_llm_start')\n", + " \n", + " def on_llm_end(self, response: LLMResult, **kwargs: Any) -> Any:\n", + " print('Inside on_llm_end')\n", + "\n", + " def on_chain_start(self, serialized: Dict[str, Any], prompts: Dict[str, Any], **kwargs: Any) -> Any:\n", + " print('Inside on_chain_start')\n", + " self.prompts_text = prompts\n", + " print(self.prompts_text)\n", + " \n", + " def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> Any:\n", + " print('Inside on_chain_end')\n", + " \n", + " overall_context = self.prompts_text['content']\n", + " generated_summary = outputs['summary']\n", + " resolution = outputs['resolution'] \n", + "\n", + " print('Evaluating Summarization quality..')\n", + " df_input = pd.DataFrame({'input_text': [overall_context]})\n", + " df_reference = pd.DataFrame({'ground_truth': [self.summary_ground_truth]})\n", + " df_output = pd.DataFrame({'generated_summary': [generated_summary]})\n", + " evals = client.llm_metrics.compute_metrics(summarization_metric_config, \n", + " sources = df_input, \n", + " predictions = df_output, \n", + " references = df_reference)\n", + " print('Summarization evaluation results:')\n", + " print(json.dumps(evals, indent=2))\n", + "\n", + " print('\\n')\n", + " \n", + " print('Evaluating Content generation quality..')\n", + " df_reference = pd.DataFrame({'ground_truth': [self.resolution_ground_truth]})\n", + " df_output = pd.DataFrame({'resolution': [resolution]})\n", + " evals = client.llm_metrics.compute_metrics(generation_metric_config, \n", + " sources = df_input, \n", + " predictions = df_output, \n", + " references = df_reference)\n", + " print('Content generation evaluation results:')\n", + " print(json.dumps(evals, indent=2))" + ] + }, + { + "cell_type": "markdown", + "id": "721bdb5a-b9c9-4964-879d-7772970de60d", + "metadata": { + "id": "bcf632ac-d2ee-417d-a8e0-9da42e3655f5" + }, + "source": [ + "# Chaining.." + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "18837b68-1971-4580-8ebd-df25967dedab", + "metadata": { + "id": "e85a0737-4118-423b-b93d-faeb8885b93c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.\n", + "Optimize background app usage to conserve battery.\n" + ] + } + ], + "source": [ + "chain = SequentialChain(chains=[summarization_prompt_azure_openai, summarization_prompt_flan_t5, issue_resolution_flan_t5], \n", + " input_variables=[\"content\"],\n", + " output_variables=[\"summary\", \"issue_type\", \"resolution\"],\n", + " callbacks=[MyCustomHandler(\n", + " summary_ground_truth = 'Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.',\n", + " resolution_ground_truth = 'Optimize background app usage to conserve battery.')],\n", + " verbose=True)" + ] + }, + { + "cell_type": "markdown", + "id": "6f8ebc11", + "metadata": { + "id": "5cbe4fcb5b2b418c9fa87505e6a7c11c" + }, + "source": [ + "## Invoke the LLM chain with callback handler" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "ca6c492a-01e4-4d62-8c97-7980f22c87d0", + "metadata": { + "id": "49393877-7c66-4302-9b38-5aee24a9e483" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Inside on_chain_start\n", + "{'content': 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.'}\n", + "\n", + "\n", + "\u001b[1m> Entering new SequentialChain chain...\u001b[0m\n", + "Inside on_chain_end\n", + "Evaluating Summarization quality..\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to /home/hadoop/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package punkt to /home/hadoop/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package omw-1.4 to /home/hadoop/nltk_data...\n", + "[nltk_data] Package omw-1.4 is already up-to-date!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Summarization evaluation results:\n", + "{\n", + " \"flesch\": {\n", + " \"flesch_reading_ease\": {\n", + " \"metric_value\": 28.84,\n", + " \"mean\": 28.84,\n", + " \"min\": 28.84,\n", + " \"max\": 28.84,\n", + " \"std\": 0.0\n", + " },\n", + " \"flesch_kincaid_grade\": {\n", + " \"metric_value\": 13.5,\n", + " \"mean\": 13.5,\n", + " \"min\": 13.5,\n", + " \"max\": 13.5,\n", + " \"std\": 0.0\n", + " }\n", + " },\n", + " \"bleu\": {\n", + " \"precisions\": [\n", + " 1.0,\n", + " 1.0,\n", + " 1.0,\n", + " 1.0\n", + " ],\n", + " \"brevity_penalty\": 1.0,\n", + " \"length_ratio\": 1.0,\n", + " \"translation_length\": 19,\n", + " \"reference_length\": 19,\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"meteor\": {\n", + " \"metric_value\": 0.9999271030762502\n", + " },\n", + " \"rouge_score\": {\n", + " \"rouge1\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rouge2\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rougeL\": {\n", + " \"metric_value\": 1.0\n", + " },\n", + " \"rougeLsum\": {\n", + " \"metric_value\": 1.0\n", + " }\n", + " },\n", + " \"sari\": {\n", + " \"metric_value\": 99.12280701754386\n", + " }\n", + "}\n", + "\n", + "\n", + "Evaluating Content generation quality..\n", + "Content generation evaluation results:\n", + "{\n", + " \"flesch\": {\n", + " \"flesch_reading_ease\": {\n", + " \"metric_value\": 64.37,\n", + " \"mean\": 64.37,\n", + " \"min\": 64.37,\n", + " \"max\": 64.37,\n", + " \"std\": 0.0\n", + " },\n", + " \"flesch_kincaid_grade\": {\n", + " \"metric_value\": 6.0,\n", + " \"mean\": 6.0,\n", + " \"min\": 6.0,\n", + " \"max\": 6.0,\n", + " \"std\": 0.0\n", + " }\n", + " },\n", + " \"bleu\": {\n", + " \"precisions\": [\n", + " 0.625,\n", + " 0.5714285714285714,\n", + " 0.5,\n", + " 0.4\n", + " ],\n", + " \"brevity_penalty\": 1.0,\n", + " \"length_ratio\": 1.0,\n", + " \"translation_length\": 8,\n", + " \"reference_length\": 8,\n", + " \"metric_value\": 0.5169731539571706\n", + " },\n", + " \"meteor\": {\n", + " \"metric_value\": 0.6225\n", + " },\n", + " \"normalized_f1\": {\n", + " \"metric_value\": 0.7692307692307692,\n", + " \"mean\": 0.7692307692307692,\n", + " \"min\": 0.7692307692307692,\n", + " \"max\": 0.7692307692307692,\n", + " \"std\": 0.0\n", + " },\n", + " \"normalized_precision\": {\n", + " \"metric_value\": 0.8333333333333334,\n", + " \"mean\": 0.8333333333333334,\n", + " \"min\": 0.8333333333333334,\n", + " \"max\": 0.8333333333333334,\n", + " \"std\": 0.0\n", + " },\n", + " \"normalized_recall\": {\n", + " \"metric_value\": 0.7142857142857143,\n", + " \"mean\": 0.7142857142857143,\n", + " \"min\": 0.7142857142857143,\n", + " \"max\": 0.7142857142857143,\n", + " \"std\": 0.0\n", + " },\n", + " \"rouge_score\": {\n", + " \"rouge1\": {\n", + " \"metric_value\": 0.7143\n", + " },\n", + " \"rouge2\": {\n", + " \"metric_value\": 0.6667\n", + " },\n", + " \"rougeL\": {\n", + " \"metric_value\": 0.7143\n", + " },\n", + " \"rougeLsum\": {\n", + " \"metric_value\": 0.7143\n", + " }\n", + " }\n", + "}\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package wordnet to /home/hadoop/nltk_data...\n", + "[nltk_data] Package wordnet is already up-to-date!\n", + "[nltk_data] Downloading package punkt to /home/hadoop/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package omw-1.4 to /home/hadoop/nltk_data...\n", + "[nltk_data] Package omw-1.4 is already up-to-date!\n" + ] + }, + { + "data": { + "text/plain": [ + "{'content': 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.',\n", + " 'summary': 'Push notifications from apps can drain battery life, users can manage notification settings to reduce battery consumption.',\n", + " 'issue_type': 'BatteryPerformance',\n", + " 'resolution': 'a) Manage background app usage to conserve'}" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "issue = 'Apps that send push notifications at high frequencies can contribute to increased battery consumption. \\\n", + "Users can manage notification settings, disable unnecessary alerts, and set longer intervals for non-essential updates.'\n", + "\n", + "chain.invoke({\"content\" :issue})" + ] + }, + { + "cell_type": "markdown", + "id": "96bbc341", + "metadata": { + "id": "34003e24-2223-4c60-8f15-eabc617956dc" + }, + "source": [ + "Author: ravi.chamarthy@in.ibm.com" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}