diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 0000000000000..746565c4837ff --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,15 @@ +blank_issues_enabled: false +version: 2.1 +contact_links: + - name: 🤔 Question or Problem + about: Ask a question or ask about a problem in GitHub Discussions. + url: https://www.github.com/langchain-ai/langchain/discussions/categories/q-a + - name: Discord + url: https://discord.gg/6adMQxSpJS + about: General community discussions + - name: Feature Request + url: https://www.github.com/langchain-ai/langchain/discussions/categories/ideas + about: Suggest a feature or an idea + - name: Show and tell + about: Show what you built with LangChain + url: https://www.github.com/langchain-ai/langchain/discussions/categories/show-and-tell diff --git a/.github/ISSUE_TEMPLATE/privileged.yml b/.github/ISSUE_TEMPLATE/privileged.yml new file mode 100644 index 0000000000000..787b8bdbab9b4 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/privileged.yml @@ -0,0 +1,25 @@ +name: 🔒 Privileged +description: You are a LangChain maintainer, or was asked directly by a maintainer to create an issue here. If not, check the other options. +body: + - type: markdown + attributes: + value: | + Thanks for your interest in LangChain! 🚀 + + If you are not a LangChain maintainer or were not asked directly by a maintainer to create an issue, then please start the conversation in a [Question in GitHub Discussions](https://github.com/langchain-ai/langchain/discussions/categories/q-a) instead. + + You are a LangChain maintainer if you maintain any of the packages inside of the LangChain repository + or are a regular contributor to LangChain with previous merged pull requests. + - type: checkboxes + id: privileged + attributes: + label: Privileged issue + description: Confirm that you are allowed to create an issue here. + options: + - label: I am a LangChain maintainer, or was asked directly by a LangChain maintainer to create an issue here. + required: true + - type: textarea + id: content + attributes: + label: Issue Content + description: Add the content of the issue here. diff --git a/.github/workflows/.codespell-exclude b/.github/workflows/.codespell-exclude new file mode 100644 index 0000000000000..d74ecbfb99db8 --- /dev/null +++ b/.github/workflows/.codespell-exclude @@ -0,0 +1,7 @@ +libs/community/langchain_community/llms/yuan2.py +"NotIn": "not in", +- `/checkin`: Check-in +docs/docs/integrations/providers/trulens.mdx +self.assertIn( +from trulens_eval import Tru +tru = Tru() diff --git a/.github/workflows/check_new_docs.yml b/.github/workflows/check_new_docs.yml new file mode 100644 index 0000000000000..09fdd4d18e24d --- /dev/null +++ b/.github/workflows/check_new_docs.yml @@ -0,0 +1,31 @@ +--- +name: Integration docs lint + +on: + push: + branches: [master] + pull_request: + +# If another push to the same PR or branch happens while this workflow is still running, +# cancel the earlier run in favor of the next run. +# +# There's no point in testing an outdated version of the code. GitHub only allows +# a limited number of job runners to be active at the same time, so it's better to cancel +# pointless jobs early so that more useful jobs can run sooner. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.10' + - id: files + uses: Ana06/get-changed-files@v2.2.0 + - name: Check new docs + run: | + python docs/scripts/check_templates.py ${{ steps.files.outputs.added }} diff --git a/cookbook/nomic_multimodal_rag.ipynb b/cookbook/nomic_multimodal_rag.ipynb new file mode 100644 index 0000000000000..ba8a77ace2903 --- /dev/null +++ b/cookbook/nomic_multimodal_rag.ipynb @@ -0,0 +1,497 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "id": "9fc3897d-176f-4729-8fd1-cfb4add53abd", + "metadata": {}, + "source": [ + "## Nomic multi-modal RAG\n", + "\n", + "Many documents contain a mixture of content types, including text and images. \n", + "\n", + "Yet, information captured in images is lost in most RAG applications.\n", + "\n", + "With the emergence of multimodal LLMs, like [GPT-4V](https://openai.com/research/gpt-4v-system-card), it is worth considering how to utilize images in RAG:\n", + "\n", + "In this demo we\n", + "\n", + "* Use multimodal embeddings from Nomic Embed [Vision](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) and [Text](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) to embed images and text\n", + "* Retrieve both using similarity search\n", + "* Pass raw images and text chunks to a multimodal LLM for answer synthesis \n", + "\n", + "## Signup\n", + "\n", + "Get your API token, then run:\n", + "```\n", + "! nomic login\n", + "```\n", + "\n", + "Then run with your generated API token \n", + "```\n", + "! nomic login < token > \n", + "```\n", + "\n", + "## Packages\n", + "\n", + "For `unstructured`, you will also need `poppler` ([installation instructions](https://pdf2image.readthedocs.io/en/latest/installation.html)) and `tesseract` ([installation instructions](https://tesseract-ocr.github.io/tessdoc/Installation.html)) in your system." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54926b9b-75c2-4cd4-8f14-b3882a0d370b", + "metadata": {}, + "outputs": [], + "source": [ + "! nomic login token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "febbc459-ebba-4c1a-a52b-fed7731593f8", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "! pip install -U langchain-nomic langchain_community tiktoken langchain-openai chromadb langchain # (newest versions required for multi-modal)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acbdc603-39e2-4a5f-836c-2bbaecd46b0b", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "# lock to 0.10.19 due to a persistent bug in more recent versions\n", + "! pip install \"unstructured[all-docs]==0.10.19\" pillow pydantic lxml pillow matplotlib tiktoken" + ] + }, + { + "cell_type": "markdown", + "id": "1e94b3fb-8e3e-4736-be0a-ad881626c7bd", + "metadata": {}, + "source": [ + "## Data Loading\n", + "\n", + "### Partition PDF text and images\n", + " \n", + "Let's look at an example pdfs containing interesting images.\n", + "\n", + "1/ Art from the J Paul Getty museum:\n", + "\n", + " * Here is a [zip file](https://drive.google.com/file/d/18kRKbq2dqAhhJ3DfZRnYcTBEUfYxe1YR/view?usp=sharing) with the PDF and the already extracted images. \n", + "* https://www.getty.edu/publications/resources/virtuallibrary/0892360224.pdf\n", + "\n", + "2/ Famous photographs from library of congress:\n", + "\n", + "* https://www.loc.gov/lcm/pdf/LCM_2020_1112.pdf\n", + "* We'll use this as an example below\n", + "\n", + "We can use `partition_pdf` below from [Unstructured](https://unstructured-io.github.io/unstructured/introduction.html#key-concepts) to extract text and images.\n", + "\n", + "To supply this to extract the images:\n", + "```\n", + "extract_images_in_pdf=True\n", + "```\n", + "\n", + "\n", + "\n", + "If using this zip file, then you can simply process the text only with:\n", + "```\n", + "extract_images_in_pdf=False\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9646b524-71a7-4b2a-bdc8-0b81f77e968f", + "metadata": {}, + "outputs": [], + "source": [ + "# Folder with pdf and extracted images\n", + "from pathlib import Path\n", + "\n", + "# replace with actual path to images\n", + "path = Path(\"../art\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "77f096ab-a933-41d0-8f4e-1efc83998fc3", + "metadata": {}, + "outputs": [], + "source": [ + "path.resolve()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bc4839c0-8773-4a07-ba59-5364501269b2", + "metadata": {}, + "outputs": [], + "source": [ + "# Extract images, tables, and chunk text\n", + "from unstructured.partition.pdf import partition_pdf\n", + "\n", + "raw_pdf_elements = partition_pdf(\n", + " filename=str(path.resolve()) + \"/getty.pdf\",\n", + " extract_images_in_pdf=False,\n", + " infer_table_structure=True,\n", + " chunking_strategy=\"by_title\",\n", + " max_characters=4000,\n", + " new_after_n_chars=3800,\n", + " combine_text_under_n_chars=2000,\n", + " image_output_dir_path=path,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "969545ad", + "metadata": {}, + "outputs": [], + "source": [ + "# Categorize text elements by type\n", + "tables = []\n", + "texts = []\n", + "for element in raw_pdf_elements:\n", + " if \"unstructured.documents.elements.Table\" in str(type(element)):\n", + " tables.append(str(element))\n", + " elif \"unstructured.documents.elements.CompositeElement\" in str(type(element)):\n", + " texts.append(str(element))" + ] + }, + { + "cell_type": "markdown", + "id": "5d8e6349-1547-4cbf-9c6f-491d8610ec10", + "metadata": {}, + "source": [ + "## Multi-modal embeddings with our document\n", + "\n", + "We will use [nomic-embed-vision-v1.5](https://huggingface.co/nomic-ai/nomic-embed-vision-v1.5) embeddings. This model is aligned \n", + "to [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) allowing for multimodal semantic search and Multimodal RAG!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bc15842-cb95-4f84-9eb5-656b0282a800", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import uuid\n", + "\n", + "import chromadb\n", + "import numpy as np\n", + "from langchain_community.vectorstores import Chroma\n", + "from langchain_nomic import NomicEmbeddings\n", + "from PIL import Image as _PILImage\n", + "\n", + "# Create chroma\n", + "text_vectorstore = Chroma(\n", + " collection_name=\"mm_rag_clip_photos_text\",\n", + " embedding_function=NomicEmbeddings(\n", + " vision_model=\"nomic-embed-vision-v1.5\", model=\"nomic-embed-text-v1.5\"\n", + " ),\n", + ")\n", + "image_vectorstore = Chroma(\n", + " collection_name=\"mm_rag_clip_photos_image\",\n", + " embedding_function=NomicEmbeddings(\n", + " vision_model=\"nomic-embed-vision-v1.5\", model=\"nomic-embed-text-v1.5\"\n", + " ),\n", + ")\n", + "\n", + "# Get image URIs with .jpg extension only\n", + "image_uris = sorted(\n", + " [\n", + " os.path.join(path, image_name)\n", + " for image_name in os.listdir(path)\n", + " if image_name.endswith(\".jpg\")\n", + " ]\n", + ")\n", + "\n", + "# Add images\n", + "image_vectorstore.add_images(uris=image_uris)\n", + "\n", + "# Add documents\n", + "text_vectorstore.add_texts(texts=texts)\n", + "\n", + "# Make retriever\n", + "image_retriever = image_vectorstore.as_retriever()\n", + "text_retriever = text_vectorstore.as_retriever()" + ] + }, + { + "cell_type": "markdown", + "id": "02a186d0-27e0-4820-8092-63b5349dd25d", + "metadata": {}, + "source": [ + "## RAG\n", + "\n", + "`vectorstore.add_images` will store / retrieve images as base64 encoded strings.\n", + "\n", + "These can be passed to [GPT-4V](https://platform.openai.com/docs/guides/vision)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "344f56a8-0dc3-433e-851c-3f7600c7a72b", + "metadata": {}, + "outputs": [], + "source": [ + "import base64\n", + "import io\n", + "from io import BytesIO\n", + "\n", + "import numpy as np\n", + "from PIL import Image\n", + "\n", + "\n", + "def resize_base64_image(base64_string, size=(128, 128)):\n", + " \"\"\"\n", + " Resize an image encoded as a Base64 string.\n", + "\n", + " Args:\n", + " base64_string (str): Base64 string of the original image.\n", + " size (tuple): Desired size of the image as (width, height).\n", + "\n", + " Returns:\n", + " str: Base64 string of the resized image.\n", + " \"\"\"\n", + " # Decode the Base64 string\n", + " img_data = base64.b64decode(base64_string)\n", + " img = Image.open(io.BytesIO(img_data))\n", + "\n", + " # Resize the image\n", + " resized_img = img.resize(size, Image.LANCZOS)\n", + "\n", + " # Save the resized image to a bytes buffer\n", + " buffered = io.BytesIO()\n", + " resized_img.save(buffered, format=img.format)\n", + "\n", + " # Encode the resized image to Base64\n", + " return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n", + "\n", + "\n", + "def is_base64(s):\n", + " \"\"\"Check if a string is Base64 encoded\"\"\"\n", + " try:\n", + " return base64.b64encode(base64.b64decode(s)) == s.encode()\n", + " except Exception:\n", + " return False\n", + "\n", + "\n", + "def split_image_text_types(docs):\n", + " \"\"\"Split numpy array images and texts\"\"\"\n", + " images = []\n", + " text = []\n", + " for doc in docs:\n", + " doc = doc.page_content # Extract Document contents\n", + " if is_base64(doc):\n", + " # Resize image to avoid OAI server error\n", + " images.append(\n", + " resize_base64_image(doc, size=(250, 250))\n", + " ) # base64 encoded str\n", + " else:\n", + " text.append(doc)\n", + " return {\"images\": images, \"texts\": text}" + ] + }, + { + "cell_type": "markdown", + "id": "23a2c1d8-fea6-4152-b184-3172dd46c735", + "metadata": {}, + "source": [ + "Currently, we format the inputs using a `RunnableLambda` while we add image support to `ChatPromptTemplates`.\n", + "\n", + "Our runnable follows the classic RAG flow - \n", + "\n", + "* We first compute the context (both \"texts\" and \"images\" in this case) and the question (just a RunnablePassthrough here) \n", + "* Then we pass this into our prompt template, which is a custom function that formats the message for the gpt-4-vision-preview model. \n", + "* And finally we parse the output as a string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d8919dc-c238-4746-86ba-45d940a7d260", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c93fab3-74c4-4f1d-958a-0bc4cdd0797e", + "metadata": {}, + "outputs": [], + "source": [ + "from operator import itemgetter\n", + "\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "\n", + "def prompt_func(data_dict):\n", + " # Joining the context texts into a single string\n", + " formatted_texts = \"\\n\".join(data_dict[\"text_context\"][\"texts\"])\n", + " messages = []\n", + "\n", + " # Adding image(s) to the messages if present\n", + " if data_dict[\"image_context\"][\"images\"]:\n", + " image_message = {\n", + " \"type\": \"image_url\",\n", + " \"image_url\": {\n", + " \"url\": f\"data:image/jpeg;base64,{data_dict['image_context']['images'][0]}\"\n", + " },\n", + " }\n", + " messages.append(image_message)\n", + "\n", + " # Adding the text message for analysis\n", + " text_message = {\n", + " \"type\": \"text\",\n", + " \"text\": (\n", + " \"As an expert art critic and historian, your task is to analyze and interpret images, \"\n", + " \"considering their historical and cultural significance. Alongside the images, you will be \"\n", + " \"provided with related text to offer context. Both will be retrieved from a vectorstore based \"\n", + " \"on user-input keywords. Please use your extensive knowledge and analytical skills to provide a \"\n", + " \"comprehensive summary that includes:\\n\"\n", + " \"- A detailed description of the visual elements in the image.\\n\"\n", + " \"- The historical and cultural context of the image.\\n\"\n", + " \"- An interpretation of the image's symbolism and meaning.\\n\"\n", + " \"- Connections between the image and the related text.\\n\\n\"\n", + " f\"User-provided keywords: {data_dict['question']}\\n\\n\"\n", + " \"Text and / or tables:\\n\"\n", + " f\"{formatted_texts}\"\n", + " ),\n", + " }\n", + " messages.append(text_message)\n", + "\n", + " return [HumanMessage(content=messages)]\n", + "\n", + "\n", + "model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n", + "\n", + "# RAG pipeline\n", + "chain = (\n", + " {\n", + " \"text_context\": text_retriever | RunnableLambda(split_image_text_types),\n", + " \"image_context\": image_retriever | RunnableLambda(split_image_text_types),\n", + " \"question\": RunnablePassthrough(),\n", + " }\n", + " | RunnableLambda(prompt_func)\n", + " | model\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "1566096d-97c2-4ddc-ba4a-6ef88c525e4e", + "metadata": {}, + "source": [ + "## Test retrieval and run RAG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "90121e56-674b-473b-871d-6e4753fd0c45", + "metadata": {}, + "outputs": [], + "source": [ + "from IPython.display import HTML, display\n", + "\n", + "\n", + "def plt_img_base64(img_base64):\n", + " # Create an HTML img tag with the base64 string as the source\n", + " image_html = f''\n", + "\n", + " # Display the image by rendering the HTML\n", + " display(HTML(image_html))\n", + "\n", + "\n", + "docs = text_retriever.invoke(\"Women with children\", k=5)\n", + "for doc in docs:\n", + " if is_base64(doc.page_content):\n", + " plt_img_base64(doc.page_content)\n", + " else:\n", + " print(doc.page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44eaa532-f035-4c04-b578-02339d42554c", + "metadata": {}, + "outputs": [], + "source": [ + "docs = image_retriever.invoke(\"Women with children\", k=5)\n", + "for doc in docs:\n", + " if is_base64(doc.page_content):\n", + " plt_img_base64(doc.page_content)\n", + " else:\n", + " print(doc.page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69fb15fd-76fc-49b4-806d-c4db2990027d", + "metadata": {}, + "outputs": [], + "source": [ + "chain.invoke(\"Women with children\")" + ] + }, + { + "cell_type": "markdown", + "id": "227f08b8-e732-4089-b65c-6eb6f9e48f15", + "metadata": {}, + "source": [ + "We can see the images retrieved in the LangSmith trace:\n", + "\n", + "LangSmith [trace](https://smith.langchain.com/public/69c558a5-49dc-4c60-a49b-3adbb70f74c5/r/e872c2c8-528c-468f-aefd-8b5cd730a673)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/api_reference/templates/runnable_non_pydantic.rst b/docs/api_reference/templates/runnable_non_pydantic.rst new file mode 100644 index 0000000000000..7b9f8681e61d3 --- /dev/null +++ b/docs/api_reference/templates/runnable_non_pydantic.rst @@ -0,0 +1,39 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface `. 🏃 + + +.. currentmodule:: {{ module }} + +.. autoclass:: {{ objname }} + + {% block attributes %} + {% if attributes %} + .. rubric:: {{ _('Attributes') }} + + .. autosummary:: + {% for item in attributes %} + ~{{ name }}.{{ item }} + {%- endfor %} + {% endif %} + {% endblock %} + + {% block methods %} + {% if methods %} + .. rubric:: {{ _('Methods') }} + + .. autosummary:: + {% for item in methods %} + ~{{ name }}.{{ item }} + {%- endfor %} + + {% for item in methods %} + .. automethod:: {{ name }}.{{ item }} + {%- endfor %} + + {% endif %} + {% endblock %} + + +.. example_links:: {{ objname }} diff --git a/docs/api_reference/templates/runnable_pydantic.rst b/docs/api_reference/templates/runnable_pydantic.rst new file mode 100644 index 0000000000000..9532a5da642ee --- /dev/null +++ b/docs/api_reference/templates/runnable_pydantic.rst @@ -0,0 +1,22 @@ +:mod:`{{module}}`.{{objname}} +{{ underline }}============== + +.. NOTE:: {{objname}} implements the standard :py:class:`Runnable Interface `. 🏃 + +.. currentmodule:: {{ module }} + +.. autopydantic_model:: {{ objname }} + :model-show-json: False + :model-show-config-summary: False + :model-show-validator-members: False + :model-show-field-summary: False + :field-signature-prefix: param + :members: + :undoc-members: + :inherited-members: + :member-order: groupwise + :show-inheritance: True + :special-members: __call__ + :exclude-members: construct, copy, dict, from_orm, parse_file, parse_obj, parse_raw, schema, schema_json, update_forward_refs, validate, json, is_lc_serializable, to_json, to_json_not_implemented, lc_secrets, lc_attributes, lc_id, get_lc_namespace, invoke, ainvoke, batch, abatch, batch_as_completed, abatch_as_completed, astream_log, stream, astream, astream_events, transform, atransform, get_output_schema, get_prompts, configurable_fields, configurable_alternatives, config_schema, map, pick, pipe, with_listeners, with_alisteners, with_config, with_fallbacks, with_types, with_retry, InputType, OutputType, config_specs, output_schema, get_input_schema, get_graph, get_name, input_schema, name, bind, assign + +.. example_links:: {{ objname }} diff --git a/docs/docs/additional_resources/dependents.mdx b/docs/docs/additional_resources/dependents.mdx new file mode 100644 index 0000000000000..a09df5027ecdc --- /dev/null +++ b/docs/docs/additional_resources/dependents.mdx @@ -0,0 +1,554 @@ +# Dependents + +Dependents stats for `langchain-ai/langchain` + +[![](https://img.shields.io/static/v1?label=Used%20by&message=41717&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents) +[![](https://img.shields.io/static/v1?label=Used%20by%20(public)&message=538&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents) +[![](https://img.shields.io/static/v1?label=Used%20by%20(private)&message=41179&color=informational&logo=slickpic)](https://github.com/langchain-ai/langchain/network/dependents) + + +[update: `2023-12-08`; only dependent repositories with Stars > 100] + + +| Repository | Stars | +| :-------- | -----: | +|[AntonOsika/gpt-engineer](https://github.com/AntonOsika/gpt-engineer) | 46514 | +|[imartinez/privateGPT](https://github.com/imartinez/privateGPT) | 44439 | +|[LAION-AI/Open-Assistant](https://github.com/LAION-AI/Open-Assistant) | 35906 | +|[hpcaitech/ColossalAI](https://github.com/hpcaitech/ColossalAI) | 35528 | +|[moymix/TaskMatrix](https://github.com/moymix/TaskMatrix) | 34342 | +|[geekan/MetaGPT](https://github.com/geekan/MetaGPT) | 31126 | +|[streamlit/streamlit](https://github.com/streamlit/streamlit) | 28911 | +|[reworkd/AgentGPT](https://github.com/reworkd/AgentGPT) | 27833 | +|[StanGirard/quivr](https://github.com/StanGirard/quivr) | 26032 | +|[OpenBB-finance/OpenBBTerminal](https://github.com/OpenBB-finance/OpenBBTerminal) | 24946 | +|[run-llama/llama_index](https://github.com/run-llama/llama_index) | 24859 | +|[jmorganca/ollama](https://github.com/jmorganca/ollama) | 20849 | +|[openai/chatgpt-retrieval-plugin](https://github.com/openai/chatgpt-retrieval-plugin) | 20249 | +|[chatchat-space/Langchain-Chatchat](https://github.com/chatchat-space/Langchain-Chatchat) | 19305 | +|[mindsdb/mindsdb](https://github.com/mindsdb/mindsdb) | 19172 | +|[PromtEngineer/localGPT](https://github.com/PromtEngineer/localGPT) | 17528 | +|[cube-js/cube](https://github.com/cube-js/cube) | 16575 | +|[mlflow/mlflow](https://github.com/mlflow/mlflow) | 16000 | +|[mudler/LocalAI](https://github.com/mudler/LocalAI) | 14067 | +|[logspace-ai/langflow](https://github.com/logspace-ai/langflow) | 13679 | +|[GaiZhenbiao/ChuanhuChatGPT](https://github.com/GaiZhenbiao/ChuanhuChatGPT) | 13648 | +|[arc53/DocsGPT](https://github.com/arc53/DocsGPT) | 13423 | +|[openai/evals](https://github.com/openai/evals) | 12649 | +|[airbytehq/airbyte](https://github.com/airbytehq/airbyte) | 12460 | +|[langgenius/dify](https://github.com/langgenius/dify) | 11859 | +|[databrickslabs/dolly](https://github.com/databrickslabs/dolly) | 10672 | +|[AIGC-Audio/AudioGPT](https://github.com/AIGC-Audio/AudioGPT) | 9437 | +|[langchain-ai/langchainjs](https://github.com/langchain-ai/langchainjs) | 9227 | +|[gventuri/pandas-ai](https://github.com/gventuri/pandas-ai) | 9203 | +|[aws/amazon-sagemaker-examples](https://github.com/aws/amazon-sagemaker-examples) | 9079 | +|[h2oai/h2ogpt](https://github.com/h2oai/h2ogpt) | 8945 | +|[PipedreamHQ/pipedream](https://github.com/PipedreamHQ/pipedream) | 7550 | +|[bentoml/OpenLLM](https://github.com/bentoml/OpenLLM) | 6957 | +|[THUDM/ChatGLM3](https://github.com/THUDM/ChatGLM3) | 6801 | +|[microsoft/promptflow](https://github.com/microsoft/promptflow) | 6776 | +|[cpacker/MemGPT](https://github.com/cpacker/MemGPT) | 6642 | +|[joshpxyne/gpt-migrate](https://github.com/joshpxyne/gpt-migrate) | 6482 | +|[zauberzeug/nicegui](https://github.com/zauberzeug/nicegui) | 6037 | +|[embedchain/embedchain](https://github.com/embedchain/embedchain) | 6023 | +|[mage-ai/mage-ai](https://github.com/mage-ai/mage-ai) | 6019 | +|[assafelovic/gpt-researcher](https://github.com/assafelovic/gpt-researcher) | 5936 | +|[sweepai/sweep](https://github.com/sweepai/sweep) | 5855 | +|[wenda-LLM/wenda](https://github.com/wenda-LLM/wenda) | 5766 | +|[zilliztech/GPTCache](https://github.com/zilliztech/GPTCache) | 5710 | +|[pdm-project/pdm](https://github.com/pdm-project/pdm) | 5665 | +|[GreyDGL/PentestGPT](https://github.com/GreyDGL/PentestGPT) | 5568 | +|[gkamradt/langchain-tutorials](https://github.com/gkamradt/langchain-tutorials) | 5507 | +|[Shaunwei/RealChar](https://github.com/Shaunwei/RealChar) | 5501 | +|[facebookresearch/llama-recipes](https://github.com/facebookresearch/llama-recipes) | 5477 | +|[serge-chat/serge](https://github.com/serge-chat/serge) | 5221 | +|[run-llama/rags](https://github.com/run-llama/rags) | 4916 | +|[openchatai/OpenChat](https://github.com/openchatai/OpenChat) | 4870 | +|[danswer-ai/danswer](https://github.com/danswer-ai/danswer) | 4774 | +|[langchain-ai/opengpts](https://github.com/langchain-ai/opengpts) | 4709 | +|[postgresml/postgresml](https://github.com/postgresml/postgresml) | 4639 | +|[MineDojo/Voyager](https://github.com/MineDojo/Voyager) | 4582 | +|[intel-analytics/BigDL](https://github.com/intel-analytics/BigDL) | 4581 | +|[yihong0618/xiaogpt](https://github.com/yihong0618/xiaogpt) | 4359 | +|[RayVentura/ShortGPT](https://github.com/RayVentura/ShortGPT) | 4357 | +|[Azure-Samples/azure-search-openai-demo](https://github.com/Azure-Samples/azure-search-openai-demo) | 4317 | +|[madawei2699/myGPTReader](https://github.com/madawei2699/myGPTReader) | 4289 | +|[apache/nifi](https://github.com/apache/nifi) | 4098 | +|[langchain-ai/chat-langchain](https://github.com/langchain-ai/chat-langchain) | 4091 | +|[aiwaves-cn/agents](https://github.com/aiwaves-cn/agents) | 4073 | +|[krishnaik06/The-Grand-Complete-Data-Science-Materials](https://github.com/krishnaik06/The-Grand-Complete-Data-Science-Materials) | 4065 | +|[khoj-ai/khoj](https://github.com/khoj-ai/khoj) | 4016 | +|[Azure/azure-sdk-for-python](https://github.com/Azure/azure-sdk-for-python) | 3941 | +|[PrefectHQ/marvin](https://github.com/PrefectHQ/marvin) | 3915 | +|[OpenBMB/ToolBench](https://github.com/OpenBMB/ToolBench) | 3799 | +|[marqo-ai/marqo](https://github.com/marqo-ai/marqo) | 3771 | +|[kyegomez/tree-of-thoughts](https://github.com/kyegomez/tree-of-thoughts) | 3688 | +|[Unstructured-IO/unstructured](https://github.com/Unstructured-IO/unstructured) | 3543 | +|[llm-workflow-engine/llm-workflow-engine](https://github.com/llm-workflow-engine/llm-workflow-engine) | 3515 | +|[shroominic/codeinterpreter-api](https://github.com/shroominic/codeinterpreter-api) | 3425 | +|[openchatai/OpenCopilot](https://github.com/openchatai/OpenCopilot) | 3418 | +|[josStorer/RWKV-Runner](https://github.com/josStorer/RWKV-Runner) | 3297 | +|[whitead/paper-qa](https://github.com/whitead/paper-qa) | 3280 | +|[homanp/superagent](https://github.com/homanp/superagent) | 3258 | +|[ParisNeo/lollms-webui](https://github.com/ParisNeo/lollms-webui) | 3199 | +|[OpenBMB/AgentVerse](https://github.com/OpenBMB/AgentVerse) | 3099 | +|[project-baize/baize-chatbot](https://github.com/project-baize/baize-chatbot) | 3090 | +|[OpenGVLab/InternGPT](https://github.com/OpenGVLab/InternGPT) | 2989 | +|[xlang-ai/OpenAgents](https://github.com/xlang-ai/OpenAgents) | 2825 | +|[dataelement/bisheng](https://github.com/dataelement/bisheng) | 2797 | +|[Mintplex-Labs/anything-llm](https://github.com/Mintplex-Labs/anything-llm) | 2784 | +|[OpenBMB/BMTools](https://github.com/OpenBMB/BMTools) | 2734 | +|[run-llama/llama-hub](https://github.com/run-llama/llama-hub) | 2721 | +|[SamurAIGPT/EmbedAI](https://github.com/SamurAIGPT/EmbedAI) | 2647 | +|[NVIDIA/NeMo-Guardrails](https://github.com/NVIDIA/NeMo-Guardrails) | 2637 | +|[X-D-Lab/LangChain-ChatGLM-Webui](https://github.com/X-D-Lab/LangChain-ChatGLM-Webui) | 2532 | +|[GerevAI/gerev](https://github.com/GerevAI/gerev) | 2517 | +|[keephq/keep](https://github.com/keephq/keep) | 2448 | +|[yanqiangmiffy/Chinese-LangChain](https://github.com/yanqiangmiffy/Chinese-LangChain) | 2397 | +|[OpenGVLab/Ask-Anything](https://github.com/OpenGVLab/Ask-Anything) | 2324 | +|[IntelligenzaArtificiale/Free-Auto-GPT](https://github.com/IntelligenzaArtificiale/Free-Auto-GPT) | 2241 | +|[YiVal/YiVal](https://github.com/YiVal/YiVal) | 2232 | +|[jupyterlab/jupyter-ai](https://github.com/jupyterlab/jupyter-ai) | 2189 | +|[Farama-Foundation/PettingZoo](https://github.com/Farama-Foundation/PettingZoo) | 2136 | +|[microsoft/TaskWeaver](https://github.com/microsoft/TaskWeaver) | 2126 | +|[hwchase17/notion-qa](https://github.com/hwchase17/notion-qa) | 2083 | +|[FlagOpen/FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding) | 2053 | +|[paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) | 1999 | +|[hegelai/prompttools](https://github.com/hegelai/prompttools) | 1984 | +|[mckinsey/vizro](https://github.com/mckinsey/vizro) | 1951 | +|[vocodedev/vocode-python](https://github.com/vocodedev/vocode-python) | 1868 | +|[dot-agent/openAMS](https://github.com/dot-agent/openAMS) | 1796 | +|[explodinggradients/ragas](https://github.com/explodinggradients/ragas) | 1766 | +|[AI-Citizen/SolidGPT](https://github.com/AI-Citizen/SolidGPT) | 1761 | +|[Kav-K/GPTDiscord](https://github.com/Kav-K/GPTDiscord) | 1696 | +|[run-llama/sec-insights](https://github.com/run-llama/sec-insights) | 1654 | +|[avinashkranjan/Amazing-Python-Scripts](https://github.com/avinashkranjan/Amazing-Python-Scripts) | 1635 | +|[microsoft/WhatTheHack](https://github.com/microsoft/WhatTheHack) | 1629 | +|[noahshinn/reflexion](https://github.com/noahshinn/reflexion) | 1625 | +|[psychic-api/psychic](https://github.com/psychic-api/psychic) | 1618 | +|[Forethought-Technologies/AutoChain](https://github.com/Forethought-Technologies/AutoChain) | 1611 | +|[pinterest/querybook](https://github.com/pinterest/querybook) | 1586 | +|[refuel-ai/autolabel](https://github.com/refuel-ai/autolabel) | 1553 | +|[jina-ai/langchain-serve](https://github.com/jina-ai/langchain-serve) | 1537 | +|[jina-ai/dev-gpt](https://github.com/jina-ai/dev-gpt) | 1522 | +|[agiresearch/OpenAGI](https://github.com/agiresearch/OpenAGI) | 1493 | +|[ttengwang/Caption-Anything](https://github.com/ttengwang/Caption-Anything) | 1484 | +|[greshake/llm-security](https://github.com/greshake/llm-security) | 1483 | +|[promptfoo/promptfoo](https://github.com/promptfoo/promptfoo) | 1480 | +|[milvus-io/bootcamp](https://github.com/milvus-io/bootcamp) | 1477 | +|[richardyc/Chrome-GPT](https://github.com/richardyc/Chrome-GPT) | 1475 | +|[melih-unsal/DemoGPT](https://github.com/melih-unsal/DemoGPT) | 1428 | +|[YORG-AI/Open-Assistant](https://github.com/YORG-AI/Open-Assistant) | 1419 | +|[101dotxyz/GPTeam](https://github.com/101dotxyz/GPTeam) | 1416 | +|[jina-ai/thinkgpt](https://github.com/jina-ai/thinkgpt) | 1408 | +|[mmz-001/knowledge_gpt](https://github.com/mmz-001/knowledge_gpt) | 1398 | +|[intel/intel-extension-for-transformers](https://github.com/intel/intel-extension-for-transformers) | 1387 | +|[Azure/azureml-examples](https://github.com/Azure/azureml-examples) | 1385 | +|[lunasec-io/lunasec](https://github.com/lunasec-io/lunasec) | 1367 | +|[eyurtsev/kor](https://github.com/eyurtsev/kor) | 1355 | +|[xusenlinzy/api-for-open-llm](https://github.com/xusenlinzy/api-for-open-llm) | 1325 | +|[griptape-ai/griptape](https://github.com/griptape-ai/griptape) | 1323 | +|[SuperDuperDB/superduperdb](https://github.com/SuperDuperDB/superduperdb) | 1290 | +|[cofactoryai/textbase](https://github.com/cofactoryai/textbase) | 1284 | +|[psychic-api/rag-stack](https://github.com/psychic-api/rag-stack) | 1260 | +|[filip-michalsky/SalesGPT](https://github.com/filip-michalsky/SalesGPT) | 1250 | +|[nod-ai/SHARK](https://github.com/nod-ai/SHARK) | 1237 | +|[pluralsh/plural](https://github.com/pluralsh/plural) | 1234 | +|[cheshire-cat-ai/core](https://github.com/cheshire-cat-ai/core) | 1194 | +|[LC1332/Chat-Haruhi-Suzumiya](https://github.com/LC1332/Chat-Haruhi-Suzumiya) | 1184 | +|[poe-platform/server-bot-quick-start](https://github.com/poe-platform/server-bot-quick-start) | 1182 | +|[microsoft/X-Decoder](https://github.com/microsoft/X-Decoder) | 1180 | +|[juncongmoo/chatllama](https://github.com/juncongmoo/chatllama) | 1171 | +|[visual-openllm/visual-openllm](https://github.com/visual-openllm/visual-openllm) | 1156 | +|[alejandro-ao/ask-multiple-pdfs](https://github.com/alejandro-ao/ask-multiple-pdfs) | 1153 | +|[ThousandBirdsInc/chidori](https://github.com/ThousandBirdsInc/chidori) | 1152 | +|[irgolic/AutoPR](https://github.com/irgolic/AutoPR) | 1137 | +|[SamurAIGPT/Camel-AutoGPT](https://github.com/SamurAIGPT/Camel-AutoGPT) | 1083 | +|[ray-project/llm-applications](https://github.com/ray-project/llm-applications) | 1080 | +|[run-llama/llama-lab](https://github.com/run-llama/llama-lab) | 1072 | +|[jiran214/GPT-vup](https://github.com/jiran214/GPT-vup) | 1041 | +|[MetaGLM/FinGLM](https://github.com/MetaGLM/FinGLM) | 1035 | +|[peterw/Chat-with-Github-Repo](https://github.com/peterw/Chat-with-Github-Repo) | 1020 | +|[Anil-matcha/ChatPDF](https://github.com/Anil-matcha/ChatPDF) | 991 | +|[langchain-ai/langserve](https://github.com/langchain-ai/langserve) | 983 | +|[THUDM/AgentTuning](https://github.com/THUDM/AgentTuning) | 976 | +|[rlancemartin/auto-evaluator](https://github.com/rlancemartin/auto-evaluator) | 975 | +|[codeacme17/examor](https://github.com/codeacme17/examor) | 964 | +|[all-in-aigc/gpts-works](https://github.com/all-in-aigc/gpts-works) | 946 | +|[Ikaros-521/AI-Vtuber](https://github.com/Ikaros-521/AI-Vtuber) | 946 | +|[microsoft/Llama-2-Onnx](https://github.com/microsoft/Llama-2-Onnx) | 898 | +|[cirediatpl/FigmaChain](https://github.com/cirediatpl/FigmaChain) | 895 | +|[ricklamers/shell-ai](https://github.com/ricklamers/shell-ai) | 893 | +|[modelscope/modelscope-agent](https://github.com/modelscope/modelscope-agent) | 893 | +|[seanpixel/Teenage-AGI](https://github.com/seanpixel/Teenage-AGI) | 886 | +|[ajndkr/lanarky](https://github.com/ajndkr/lanarky) | 880 | +|[kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference](https://github.com/kennethleungty/Llama-2-Open-Source-LLM-CPU-Inference) | 872 | +|[corca-ai/EVAL](https://github.com/corca-ai/EVAL) | 846 | +|[hwchase17/chat-your-data](https://github.com/hwchase17/chat-your-data) | 841 | +|[kreneskyp/ix](https://github.com/kreneskyp/ix) | 821 | +|[Link-AGI/AutoAgents](https://github.com/Link-AGI/AutoAgents) | 820 | +|[truera/trulens](https://github.com/truera/trulens) | 794 | +|[Dataherald/dataherald](https://github.com/Dataherald/dataherald) | 788 | +|[sunlabuiuc/PyHealth](https://github.com/sunlabuiuc/PyHealth) | 783 | +|[jondurbin/airoboros](https://github.com/jondurbin/airoboros) | 783 | +|[pyspark-ai/pyspark-ai](https://github.com/pyspark-ai/pyspark-ai) | 782 | +|[confident-ai/deepeval](https://github.com/confident-ai/deepeval) | 780 | +|[billxbf/ReWOO](https://github.com/billxbf/ReWOO) | 777 | +|[langchain-ai/streamlit-agent](https://github.com/langchain-ai/streamlit-agent) | 776 | +|[akshata29/entaoai](https://github.com/akshata29/entaoai) | 771 | +|[LambdaLabsML/examples](https://github.com/LambdaLabsML/examples) | 770 | +|[getmetal/motorhead](https://github.com/getmetal/motorhead) | 768 | +|[Dicklesworthstone/swiss_army_llama](https://github.com/Dicklesworthstone/swiss_army_llama) | 757 | +|[ruoccofabrizio/azure-open-ai-embeddings-qna](https://github.com/ruoccofabrizio/azure-open-ai-embeddings-qna) | 757 | +|[msoedov/langcorn](https://github.com/msoedov/langcorn) | 754 | +|[e-johnstonn/BriefGPT](https://github.com/e-johnstonn/BriefGPT) | 753 | +|[microsoft/sample-app-aoai-chatGPT](https://github.com/microsoft/sample-app-aoai-chatGPT) | 749 | +|[explosion/spacy-llm](https://github.com/explosion/spacy-llm) | 731 | +|[MiuLab/Taiwan-LLM](https://github.com/MiuLab/Taiwan-LLM) | 716 | +|[whyiyhw/chatgpt-wechat](https://github.com/whyiyhw/chatgpt-wechat) | 702 | +|[Azure-Samples/openai](https://github.com/Azure-Samples/openai) | 692 | +|[iusztinpaul/hands-on-llms](https://github.com/iusztinpaul/hands-on-llms) | 687 | +|[safevideo/autollm](https://github.com/safevideo/autollm) | 682 | +|[OpenGenerativeAI/GenossGPT](https://github.com/OpenGenerativeAI/GenossGPT) | 669 | +|[NoDataFound/hackGPT](https://github.com/NoDataFound/hackGPT) | 663 | +|[AILab-CVC/GPT4Tools](https://github.com/AILab-CVC/GPT4Tools) | 662 | +|[langchain-ai/auto-evaluator](https://github.com/langchain-ai/auto-evaluator) | 657 | +|[yvann-ba/Robby-chatbot](https://github.com/yvann-ba/Robby-chatbot) | 639 | +|[alexanderatallah/window.ai](https://github.com/alexanderatallah/window.ai) | 635 | +|[amosjyng/langchain-visualizer](https://github.com/amosjyng/langchain-visualizer) | 630 | +|[microsoft/PodcastCopilot](https://github.com/microsoft/PodcastCopilot) | 621 | +|[aws-samples/aws-genai-llm-chatbot](https://github.com/aws-samples/aws-genai-llm-chatbot) | 616 | +|[NeumTry/NeumAI](https://github.com/NeumTry/NeumAI) | 605 | +|[namuan/dr-doc-search](https://github.com/namuan/dr-doc-search) | 599 | +|[plastic-labs/tutor-gpt](https://github.com/plastic-labs/tutor-gpt) | 595 | +|[marimo-team/marimo](https://github.com/marimo-team/marimo) | 591 | +|[yakami129/VirtualWife](https://github.com/yakami129/VirtualWife) | 586 | +|[xuwenhao/geektime-ai-course](https://github.com/xuwenhao/geektime-ai-course) | 584 | +|[jonra1993/fastapi-alembic-sqlmodel-async](https://github.com/jonra1993/fastapi-alembic-sqlmodel-async) | 573 | +|[dgarnitz/vectorflow](https://github.com/dgarnitz/vectorflow) | 568 | +|[yeagerai/yeagerai-agent](https://github.com/yeagerai/yeagerai-agent) | 564 | +|[daveebbelaar/langchain-experiments](https://github.com/daveebbelaar/langchain-experiments) | 563 | +|[traceloop/openllmetry](https://github.com/traceloop/openllmetry) | 559 | +|[Agenta-AI/agenta](https://github.com/Agenta-AI/agenta) | 546 | +|[michaelthwan/searchGPT](https://github.com/michaelthwan/searchGPT) | 545 | +|[jina-ai/agentchain](https://github.com/jina-ai/agentchain) | 544 | +|[mckaywrigley/repo-chat](https://github.com/mckaywrigley/repo-chat) | 533 | +|[marella/chatdocs](https://github.com/marella/chatdocs) | 532 | +|[opentensor/bittensor](https://github.com/opentensor/bittensor) | 532 | +|[DjangoPeng/openai-quickstart](https://github.com/DjangoPeng/openai-quickstart) | 527 | +|[freddyaboulton/gradio-tools](https://github.com/freddyaboulton/gradio-tools) | 517 | +|[sidhq/Multi-GPT](https://github.com/sidhq/Multi-GPT) | 515 | +|[alejandro-ao/langchain-ask-pdf](https://github.com/alejandro-ao/langchain-ask-pdf) | 514 | +|[sajjadium/ctf-archives](https://github.com/sajjadium/ctf-archives) | 507 | +|[continuum-llms/chatgpt-memory](https://github.com/continuum-llms/chatgpt-memory) | 502 | +|[steamship-core/steamship-langchain](https://github.com/steamship-core/steamship-langchain) | 494 | +|[mpaepper/content-chatbot](https://github.com/mpaepper/content-chatbot) | 493 | +|[langchain-ai/langchain-aiplugin](https://github.com/langchain-ai/langchain-aiplugin) | 492 | +|[logan-markewich/llama_index_starter_pack](https://github.com/logan-markewich/llama_index_starter_pack) | 483 | +|[datawhalechina/llm-universe](https://github.com/datawhalechina/llm-universe) | 475 | +|[leondz/garak](https://github.com/leondz/garak) | 464 | +|[RedisVentures/ArXivChatGuru](https://github.com/RedisVentures/ArXivChatGuru) | 461 | +|[Anil-matcha/Chatbase](https://github.com/Anil-matcha/Chatbase) | 455 | +|[Aiyu-awa/luna-ai](https://github.com/Aiyu-awa/luna-ai) | 450 | +|[DataDog/dd-trace-py](https://github.com/DataDog/dd-trace-py) | 450 | +|[Azure-Samples/miyagi](https://github.com/Azure-Samples/miyagi) | 449 | +|[poe-platform/poe-protocol](https://github.com/poe-platform/poe-protocol) | 447 | +|[onlyphantom/llm-python](https://github.com/onlyphantom/llm-python) | 446 | +|[junruxiong/IncarnaMind](https://github.com/junruxiong/IncarnaMind) | 441 | +|[CarperAI/OpenELM](https://github.com/CarperAI/OpenELM) | 441 | +|[daodao97/chatdoc](https://github.com/daodao97/chatdoc) | 437 | +|[showlab/VLog](https://github.com/showlab/VLog) | 436 | +|[wandb/weave](https://github.com/wandb/weave) | 420 | +|[QwenLM/Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) | 419 | +|[huchenxucs/ChatDB](https://github.com/huchenxucs/ChatDB) | 416 | +|[jerlendds/osintbuddy](https://github.com/jerlendds/osintbuddy) | 411 | +|[monarch-initiative/ontogpt](https://github.com/monarch-initiative/ontogpt) | 408 | +|[mallorbc/Finetune_LLMs](https://github.com/mallorbc/Finetune_LLMs) | 406 | +|[JayZeeDesign/researcher-gpt](https://github.com/JayZeeDesign/researcher-gpt) | 405 | +|[rsaryev/talk-codebase](https://github.com/rsaryev/talk-codebase) | 401 | +|[langchain-ai/langsmith-cookbook](https://github.com/langchain-ai/langsmith-cookbook) | 398 | +|[mtenenholtz/chat-twitter](https://github.com/mtenenholtz/chat-twitter) | 398 | +|[morpheuslord/GPT_Vuln-analyzer](https://github.com/morpheuslord/GPT_Vuln-analyzer) | 391 | +|[MagnivOrg/prompt-layer-library](https://github.com/MagnivOrg/prompt-layer-library) | 387 | +|[JohnSnowLabs/langtest](https://github.com/JohnSnowLabs/langtest) | 384 | +|[mrwadams/attackgen](https://github.com/mrwadams/attackgen) | 381 | +|[codefuse-ai/Test-Agent](https://github.com/codefuse-ai/Test-Agent) | 380 | +|[personoids/personoids-lite](https://github.com/personoids/personoids-lite) | 379 | +|[mosaicml/examples](https://github.com/mosaicml/examples) | 378 | +|[steamship-packages/langchain-production-starter](https://github.com/steamship-packages/langchain-production-starter) | 370 | +|[FlagAI-Open/Aquila2](https://github.com/FlagAI-Open/Aquila2) | 365 | +|[Mintplex-Labs/vector-admin](https://github.com/Mintplex-Labs/vector-admin) | 365 | +|[NimbleBoxAI/ChainFury](https://github.com/NimbleBoxAI/ChainFury) | 357 | +|[BlackHC/llm-strategy](https://github.com/BlackHC/llm-strategy) | 354 | +|[lilacai/lilac](https://github.com/lilacai/lilac) | 352 | +|[preset-io/promptimize](https://github.com/preset-io/promptimize) | 351 | +|[yuanjie-ai/ChatLLM](https://github.com/yuanjie-ai/ChatLLM) | 347 | +|[andylokandy/gpt-4-search](https://github.com/andylokandy/gpt-4-search) | 346 | +|[zhoudaquan/ChatAnything](https://github.com/zhoudaquan/ChatAnything) | 343 | +|[rgomezcasas/dotfiles](https://github.com/rgomezcasas/dotfiles) | 343 | +|[tigerlab-ai/tiger](https://github.com/tigerlab-ai/tiger) | 342 | +|[HumanSignal/label-studio-ml-backend](https://github.com/HumanSignal/label-studio-ml-backend) | 334 | +|[nasa-petal/bidara](https://github.com/nasa-petal/bidara) | 334 | +|[momegas/megabots](https://github.com/momegas/megabots) | 334 | +|[Cheems-Seminar/grounded-segment-any-parts](https://github.com/Cheems-Seminar/grounded-segment-any-parts) | 330 | +|[CambioML/pykoi](https://github.com/CambioML/pykoi) | 326 | +|[Nuggt-dev/Nuggt](https://github.com/Nuggt-dev/Nuggt) | 326 | +|[wandb/edu](https://github.com/wandb/edu) | 326 | +|[Haste171/langchain-chatbot](https://github.com/Haste171/langchain-chatbot) | 324 | +|[sugarforever/LangChain-Tutorials](https://github.com/sugarforever/LangChain-Tutorials) | 322 | +|[liangwq/Chatglm_lora_multi-gpu](https://github.com/liangwq/Chatglm_lora_multi-gpu) | 321 | +|[ur-whitelab/chemcrow-public](https://github.com/ur-whitelab/chemcrow-public) | 320 | +|[itamargol/openai](https://github.com/itamargol/openai) | 318 | +|[gia-guar/JARVIS-ChatGPT](https://github.com/gia-guar/JARVIS-ChatGPT) | 304 | +|[SpecterOps/Nemesis](https://github.com/SpecterOps/Nemesis) | 302 | +|[facebookresearch/personal-timeline](https://github.com/facebookresearch/personal-timeline) | 302 | +|[hnawaz007/pythondataanalysis](https://github.com/hnawaz007/pythondataanalysis) | 301 | +|[Chainlit/cookbook](https://github.com/Chainlit/cookbook) | 300 | +|[airobotlab/KoChatGPT](https://github.com/airobotlab/KoChatGPT) | 300 | +|[GPT-Fathom/GPT-Fathom](https://github.com/GPT-Fathom/GPT-Fathom) | 299 | +|[kaarthik108/snowChat](https://github.com/kaarthik108/snowChat) | 299 | +|[kyegomez/swarms](https://github.com/kyegomez/swarms) | 296 | +|[LangStream/langstream](https://github.com/LangStream/langstream) | 295 | +|[genia-dev/GeniA](https://github.com/genia-dev/GeniA) | 294 | +|[shamspias/customizable-gpt-chatbot](https://github.com/shamspias/customizable-gpt-chatbot) | 291 | +|[TsinghuaDatabaseGroup/DB-GPT](https://github.com/TsinghuaDatabaseGroup/DB-GPT) | 290 | +|[conceptofmind/toolformer](https://github.com/conceptofmind/toolformer) | 283 | +|[sullivan-sean/chat-langchainjs](https://github.com/sullivan-sean/chat-langchainjs) | 283 | +|[AutoPackAI/beebot](https://github.com/AutoPackAI/beebot) | 282 | +|[pablomarin/GPT-Azure-Search-Engine](https://github.com/pablomarin/GPT-Azure-Search-Engine) | 282 | +|[gkamradt/LLMTest_NeedleInAHaystack](https://github.com/gkamradt/LLMTest_NeedleInAHaystack) | 280 | +|[gustavz/DataChad](https://github.com/gustavz/DataChad) | 280 | +|[Safiullah-Rahu/CSV-AI](https://github.com/Safiullah-Rahu/CSV-AI) | 278 | +|[hwchase17/chroma-langchain](https://github.com/hwchase17/chroma-langchain) | 275 | +|[AkshitIreddy/Interactive-LLM-Powered-NPCs](https://github.com/AkshitIreddy/Interactive-LLM-Powered-NPCs) | 268 | +|[ennucore/clippinator](https://github.com/ennucore/clippinator) | 267 | +|[artitw/text2text](https://github.com/artitw/text2text) | 264 | +|[anarchy-ai/LLM-VM](https://github.com/anarchy-ai/LLM-VM) | 263 | +|[wpydcr/LLM-Kit](https://github.com/wpydcr/LLM-Kit) | 262 | +|[streamlit/llm-examples](https://github.com/streamlit/llm-examples) | 262 | +|[paolorechia/learn-langchain](https://github.com/paolorechia/learn-langchain) | 262 | +|[yym68686/ChatGPT-Telegram-Bot](https://github.com/yym68686/ChatGPT-Telegram-Bot) | 261 | +|[PradipNichite/Youtube-Tutorials](https://github.com/PradipNichite/Youtube-Tutorials) | 259 | +|[radi-cho/datasetGPT](https://github.com/radi-cho/datasetGPT) | 259 | +|[ur-whitelab/exmol](https://github.com/ur-whitelab/exmol) | 259 | +|[ml6team/fondant](https://github.com/ml6team/fondant) | 254 | +|[bborn/howdoi.ai](https://github.com/bborn/howdoi.ai) | 254 | +|[rahulnyk/knowledge_graph](https://github.com/rahulnyk/knowledge_graph) | 253 | +|[recalign/RecAlign](https://github.com/recalign/RecAlign) | 248 | +|[hwchase17/langchain-streamlit-template](https://github.com/hwchase17/langchain-streamlit-template) | 248 | +|[fetchai/uAgents](https://github.com/fetchai/uAgents) | 247 | +|[arthur-ai/bench](https://github.com/arthur-ai/bench) | 247 | +|[miaoshouai/miaoshouai-assistant](https://github.com/miaoshouai/miaoshouai-assistant) | 246 | +|[RoboCoachTechnologies/GPT-Synthesizer](https://github.com/RoboCoachTechnologies/GPT-Synthesizer) | 244 | +|[langchain-ai/web-explorer](https://github.com/langchain-ai/web-explorer) | 242 | +|[kaleido-lab/dolphin](https://github.com/kaleido-lab/dolphin) | 242 | +|[PJLab-ADG/DriveLikeAHuman](https://github.com/PJLab-ADG/DriveLikeAHuman) | 241 | +|[stepanogil/autonomous-hr-chatbot](https://github.com/stepanogil/autonomous-hr-chatbot) | 238 | +|[WongSaang/chatgpt-ui-server](https://github.com/WongSaang/chatgpt-ui-server) | 236 | +|[nexus-stc/stc](https://github.com/nexus-stc/stc) | 235 | +|[yeagerai/genworlds](https://github.com/yeagerai/genworlds) | 235 | +|[Gentopia-AI/Gentopia](https://github.com/Gentopia-AI/Gentopia) | 235 | +|[alphasecio/langchain-examples](https://github.com/alphasecio/langchain-examples) | 235 | +|[grumpyp/aixplora](https://github.com/grumpyp/aixplora) | 232 | +|[shaman-ai/agent-actors](https://github.com/shaman-ai/agent-actors) | 232 | +|[darrenburns/elia](https://github.com/darrenburns/elia) | 231 | +|[orgexyz/BlockAGI](https://github.com/orgexyz/BlockAGI) | 231 | +|[handrew/browserpilot](https://github.com/handrew/browserpilot) | 226 | +|[su77ungr/CASALIOY](https://github.com/su77ungr/CASALIOY) | 225 | +|[nicknochnack/LangchainDocuments](https://github.com/nicknochnack/LangchainDocuments) | 225 | +|[dbpunk-labs/octogen](https://github.com/dbpunk-labs/octogen) | 224 | +|[langchain-ai/weblangchain](https://github.com/langchain-ai/weblangchain) | 222 | +|[CL-lau/SQL-GPT](https://github.com/CL-lau/SQL-GPT) | 222 | +|[alvarosevilla95/autolang](https://github.com/alvarosevilla95/autolang) | 221 | +|[showlab/UniVTG](https://github.com/showlab/UniVTG) | 220 | +|[edreisMD/plugnplai](https://github.com/edreisMD/plugnplai) | 219 | +|[hardbyte/qabot](https://github.com/hardbyte/qabot) | 216 | +|[microsoft/azure-openai-in-a-day-workshop](https://github.com/microsoft/azure-openai-in-a-day-workshop) | 215 | +|[Azure-Samples/chat-with-your-data-solution-accelerator](https://github.com/Azure-Samples/chat-with-your-data-solution-accelerator) | 214 | +|[amadad/agentcy](https://github.com/amadad/agentcy) | 213 | +|[snexus/llm-search](https://github.com/snexus/llm-search) | 212 | +|[afaqueumer/DocQA](https://github.com/afaqueumer/DocQA) | 206 | +|[plchld/InsightFlow](https://github.com/plchld/InsightFlow) | 205 | +|[yasyf/compress-gpt](https://github.com/yasyf/compress-gpt) | 205 | +|[benthecoder/ClassGPT](https://github.com/benthecoder/ClassGPT) | 205 | +|[voxel51/voxelgpt](https://github.com/voxel51/voxelgpt) | 204 | +|[jbrukh/gpt-jargon](https://github.com/jbrukh/gpt-jargon) | 204 | +|[emarco177/ice_breaker](https://github.com/emarco177/ice_breaker) | 204 | +|[tencentmusic/supersonic](https://github.com/tencentmusic/supersonic) | 202 | +|[Azure-Samples/azure-search-power-skills](https://github.com/Azure-Samples/azure-search-power-skills) | 202 | +|[blob42/Instrukt](https://github.com/blob42/Instrukt) | 201 | +|[langchain-ai/langsmith-sdk](https://github.com/langchain-ai/langsmith-sdk) | 200 | +|[SamPink/dev-gpt](https://github.com/SamPink/dev-gpt) | 200 | +|[ju-bezdek/langchain-decorators](https://github.com/ju-bezdek/langchain-decorators) | 198 | +|[KMnO4-zx/huanhuan-chat](https://github.com/KMnO4-zx/huanhuan-chat) | 196 | +|[Azure-Samples/jp-azureopenai-samples](https://github.com/Azure-Samples/jp-azureopenai-samples) | 192 | +|[hongbo-miao/hongbomiao.com](https://github.com/hongbo-miao/hongbomiao.com) | 190 | +|[CakeCrusher/openplugin](https://github.com/CakeCrusher/openplugin) | 190 | +|[PaddlePaddle/ERNIE-Bot-SDK](https://github.com/PaddlePaddle/ERNIE-Bot-SDK) | 189 | +|[retr0reg/Ret2GPT](https://github.com/retr0reg/Ret2GPT) | 189 | +|[AmineDiro/cria](https://github.com/AmineDiro/cria) | 187 | +|[lancedb/vectordb-recipes](https://github.com/lancedb/vectordb-recipes) | 186 | +|[vaibkumr/prompt-optimizer](https://github.com/vaibkumr/prompt-optimizer) | 185 | +|[aws-ia/ecs-blueprints](https://github.com/aws-ia/ecs-blueprints) | 184 | +|[ethanyanjiali/minChatGPT](https://github.com/ethanyanjiali/minChatGPT) | 183 | +|[MuhammadMoinFaisal/LargeLanguageModelsProjects](https://github.com/MuhammadMoinFaisal/LargeLanguageModelsProjects) | 182 | +|[shauryr/S2QA](https://github.com/shauryr/S2QA) | 181 | +|[summarizepaper/summarizepaper](https://github.com/summarizepaper/summarizepaper) | 180 | +|[NomaDamas/RAGchain](https://github.com/NomaDamas/RAGchain) | 179 | +|[pnkvalavala/repochat](https://github.com/pnkvalavala/repochat) | 179 | +|[ibiscp/LLM-IMDB](https://github.com/ibiscp/LLM-IMDB) | 177 | +|[fengyuli-dev/multimedia-gpt](https://github.com/fengyuli-dev/multimedia-gpt) | 177 | +|[langchain-ai/text-split-explorer](https://github.com/langchain-ai/text-split-explorer) | 175 | +|[iMagist486/ElasticSearch-Langchain-Chatglm2](https://github.com/iMagist486/ElasticSearch-Langchain-Chatglm2) | 175 | +|[limaoyi1/Auto-PPT](https://github.com/limaoyi1/Auto-PPT) | 175 | +|[Open-Swarm-Net/GPT-Swarm](https://github.com/Open-Swarm-Net/GPT-Swarm) | 175 | +|[morpheuslord/HackBot](https://github.com/morpheuslord/HackBot) | 174 | +|[v7labs/benchllm](https://github.com/v7labs/benchllm) | 174 | +|[Coding-Crashkurse/Langchain-Full-Course](https://github.com/Coding-Crashkurse/Langchain-Full-Course) | 174 | +|[dongyh20/Octopus](https://github.com/dongyh20/Octopus) | 173 | +|[kimtth/azure-openai-llm-vector-langchain](https://github.com/kimtth/azure-openai-llm-vector-langchain) | 173 | +|[mayooear/private-chatbot-mpt30b-langchain](https://github.com/mayooear/private-chatbot-mpt30b-langchain) | 173 | +|[zilliztech/akcio](https://github.com/zilliztech/akcio) | 172 | +|[jmpaz/promptlib](https://github.com/jmpaz/promptlib) | 172 | +|[ccurme/yolopandas](https://github.com/ccurme/yolopandas) | 172 | +|[joaomdmoura/CrewAI](https://github.com/joaomdmoura/CrewAI) | 170 | +|[katanaml/llm-mistral-invoice-cpu](https://github.com/katanaml/llm-mistral-invoice-cpu) | 170 | +|[chakkaradeep/pyCodeAGI](https://github.com/chakkaradeep/pyCodeAGI) | 170 | +|[mudler/LocalAGI](https://github.com/mudler/LocalAGI) | 167 | +|[dssjon/biblos](https://github.com/dssjon/biblos) | 165 | +|[kjappelbaum/gptchem](https://github.com/kjappelbaum/gptchem) | 165 | +|[xxw1995/chatglm3-finetune](https://github.com/xxw1995/chatglm3-finetune) | 164 | +|[ArjanCodes/examples](https://github.com/ArjanCodes/examples) | 163 | +|[AIAnytime/Llama2-Medical-Chatbot](https://github.com/AIAnytime/Llama2-Medical-Chatbot) | 163 | +|[RCGAI/SimplyRetrieve](https://github.com/RCGAI/SimplyRetrieve) | 162 | +|[langchain-ai/langchain-teacher](https://github.com/langchain-ai/langchain-teacher) | 162 | +|[menloparklab/falcon-langchain](https://github.com/menloparklab/falcon-langchain) | 162 | +|[flurb18/AgentOoba](https://github.com/flurb18/AgentOoba) | 162 | +|[homanp/vercel-langchain](https://github.com/homanp/vercel-langchain) | 161 | +|[jiran214/langup-ai](https://github.com/jiran214/langup-ai) | 160 | +|[JorisdeJong123/7-Days-of-LangChain](https://github.com/JorisdeJong123/7-Days-of-LangChain) | 160 | +|[GoogleCloudPlatform/data-analytics-golden-demo](https://github.com/GoogleCloudPlatform/data-analytics-golden-demo) | 159 | +|[positive666/Prompt-Can-Anything](https://github.com/positive666/Prompt-Can-Anything) | 159 | +|[luisroque/large_laguage_models](https://github.com/luisroque/large_laguage_models) | 159 | +|[mlops-for-all/mlops-for-all.github.io](https://github.com/mlops-for-all/mlops-for-all.github.io) | 158 | +|[wandb/wandbot](https://github.com/wandb/wandbot) | 158 | +|[elastic/elasticsearch-labs](https://github.com/elastic/elasticsearch-labs) | 157 | +|[shroominic/funcchain](https://github.com/shroominic/funcchain) | 157 | +|[deeppavlov/dream](https://github.com/deeppavlov/dream) | 156 | +|[mluogh/eastworld](https://github.com/mluogh/eastworld) | 154 | +|[georgesung/llm_qlora](https://github.com/georgesung/llm_qlora) | 154 | +|[RUC-GSAI/YuLan-Rec](https://github.com/RUC-GSAI/YuLan-Rec) | 153 | +|[KylinC/ChatFinance](https://github.com/KylinC/ChatFinance) | 152 | +|[Dicklesworthstone/llama2_aided_tesseract](https://github.com/Dicklesworthstone/llama2_aided_tesseract) | 152 | +|[c0sogi/LLMChat](https://github.com/c0sogi/LLMChat) | 152 | +|[eunomia-bpf/GPTtrace](https://github.com/eunomia-bpf/GPTtrace) | 152 | +|[ErikBjare/gptme](https://github.com/ErikBjare/gptme) | 152 | +|[Klingefjord/chatgpt-telegram](https://github.com/Klingefjord/chatgpt-telegram) | 152 | +|[RoboCoachTechnologies/ROScribe](https://github.com/RoboCoachTechnologies/ROScribe) | 151 | +|[Aggregate-Intellect/sherpa](https://github.com/Aggregate-Intellect/sherpa) | 151 | +|[3Alan/DocsMind](https://github.com/3Alan/DocsMind) | 151 | +|[tangqiaoyu/ToolAlpaca](https://github.com/tangqiaoyu/ToolAlpaca) | 150 | +|[kulltc/chatgpt-sql](https://github.com/kulltc/chatgpt-sql) | 150 | +|[mallahyari/drqa](https://github.com/mallahyari/drqa) | 150 | +|[MedalCollector/Orator](https://github.com/MedalCollector/Orator) | 149 | +|[Teahouse-Studios/akari-bot](https://github.com/Teahouse-Studios/akari-bot) | 149 | +|[realminchoi/babyagi-ui](https://github.com/realminchoi/babyagi-ui) | 148 | +|[ssheng/BentoChain](https://github.com/ssheng/BentoChain) | 148 | +|[solana-labs/chatgpt-plugin](https://github.com/solana-labs/chatgpt-plugin) | 147 | +|[aurelio-labs/arxiv-bot](https://github.com/aurelio-labs/arxiv-bot) | 147 | +|[Jaseci-Labs/jaseci](https://github.com/Jaseci-Labs/jaseci) | 146 | +|[menloparklab/langchain-cohere-qdrant-doc-retrieval](https://github.com/menloparklab/langchain-cohere-qdrant-doc-retrieval) | 146 | +|[trancethehuman/entities-extraction-web-scraper](https://github.com/trancethehuman/entities-extraction-web-scraper) | 144 | +|[peterw/StoryStorm](https://github.com/peterw/StoryStorm) | 144 | +|[grumpyp/chroma-langchain-tutorial](https://github.com/grumpyp/chroma-langchain-tutorial) | 144 | +|[gh18l/CrawlGPT](https://github.com/gh18l/CrawlGPT) | 142 | +|[langchain-ai/langchain-aws-template](https://github.com/langchain-ai/langchain-aws-template) | 142 | +|[yasyf/summ](https://github.com/yasyf/summ) | 141 | +|[petehunt/langchain-github-bot](https://github.com/petehunt/langchain-github-bot) | 141 | +|[hirokidaichi/wanna](https://github.com/hirokidaichi/wanna) | 140 | +|[jina-ai/fastapi-serve](https://github.com/jina-ai/fastapi-serve) | 139 | +|[zenml-io/zenml-projects](https://github.com/zenml-io/zenml-projects) | 139 | +|[jlonge4/local_llama](https://github.com/jlonge4/local_llama) | 139 | +|[smyja/blackmaria](https://github.com/smyja/blackmaria) | 138 | +|[ChuloAI/BrainChulo](https://github.com/ChuloAI/BrainChulo) | 137 | +|[log1stics/voice-generator-webui](https://github.com/log1stics/voice-generator-webui) | 137 | +|[davila7/file-gpt](https://github.com/davila7/file-gpt) | 137 | +|[dcaribou/transfermarkt-datasets](https://github.com/dcaribou/transfermarkt-datasets) | 136 | +|[ciare-robotics/world-creator](https://github.com/ciare-robotics/world-creator) | 135 | +|[Undertone0809/promptulate](https://github.com/Undertone0809/promptulate) | 134 | +|[fixie-ai/fixie-examples](https://github.com/fixie-ai/fixie-examples) | 134 | +|[run-llama/ai-engineer-workshop](https://github.com/run-llama/ai-engineer-workshop) | 133 | +|[definitive-io/code-indexer-loop](https://github.com/definitive-io/code-indexer-loop) | 131 | +|[mortium91/langchain-assistant](https://github.com/mortium91/langchain-assistant) | 131 | +|[baidubce/bce-qianfan-sdk](https://github.com/baidubce/bce-qianfan-sdk) | 130 | +|[Ngonie-x/langchain_csv](https://github.com/Ngonie-x/langchain_csv) | 130 | +|[IvanIsCoding/ResuLLMe](https://github.com/IvanIsCoding/ResuLLMe) | 130 | +|[AnchoringAI/anchoring-ai](https://github.com/AnchoringAI/anchoring-ai) | 129 | +|[Azure/business-process-automation](https://github.com/Azure/business-process-automation) | 128 | +|[athina-ai/athina-sdk](https://github.com/athina-ai/athina-sdk) | 126 | +|[thunlp/ChatEval](https://github.com/thunlp/ChatEval) | 126 | +|[prof-frink-lab/slangchain](https://github.com/prof-frink-lab/slangchain) | 126 | +|[vietanhdev/pautobot](https://github.com/vietanhdev/pautobot) | 125 | +|[awslabs/generative-ai-cdk-constructs](https://github.com/awslabs/generative-ai-cdk-constructs) | 124 | +|[sdaaron/QueryGPT](https://github.com/sdaaron/QueryGPT) | 124 | +|[rabbitmetrics/langchain-13-min](https://github.com/rabbitmetrics/langchain-13-min) | 124 | +|[AutoLLM/AutoAgents](https://github.com/AutoLLM/AutoAgents) | 122 | +|[nicknochnack/Nopenai](https://github.com/nicknochnack/Nopenai) | 122 | +|[wombyz/HormoziGPT](https://github.com/wombyz/HormoziGPT) | 122 | +|[dotvignesh/PDFChat](https://github.com/dotvignesh/PDFChat) | 122 | +|[topoteretes/PromethAI-Backend](https://github.com/topoteretes/PromethAI-Backend) | 121 | +|[nftblackmagic/flask-langchain](https://github.com/nftblackmagic/flask-langchain) | 121 | +|[vishwasg217/finsight](https://github.com/vishwasg217/finsight) | 120 | +|[snap-stanford/MLAgentBench](https://github.com/snap-stanford/MLAgentBench) | 120 | +|[Azure/app-service-linux-docs](https://github.com/Azure/app-service-linux-docs) | 120 | +|[nyanp/chat2plot](https://github.com/nyanp/chat2plot) | 120 | +|[ant4g0nist/polar](https://github.com/ant4g0nist/polar) | 119 | +|[aws-samples/cdk-eks-blueprints-patterns](https://github.com/aws-samples/cdk-eks-blueprints-patterns) | 119 | +|[aws-samples/amazon-kendra-langchain-extensions](https://github.com/aws-samples/amazon-kendra-langchain-extensions) | 119 | +|[Xueheng-Li/SynologyChatbotGPT](https://github.com/Xueheng-Li/SynologyChatbotGPT) | 119 | +|[CodeAlchemyAI/ViLT-GPT](https://github.com/CodeAlchemyAI/ViLT-GPT) | 117 | +|[Lin-jun-xiang/docGPT-langchain](https://github.com/Lin-jun-xiang/docGPT-langchain) | 117 | +|[ademakdogan/ChatSQL](https://github.com/ademakdogan/ChatSQL) | 116 | +|[aniketmaurya/llm-inference](https://github.com/aniketmaurya/llm-inference) | 115 | +|[xuwenhao/mactalk-ai-course](https://github.com/xuwenhao/mactalk-ai-course) | 115 | +|[cmooredev/RepoReader](https://github.com/cmooredev/RepoReader) | 115 | +|[abi/autocommit](https://github.com/abi/autocommit) | 115 | +|[MIDORIBIN/langchain-gpt4free](https://github.com/MIDORIBIN/langchain-gpt4free) | 114 | +|[finaldie/auto-news](https://github.com/finaldie/auto-news) | 114 | +|[Anil-matcha/Youtube-to-chatbot](https://github.com/Anil-matcha/Youtube-to-chatbot) | 114 | +|[avrabyt/MemoryBot](https://github.com/avrabyt/MemoryBot) | 114 | +|[Capsize-Games/airunner](https://github.com/Capsize-Games/airunner) | 113 | +|[atisharma/llama_farm](https://github.com/atisharma/llama_farm) | 113 | +|[mbchang/data-driven-characters](https://github.com/mbchang/data-driven-characters) | 112 | +|[fiddler-labs/fiddler-auditor](https://github.com/fiddler-labs/fiddler-auditor) | 112 | +|[dirkjbreeuwer/gpt-automated-web-scraper](https://github.com/dirkjbreeuwer/gpt-automated-web-scraper) | 111 | +|[Appointat/Chat-with-Document-s-using-ChatGPT-API-and-Text-Embedding](https://github.com/Appointat/Chat-with-Document-s-using-ChatGPT-API-and-Text-Embedding) | 111 | +|[hwchase17/langchain-gradio-template](https://github.com/hwchase17/langchain-gradio-template) | 111 | +|[artas728/spelltest](https://github.com/artas728/spelltest) | 110 | +|[NVIDIA/GenerativeAIExamples](https://github.com/NVIDIA/GenerativeAIExamples) | 109 | +|[Azure/aistudio-copilot-sample](https://github.com/Azure/aistudio-copilot-sample) | 108 | +|[codefuse-ai/codefuse-chatbot](https://github.com/codefuse-ai/codefuse-chatbot) | 108 | +|[apirrone/Memento](https://github.com/apirrone/Memento) | 108 | +|[e-johnstonn/GPT-Doc-Summarizer](https://github.com/e-johnstonn/GPT-Doc-Summarizer) | 108 | +|[salesforce/BOLAA](https://github.com/salesforce/BOLAA) | 107 | +|[Erol444/gpt4-openai-api](https://github.com/Erol444/gpt4-openai-api) | 106 | +|[linjungz/chat-with-your-doc](https://github.com/linjungz/chat-with-your-doc) | 106 | +|[crosleythomas/MirrorGPT](https://github.com/crosleythomas/MirrorGPT) | 106 | +|[panaverse/learn-generative-ai](https://github.com/panaverse/learn-generative-ai) | 105 | +|[Azure/azure-sdk-tools](https://github.com/Azure/azure-sdk-tools) | 105 | +|[malywut/gpt_examples](https://github.com/malywut/gpt_examples) | 105 | +|[ritun16/chain-of-verification](https://github.com/ritun16/chain-of-verification) | 104 | +|[langchain-ai/langchain-benchmarks](https://github.com/langchain-ai/langchain-benchmarks) | 104 | +|[lightninglabs/LangChainBitcoin](https://github.com/lightninglabs/LangChainBitcoin) | 104 | +|[flepied/second-brain-agent](https://github.com/flepied/second-brain-agent) | 103 | +|[llmapp/openai.mini](https://github.com/llmapp/openai.mini) | 102 | +|[gimlet-ai/tddGPT](https://github.com/gimlet-ai/tddGPT) | 102 | +|[jlonge4/gpt_chatwithPDF](https://github.com/jlonge4/gpt_chatwithPDF) | 102 | +|[agentification/RAFA_code](https://github.com/agentification/RAFA_code) | 101 | +|[pacman100/DHS-LLM-Workshop](https://github.com/pacman100/DHS-LLM-Workshop) | 101 | +|[aws-samples/private-llm-qa-bot](https://github.com/aws-samples/private-llm-qa-bot) | 101 | + + +_Generated by [github-dependents-info](https://github.com/nvuillam/github-dependents-info)_ + +`github-dependents-info --repo "langchain-ai/langchain" --markdownfile dependents.md --minstars 100 --sort stars` diff --git a/docs/docs/additional_resources/tutorials.mdx b/docs/docs/additional_resources/tutorials.mdx new file mode 100644 index 0000000000000..1b98d8c31af7a --- /dev/null +++ b/docs/docs/additional_resources/tutorials.mdx @@ -0,0 +1,51 @@ +# 3rd Party Tutorials + +## Tutorials + +### [LangChain v 0.1 by LangChain.ai](https://www.youtube.com/playlist?list=PLfaIDFEXuae0gBSJ9T0w7cu7iJZbH3T31) +### [Build with Langchain - Advanced by LangChain.ai](https://www.youtube.com/playlist?list=PLfaIDFEXuae06tclDATrMYY0idsTdLg9v) +### [LangGraph by LangChain.ai](https://www.youtube.com/playlist?list=PLfaIDFEXuae16n2TWUkKq5PgJ0w6Pkwtg) +### [by Greg Kamradt](https://www.youtube.com/playlist?list=PLqZXAkvF1bPNQER9mLmDbntNfSpzdDIU5) +### [by Sam Witteveen](https://www.youtube.com/playlist?list=PL8motc6AQftk1Bs42EW45kwYbyJ4jOdiZ) +### [by James Briggs](https://www.youtube.com/playlist?list=PLIUOU7oqGTLieV9uTIFMm6_4PXg-hlN6F) +### [by Prompt Engineering](https://www.youtube.com/playlist?list=PLVEEucA9MYhOu89CX8H3MBZqayTbcCTMr) +### [by Mayo Oshin](https://www.youtube.com/@chatwithdata/search?query=langchain) +### [by 1 little Coder](https://www.youtube.com/playlist?list=PLpdmBGJ6ELUK-v0MK-t4wZmVEbxM5xk6L) +### [by BobLin (Chinese language)](https://www.youtube.com/playlist?list=PLbd7ntv6PxC3QMFQvtWfk55p-Op_syO1C) + +## Courses + +### Featured courses on Deeplearning.AI + +- [LangChain for LLM Application Development](https://www.deeplearning.ai/short-courses/langchain-for-llm-application-development/) +- [LangChain Chat with Your Data](https://www.deeplearning.ai/short-courses/langchain-chat-with-your-data/) +- [Functions, Tools and Agents with LangChain](https://www.deeplearning.ai/short-courses/functions-tools-agents-langchain/) +- [Build LLM Apps with LangChain.js](https://www.deeplearning.ai/short-courses/build-llm-apps-with-langchain-js/) + +### Online courses + +- [Udemy](https://www.udemy.com/courses/search/?q=langchain) +- [DataCamp](https://www.datacamp.com/courses/developing-llm-applications-with-langchain) +- [Pluralsight](https://www.pluralsight.com/search?q=langchain) +- [Coursera](https://www.coursera.org/search?query=langchain) +- [Maven](https://maven.com/courses?query=langchain) +- [Udacity](https://www.udacity.com/catalog/all/any-price/any-school/any-skill/any-difficulty/any-duration/any-type/relevance/page-1?searchValue=langchain) +- [LinkedIn Learning](https://www.linkedin.com/search/results/learning/?keywords=langchain) +- [edX](https://www.edx.org/search?q=langchain) +- [freeCodeCamp](https://www.youtube.com/@freecodecamp/search?query=langchain) + +## Short Tutorials + +- [by Nicholas Renotte](https://youtu.be/MlK6SIjcjE8) +- [by Patrick Loeber](https://youtu.be/LbT1yp6quS8) +- [by Rabbitmetrics](https://youtu.be/aywZrzNaKjs) +- [by Ivan Reznikov](https://medium.com/@ivanreznikov/langchain-101-course-updated-668f7b41d6cb) + +## Books and Handbooks + +- [Generative AI with LangChain](https://www.amazon.com/Generative-AI-LangChain-language-ChatGPT/dp/1835083463/ref=sr_1_1?crid=1GMOMH0G7GLR&keywords=generative+ai+with+langchain&qid=1703247181&sprefix=%2Caps%2C298&sr=8-1) by [Ben Auffrath](https://www.amazon.com/stores/Ben-Auffarth/author/B08JQKSZ7D?ref=ap_rdr&store_ref=ap_rdr&isDramIntegrated=true&shoppingPortalEnabled=true), ©️ 2023 Packt Publishing +- [LangChain AI Handbook](https://www.pinecone.io/learn/langchain/) By **James Briggs** and **Francisco Ingham** +- [LangChain Cheatsheet](https://pub.towardsai.net/langchain-cheatsheet-all-secrets-on-a-single-page-8be26b721cde) by **Ivan Reznikov** +- [Dive into Langchain (Chinese language)](https://langchain.boblin.app/) + +--------------------- diff --git a/docs/docs/contributing/code/guidelines.mdx b/docs/docs/contributing/code/guidelines.mdx new file mode 100644 index 0000000000000..7f75199b1e1bc --- /dev/null +++ b/docs/docs/contributing/code/guidelines.mdx @@ -0,0 +1,35 @@ +# General guidelines + +Here are some things to keep in mind for all types of contributions: + +- Follow the ["fork and pull request"](https://docs.github.com/en/get-started/exploring-projects-on-github/contributing-to-a-project) workflow. +- Fill out the checked-in pull request template when opening pull requests. Note related issues and tag relevant maintainers. +- Ensure your PR passes formatting, linting, and testing checks before requesting a review. + - If you would like comments or feedback on your current progress, please open an issue or discussion and tag a maintainer. + - See the sections on [Testing](/docs/contributing/code/setup#testing) and [Formatting and Linting](/docs/contributing/code/setup#formatting-and-linting) for how to run these checks locally. +- Backwards compatibility is key. Your changes must not be breaking, except in case of critical bug and security fixes. +- Look for duplicate PRs or issues that have already been opened before opening a new one. +- Keep scope as isolated as possible. As a general rule, your changes should not affect more than one package at a time. + +## Bugfixes + +We encourage and appreciate bugfixes. We ask that you: + +- Explain the bug in enough detail for maintainers to be able to reproduce it. + - If an accompanying issue exists, link to it. Prefix with `Fixes` so that the issue will close automatically when the PR is merged. +- Avoid breaking changes if possible. +- Include unit tests that fail without the bugfix. + +If you come across a bug and don't know how to fix it, we ask that you open an issue for it describing in detail the environment in which you encountered the bug. + +## New features + +We aim to keep the bar high for new features. We generally don't accept new core abstractions, changes to infra, changes to dependencies, +or new agents/chains from outside contributors without an existing GitHub discussion or issue that demonstrates an acute need for them. + +- New features must come with docs, unit tests, and (if appropriate) integration tests. +- New integrations must come with docs, unit tests, and (if appropriate) integration tests. + - See [this page](/docs/contributing/integrations) for more details on contributing new integrations. +- New functionality should not inherit from or use deprecated methods or classes. +- We will reject features that are likely to lead to security vulnerabilities or reports. +- Do not add any hard dependencies. Integrations may add optional dependencies. diff --git a/docs/docs/contributing/code/index.mdx b/docs/docs/contributing/code/index.mdx new file mode 100644 index 0000000000000..43b90785567b2 --- /dev/null +++ b/docs/docs/contributing/code/index.mdx @@ -0,0 +1,6 @@ +# Contribute Code + +If you would like to add a new feature or update an existing one, please read the resources below before getting started: + +- [General guidelines](/docs/contributing/code/guidelines/) +- [Setup](/docs/contributing/code/setup/) diff --git a/docs/docs/contributing/code/setup.mdx b/docs/docs/contributing/code/setup.mdx new file mode 100644 index 0000000000000..5e983d30fbecf --- /dev/null +++ b/docs/docs/contributing/code/setup.mdx @@ -0,0 +1,213 @@ +# Setup + +This guide walks through how to run the repository locally and check in your first code. +For a [development container](https://containers.dev/), see the [.devcontainer folder](https://github.com/langchain-ai/langchain/tree/master/.devcontainer). + +## Dependency Management: Poetry and other env/dependency managers + +This project utilizes [Poetry](https://python-poetry.org/) v1.7.1+ as a dependency manager. + +❗Note: *Before installing Poetry*, if you use `Conda`, create and activate a new Conda env (e.g. `conda create -n langchain python=3.9`) + +Install Poetry: **[documentation on how to install it](https://python-poetry.org/docs/#installation)**. + +❗Note: If you use `Conda` or `Pyenv` as your environment/package manager, after installing Poetry, +tell Poetry to use the virtualenv python environment (`poetry config virtualenvs.prefer-active-python true`) + +## Different packages + +This repository contains multiple packages: +- `langchain-core`: Base interfaces for key abstractions as well as logic for combining them in chains (LangChain Expression Language). +- `langchain-community`: Third-party integrations of various components. +- `langchain`: Chains, agents, and retrieval logic that makes up the cognitive architecture of your applications. +- `langchain-experimental`: Components and chains that are experimental, either in the sense that the techniques are novel and still being tested, or they require giving the LLM more access than would be possible in most production systems. +- Partner integrations: Partner packages in `libs/partners` that are independently version controlled. + +Each of these has its own development environment. Docs are run from the top-level makefile, but development +is split across separate test & release flows. + +For this quickstart, start with langchain-community: + +```bash +cd libs/community +``` + +## Local Development Dependencies + +Install langchain-community development requirements (for running langchain, running examples, linting, formatting, tests, and coverage): + +```bash +poetry install --with lint,typing,test,test_integration +``` + +Then verify dependency installation: + +```bash +make test +``` + +If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running +Poetry v1.6.1+. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. +If you are still seeing this bug on v1.6.1+, you may also try disabling "modern installation" +(`poetry config installer.modern-installation false`) and re-installing requirements. +See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details. + +## Testing + +**Note:** In `langchain`, `langchain-community`, and `langchain-experimental`, some test dependencies are optional. See the following section about optional dependencies. + +Unit tests cover modular logic that does not require calls to outside APIs. +If you add new logic, please add a unit test. + +To run unit tests: + +```bash +make test +``` + +To run unit tests in Docker: + +```bash +make docker_tests +``` + +There are also [integration tests and code-coverage](/docs/contributing/testing/) available. + +### Only develop langchain_core or langchain_experimental + +If you are only developing `langchain_core` or `langchain_experimental`, you can simply install the dependencies for the respective projects and run tests: + +```bash +cd libs/core +poetry install --with test +make test +``` + +Or: + +```bash +cd libs/experimental +poetry install --with test +make test +``` + +## Formatting and Linting + +Run these locally before submitting a PR; the CI system will check also. + +### Code Formatting + +Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/). + +To run formatting for docs, cookbook and templates: + +```bash +make format +``` + +To run formatting for a library, run the same command from the relevant library directory: + +```bash +cd libs/{LIBRARY} +make format +``` + +Additionally, you can run the formatter only on the files that have been modified in your current branch as compared to the master branch using the format_diff command: + +```bash +make format_diff +``` + +This is especially useful when you have made changes to a subset of the project and want to ensure your changes are properly formatted without affecting the rest of the codebase. + +#### Linting + +Linting for this project is done via a combination of [ruff](https://docs.astral.sh/ruff/rules/) and [mypy](http://mypy-lang.org/). + +To run linting for docs, cookbook and templates: + +```bash +make lint +``` + +To run linting for a library, run the same command from the relevant library directory: + +```bash +cd libs/{LIBRARY} +make lint +``` + +In addition, you can run the linter only on the files that have been modified in your current branch as compared to the master branch using the lint_diff command: + +```bash +make lint_diff +``` + +This can be very helpful when you've made changes to only certain parts of the project and want to ensure your changes meet the linting standards without having to check the entire codebase. + +We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. + +### Spellcheck + +Spellchecking for this project is done via [codespell](https://github.com/codespell-project/codespell). +Note that `codespell` finds common typos, so it could have false-positive (correctly spelled but rarely used) and false-negatives (not finding misspelled) words. + +To check spelling for this project: + +```bash +make spell_check +``` + +To fix spelling in place: + +```bash +make spell_fix +``` + +If codespell is incorrectly flagging a word, you can skip spellcheck for that word by adding it to the codespell config in the `pyproject.toml` file. + +```python +[tool.codespell] +... +# Add here: +ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure' +``` + +## Working with Optional Dependencies + +`langchain`, `langchain-community`, and `langchain-experimental` rely on optional dependencies to keep these packages lightweight. + +`langchain-core` and partner packages **do not use** optional dependencies in this way. + +You'll notice that `pyproject.toml` and `poetry.lock` are **not** touched when you add optional dependencies below. + +If you're adding a new dependency to Langchain, assume that it will be an optional dependency, and +that most users won't have it installed. + +Users who do not have the dependency installed should be able to **import** your code without +any side effects (no warnings, no errors, no exceptions). + +To introduce the dependency to a library, please do the following: + +1. Open extended_testing_deps.txt and add the dependency +2. Add a unit test that the very least attempts to import the new code. Ideally, the unit +test makes use of lightweight fixtures to test the logic of the code. +3. Please use the `@pytest.mark.requires(package_name)` decorator for any unit tests that require the dependency. + +## Adding a Jupyter Notebook + +If you are adding a Jupyter Notebook example, you'll want to install the optional `dev` dependencies. + +To install dev dependencies: + +```bash +poetry install --with dev +``` + +Launch a notebook: + +```bash +poetry run jupyter notebook +``` + +When you run `poetry install`, the `langchain` package is installed as editable in the virtualenv, so your new logic can be imported into the notebook. diff --git a/docs/docs/contributing/documentation/index.mdx b/docs/docs/contributing/documentation/index.mdx new file mode 100644 index 0000000000000..564edf6d53498 --- /dev/null +++ b/docs/docs/contributing/documentation/index.mdx @@ -0,0 +1,7 @@ +# Contribute Documentation + +Documentation is a vital part of LangChain. We welcome both new documentation for new features and +community improvements to our current documentation. Please read the resources below before getting started: + +- [Documentation style guide](/docs/contributing/documentation/style_guide/) +- [Setup](/docs/contributing/documentation/setup/) diff --git a/docs/docs/contributing/documentation/setup.mdx b/docs/docs/contributing/documentation/setup.mdx new file mode 100644 index 0000000000000..9635d005fe1c2 --- /dev/null +++ b/docs/docs/contributing/documentation/setup.mdx @@ -0,0 +1,181 @@ +--- +sidebar_class_name: "hidden" +--- + +# Setup + +LangChain documentation consists of two components: + +1. Main Documentation: Hosted at [python.langchain.com](https://python.langchain.com/), +this comprehensive resource serves as the primary user-facing documentation. +It covers a wide array of topics, including tutorials, use cases, integrations, +and more, offering extensive guidance on building with LangChain. +The content for this documentation lives in the `/docs` directory of the monorepo. +2. In-code Documentation: This is documentation of the codebase itself, which is also +used to generate the externally facing [API Reference](https://api.python.langchain.com/en/latest/langchain_api_reference.html). +The content for the API reference is autogenerated by scanning the docstrings in the codebase. For this reason we ask that +developers document their code well. + +The `API Reference` is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) +from the code and is hosted by [Read the Docs](https://readthedocs.org/). + +We appreciate all contributions to the documentation, whether it be fixing a typo, +adding a new tutorial or example and whether it be in the main documentation or the API Reference. + +Similar to linting, we recognize documentation can be annoying. If you do not want +to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. + +## 📜 Main Documentation + +The content for the main documentation is located in the `/docs` directory of the monorepo. + +The documentation is written using a combination of ipython notebooks (`.ipynb` files) +and markdown (`.mdx` files). The notebooks are converted to markdown +and then built using [Docusaurus 2](https://docusaurus.io/). + +Feel free to make contributions to the main documentation! 🥰 + +After modifying the documentation: + +1. Run the linting and formatting commands (see below) to ensure that the documentation is well-formatted and free of errors. +2. Optionally build the documentation locally to verify that the changes look good. +3. Make a pull request with the changes. +4. You can preview and verify that the changes are what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page. This will take you to a preview of the documentation changes. + +## ⚒️ Linting and Building Documentation Locally + +After writing up the documentation, you may want to lint and build the documentation +locally to ensure that it looks good and is free of errors. + +If you're unable to build it locally that's okay as well, as you will be able to +see a preview of the documentation on the pull request page. + +From the **monorepo root**, run the following command to install the dependencies: + +```bash +poetry install --with lint,docs --no-root +```` + +### Building + +The code that builds the documentation is located in the `/docs` directory of the monorepo. + +In the following commands, the prefix `api_` indicates that those are operations for the API Reference. + +Before building the documentation, it is always a good idea to clean the build directory: + +```bash +make docs_clean +make api_docs_clean +``` + +Next, you can build the documentation as outlined below: + +```bash +make docs_build +make api_docs_build +``` + +:::tip + +The `make api_docs_build` command takes a long time. If you're making cosmetic changes to the API docs and want to see how they look, use: + +```bash +make api_docs_quick_preview +``` + +which will just build a small subset of the API reference. + +::: + +Finally, run the link checker to ensure all links are valid: + +```bash +make docs_linkcheck +make api_docs_linkcheck +``` + +### Linting and Formatting + +The Main Documentation is linted from the **monorepo root**. To lint the main documentation, run the following from there: + +```bash +make lint +``` + +If you have formatting-related errors, you can fix them automatically with: + +```bash +make format +``` + +## ⌨️ In-code Documentation + +The in-code documentation is largely autogenerated by [sphinx](https://www.sphinx-doc.org/en/master/) from the code and is hosted by [Read the Docs](https://readthedocs.org/). + +For the API reference to be useful, the codebase must be well-documented. This means that all functions, classes, and methods should have a docstring that explains what they do, what the arguments are, and what the return value is. This is a good practice in general, but it is especially important for LangChain because the API reference is the primary resource for developers to understand how to use the codebase. + +We generally follow the [Google Python Style Guide](https://google.github.io/styleguide/pyguide.html#38-comments-and-docstrings) for docstrings. + +Here is an example of a well-documented function: + +```python + +def my_function(arg1: int, arg2: str) -> float: + """This is a short description of the function. (It should be a single sentence.) + + This is a longer description of the function. It should explain what + the function does, what the arguments are, and what the return value is. + It should wrap at 88 characters. + + Examples: + This is a section for examples of how to use the function. + + .. code-block:: python + + my_function(1, "hello") + + Args: + arg1: This is a description of arg1. We do not need to specify the type since + it is already specified in the function signature. + arg2: This is a description of arg2. + + Returns: + This is a description of the return value. + """ + return 3.14 +``` + +### Linting and Formatting + +The in-code documentation is linted from the directories belonging to the packages +being documented. + +For example, if you're working on the `langchain-community` package, you would change +the working directory to the `langchain-community` directory: + +```bash +cd [root]/libs/langchain-community +``` + +Set up a virtual environment for the package if you haven't done so already. + +Install the dependencies for the package. + +```bash +poetry install --with lint +``` + +Then you can run the following commands to lint and format the in-code documentation: + +```bash +make format +make lint +``` + +## Verify Documentation Changes + +After pushing documentation changes to the repository, you can preview and verify that the changes are +what you wanted by clicking the `View deployment` or `Visit Preview` buttons on the pull request `Conversation` page. +This will take you to a preview of the documentation changes. +This preview is created by [Vercel](https://vercel.com/docs/getting-started-with-vercel). \ No newline at end of file diff --git a/docs/docs/example_data/nke-10k-2023.pdf b/docs/docs/example_data/nke-10k-2023.pdf new file mode 100644 index 0000000000000..6ade8863e8072 Binary files /dev/null and b/docs/docs/example_data/nke-10k-2023.pdf differ diff --git a/docs/docs/how_to/chat_models_universal_init.ipynb b/docs/docs/how_to/chat_models_universal_init.ipynb new file mode 100644 index 0000000000000..c77083cdfb119 --- /dev/null +++ b/docs/docs/how_to/chat_models_universal_init.ipynb @@ -0,0 +1,157 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "cfdf4f09-8125-4ed1-8063-6feed57da8a3", + "metadata": {}, + "source": [ + "# How to init any model in one line\n", + "\n", + "Many LLM applications let end users specify what model provider and model they want the application to be powered by. This requires writing some logic to initialize different ChatModels based on some user configuration. The `init_chat_model()` helper method makes it easy to initialize a number of different model integrations without having to worry about import paths and class names.\n", + "\n", + ":::tip Supported models\n", + "\n", + "See the [init_chat_model()](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html) API reference for a full list of supported integrations.\n", + "\n", + "Make sure you have the integration packages installed for any model providers you want to support. E.g. you should have `langchain-openai` installed to init an OpenAI model.\n", + "\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "165b0de6-9ae3-4e3d-aa98-4fc8a97c4a06", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain langchain-openai langchain-anthropic langchain-google-vertexai" + ] + }, + { + "cell_type": "markdown", + "id": "ea2c9f57-a796-45f8-b6f4-3efd3f361a9b", + "metadata": {}, + "source": [ + "## Basic usage" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "79e14913-803c-4382-9009-5c6af3d75d35", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GPT-4o: I'm an AI created by OpenAI, and I don't have a personal name. You can call me Assistant! How can I help you today?\n", + "\n", + "Claude Opus: My name is Claude. It's nice to meet you!\n", + "\n", + "Gemini 1.5: I am a large language model, trained by Google. I do not have a name. \n", + "\n", + "\n" + ] + } + ], + "source": [ + "from langchain.chat_models import init_chat_model\n", + "\n", + "# Returns a langchain_openai.ChatOpenAI instance.\n", + "gpt_4o = init_chat_model(\"gpt-4o\", model_provider=\"openai\", temperature=0)\n", + "# Returns a langchain_anthropic.ChatAnthropic instance.\n", + "claude_opus = init_chat_model(\n", + " \"claude-3-opus-20240229\", model_provider=\"anthropic\", temperature=0\n", + ")\n", + "# Returns a langchain_google_vertexai.ChatVertexAI instance.\n", + "gemini_15 = init_chat_model(\n", + " \"gemini-1.5-pro\", model_provider=\"google_vertexai\", temperature=0\n", + ")\n", + "\n", + "# Since all model integrations implement the ChatModel interface, you can use them in the same way.\n", + "print(\"GPT-4o: \" + gpt_4o.invoke(\"what's your name\").content + \"\\n\")\n", + "print(\"Claude Opus: \" + claude_opus.invoke(\"what's your name\").content + \"\\n\")\n", + "print(\"Gemini 1.5: \" + gemini_15.invoke(\"what's your name\").content + \"\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "fff9a4c8-b6ee-4a1a-8d3d-0ecaa312d4ed", + "metadata": {}, + "source": [ + "## Simple config example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75c25d39-bf47-4b51-a6c6-64d9c572bfd6", + "metadata": {}, + "outputs": [], + "source": [ + "user_config = {\n", + " \"model\": \"...user-specified...\",\n", + " \"model_provider\": \"...user-specified...\",\n", + " \"temperature\": 0,\n", + " \"max_tokens\": 1000,\n", + "}\n", + "\n", + "llm = init_chat_model(**user_config)\n", + "llm.invoke(\"what's your name\")" + ] + }, + { + "cell_type": "markdown", + "id": "f811f219-5e78-4b62-b495-915d52a22532", + "metadata": {}, + "source": [ + "## Inferring model provider\n", + "\n", + "For common and distinct model names `init_chat_model()` will attempt to infer the model provider. See the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain.chat_models.base.init_chat_model.html) for a full list of inference behavior. E.g. any model that starts with `gpt-3...` or `gpt-4...` will be inferred as using model provider `openai`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0378ccc6-95bc-4d50-be50-fccc193f0a71", + "metadata": {}, + "outputs": [], + "source": [ + "gpt_4o = init_chat_model(\"gpt-4o\", temperature=0)\n", + "claude_opus = init_chat_model(\"claude-3-opus-20240229\", temperature=0)\n", + "gemini_15 = init_chat_model(\"gemini-1.5-pro\", temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da07b5c0-d2e6-42e4-bfcd-2efcfaae6221", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "poetry-venv-2", + "language": "python", + "name": "poetry-venv-2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/how_to/filter_messages.ipynb b/docs/docs/how_to/filter_messages.ipynb new file mode 100644 index 0000000000000..061a193a2a9da --- /dev/null +++ b/docs/docs/how_to/filter_messages.ipynb @@ -0,0 +1,203 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e389175d-8a65-4f0d-891c-dbdfabb3c3ef", + "metadata": {}, + "source": [ + "# How to filter messages\n", + "\n", + "In more complex chains and agents we might track state with a list of messages. This list can start to accumulate messages from multiple different models, speakers, sub-chains, etc., and we may only want to pass subsets of this full list of messages to each model call in the chain/agent.\n", + "\n", + "The `filter_messages` utility makes it easy to filter messages by type, id, or name.\n", + "\n", + "## Basic usage" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f4ad2fd3-3cab-40d4-a989-972115865b8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='example input', name='example_user', id='2'),\n", + " HumanMessage(content='real input', name='bob', id='4')]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_core.messages import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " filter_messages,\n", + ")\n", + "\n", + "messages = [\n", + " SystemMessage(\"you are a good assistant\", id=\"1\"),\n", + " HumanMessage(\"example input\", id=\"2\", name=\"example_user\"),\n", + " AIMessage(\"example output\", id=\"3\", name=\"example_assistant\"),\n", + " HumanMessage(\"real input\", id=\"4\", name=\"bob\"),\n", + " AIMessage(\"real output\", id=\"5\", name=\"alice\"),\n", + "]\n", + "\n", + "filter_messages(messages, include_types=\"human\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7b663a1e-a8ae-453e-a072-8dd75dfab460", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content='you are a good assistant', id='1'),\n", + " HumanMessage(content='real input', name='bob', id='4'),\n", + " AIMessage(content='real output', name='alice', id='5')]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filter_messages(messages, exclude_names=[\"example_user\", \"example_assistant\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "db170e46-03f8-4710-b967-23c70c3ac054", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='example input', name='example_user', id='2'),\n", + " HumanMessage(content='real input', name='bob', id='4'),\n", + " AIMessage(content='real output', name='alice', id='5')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filter_messages(messages, include_types=[HumanMessage, AIMessage], exclude_ids=[\"3\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b7c4e5ad-d1b4-4c18-b250-864adde8f0dd", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "`filter_messages` can be used in an imperatively (like above) or declaratively, making it easy to compose with other components in a chain:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "675f8f79-db39-401c-a582-1df2478cba30", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=[], response_metadata={'id': 'msg_01Wz7gBHahAwkZ1KCBNtXmwA', 'model': 'claude-3-sonnet-20240229', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 16, 'output_tokens': 3}}, id='run-b5d8a3fe-004f-4502-a071-a6c025031827-0', usage_metadata={'input_tokens': 16, 'output_tokens': 3, 'total_tokens': 19})" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pip install -U langchain-anthropic\n", + "from langchain_anthropic import ChatAnthropic\n", + "\n", + "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)\n", + "# Notice we don't pass in messages. This creates\n", + "# a RunnableLambda that takes messages as input\n", + "filter_ = filter_messages(exclude_names=[\"example_user\", \"example_assistant\"])\n", + "chain = filter_ | llm\n", + "chain.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "4133ab28-f49c-480f-be92-b51eb6559153", + "metadata": {}, + "source": [ + "Looking at the LangSmith trace we can see that before the messages are passed to the model they are filtered: https://smith.langchain.com/public/f808a724-e072-438e-9991-657cc9e7e253/r\n", + "\n", + "Looking at just the filter_, we can see that it's a Runnable object that can be invoked like all Runnables:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c090116a-1fef-43f6-a178-7265dff9db00", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='real input', name='bob', id='4'),\n", + " AIMessage(content='real output', name='alice', id='5')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "filter_.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "ff339066-d424-4042-8cca-cd4b007c1a8e", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For a complete description of all arguments head to the API reference: https://api.python.langchain.com/en/latest/messages/langchain_core.messages.utils.filter_messages.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "poetry-venv-2", + "language": "python", + "name": "poetry-venv-2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/how_to/merge_message_runs.ipynb b/docs/docs/how_to/merge_message_runs.ipynb new file mode 100644 index 0000000000000..61dd3e49a8aac --- /dev/null +++ b/docs/docs/how_to/merge_message_runs.ipynb @@ -0,0 +1,170 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac47bfab-0f4f-42ce-8bb6-898ef22a0338", + "metadata": {}, + "source": [ + "# How to merge consecutive messages of the same type\n", + "\n", + "Certain models do not support passing in consecutive messages of the same type (a.k.a. \"runs\" of the same message type).\n", + "\n", + "The `merge_message_runs` utility makes it easy to merge consecutive messages of the same type.\n", + "\n", + "## Basic usage" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "1a215bbb-c05c-40b0-a6fd-d94884d517df", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\")\n", + "\n", + "HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways'])\n", + "\n", + "AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!')\n" + ] + } + ], + "source": [ + "from langchain_core.messages import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " merge_message_runs,\n", + ")\n", + "\n", + "messages = [\n", + " SystemMessage(\"you're a good assistant.\"),\n", + " SystemMessage(\"you always respond with a joke.\"),\n", + " HumanMessage([{\"type\": \"text\", \"text\": \"i wonder why it's called langchain\"}]),\n", + " HumanMessage(\"and who is harrison chasing anyways\"),\n", + " AIMessage(\n", + " 'Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!'\n", + " ),\n", + " AIMessage(\"Why, he's probably chasing after the last cup of coffee in the office!\"),\n", + "]\n", + "\n", + "merged = merge_message_runs(messages)\n", + "print(\"\\n\\n\".join([repr(x) for x in merged]))" + ] + }, + { + "cell_type": "markdown", + "id": "0544c811-7112-4b76-8877-cc897407c738", + "metadata": {}, + "source": [ + "Notice that if the contents of one of the messages to merge is a list of content blocks then the merged message will have a list of content blocks. And if both messages to merge have string contents then those are concatenated with a newline character." + ] + }, + { + "cell_type": "markdown", + "id": "1b2eee74-71c8-4168-b968-bca580c25d18", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "`merge_message_runs` can be used in an imperatively (like above) or declaratively, making it easy to compose with other components in a chain:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6d5a0283-11f8-435b-b27b-7b18f7693592", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=[], response_metadata={'id': 'msg_01D6R8Naum57q8qBau9vLBUX', 'model': 'claude-3-sonnet-20240229', 'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 84, 'output_tokens': 3}}, id='run-ac0c465b-b54f-4b8b-9295-e5951250d653-0', usage_metadata={'input_tokens': 84, 'output_tokens': 3, 'total_tokens': 87})" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pip install -U langchain-anthropic\n", + "from langchain_anthropic import ChatAnthropic\n", + "\n", + "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)\n", + "# Notice we don't pass in messages. This creates\n", + "# a RunnableLambda that takes messages as input\n", + "merger = merge_message_runs()\n", + "chain = merger | llm\n", + "chain.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "72e90dce-693c-4842-9526-ce6460fe956b", + "metadata": {}, + "source": [ + "Looking at the LangSmith trace we can see that before the messages are passed to the model they are merged: https://smith.langchain.com/public/ab558677-cac9-4c59-9066-1ecce5bcd87c/r\n", + "\n", + "Looking at just the merger, we can see that it's a Runnable object that can be invoked like all Runnables:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "460817a6-c327-429d-958e-181a8c46059c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant.\\nyou always respond with a joke.\"),\n", + " HumanMessage(content=[{'type': 'text', 'text': \"i wonder why it's called langchain\"}, 'and who is harrison chasing anyways']),\n", + " AIMessage(content='Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!\\nWhy, he\\'s probably chasing after the last cup of coffee in the office!')]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merger.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "4548d916-ce21-4dc6-8f19-eedb8003ace6", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For a complete description of all arguments head to the API reference: https://api.python.langchain.com/en/latest/messages/langchain_core.messages.utils.merge_message_runs.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "poetry-venv-2", + "language": "python", + "name": "poetry-venv-2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/how_to/pydantic_compatibility.md b/docs/docs/how_to/pydantic_compatibility.md new file mode 100644 index 0000000000000..50d64f42478d3 --- /dev/null +++ b/docs/docs/how_to/pydantic_compatibility.md @@ -0,0 +1,107 @@ +# How to use LangChain with different Pydantic versions + +- Pydantic v2 was released in June, 2023 (https://docs.pydantic.dev/2.0/blog/pydantic-v2-final/) +- v2 contains has a number of breaking changes (https://docs.pydantic.dev/2.0/migration/) +- Pydantic v2 and v1 are under the same package name, so both versions cannot be installed at the same time + +## LangChain Pydantic migration plan + +As of `langchain>=0.0.267`, LangChain will allow users to install either Pydantic V1 or V2. + * Internally LangChain will continue to [use V1](https://docs.pydantic.dev/latest/migration/#continue-using-pydantic-v1-features). + * During this time, users can pin their pydantic version to v1 to avoid breaking changes, or start a partial + migration using pydantic v2 throughout their code, but avoiding mixing v1 and v2 code for LangChain (see below). + +User can either pin to pydantic v1, and upgrade their code in one go once LangChain has migrated to v2 internally, or they can start a partial migration to v2, but must avoid mixing v1 and v2 code for LangChain. + +Below are two examples of showing how to avoid mixing pydantic v1 and v2 code in +the case of inheritance and in the case of passing objects to LangChain. + +**Example 1: Extending via inheritance** + +**YES** + +```python +from pydantic.v1 import root_validator, validator +from langchain_core.tools import BaseTool + +class CustomTool(BaseTool): # BaseTool is v1 code + x: int = Field(default=1) + + def _run(*args, **kwargs): + return "hello" + + @validator('x') # v1 code + @classmethod + def validate_x(cls, x: int) -> int: + return 1 + + +CustomTool( + name='custom_tool', + description="hello", + x=1, +) +``` + +Mixing Pydantic v2 primitives with Pydantic v1 primitives can raise cryptic errors + +**NO** + +```python +from pydantic import Field, field_validator # pydantic v2 +from langchain_core.tools import BaseTool + +class CustomTool(BaseTool): # BaseTool is v1 code + x: int = Field(default=1) + + def _run(*args, **kwargs): + return "hello" + + @field_validator('x') # v2 code + @classmethod + def validate_x(cls, x: int) -> int: + return 1 + + +CustomTool( + name='custom_tool', + description="hello", + x=1, +) +``` + +**Example 2: Passing objects to LangChain** + +**YES** + +```python +from langchain_core.tools import Tool +from pydantic.v1 import BaseModel, Field # <-- Uses v1 namespace + +class CalculatorInput(BaseModel): + question: str = Field() + +Tool.from_function( # <-- tool uses v1 namespace + func=lambda question: 'hello', + name="Calculator", + description="useful for when you need to answer questions about math", + args_schema=CalculatorInput +) +``` + +**NO** + +```python +from langchain_core.tools import Tool +from pydantic import BaseModel, Field # <-- Uses v2 namespace + +class CalculatorInput(BaseModel): + question: str = Field() + +Tool.from_function( # <-- tool uses v1 namespace + func=lambda question: 'hello', + name="Calculator", + description="useful for when you need to answer questions about math", + args_schema=CalculatorInput +) +``` diff --git a/docs/docs/how_to/serialization.ipynb b/docs/docs/how_to/serialization.ipynb new file mode 100644 index 0000000000000..d8701842050ef --- /dev/null +++ b/docs/docs/how_to/serialization.ipynb @@ -0,0 +1,305 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ab3dc782-321e-4503-96ee-ac88a15e4b5e", + "metadata": {}, + "source": [ + "# How to save and load LangChain objects\n", + "\n", + "LangChain classes implement standard methods for serialization. Serializing LangChain objects using these methods confer some advantages:\n", + "\n", + "- Secrets, such as API keys, are separated from other parameters and can be loaded back to the object on de-serialization;\n", + "- De-serialization is kept compatible across package versions, so objects that were serialized with one version of LangChain can be properly de-serialized with another.\n", + "\n", + "To save and load LangChain objects using this system, use the `dumpd`, `dumps`, `load`, and `loads` functions in the [load module](https://api.python.langchain.com/en/latest/core_api_reference.html#module-langchain_core.load) of `langchain-core`. These functions support JSON and JSON-serializable objects.\n", + "\n", + "All LangChain objects that inherit from [Serializable](https://api.python.langchain.com/en/latest/load/langchain_core.load.serializable.Serializable.html) are JSON-serializable. Examples include [messages](https://api.python.langchain.com/en/latest/core_api_reference.html#module-langchain_core.messages), [document objects](https://api.python.langchain.com/en/latest/documents/langchain_core.documents.base.Document.html) (e.g., as returned from [retrievers](/docs/concepts/#retrievers)), and most [Runnables](/docs/concepts/#langchain-expression-language-lcel), such as chat models, retrievers, and [chains](/docs/how_to/sequence) implemented with the LangChain Expression Language.\n", + "\n", + "Below we walk through an example with a simple [LLM chain](/docs/tutorials/llm_chain).\n", + "\n", + ":::{.callout-caution}\n", + "\n", + "De-serialization using `load` and `loads` can instantiate any serializable LangChain object. Only use this feature with trusted inputs!\n", + "\n", + "De-serialization is a beta feature and is subject to change.\n", + ":::" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "f85d9e51-2a36-4f69-83b1-c716cd43f790", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.load import dumpd, dumps, load, loads\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", \"Translate the following into {language}:\"),\n", + " (\"user\", \"{text}\"),\n", + " ],\n", + ")\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", api_key=\"llm-api-key\")\n", + "\n", + "chain = prompt | llm" + ] + }, + { + "cell_type": "markdown", + "id": "356ea99f-5cb5-4433-9a6c-2443d2be9ed3", + "metadata": {}, + "source": [ + "## Saving objects\n", + "\n", + "### To json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "26516764-d46b-4357-a6c6-bd8315bfa530", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"lc\": 1,\n", + " \"type\": \"constructor\",\n", + " \"id\": [\n", + " \"langchain\",\n", + " \"schema\",\n", + " \"runnable\",\n", + " \"RunnableSequence\"\n", + " ],\n", + " \"kwargs\": {\n", + " \"first\": {\n", + " \"lc\": 1,\n", + " \"type\": \"constructor\",\n", + " \"id\": [\n", + " \"langchain\",\n", + " \"prompts\",\n", + " \"chat\",\n", + " \"ChatPromptTemplate\"\n", + " ],\n", + " \"kwargs\": {\n", + " \"input_variables\": [\n", + " \"language\",\n", + " \"text\"\n", + " ],\n", + " \"messages\": [\n", + " {\n", + " \"lc\": 1,\n", + " \"type\": \"constructor\",\n", + " \n" + ] + } + ], + "source": [ + "string_representation = dumps(chain, pretty=True)\n", + "print(string_representation[:500])" + ] + }, + { + "cell_type": "markdown", + "id": "bd425716-545d-466b-a4e5-dc9952cfd72a", + "metadata": {}, + "source": [ + "### To a json-serializable Python dict" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "6561a968-1741-4419-8c29-e705b9d0ef39", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "dict_representation = dumpd(chain)\n", + "\n", + "print(type(dict_representation))" + ] + }, + { + "cell_type": "markdown", + "id": "711e986e-dd24-4839-9e38-c57903378a5f", + "metadata": {}, + "source": [ + "### To disk" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f818378b-f4d6-43a7-895b-76cf7359b157", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "with open(\"/tmp/chain.json\", \"w\") as fp:\n", + " json.dump(string_representation, fp)" + ] + }, + { + "cell_type": "markdown", + "id": "1e621a32-ff5f-4627-ad59-88cacba73c6b", + "metadata": {}, + "source": [ + "Note that the API key is withheld from the serialized representations. Parameters that are considered secret are specified by the `.lc_secrets` attribute of the LangChain object:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8225e150-000a-4fbc-9f3d-09568f4b560b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'openai_api_key': 'OPENAI_API_KEY'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.last.lc_secrets" + ] + }, + { + "cell_type": "markdown", + "id": "6d090177-eb1c-4bfb-8c13-29286afe17d9", + "metadata": {}, + "source": [ + "## Loading objects\n", + "\n", + "Specifying `secrets_map` in `load` and `loads` will load the corresponding secrets onto the de-serialized LangChain object.\n", + "\n", + "### From string" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "54a66267-5f3a-40a2-bfcc-8b44bb24c154", + "metadata": {}, + "outputs": [], + "source": [ + "chain = loads(string_representation, secrets_map={\"OPENAI_API_KEY\": \"llm-api-key\"})" + ] + }, + { + "cell_type": "markdown", + "id": "5ed9aff1-92cc-44ba-b2ec-4d12f924fa03", + "metadata": {}, + "source": [ + "### From dict" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "76979932-13de-4427-9f88-040fb05a6778", + "metadata": {}, + "outputs": [], + "source": [ + "chain = load(dict_representation, secrets_map={\"OPENAI_API_KEY\": \"llm-api-key\"})" + ] + }, + { + "cell_type": "markdown", + "id": "7dd81a2a-5163-414d-ab42-f1c35e30471b", + "metadata": {}, + "source": [ + "### From disk" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "033f62a7-3377-472a-be58-718baa6ab445", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"/tmp/chain.json\", \"r\") as fp:\n", + " chain = loads(json.load(fp), secrets_map={\"OPENAI_API_KEY\": \"llm-api-key\"})" + ] + }, + { + "cell_type": "markdown", + "id": "dc520fdb-035a-468f-a8a8-c3ffe8ed98eb", + "metadata": {}, + "source": [ + "Note that we recover the API key specified at the start of the guide:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "566b2475-d9b4-432b-8c3b-27c2f183624e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'llm-api-key'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.last.openai_api_key.get_secret_value()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b4cba53-e1d5-4979-927e-b5794a02afc3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/how_to/tool_calling_parallel.ipynb b/docs/docs/how_to/tool_calling_parallel.ipynb new file mode 100644 index 0000000000000..dab57440390e1 --- /dev/null +++ b/docs/docs/how_to/tool_calling_parallel.ipynb @@ -0,0 +1,108 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Disabling parallel tool calling (OpenAI only)\n", + "\n", + "OpenAI tool calling performs tool calling in parallel by default. That means that if we ask a question like \"What is the weather in Tokyo, New York, and Chicago?\" and we have a tool for getting the weather, it will call the tool 3 times in parallel. We can force it to call only a single tool once by using the ``parallel_tool_call`` parameter." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's set up our tools and model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "\n", + "@tool\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Adds a and b.\"\"\"\n", + " return a + b\n", + "\n", + "\n", + "@tool\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiplies a and b.\"\"\"\n", + " return a * b\n", + "\n", + "\n", + "tools = [add, multiply]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's show a quick example of how disabling parallel tool calls work:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'add',\n", + " 'args': {'a': 2, 'b': 2},\n", + " 'id': 'call_Hh4JOTCDM85Sm9Pr84VKrWu5'}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "llm_with_tools = llm.bind_tools(tools, parallel_tool_calls=False)\n", + "llm_with_tools.invoke(\"Please call the first tool two times\").tool_calls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, even though we explicitly told the model to call a tool twice, by disabling parallel tool calls the model was constrained to only calling one." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/tool_choice.ipynb b/docs/docs/how_to/tool_choice.ipynb new file mode 100644 index 0000000000000..050d6c6ce21fa --- /dev/null +++ b/docs/docs/how_to/tool_choice.ipynb @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to force tool calling behavior\n", + "\n", + "In order to force our LLM to spelect a specific tool, we can use the `tool_choice` parameter to ensure certain behavior. First, let's define our model and tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "\n", + "@tool\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Adds a and b.\"\"\"\n", + " return a + b\n", + "\n", + "\n", + "@tool\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiplies a and b.\"\"\"\n", + " return a * b\n", + "\n", + "\n", + "tools = [add, multiply]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "%pip install -qU langchain langchain_openai\n", + "\n", + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For example, we can force our tool to call the multiply tool by using the following code:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_9cViskmLvPnHjXk9tbVla5HA', 'function': {'arguments': '{\"a\":2,\"b\":4}', 'name': 'Multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 103, 'total_tokens': 112}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-095b827e-2bdd-43bb-8897-c843f4504883-0', tool_calls=[{'name': 'Multiply', 'args': {'a': 2, 'b': 4}, 'id': 'call_9cViskmLvPnHjXk9tbVla5HA'}], usage_metadata={'input_tokens': 103, 'output_tokens': 9, 'total_tokens': 112})" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "llm_forced_to_multiply = llm.bind_tools(tools, tool_choice=\"Multiply\")\n", + "llm_forced_to_multiply.invoke(\"what is 2 + 4\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Even if we pass it something that doesn't require multiplcation - it will still call the tool!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also just force our tool to select at least one of our tools by passing in the \"any\" (or \"required\" which is OpenAI specific) keyword to the `tool_choice` parameter." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_mCSiJntCwHJUBfaHZVUB2D8W', 'function': {'arguments': '{\"a\":1,\"b\":2}', 'name': 'Add'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 94, 'total_tokens': 109}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-28f75260-9900-4bed-8cd3-f1579abb65e5-0', tool_calls=[{'name': 'Add', 'args': {'a': 1, 'b': 2}, 'id': 'call_mCSiJntCwHJUBfaHZVUB2D8W'}], usage_metadata={'input_tokens': 94, 'output_tokens': 15, 'total_tokens': 109})" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "llm_forced_to_use_tool = llm.bind_tools(tools, tool_choice=\"any\")\n", + "llm_forced_to_use_tool.invoke(\"What day is today?\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/tool_results_pass_to_model.ipynb b/docs/docs/how_to/tool_results_pass_to_model.ipynb new file mode 100644 index 0000000000000..c8ff47cebbf20 --- /dev/null +++ b/docs/docs/how_to/tool_results_pass_to_model.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to pass tool outputs to the model\n", + "\n", + "If we're using the model-generated tool invocations to actually call tools and want to pass the tool results back to the model, we can do so using `ToolMessage`s. First, let's define our tools and our model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "\n", + "@tool\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Adds a and b.\"\"\"\n", + " return a + b\n", + "\n", + "\n", + "@tool\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiplies a and b.\"\"\"\n", + " return a * b\n", + "\n", + "\n", + "tools = [add, multiply]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n", + "llm_with_tools = llm.bind_tools(tools)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can use ``ToolMessage`` to pass back the output of the tool calls to the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='What is 3 * 12? Also, what is 11 + 49?'),\n", + " AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_svc2GLSxNFALbaCAbSjMI9J8', 'function': {'arguments': '{\"a\": 3, \"b\": 12}', 'name': 'Multiply'}, 'type': 'function'}, {'id': 'call_r8jxte3zW6h3MEGV3zH2qzFh', 'function': {'arguments': '{\"a\": 11, \"b\": 49}', 'name': 'Add'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 50, 'prompt_tokens': 105, 'total_tokens': 155}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_d9767fc5b9', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-a79ad1dd-95f1-4a46-b688-4c83f327a7b3-0', tool_calls=[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_svc2GLSxNFALbaCAbSjMI9J8'}, {'name': 'Add', 'args': {'a': 11, 'b': 49}, 'id': 'call_r8jxte3zW6h3MEGV3zH2qzFh'}]),\n", + " ToolMessage(content='36', tool_call_id='call_svc2GLSxNFALbaCAbSjMI9J8'),\n", + " ToolMessage(content='60', tool_call_id='call_r8jxte3zW6h3MEGV3zH2qzFh')]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from langchain_core.messages import HumanMessage, ToolMessage\n", + "\n", + "query = \"What is 3 * 12? Also, what is 11 + 49?\"\n", + "\n", + "messages = [HumanMessage(query)]\n", + "ai_msg = llm_with_tools.invoke(messages)\n", + "messages.append(ai_msg)\n", + "for tool_call in ai_msg.tool_calls:\n", + " selected_tool = {\"add\": add, \"multiply\": multiply}[tool_call[\"name\"].lower()]\n", + " tool_output = selected_tool.invoke(tool_call[\"args\"])\n", + " messages.append(ToolMessage(tool_output, tool_call_id=tool_call[\"id\"]))\n", + "messages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='3 * 12 is 36 and 11 + 49 is 60.', response_metadata={'token_usage': {'completion_tokens': 18, 'prompt_tokens': 171, 'total_tokens': 189}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': 'fp_d9767fc5b9', 'finish_reason': 'stop', 'logprobs': None}, id='run-20b52149-e00d-48ea-97cf-f8de7a255f8c-0')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "llm_with_tools.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we pass back the same `id` in the `ToolMessage` as the what we receive from the model in order to help the model match tool responses with tool calls." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/tool_runtime.ipynb b/docs/docs/how_to/tool_runtime.ipynb new file mode 100644 index 0000000000000..b846e3b36ade6 --- /dev/null +++ b/docs/docs/how_to/tool_runtime.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to pass run time values to a tool\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "- [Chat models](/docs/concepts/#chat-models)\n", + "- [LangChain Tools](/docs/concepts/#tools)\n", + "- [How to create tools](/docs/how_to/custom_tools)\n", + "- [How to use a model to call tools](https://python.langchain.com/v0.2/docs/how_to/tool_calling)\n", + ":::\n", + "\n", + ":::{.callout-info} Supported models\n", + "\n", + "This how-to guide uses models with native tool calling capability.\n", + "You can find a [list of all models that support tool calling](/docs/integrations/chat/).\n", + "\n", + ":::\n", + "\n", + ":::{.callout-info} Using with LangGraph\n", + "\n", + "If you're using LangGraph, please refer to [this how-to guide](https://langchain-ai.github.io/langgraph/how-tos/pass-run-time-values-to-tools/)\n", + "which shows how to create an agent that keeps track of a given user's favorite pets.\n", + ":::\n", + "\n", + "You may need to bind values to a tool that are only known at runtime. For example, the tool logic may require using the ID of the user who made the request.\n", + "\n", + "Most of the time, such values should not be controlled by the LLM. In fact, allowing the LLM to control the user ID may lead to a security risk.\n", + "\n", + "Instead, the LLM should only control the parameters of the tool that are meant to be controlled by the LLM, while other parameters (such as user ID) should be fixed by the application logic.\n", + "\n", + "This how-to guide shows a simple design pattern that creates the tool dynamically at run time and binds to them appropriate values." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can bind them to chat models as follows:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython -m pip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "%pip install -qU langchain langchain_openai\n", + "\n", + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "if \"OPENAI_API_KEY\" not in os.environ:\n", + " os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Passing request time information\n", + "\n", + "The idea is to create the tool dynamically at request time, and bind to it the appropriate information. For example,\n", + "this information may be the user ID as resolved from the request itself." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List\n", + "\n", + "from langchain_core.output_parsers import JsonOutputParser\n", + "from langchain_core.tools import BaseTool, tool" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "user_to_pets = {}\n", + "\n", + "\n", + "def generate_tools_for_user(user_id: str) -> List[BaseTool]:\n", + " \"\"\"Generate a set of tools that have a user id associated with them.\"\"\"\n", + "\n", + " @tool\n", + " def update_favorite_pets(pets: List[str]) -> None:\n", + " \"\"\"Add the list of favorite pets.\"\"\"\n", + " user_to_pets[user_id] = pets\n", + "\n", + " @tool\n", + " def delete_favorite_pets() -> None:\n", + " \"\"\"Delete the list of favorite pets.\"\"\"\n", + " if user_id in user_to_pets:\n", + " del user_to_pets[user_id]\n", + "\n", + " @tool\n", + " def list_favorite_pets() -> None:\n", + " \"\"\"List favorite pets if any.\"\"\"\n", + " return user_to_pets.get(user_id, [])\n", + "\n", + " return [update_favorite_pets, delete_favorite_pets, list_favorite_pets]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verify that the tools work correctly" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'eugene': ['cat', 'dog']}\n", + "['cat', 'dog']\n" + ] + } + ], + "source": [ + "update_pets, delete_pets, list_pets = generate_tools_for_user(\"eugene\")\n", + "update_pets.invoke({\"pets\": [\"cat\", \"dog\"]})\n", + "print(user_to_pets)\n", + "print(list_pets.invoke({}))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "\n", + "def handle_run_time_request(user_id: str, query: str):\n", + " \"\"\"Handle run time request.\"\"\"\n", + " tools = generate_tools_for_user(user_id)\n", + " llm_with_tools = llm.bind_tools(tools)\n", + " prompt = ChatPromptTemplate.from_messages(\n", + " [(\"system\", \"You are a helpful assistant.\")],\n", + " )\n", + " chain = prompt | llm_with_tools\n", + " return llm_with_tools.invoke(query)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This code will allow the LLM to invoke the tools, but the LLM is **unaware** of the fact that a **user ID** even exists!" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'update_favorite_pets',\n", + " 'args': {'pets': ['cats', 'parrots']},\n", + " 'id': 'call_jJvjPXsNbFO5MMgW0q84iqCN'}]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ai_message = handle_run_time_request(\n", + " \"eugene\", \"my favorite animals are cats and parrots.\"\n", + ")\n", + "ai_message.tool_calls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + ":::{.callout-important}\n", + "\n", + "Chat models only output requests to invoke tools, they don't actually invoke the underlying tools.\n", + "\n", + "To see how to invoke the tools, please refer to [how to use a model to call tools](https://python.langchain.com/v0.2/docs/how_to/tool_calling).\n", + ":::" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/how_to/tool_streaming.ipynb b/docs/docs/how_to/tool_streaming.ipynb new file mode 100644 index 0000000000000..29958a43e4aaa --- /dev/null +++ b/docs/docs/how_to/tool_streaming.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to stream tool calls\n", + "\n", + "When tools are called in a streaming context, \n", + "[message chunks](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n", + "will be populated with [tool call chunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.tool.ToolCallChunk.html#langchain_core.messages.tool.ToolCallChunk) \n", + "objects in a list via the `.tool_call_chunks` attribute. A `ToolCallChunk` includes \n", + "optional string fields for the tool `name`, `args`, and `id`, and includes an optional \n", + "integer field `index` that can be used to join chunks together. Fields are optional \n", + "because portions of a tool call may be streamed across different chunks (e.g., a chunk \n", + "that includes a substring of the arguments may have null values for the tool name and id).\n", + "\n", + "Because message chunks inherit from their parent message class, an \n", + "[AIMessageChunk](https://api.python.langchain.com/en/latest/messages/langchain_core.messages.ai.AIMessageChunk.html#langchain_core.messages.ai.AIMessageChunk) \n", + "with tool call chunks will also include `.tool_calls` and `.invalid_tool_calls` fields. \n", + "These fields are parsed best-effort from the message's tool call chunks.\n", + "\n", + "Note that not all providers currently support streaming for tool calls. Before we start let's define our tools and our model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "\n", + "@tool\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Adds a and b.\"\"\"\n", + " return a + b\n", + "\n", + "\n", + "@tool\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiplies a and b.\"\"\"\n", + " return a * b\n", + "\n", + "\n", + "tools = [add, multiply]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n", + "llm_with_tools = llm.bind_tools(tools)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's define our query and stream our output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[{'name': 'Multiply', 'args': '', 'id': 'call_3aQwTP9CYlFxwOvQZPHDu6wL', 'index': 0}]\n", + "[{'name': None, 'args': '{\"a\"', 'id': None, 'index': 0}]\n", + "[{'name': None, 'args': ': 3, ', 'id': None, 'index': 0}]\n", + "[{'name': None, 'args': '\"b\": 1', 'id': None, 'index': 0}]\n", + "[{'name': None, 'args': '2}', 'id': None, 'index': 0}]\n", + "[{'name': 'Add', 'args': '', 'id': 'call_SQUoSsJz2p9Kx2x73GOgN1ja', 'index': 1}]\n", + "[{'name': None, 'args': '{\"a\"', 'id': None, 'index': 1}]\n", + "[{'name': None, 'args': ': 11,', 'id': None, 'index': 1}]\n", + "[{'name': None, 'args': ' \"b\": ', 'id': None, 'index': 1}]\n", + "[{'name': None, 'args': '49}', 'id': None, 'index': 1}]\n", + "[]\n" + ] + } + ], + "source": [ + "query = \"What is 3 * 12? Also, what is 11 + 49?\"\n", + "\n", + "async for chunk in llm_with_tools.astream(query):\n", + " print(chunk.tool_call_chunks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that adding message chunks will merge their corresponding tool call chunks. This is the principle by which LangChain's various [tool output parsers](/docs/how_to/output_parser_structured) support streaming.\n", + "\n", + "For example, below we accumulate tool call chunks:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[{'name': 'Multiply', 'args': '', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}]\n", + "[{'name': 'Multiply', 'args': '{\"a\"', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, ', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 1', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '{\"a\"', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '{\"a\": 11,', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '{\"a\": 11, \"b\": ', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '{\"a\": 11, \"b\": 49}', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n", + "[{'name': 'Multiply', 'args': '{\"a\": 3, \"b\": 12}', 'id': 'call_AkL3dVeCjjiqvjv8ckLxL3gP', 'index': 0}, {'name': 'Add', 'args': '{\"a\": 11, \"b\": 49}', 'id': 'call_b4iMiB3chGNGqbt5SjqqD2Wh', 'index': 1}]\n" + ] + } + ], + "source": [ + "first = True\n", + "async for chunk in llm_with_tools.astream(query):\n", + " if first:\n", + " gathered = chunk\n", + " first = False\n", + " else:\n", + " gathered = gathered + chunk\n", + "\n", + " print(gathered.tool_call_chunks)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(gathered.tool_call_chunks[0][\"args\"]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And below we accumulate tool calls to demonstrate partial parsing:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[]\n", + "[]\n", + "[{'name': 'Multiply', 'args': {}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 1}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}, {'name': 'Add', 'args': {}, 'id': 'call_54Hx3DGjZitFlEjgMe1DYonh'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}, {'name': 'Add', 'args': {'a': 11}, 'id': 'call_54Hx3DGjZitFlEjgMe1DYonh'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}, {'name': 'Add', 'args': {'a': 11}, 'id': 'call_54Hx3DGjZitFlEjgMe1DYonh'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}, {'name': 'Add', 'args': {'a': 11, 'b': 49}, 'id': 'call_54Hx3DGjZitFlEjgMe1DYonh'}]\n", + "[{'name': 'Multiply', 'args': {'a': 3, 'b': 12}, 'id': 'call_4p0D4tHVXSiae9Mu0e8jlI1m'}, {'name': 'Add', 'args': {'a': 11, 'b': 49}, 'id': 'call_54Hx3DGjZitFlEjgMe1DYonh'}]\n" + ] + } + ], + "source": [ + "first = True\n", + "async for chunk in llm_with_tools.astream(query):\n", + " if first:\n", + " gathered = chunk\n", + " first = False\n", + " else:\n", + " gathered = gathered + chunk\n", + "\n", + " print(gathered.tool_calls)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "print(type(gathered.tool_calls[0][\"args\"]))" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/tools_few_shot.ipynb b/docs/docs/how_to/tools_few_shot.ipynb new file mode 100644 index 0000000000000..c4f3570da1a94 --- /dev/null +++ b/docs/docs/how_to/tools_few_shot.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to use few-shot prompting with tool calling\n", + "\n", + "For more complex tool use it's very useful to add few-shot examples to the prompt. We can do this by adding `AIMessage`s with `ToolCall`s and corresponding `ToolMessage`s to our prompt.\n", + "\n", + "First let's define our tools and model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.tools import tool\n", + "\n", + "\n", + "@tool\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Adds a and b.\"\"\"\n", + " return a + b\n", + "\n", + "\n", + "@tool\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiplies a and b.\"\"\"\n", + " return a * b\n", + "\n", + "\n", + "tools = [add, multiply]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass()\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo-0125\", temperature=0)\n", + "llm_with_tools = llm.bind_tools(tools)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's run our model where we can notice that even with some special instructions our model can get tripped up by order of operations. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'Multiply',\n", + " 'args': {'a': 119, 'b': 8},\n", + " 'id': 'call_T88XN6ECucTgbXXkyDeC2CQj'},\n", + " {'name': 'Add',\n", + " 'args': {'a': 952, 'b': -20},\n", + " 'id': 'call_licdlmGsRqzup8rhqJSb1yZ4'}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "llm_with_tools.invoke(\n", + " \"Whats 119 times 8 minus 20. Don't do any math yourself, only use tools for math. Respect order of operations\"\n", + ").tool_calls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The model shouldn't be trying to add anything yet, since it technically can't know the results of 119 * 8 yet.\n", + "\n", + "By adding a prompt with some examples we can correct this behavior:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'Multiply',\n", + " 'args': {'a': 119, 'b': 8},\n", + " 'id': 'call_9MvuwQqg7dlJupJcoTWiEsDo'}]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from langchain_core.messages import AIMessage, HumanMessage, ToolMessage\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "\n", + "examples = [\n", + " HumanMessage(\n", + " \"What's the product of 317253 and 128472 plus four\", name=\"example_user\"\n", + " ),\n", + " AIMessage(\n", + " \"\",\n", + " name=\"example_assistant\",\n", + " tool_calls=[\n", + " {\"name\": \"Multiply\", \"args\": {\"x\": 317253, \"y\": 128472}, \"id\": \"1\"}\n", + " ],\n", + " ),\n", + " ToolMessage(\"16505054784\", tool_call_id=\"1\"),\n", + " AIMessage(\n", + " \"\",\n", + " name=\"example_assistant\",\n", + " tool_calls=[{\"name\": \"Add\", \"args\": {\"x\": 16505054784, \"y\": 4}, \"id\": \"2\"}],\n", + " ),\n", + " ToolMessage(\"16505054788\", tool_call_id=\"2\"),\n", + " AIMessage(\n", + " \"The product of 317253 and 128472 plus four is 16505054788\",\n", + " name=\"example_assistant\",\n", + " ),\n", + "]\n", + "\n", + "system = \"\"\"You are bad at math but are an expert at using a calculator. \n", + "\n", + "Use past tool usage as an example of how to correctly use the tools.\"\"\"\n", + "few_shot_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", system),\n", + " *examples,\n", + " (\"human\", \"{query}\"),\n", + " ]\n", + ")\n", + "\n", + "chain = {\"query\": RunnablePassthrough()} | few_shot_prompt | llm_with_tools\n", + "chain.invoke(\"Whats 119 times 8 minus 20\").tool_calls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And we get the correct output this time.\n", + "\n", + "Here's what the [LangSmith trace](https://smith.langchain.com/public/f70550a1-585f-4c9d-a643-13148ab1616f/r) looks like." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/tools_model_specific.ipynb b/docs/docs/how_to/tools_model_specific.ipynb new file mode 100644 index 0000000000000..8596f1815baf9 --- /dev/null +++ b/docs/docs/how_to/tools_model_specific.ipynb @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# How to bind model-specific tools\n", + "\n", + "Providers adopt different conventions for formatting tool schemas. \n", + "For instance, OpenAI uses a format like this:\n", + "\n", + "- `type`: The type of the tool. At the time of writing, this is always `\"function\"`.\n", + "- `function`: An object containing tool parameters.\n", + "- `function.name`: The name of the schema to output.\n", + "- `function.description`: A high level description of the schema to output.\n", + "- `function.parameters`: The nested details of the schema you want to extract, formatted as a [JSON schema](https://json-schema.org/) dict.\n", + "\n", + "We can bind this model-specific format directly to the model as well if preferred. Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_mn4ELw1NbuE0DFYhIeK0GrPe', 'function': {'arguments': '{\"a\":119,\"b\":8}', 'name': 'multiply'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 62, 'total_tokens': 79}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_c2295e73ad', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run-353e8a9a-7125-4f94-8c68-4f3da4c21120-0', tool_calls=[{'name': 'multiply', 'args': {'a': 119, 'b': 8}, 'id': 'call_mn4ELw1NbuE0DFYhIeK0GrPe'}])" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "\n", + "model = ChatOpenAI()\n", + "\n", + "model_with_tools = model.bind(\n", + " tools=[\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"multiply\",\n", + " \"description\": \"Multiply two integers together.\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"a\": {\"type\": \"number\", \"description\": \"First integer\"},\n", + " \"b\": {\"type\": \"number\", \"description\": \"Second integer\"},\n", + " },\n", + " \"required\": [\"a\", \"b\"],\n", + " },\n", + " },\n", + " }\n", + " ]\n", + ")\n", + "\n", + "model_with_tools.invoke(\"Whats 119 times 8?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is functionally equivalent to the `bind_tools()` method." + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/how_to/trim_messages.ipynb b/docs/docs/how_to/trim_messages.ipynb new file mode 100644 index 0000000000000..efbe0c009b88b --- /dev/null +++ b/docs/docs/how_to/trim_messages.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b5ee5b75-6876-4d62-9ade-5a7a808ae5a2", + "metadata": {}, + "source": [ + "# How to trim messages\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Messages](/docs/concepts/#messages)\n", + "- [Chat models](/docs/concepts/#chat-models)\n", + "- [Chaining](/docs/how_to/sequence/)\n", + "- [Chat history](/docs/concepts/#chat-history)\n", + "\n", + "The methods in this guide also require `langchain-core>=0.2.9`.\n", + "\n", + ":::\n", + "\n", + "All models have finite context windows, meaning there's a limit to how many tokens they can take as input. If you have very long messages or a chain/agent that accumulates a long message is history, you'll need to manage the length of the messages you're passing in to the model.\n", + "\n", + "The `trim_messages` util provides some basic strategies for trimming a list of messages to be of a certain token length.\n", + "\n", + "## Getting the last `max_tokens` tokens\n", + "\n", + "To get the last `max_tokens` in the list of Messages we can set `strategy=\"last\"`. Notice that for our `token_counter` we can pass in a function (more on that below) or a language model (since language models have a message token counting method). It makes sense to pass in a model when you're trimming your messages to fit into the context window of that specific model:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c974633b-3bd0-4844-8a8f-85e3e25f13fe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# pip install -U langchain-openai\n", + "from langchain_core.messages import (\n", + " AIMessage,\n", + " HumanMessage,\n", + " SystemMessage,\n", + " trim_messages,\n", + ")\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "messages = [\n", + " SystemMessage(\"you're a good assistant, you always respond with a joke.\"),\n", + " HumanMessage(\"i wonder why it's called langchain\"),\n", + " AIMessage(\n", + " 'Well, I guess they thought \"WordRope\" and \"SentenceString\" just didn\\'t have the same ring to it!'\n", + " ),\n", + " HumanMessage(\"and who is harrison chasing anyways\"),\n", + " AIMessage(\n", + " \"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"\n", + " ),\n", + " HumanMessage(\"what do you call a speechless parrot\"),\n", + "]\n", + "\n", + "trim_messages(\n", + " messages,\n", + " max_tokens=45,\n", + " strategy=\"last\",\n", + " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d3f46654-c4b2-4136-b995-91c3febe5bf9", + "metadata": {}, + "source": [ + "If we want to always keep the initial system message we can specify `include_system=True`:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "589b0223-3a73-44ec-8315-2dba3ee6117d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trim_messages(\n", + " messages,\n", + " max_tokens=45,\n", + " strategy=\"last\",\n", + " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " include_system=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8a8b542c-04d1-4515-8d82-b999ea4fac4f", + "metadata": {}, + "source": [ + "If we want to allow splitting up the contents of a message we can specify `allow_partial=True`:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8c46a209-dddd-4d01-81f6-f6ae55d3225c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", + " AIMessage(content=\"\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trim_messages(\n", + " messages,\n", + " max_tokens=56,\n", + " strategy=\"last\",\n", + " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " include_system=True,\n", + " allow_partial=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "306adf9c-41cd-495c-b4dc-e4f43dd7f8f8", + "metadata": {}, + "source": [ + "If we need to make sure that our first message (excluding the system message) is always of a specific type, we can specify `start_on`:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "878a730b-fe44-4e9d-ab65-7b8f7b069de8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trim_messages(\n", + " messages,\n", + " max_tokens=60,\n", + " strategy=\"last\",\n", + " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + " include_system=True,\n", + " start_on=\"human\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7f5d391d-235b-4091-b2de-c22866b478f3", + "metadata": {}, + "source": [ + "## Getting the first `max_tokens` tokens\n", + "\n", + "We can perform the flipped operation of getting the *first* `max_tokens` by specifying `strategy=\"first\"`:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5f56ae54-1a39-4019-9351-3b494c003d5b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", + " HumanMessage(content=\"i wonder why it's called langchain\")]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trim_messages(\n", + " messages,\n", + " max_tokens=45,\n", + " strategy=\"first\",\n", + " token_counter=ChatOpenAI(model=\"gpt-4o\"),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "ab70bf70-1e5a-4d51-b9b8-a823bf2cf532", + "metadata": {}, + "source": [ + "## Writing a custom token counter\n", + "\n", + "We can write a custom token counter function that takes in a list of messages and returns an int." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "1c1c3b1e-2ece-49e7-a3b6-e69877c1633b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[AIMessage(content=\"Hmmm let me think.\\n\\nWhy, he's probably chasing after the last cup of coffee in the office!\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from typing import List\n", + "\n", + "# pip install tiktoken\n", + "import tiktoken\n", + "from langchain_core.messages import BaseMessage, ToolMessage\n", + "\n", + "\n", + "def str_token_counter(text: str) -> int:\n", + " enc = tiktoken.get_encoding(\"o200k_base\")\n", + " return len(enc.encode(text))\n", + "\n", + "\n", + "def tiktoken_counter(messages: List[BaseMessage]) -> int:\n", + " \"\"\"Approximately reproduce https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n", + "\n", + " For simplicity only supports str Message.contents.\n", + " \"\"\"\n", + " num_tokens = 3 # every reply is primed with <|start|>assistant<|message|>\n", + " tokens_per_message = 3\n", + " tokens_per_name = 1\n", + " for msg in messages:\n", + " if isinstance(msg, HumanMessage):\n", + " role = \"user\"\n", + " elif isinstance(msg, AIMessage):\n", + " role = \"assistant\"\n", + " elif isinstance(msg, ToolMessage):\n", + " role = \"tool\"\n", + " elif isinstance(msg, SystemMessage):\n", + " role = \"system\"\n", + " else:\n", + " raise ValueError(f\"Unsupported messages type {msg.__class__}\")\n", + " num_tokens += (\n", + " tokens_per_message\n", + " + str_token_counter(role)\n", + " + str_token_counter(msg.content)\n", + " )\n", + " if msg.name:\n", + " num_tokens += tokens_per_name + str_token_counter(msg.name)\n", + " return num_tokens\n", + "\n", + "\n", + "trim_messages(\n", + " messages,\n", + " max_tokens=45,\n", + " strategy=\"last\",\n", + " token_counter=tiktoken_counter,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "4b2a672b-c007-47c5-9105-617944dc0a6a", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "`trim_messages` can be used in an imperatively (like above) or declaratively, making it easy to compose with other components in a chain" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "96aa29b2-01e0-437c-a1ab-02fb0141cb57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='A: A \"Polly-gone\"!', response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 32, 'total_tokens': 41}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_66b29dffce', 'finish_reason': 'stop', 'logprobs': None}, id='run-83e96ddf-bcaa-4f63-824c-98b0f8a0d474-0', usage_metadata={'input_tokens': 32, 'output_tokens': 9, 'total_tokens': 41})" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llm = ChatOpenAI(model=\"gpt-4o\")\n", + "\n", + "# Notice we don't pass in messages. This creates\n", + "# a RunnableLambda that takes messages as input\n", + "trimmer = trim_messages(\n", + " max_tokens=45,\n", + " strategy=\"last\",\n", + " token_counter=llm,\n", + " include_system=True,\n", + ")\n", + "\n", + "chain = trimmer | llm\n", + "chain.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "4d91d390-e7f7-467b-ad87-d100411d7a21", + "metadata": {}, + "source": [ + "Looking at the LangSmith trace we can see that before the messages are passed to the model they are first trimmed: https://smith.langchain.com/public/65af12c4-c24d-4824-90f0-6547566e59bb/r\n", + "\n", + "Looking at just the trimmer, we can see that it's a Runnable object that can be invoked like all Runnables:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "1ff02d0a-353d-4fac-a77c-7c2c5262abd9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[SystemMessage(content=\"you're a good assistant, you always respond with a joke.\"),\n", + " HumanMessage(content='what do you call a speechless parrot')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trimmer.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "id": "dc4720c8-4062-4ebc-9385-58411202ce6e", + "metadata": {}, + "source": [ + "## Using with ChatMessageHistory\n", + "\n", + "Trimming messages is especially useful when [working with chat histories](/docs/how_to/message_history/), which can get arbitrarily long:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "a9517858-fc2f-4dc3-898d-bf98a0e905a0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='A \"polly-no-wanna-cracker\"!', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 32, 'total_tokens': 42}, 'model_name': 'gpt-4o-2024-05-13', 'system_fingerprint': 'fp_5bf7397cd3', 'finish_reason': 'stop', 'logprobs': None}, id='run-054dd309-3497-4e7b-b22a-c1859f11d32e-0', usage_metadata={'input_tokens': 32, 'output_tokens': 10, 'total_tokens': 42})" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_core.chat_history import InMemoryChatMessageHistory\n", + "from langchain_core.runnables.history import RunnableWithMessageHistory\n", + "\n", + "chat_history = InMemoryChatMessageHistory(messages=messages[:-1])\n", + "\n", + "\n", + "def dummy_get_session_history(session_id):\n", + " if session_id != \"1\":\n", + " return InMemoryChatMessageHistory()\n", + " return chat_history\n", + "\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-4o\")\n", + "\n", + "trimmer = trim_messages(\n", + " max_tokens=45,\n", + " strategy=\"last\",\n", + " token_counter=llm,\n", + " include_system=True,\n", + ")\n", + "\n", + "chain = trimmer | llm\n", + "chain_with_history = RunnableWithMessageHistory(chain, dummy_get_session_history)\n", + "chain_with_history.invoke(\n", + " [HumanMessage(\"what do you call a speechless parrot\")],\n", + " config={\"configurable\": {\"session_id\": \"1\"}},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "556b7b4c-43cb-41de-94fc-1a41f4ec4d2e", + "metadata": {}, + "source": [ + "Looking at the LangSmith trace we can see that we retrieve all of our messages but before the messages are passed to the model they are trimmed to be just the system message and last human message: https://smith.langchain.com/public/17dd700b-9994-44ca-930c-116e00997315/r" + ] + }, + { + "cell_type": "markdown", + "id": "75dc7b84-b92f-44e7-8beb-ba22398e4efb", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For a complete description of all arguments head to the API reference: https://api.python.langchain.com/en/latest/messages/langchain_core.messages.utils.trim_messages.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/callbacks/upstash_ratelimit.ipynb b/docs/docs/integrations/callbacks/upstash_ratelimit.ipynb new file mode 100644 index 0000000000000..78c5e15f83268 --- /dev/null +++ b/docs/docs/integrations/callbacks/upstash_ratelimit.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Upstash Ratelimit Callback\n", + "\n", + "In this guide, we will go over how to add rate limiting based on number of requests or the number of tokens using `UpstashRatelimitHandler`. This handler uses [ratelimit library of Upstash](https://github.com/upstash/ratelimit-py/), which utilizes [Upstash Redis](https://upstash.com/docs/redis/overall/getstarted).\n", + "\n", + "Upstash Ratelimit works by sending an HTTP request to Upstash Redis everytime the `limit` method is called. Remaining tokens/requests of the user are checked and updated. Based on the remaining tokens, we can stop the execution of costly operations like invoking an LLM or querying a vector store:\n", + "\n", + "```py\n", + "response = ratelimit.limit()\n", + "if response.allowed:\n", + " execute_costly_operation()\n", + "```\n", + "\n", + "`UpstashRatelimitHandler` allows you to incorporate the ratelimit logic into your chain in a few minutes.\n", + "\n", + "First, you will need to go to [the Upstash Console](https://console.upstash.com/login) and create a redis database ([see our docs](https://upstash.com/docs/redis/overall/getstarted)). After creating a database, you will need to set the environment variables:\n", + "\n", + "```\n", + "UPSTASH_REDIS_REST_URL=\"****\"\n", + "UPSTASH_REDIS_REST_TOKEN=\"****\"\n", + "```\n", + "\n", + "Next, you will need to install Upstash Ratelimit and Redis library with:\n", + "\n", + "```\n", + "pip install upstash-ratelimit upstash-redis\n", + "```\n", + "\n", + "You are now ready to add rate limiting to your chain!\n", + "\n", + "## Ratelimiting Per Request\n", + "\n", + "Let's imagine that we want to allow our users to invoke our chain 10 times per minute. Achieving this is as simple as:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Error in UpstashRatelimitHandler.on_chain_start callback: UpstashRatelimitError('Request limit reached!')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Handling ratelimit. \n" + ] + } + ], + "source": [ + "# set env variables\n", + "import os\n", + "\n", + "os.environ[\"UPSTASH_REDIS_REST_URL\"] = \"****\"\n", + "os.environ[\"UPSTASH_REDIS_REST_TOKEN\"] = \"****\"\n", + "\n", + "from langchain_community.callbacks import UpstashRatelimitError, UpstashRatelimitHandler\n", + "from langchain_core.runnables import RunnableLambda\n", + "from upstash_ratelimit import FixedWindow, Ratelimit\n", + "from upstash_redis import Redis\n", + "\n", + "# create ratelimit\n", + "ratelimit = Ratelimit(\n", + " redis=Redis.from_env(),\n", + " # 10 requests per window, where window size is 60 seconds:\n", + " limiter=FixedWindow(max_requests=10, window=60),\n", + ")\n", + "\n", + "# create handler\n", + "user_id = \"user_id\" # should be a method which gets the user id\n", + "handler = UpstashRatelimitHandler(identifier=user_id, request_ratelimit=ratelimit)\n", + "\n", + "# create mock chain\n", + "chain = RunnableLambda(str)\n", + "\n", + "# invoke chain with handler:\n", + "try:\n", + " result = chain.invoke(\"Hello world!\", config={\"callbacks\": [handler]})\n", + "except UpstashRatelimitError:\n", + " print(\"Handling ratelimit.\", UpstashRatelimitError)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we pass the handler to the `invoke` method instead of passing the handler when defining the chain.\n", + "\n", + "For rate limiting algorithms other than `FixedWindow`, see [upstash-ratelimit docs](https://github.com/upstash/ratelimit-py?tab=readme-ov-file#ratelimiting-algorithms).\n", + "\n", + "Before executing any steps in our pipeline, ratelimit will check whether the user has passed the request limit. If so, `UpstashRatelimitError` is raised.\n", + "\n", + "## Ratelimiting Per Token\n", + "\n", + "Another option is to rate limit chain invokations based on:\n", + "1. number of tokens in prompt\n", + "2. number of tokens in prompt and LLM completion\n", + "\n", + "This only works if you have an LLM in your chain. Another requirement is that the LLM you are using should return the token usage in it's `LLMOutput`.\n", + "\n", + "### How it works\n", + "\n", + "The handler will get the remaining tokens before calling the LLM. If the remaining tokens is more than 0, LLM will be called. Otherwise `UpstashRatelimitError` will be raised.\n", + "\n", + "After LLM is called, token usage information will be used to subtracted from the remaining tokens of the user. No error is raised at this stage of the chain.\n", + "\n", + "### Configuration\n", + "\n", + "For the first configuration, simply initialize the handler like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ratelimit = Ratelimit(\n", + " redis=Redis.from_env(),\n", + " # 1000 tokens per window, where window size is 60 seconds:\n", + " limiter=FixedWindow(max_requests=1000, window=60),\n", + ")\n", + "\n", + "handler = UpstashRatelimitHandler(identifier=user_id, token_ratelimit=ratelimit)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the second configuration, here is how to initialize the handler:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ratelimit = Ratelimit(\n", + " redis=Redis.from_env(),\n", + " # 1000 tokens per window, where window size is 60 seconds:\n", + " limiter=FixedWindow(max_requests=1000, window=60),\n", + ")\n", + "\n", + "handler = UpstashRatelimitHandler(\n", + " identifier=user_id,\n", + " token_ratelimit=ratelimit,\n", + " include_output_tokens=True, # set to True\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also employ ratelimiting based on requests and tokens at the same time, simply by passing both `request_ratelimit` and `token_ratelimit` parameters.\n", + "\n", + "Here is an example with a chain utilizing an LLM:" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Error in UpstashRatelimitHandler.on_llm_start callback: UpstashRatelimitError('Token limit reached!')\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Handling ratelimit. \n" + ] + } + ], + "source": [ + "# set env variables\n", + "import os\n", + "\n", + "os.environ[\"UPSTASH_REDIS_REST_URL\"] = \"****\"\n", + "os.environ[\"UPSTASH_REDIS_REST_TOKEN\"] = \"****\"\n", + "os.environ[\"OPENAI_API_KEY\"] = \"****\"\n", + "\n", + "from langchain_community.callbacks import UpstashRatelimitError, UpstashRatelimitHandler\n", + "from langchain_core.runnables import RunnableLambda\n", + "from langchain_openai import ChatOpenAI\n", + "from upstash_ratelimit import FixedWindow, Ratelimit\n", + "from upstash_redis import Redis\n", + "\n", + "# create ratelimit\n", + "ratelimit = Ratelimit(\n", + " redis=Redis.from_env(),\n", + " # 500 tokens per window, where window size is 60 seconds:\n", + " limiter=FixedWindow(max_requests=500, window=60),\n", + ")\n", + "\n", + "# create handler\n", + "user_id = \"user_id\" # should be a method which gets the user id\n", + "handler = UpstashRatelimitHandler(identifier=user_id, token_ratelimit=ratelimit)\n", + "\n", + "# create mock chain\n", + "as_str = RunnableLambda(str)\n", + "model = ChatOpenAI()\n", + "\n", + "chain = as_str | model\n", + "\n", + "# invoke chain with handler:\n", + "try:\n", + " result = chain.invoke(\"Hello world!\", config={\"callbacks\": [handler]})\n", + "except UpstashRatelimitError:\n", + " print(\"Handling ratelimit.\", UpstashRatelimitError)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lc39", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/chat/databricks.ipynb b/docs/docs/integrations/chat/databricks.ipynb new file mode 100644 index 0000000000000..1e6e325f928c1 --- /dev/null +++ b/docs/docs/integrations/chat/databricks.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "sidebar_label: Databricks\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ChatDatabricks\n", + "\n", + "> [Databricks](https://www.databricks.com/) Lakehouse Platform unifies data, analytics, and AI on one platform. \n", + "\n", + "This notebook provides a quick overview for getting started with Databricks [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatDatabricks features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.databricks.ChatDatabricks.html).\n", + "\n", + "## Overview\n", + "\n", + "`ChatDatabricks` class wraps a chat model endpoint hosted on [Databricks Model Serving](https://docs.databricks.com/en/machine-learning/model-serving/index.html). This example notebook shows how to wrap your serving endpoint and use it as a chat model in your LangChain application.\n", + "\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: |\n", + "| [ChatDatabricks](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.databricks.ChatDatabricks.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | beta | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-community?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n", + "\n", + "### Model features\n", + "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", + "| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", + "| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | \n", + "\n", + "### Supported Methods\n", + "\n", + "`ChatDatabricks` supports all methods of `ChatModel` including async APIs.\n", + "\n", + "\n", + "### Endpoint Requirement\n", + "\n", + "The serving endpoint `ChatDatabricks` wraps must have OpenAI-compatible chat input/output format ([reference](https://mlflow.org/docs/latest/llms/deployments/index.html#chat)). As long as the input format is compatible, `ChatDatabricks` can be used for any endpoint type hosted on [Databricks Model Serving](https://docs.databricks.com/en/machine-learning/model-serving/index.html):\n", + "\n", + "1. Foundation Models - Curated list of state-of-the-art foundation models such as DRBX, Llama3, Mixtral-8x7B, and etc. These endpoint are ready to use in your Databricks workspace without any set up.\n", + "2. Custom Models - You can also deploy custom models to a serving endpoint via MLflow with\n", + "your choice of framework such as LangChain, Pytorch, Transformers, etc.\n", + "3. External Models - Databricks endpoints can serve models that are hosted outside Databricks as a proxy, such as proprietary model service like OpenAI GPT4.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## Setup\n", + "\n", + "To access Databricks models you'll need to create a Databricks account, set up credentials (only if you are outside Databricks workspace), and install required packages.\n", + "\n", + "### Credentials (only if you are outside Databricks)\n", + "\n", + "If you are running LangChain app inside Databricks, you can skip this step.\n", + "\n", + "Otherwise, you need manually set the Databricks workspace hostname and personal access token to `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables, respectively. See [Authentication Documentation](https://docs.databricks.com/en/dev-tools/auth/index.html#databricks-personal-access-tokens) for how to get an access token." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter your Databricks access token: ········\n" + ] + } + ], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"DATABRICKS_HOST\"] = \"https://your-workspace.cloud.databricks.com\"\n", + "os.environ[\"DATABRICKS_TOKEN\"] = getpass.getpass(\"Enter your Databricks access token: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "The LangChain Databricks integration lives in the `langchain-community` package. Also, `mlflow >= 2.9 ` is required to run the code in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain-community mlflow>=2.9.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We first demonstrates how to query DBRX-instruct model hosted as Foundation Models endpoint with `ChatDatabricks`.\n", + "\n", + "For other type of endpoints, there are some difference in how to set up the endpoint itself, however, once the endpoint is ready, there is no difference in how to query it with `ChatDatabricks`. Please refer to the bottom of this notebook for the examples with other type of endpoints." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chat_models import ChatDatabricks\n", + "\n", + "chat_model = ChatDatabricks(\n", + " endpoint=\"databricks-dbrx-instruct\",\n", + " temperature=0.1,\n", + " max_tokens=256,\n", + " # See https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.databricks.ChatDatabricks.html for other supported parameters\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='MLflow is an open-source platform for managing end-to-end machine learning workflows. It was introduced by Databricks in 2018. MLflow provides tools for tracking experiments, packaging and sharing code, and deploying models. It is designed to work with any machine learning library and can be used in a variety of environments, including local machines, virtual machines, and cloud-based clusters. MLflow aims to streamline the machine learning development lifecycle, making it easier for data scientists and engineers to collaborate and deploy models into production.', response_metadata={'prompt_tokens': 229, 'completion_tokens': 104, 'total_tokens': 333}, id='run-d3fb4d06-3e10-4471-83c9-c282cc62b74d-0')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat_model.invoke(\"What is MLflow?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content='Databricks Model Serving is a feature of the Databricks platform that allows data scientists and engineers to easily deploy machine learning models into production. With Model Serving, you can host, manage, and serve machine learning models as APIs, making it easy to integrate them into applications and business processes. It supports a variety of popular machine learning frameworks, including TensorFlow, PyTorch, and scikit-learn, and provides tools for monitoring and managing the performance of deployed models. Model Serving is designed to be scalable, secure, and easy to use, making it a great choice for organizations that want to quickly and efficiently deploy machine learning models into production.', response_metadata={'prompt_tokens': 35, 'completion_tokens': 130, 'total_tokens': 165}, id='run-b3feea21-223e-4105-8627-41d647d5ccab-0')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# You can also pass a list of messages\n", + "messages = [\n", + " (\"system\", \"You are a chatbot that can answer questions about Databricks.\"),\n", + " (\"user\", \"What is Databricks Model Serving?\"),\n", + "]\n", + "chat_model.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chaining\n", + "Similar to other chat models, `ChatDatabricks` can be used as a part of a complex chain." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\"Unity Catalog is a new data catalog feature in Databricks that allows you to discover, manage, and govern all your data assets across your data landscape, including data lakes, data warehouses, and data marts. It provides a centralized repository for storing and managing metadata, data lineage, and access controls for all your data assets. Unity Catalog enables data teams to easily discover and access the data they need, while ensuring compliance with data privacy and security regulations. It is designed to work seamlessly with Databricks' Lakehouse platform, providing a unified experience for managing and analyzing all your data.\", response_metadata={'prompt_tokens': 32, 'completion_tokens': 118, 'total_tokens': 150}, id='run-82d72624-f8df-4c0d-a976-919feec09a55-0')" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"You are a chatbot that can answer questions about {topic}.\",\n", + " ),\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")\n", + "\n", + "chain = prompt | chat_model\n", + "chain.invoke(\n", + " {\n", + " \"topic\": \"Databricks\",\n", + " \"question\": \"What is Unity Catalog?\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Invocation (streaming)\n", + "\n", + "`ChatDatabricks` supports streaming response by `stream` method since `langchain-community>=0.2.1`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "I|'m| an| AI| and| don|'t| have| feelings|,| but| I|'m| here| and| ready| to| assist| you|.| How| can| I| help| you| today|?||" + ] + } + ], + "source": [ + "for chunk in chat_model.stream(\"How are you?\"):\n", + " print(chunk.content, end=\"|\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Async Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "\n", + "country = [\"Japan\", \"Italy\", \"Australia\"]\n", + "futures = [chat_model.ainvoke(f\"Where is the capital of {c}?\") for c in country]\n", + "await asyncio.gather(*futures)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wrapping Custom Model Endpoint\n", + "\n", + "Prerequisites:\n", + "\n", + "* An LLM was registered and deployed to [a Databricks serving endpoint](https://docs.databricks.com/machine-learning/model-serving/index.html) via MLflow. The endpoint must have OpenAI-compatible chat input/output format ([reference](https://mlflow.org/docs/latest/llms/deployments/index.html#chat))\n", + "* You have [\"Can Query\" permission](https://docs.databricks.com/security/auth-authz/access-control/serving-endpoint-acl.html) to the endpoint.\n", + "\n", + "Once the endpoint is ready, the usage pattern is completely same as Foundation Models." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chat_model_custom = ChatDatabricks(\n", + " endpoint=\"YOUR_ENDPOINT_NAME\",\n", + " temperature=0.1,\n", + " max_tokens=256,\n", + ")\n", + "\n", + "chat_model_custom.invoke(\"How are you?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wrapping External Models" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prerequisite: Create Proxy Endpoint\n", + "\n", + "First, create a new Databricks serving endpoint that proxies requests to the target external model. The endpoint creation should be fairy quick for proxying external models.\n", + "\n", + "This requires registering OpenAI API Key in Databricks secret manager with the following comment:\n", + "```sh\n", + "# Replace `` with your scope\n", + "databricks secrets create-scope \n", + "databricks secrets put-secret openai-api-key --string-value $OPENAI_API_KEY\n", + "```\n", + "\n", + "For how to set up Databricks CLI and manage secrets, please refer to https://docs.databricks.com/en/security/secrets/secrets.html" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from mlflow.deployments import get_deploy_client\n", + "\n", + "client = get_deploy_client(\"databricks\")\n", + "\n", + "secret = \"secrets//openai-api-key\" # replace `` with your scope\n", + "endpoint_name = \"my-chat\" # rename this if my-chat already exists\n", + "client.create_endpoint(\n", + " name=endpoint_name,\n", + " config={\n", + " \"served_entities\": [\n", + " {\n", + " \"name\": \"my-chat\",\n", + " \"external_model\": {\n", + " \"name\": \"gpt-3.5-turbo\",\n", + " \"provider\": \"openai\",\n", + " \"task\": \"llm/v1/chat\",\n", + " \"openai_config\": {\n", + " \"openai_api_key\": \"{{\" + secret + \"}}\",\n", + " },\n", + " },\n", + " }\n", + " ],\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the endpoint status has become \"Ready\", you can query the endpoint in the same way as other types of endpoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chat_model_external = ChatDatabricks(\n", + " endpoint=endpoint_name,\n", + " temperature=0.1,\n", + " max_tokens=256,\n", + ")\n", + "chat_model_external.invoke(\"How to use Databricks?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all ChatDatabricks features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.ChatDatabricks.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/chat/llamacpp.ipynb b/docs/docs/integrations/chat/llamacpp.ipynb new file mode 100644 index 0000000000000..4e1cef0f3d4fd --- /dev/null +++ b/docs/docs/integrations/chat/llamacpp.ipynb @@ -0,0 +1,418 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ChatLlamaCpp\n", + "\n", + "This notebook provides a quick overview for getting started with chat model intergrated with [llama cpp python](https://github.com/abetlen/llama-cpp-python)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview\n", + "\n", + "### Integration details\n", + "| Class | Package | Local | Serializable | JS support |\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [ChatLlamaCpp](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.llamacpp.ChatLlamaCpp.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅ | ❌ | ❌ |\n", + "\n", + "### Model features\n", + "| [Tool calling](/docs/how_to/tool_calling) | [Structured output](/docs/how_to/structured_output/) | JSON mode | Image input | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", + "| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", + "| ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | \n", + "\n", + "## Setup\n", + "\n", + "To get started and use **all** the features show below, we reccomend using a model that has been fine-tuned for tool-calling.\n", + "\n", + "We will use [\n", + "Hermes-2-Pro-Llama-3-8B-GGUF](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF) from NousResearch. \n", + "\n", + "> Hermes 2 Pro is an upgraded version of Nous Hermes 2, consisting of an updated and cleaned version of the OpenHermes 2.5 Dataset, as well as a newly introduced Function Calling and JSON Mode dataset developed in-house. This new version of Hermes maintains its excellent general task and conversation capabilities - but also excels at Function Calling\n", + "\n", + "See our guides on local models to go deeper:\n", + "\n", + "* [Run LLMs locally](https://python.langchain.com/v0.1/docs/guides/development/local_llms/)\n", + "* [Using local models with RAG](https://python.langchain.com/v0.1/docs/use_cases/question_answering/local_retrieval_qa/)\n", + "\n", + "### Installation\n", + "\n", + "The LangChain OpenAI integration lives in the `langchain-community` and `llama-cpp-python` packages:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain-community llama-cpp-python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# Path to your model weights\n", + "local_model = \"local/path/to/Hermes-2-Pro-Llama-3-8B-Q8_0.gguf\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import multiprocessing\n", + "\n", + "from langchain_community.chat_models import ChatLlamaCpp\n", + "\n", + "llm = ChatLlamaCpp(\n", + " temperature=0.5,\n", + " model_path=local_model,\n", + " n_ctx=10000,\n", + " n_gpu_layers=8,\n", + " n_batch=300, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n", + " max_tokens=512,\n", + " n_threads=multiprocessing.cpu_count() - 1,\n", + " repeat_penalty=1.5,\n", + " top_p=0.5,\n", + " verbose=True,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "messages = [\n", + " (\n", + " \"system\",\n", + " \"You are a helpful assistant that translates English to French. Translate the user sentence.\",\n", + " ),\n", + " (\"human\", \"I love programming.\"),\n", + "]\n", + "\n", + "ai_msg = llm.invoke(messages)\n", + "ai_msg" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "J'aime programmer. (In France, \"programming\" is often used in its original sense of scheduling or organizing events.) \n", + "\n", + "If you meant computer-programming: \n", + "Je suis amoureux de la programmation informatique.\n", + "\n", + "(You might also say simply 'programmation', which would be understood as both meanings - depending on context).\n" + ] + } + ], + "source": [ + "print(ai_msg.content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"You are a helpful assistant that translates {input_language} to {output_language}.\",\n", + " ),\n", + " (\"human\", \"{input}\"),\n", + " ]\n", + ")\n", + "\n", + "chain = prompt | llm\n", + "chain.invoke(\n", + " {\n", + " \"input_language\": \"English\",\n", + " \"output_language\": \"German\",\n", + " \"input\": \"I love programming.\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tool calling\n", + "\n", + "Firstly, it works mostly the same as OpenAI Function Calling\n", + "\n", + "OpenAI has a [tool calling](https://platform.openai.com/docs/guides/function-calling) (we use \"tool calling\" and \"function calling\" interchangeably here) API that lets you describe tools and their arguments, and have the model return a JSON object with a tool to invoke and the inputs to that tool. tool-calling is extremely useful for building tool-using chains and agents, and for getting structured outputs from models more generally.\n", + "\n", + "With `ChatLlamaCpp.bind_tools`, we can easily pass in Pydantic classes, dict schemas, LangChain tools, or even functions as tools to the model. Under the hood these are converted to an OpenAI tool schemas, which looks like:\n", + "```\n", + "{\n", + " \"name\": \"...\",\n", + " \"description\": \"...\",\n", + " \"parameters\": {...} # JSONSchema\n", + "}\n", + "```\n", + "and passed in every model invocation.\n", + "\n", + "\n", + "However, it cannot automatically trigger a function/tool, we need to force it by specifying the 'tool choice' parameter. This parameter is typically formatted as described below.\n", + "\n", + "```{\"type\": \"function\", \"function\": {\"name\": <>}}.```" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.tools import tool\n", + "from langchain_core.pydantic_v1 import BaseModel, Field\n", + "\n", + "\n", + "class WeatherInput(BaseModel):\n", + " location: str = Field(description=\"The city and state, e.g. San Francisco, CA\")\n", + " unit: str = Field(enum=[\"celsius\", \"fahrenheit\"])\n", + "\n", + "\n", + "@tool(\"get_current_weather\", args_schema=WeatherInput)\n", + "def get_weather(location: str, unit: str):\n", + " \"\"\"Get the current weather in a given location\"\"\"\n", + " return f\"Now the weather in {location} is 22 {unit}\"\n", + "\n", + "\n", + "llm_with_tools = llm.bind_tools(\n", + " tools=[get_weather],\n", + " tool_choice={\"type\": \"function\", \"function\": {\"name\": \"get_current_weather\"}},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ai_msg = llm_with_tools.invoke(\n", + " \"what is the weather like in HCMC in celsius\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'get_current_weather',\n", + " 'args': {'location': 'Ho Chi Minh City', 'unit': 'celsius'},\n", + " 'id': 'call__0_get_current_weather_cmpl-394d9943-0a1f-425b-8139-d2826c1431f2'}]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ai_msg.tool_calls" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "class MagicFunctionInput(BaseModel):\n", + " magic_function_input: int = Field(description=\"The input value for magic function\")\n", + "\n", + "\n", + "@tool(\"get_magic_function\", args_schema=MagicFunctionInput)\n", + "def magic_function(magic_function_input: int):\n", + " \"\"\"Get the value of magic function for an input.\"\"\"\n", + " return magic_function_input + 2\n", + "\n", + "\n", + "llm_with_tools = llm.bind_tools(\n", + " tools=[magic_function],\n", + " tool_choice={\"type\": \"function\", \"function\": {\"name\": \"get_magic_function\"}},\n", + ")\n", + "\n", + "ai_msg = llm_with_tools.invoke(\n", + " \"What is magic function of 3?\",\n", + ")\n", + "\n", + "ai_msg" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'name': 'get_magic_function',\n", + " 'args': {'magic_function_input': 3},\n", + " 'id': 'call__0_get_magic_function_cmpl-cd83a994-b820-4428-957c-48076c68335a'}]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ai_msg.tool_calls" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Structured output" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.pydantic_v1 import BaseModel\n", + "from langchain_core.utils.function_calling import convert_to_openai_tool\n", + "\n", + "\n", + "class Joke(BaseModel):\n", + " \"\"\"A setup to a joke and the punchline.\"\"\"\n", + "\n", + " setup: str\n", + " punchline: str\n", + "\n", + "\n", + "dict_schema = convert_to_openai_tool(Joke)\n", + "structured_llm = llm.with_structured_output(dict_schema)\n", + "result = structured_llm.invoke(\"Tell me a joke about birds\")\n", + "result" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'setup': '- Why did the chicken cross the playground?',\n", + " 'punchline': '\\n\\n- To get to its gilded cage on the other side!'}" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Streaming\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for chunk in llm.stream(\"what is 25x5\"):\n", + " print(chunk.content, end=\"\\n\", flush=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all ChatLlamaCpp features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.llamacpp.ChatLlamaCpp.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/chat/oci_generative_ai.ipynb b/docs/docs/integrations/chat/oci_generative_ai.ipynb new file mode 100644 index 0000000000000..4ce58a13fbf00 --- /dev/null +++ b/docs/docs/integrations/chat/oci_generative_ai.ipynb @@ -0,0 +1,190 @@ +{ + "cells": [ + { + "cell_type": "raw", + "id": "afaf8039", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: OCIGenAI\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "e49f1e0d", + "metadata": {}, + "source": [ + "# ChatOCIGenAI\n", + "\n", + "This notebook provides a quick overview for getting started with OCIGenAI [chat models](/docs/concepts/#chat-models). For detailed documentation of all ChatOCIGenAI features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_generative_ai.ChatOCIGenAI.html).\n", + "\n", + "Oracle Cloud Infrastructure (OCI) Generative AI is a fully managed service that provides a set of state-of-the-art, customizable large language models (LLMs) that cover a wide range of use cases, and which is available through a single API.\n", + "Using the OCI Generative AI service you can access ready-to-use pretrained models, or create and host your own fine-tuned custom models based on your own data on dedicated AI clusters. Detailed documentation of the service and API is available __[here](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)__ and __[here](https://docs.oracle.com/en-us/iaas/api/#/en/generative-ai/20231130/)__.\n", + "\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/chat/oci_generative_ai) | Package downloads | Package latest |\n", + "| :--- | :--- | :---: | :---: | :---: | :---: | :---: |\n", + "| [ChatOCIGenAI](https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_generative_ai.ChatOCIGenAI.html) | [langchain-community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ❌ | ❌ | ❌ | ![PyPI - Downloads](https://img.shields.io/pypi/dm/langchain-oci-generative-ai?style=flat-square&label=%20) | ![PyPI - Version](https://img.shields.io/pypi/v/langchain-oci-generative-ai?style=flat-square&label=%20) |\n", + "\n", + "### Model features\n", + "| [Tool calling](/docs/how_to/tool_calling/) | [Structured output](/docs/how_to/structured_output/) | JSON mode | [Image input](/docs/how_to/multimodal_inputs/) | Audio input | Video input | [Token-level streaming](/docs/how_to/chat_streaming/) | Native async | [Token usage](/docs/how_to/chat_token_usage_tracking/) | [Logprobs](/docs/how_to/logprobs/) |\n", + "| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |\n", + "| ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | \n", + "\n", + "## Setup\n", + "\n", + "To access OCIGenAI models you'll need to install the `oci` and `langchain-community` packages.\n", + "\n", + "### Credentials\n", + "\n", + "The credentials and authentication methods supported for this integration are equivalent to those used with other OCI services and follow the __[standard SDK authentication](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdk_authentication_methods.htm)__ methods, specifically API Key, session token, instance principal, and resource principal.\n", + "\n", + "API key is the default authentication method used in the examples above. The following example demonstrates how to use a different authentication method (session token)" + ] + }, + { + "cell_type": "markdown", + "id": "0730d6a1-c893-4840-9817-5e5251676d5d", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "The LangChain OCIGenAI integration lives in the `langchain-community` package and you will also need to install the `oci` package:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "652d6238-1f87-422a-b135-f5abbb8652fc", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain-community oci" + ] + }, + { + "cell_type": "markdown", + "id": "a38cde65-254d-4219-a441-068766c0d4b5", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and generate chat completions:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb09c344-1836-4e0c-acf8-11d13ac1dbae", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI\n", + "from langchain_core.messages import AIMessage, HumanMessage, SystemMessage\n", + "\n", + "chat = ChatOCIGenAI(\n", + " model_id=\"cohere.command-r-16k\",\n", + " service_endpoint=\"https://inference.generativeai.us-chicago-1.oci.oraclecloud.com\",\n", + " compartment_id=\"MY_OCID\",\n", + " model_kwargs={\"temperature\": 0.7, \"max_tokens\": 500},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "2b4f3e15", + "metadata": {}, + "source": [ + "## Invocation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62e0dbc3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "messages = [\n", + " SystemMessage(content=\"your are an AI assistant.\"),\n", + " AIMessage(content=\"Hi there human!\"),\n", + " HumanMessage(content=\"tell me a joke.\"),\n", + "]\n", + "response = chat.invoke(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d86145b3-bfef-46e8-b227-4dda5c9c2705", + "metadata": {}, + "outputs": [], + "source": [ + "print(response.content)" + ] + }, + { + "cell_type": "markdown", + "id": "18e2bfc0-7e78-4528-a73f-499ac150dca8", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can [chain](/docs/how_to/sequence/) our model with a prompt template like so:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e197d1d7-a070-4c96-9f8a-a0e86d046e0b", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\"Tell me a joke about {topic}\")\n", + "chain = prompt | chat\n", + "\n", + "response = chain.invoke({\"topic\": \"dogs\"})\n", + "print(response.content)" + ] + }, + { + "cell_type": "markdown", + "id": "3a5bb5ca-c3ae-4a58-be67-2cd18574b9a3", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all ChatOCIGenAI features and configurations head to the API reference: https://api.python.langchain.com/en/latest/chat_models/langchain_community.chat_models.oci_generative_ai.ChatOCIGenAI.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/chat/snowflake.ipynb b/docs/docs/integrations/chat/snowflake.ipynb new file mode 100644 index 0000000000000..650648ffb7acc --- /dev/null +++ b/docs/docs/integrations/chat/snowflake.ipynb @@ -0,0 +1,180 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Snowflake Cortex\n", + "\n", + "[Snowflake Cortex](https://docs.snowflake.com/en/user-guide/snowflake-cortex/llm-functions) gives you instant access to industry-leading large language models (LLMs) trained by researchers at companies like Mistral, Reka, Meta, and Google, including [Snowflake Arctic](https://www.snowflake.com/en/data-cloud/arctic/), an open enterprise-grade model developed by Snowflake.\n", + "\n", + "This example goes over how to use LangChain to interact with Snowflake Cortex." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation and setup\n", + "\n", + "We start by installing the `snowflake-snowpark-python` library, using the command below. Then we configure the credentials for connecting to Snowflake, as environment variables or pass them directly." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --upgrade --quiet snowflake-snowpark-python" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "# First step is to set up the environment variables, to connect to Snowflake,\n", + "# you can also pass these snowflake credentials while instantiating the model\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_ACCOUNT\") is None:\n", + " os.environ[\"SNOWFLAKE_ACCOUNT\"] = getpass.getpass(\"Account: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_USERNAME\") is None:\n", + " os.environ[\"SNOWFLAKE_USERNAME\"] = getpass.getpass(\"Username: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_PASSWORD\") is None:\n", + " os.environ[\"SNOWFLAKE_PASSWORD\"] = getpass.getpass(\"Password: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_DATABASE\") is None:\n", + " os.environ[\"SNOWFLAKE_DATABASE\"] = getpass.getpass(\"Database: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_SCHEMA\") is None:\n", + " os.environ[\"SNOWFLAKE_SCHEMA\"] = getpass.getpass(\"Schema: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_WAREHOUSE\") is None:\n", + " os.environ[\"SNOWFLAKE_WAREHOUSE\"] = getpass.getpass(\"Warehouse: \")\n", + "\n", + "if os.environ.get(\"SNOWFLAKE_ROLE\") is None:\n", + " os.environ[\"SNOWFLAKE_ROLE\"] = getpass.getpass(\"Role: \")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chat_models import ChatSnowflakeCortex\n", + "from langchain_core.messages import HumanMessage, SystemMessage\n", + "\n", + "# By default, we'll be using the cortex provided model: `snowflake-arctic`, with function: `complete`\n", + "chat = ChatSnowflakeCortex()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The above cell assumes that your Snowflake credentials are set in your environment variables. If you would rather manually specify them, use the following code:\n", + "\n", + "```python\n", + "chat = ChatSnowflakeCortex(\n", + " # change default cortex model and function\n", + " model=\"snowflake-arctic\",\n", + " cortex_function=\"complete\",\n", + "\n", + " # change default generation parameters\n", + " temperature=0,\n", + " max_tokens=10,\n", + " top_p=0.95,\n", + "\n", + " # specify snowflake credentials\n", + " account=\"YOUR_SNOWFLAKE_ACCOUNT\",\n", + " username=\"YOUR_SNOWFLAKE_USERNAME\",\n", + " password=\"YOUR_SNOWFLAKE_PASSWORD\",\n", + " database=\"YOUR_SNOWFLAKE_DATABASE\",\n", + " schema=\"YOUR_SNOWFLAKE_SCHEMA\",\n", + " role=\"YOUR_SNOWFLAKE_ROLE\",\n", + " warehouse=\"YOUR_SNOWFLAKE_WAREHOUSE\"\n", + ")\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calling the model\n", + "We can now call the model using the `invoke` or `generate` method.\n", + "\n", + "#### Generation" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "AIMessage(content=\" Large language models are artificial intelligence systems designed to understand, generate, and manipulate human language. These models are typically based on deep learning techniques and are trained on vast amounts of text data to learn patterns and structures in language. They can perform a wide range of language-related tasks, such as language translation, text generation, sentiment analysis, and answering questions. Some well-known large language models include Google's BERT, OpenAI's GPT series, and Facebook's RoBERTa. These models have shown remarkable performance in various natural language processing tasks, and their applications continue to expand as research in AI progresses.\", response_metadata={'completion_tokens': 131, 'prompt_tokens': 29, 'total_tokens': 160}, id='run-5435bd0a-83fd-4295-b237-66cbd1b5c0f3-0')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "messages = [\n", + " SystemMessage(content=\"You are a friendly assistant.\"),\n", + " HumanMessage(content=\"What are large language models?\"),\n", + "]\n", + "chat.invoke(messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Streaming\n", + "`ChatSnowflakeCortex` doesn't support streaming as of now. Support for streaming will be coming in the later versions!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/document_loaders/scrapfly.ipynb b/docs/docs/integrations/document_loaders/scrapfly.ipynb new file mode 100644 index 0000000000000..2625e3d3fb940 --- /dev/null +++ b/docs/docs/integrations/document_loaders/scrapfly.ipynb @@ -0,0 +1,107 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ScrapFly\n", + "[ScrapFly](https://scrapfly.io/) is a web scraping API with headless browser capabilities, proxies, and anti-bot bypass. It allows for extracting web page data into accessible LLM markdown or text." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Installation\n", + "Install ScrapFly Python SDK and he required Langchain packages using pip:\n", + "```shell\n", + "pip install scrapfly-sdk langchain langchain-community\n", + "```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import ScrapflyLoader\n", + "\n", + "scrapfly_loader = ScrapflyLoader(\n", + " [\"https://web-scraping.dev/products\"],\n", + " api_key=\"Your ScrapFly API key\", # Get your API key from https://www.scrapfly.io/\n", + " ignore_scrape_failures=True, # Ignore unprocessable web pages and log their exceptions\n", + ")\n", + "\n", + "# Load documents from URLs as markdown\n", + "documents = scrapfly_loader.load()\n", + "print(documents)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The ScrapflyLoader also allows passigng ScrapeConfig object for customizing the scrape request. See the documentation for the full feature details and their API params: https://scrapfly.io/docs/scrape-api/getting-started" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import ScrapflyLoader\n", + "\n", + "scrapfly_scrape_config = {\n", + " \"asp\": True, # Bypass scraping blocking and antibot solutions, like Cloudflare\n", + " \"render_js\": True, # Enable JavaScript rendering with a cloud headless browser\n", + " \"proxy_pool\": \"public_residential_pool\", # Select a proxy pool (datacenter or residnetial)\n", + " \"country\": \"us\", # Select a proxy location\n", + " \"auto_scroll\": True, # Auto scroll the page\n", + " \"js\": \"\", # Execute custom JavaScript code by the headless browser\n", + "}\n", + "\n", + "scrapfly_loader = ScrapflyLoader(\n", + " [\"https://web-scraping.dev/products\"],\n", + " api_key=\"Your ScrapFly API key\", # Get your API key from https://www.scrapfly.io/\n", + " ignore_scrape_failures=True, # Ignore unprocessable web pages and log their exceptions\n", + " scrape_config=scrapfly_scrape_config, # Pass the scrape_config object\n", + " scrape_format=\"markdown\", # The scrape result format, either `markdown`(default) or `text`\n", + ")\n", + "\n", + "# Load documents from URLs as markdown\n", + "documents = scrapfly_loader.load()\n", + "print(documents)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/document_transformers/dashscope_rerank.ipynb b/docs/docs/integrations/document_transformers/dashscope_rerank.ipynb new file mode 100644 index 0000000000000..3d54d5e8f5edf --- /dev/null +++ b/docs/docs/integrations/document_transformers/dashscope_rerank.ipynb @@ -0,0 +1,387 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# DashScope Reranker\n", + "\n", + "This notebook shows how to use DashScope Reranker for document compression and retrieval. [DashScope](https://dashscope.aliyun.com/) is the generative AI service from Alibaba Cloud (Aliyun).\n", + "\n", + "DashScope's [Text ReRank Model](https://help.aliyun.com/document_detail/2780058.html?spm=a2c4g.2780059.0.0.6d995024FlrJ12) supports reranking documents with a maximum of 4000 tokens. Moreover, it supports Chinese, English, Japanese, Korean, Thai, Spanish, French, Portuguese, Indonesian, Arabic, and over 50 other languages. For more details, please visit [here](https://help.aliyun.com/document_detail/2780059.html?spm=a2c4g.2780058.0.0.3a9e5b1dWeOQjI)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet dashscope" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet faiss\n", + "\n", + "# OR (depending on Python version)\n", + "\n", + "%pip install --upgrade --quiet faiss-cpu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To create api key: https://bailian.console.aliyun.com/?apiKey=1#/api-key\n", + "\n", + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"DASHSCOPE_API_KEY\"] = getpass.getpass(\"DashScope API Key:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function for printing docs\n", + "def pretty_print_docs(docs):\n", + " print(\n", + " f\"\\n{'-' * 100}\\n\".join(\n", + " [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the base vector store retriever\n", + "Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can set up the retriever to retrieve a high number (20) of docs." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "I understand. \n", + "\n", + "I remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. \n", + "\n", + "That’s why one of the first things I did as President was fight to pass the American Rescue Plan. \n", + "\n", + "Because people were hurting. We needed to act, and we did. \n", + "\n", + "Few pieces of legislation have done more in a critical moment in our history to lift us out of crisis.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "To all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n", + "\n", + "And I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n", + "\n", + "Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 4:\n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 5:\n", + "\n", + "Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n", + "\n", + "The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n", + "\n", + "We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 6:\n", + "\n", + "Every Administration says they’ll do it, but we are actually doing it. \n", + "\n", + "We will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \n", + "\n", + "But to compete for the best jobs of the future, we also need to level the playing field with China and other competitors.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 7:\n", + "\n", + "When we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. \n", + "\n", + "For more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. \n", + "\n", + "And I know you’re tired, frustrated, and exhausted. \n", + "\n", + "But I also know this.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 8:\n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 9:\n", + "\n", + "My plan will not only lower costs to give families a fair shot, it will lower the deficit. \n", + "\n", + "The previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. \n", + "\n", + "But in my administration, the watchdogs have been welcomed back. \n", + "\n", + "We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 10:\n", + "\n", + "He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \n", + "\n", + "We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \n", + "\n", + "The pandemic has been punishing. \n", + "\n", + "And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \n", + "\n", + "I understand.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 11:\n", + "\n", + "And tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n", + "\n", + "By the end of this year, the deficit will be down to less than half what it was before I took office. \n", + "\n", + "The only president ever to cut the deficit by more than one trillion dollars in a single year. \n", + "\n", + "Lowering your costs also means demanding more competition. \n", + "\n", + "I’m a capitalist, but capitalism without competition isn’t capitalism. \n", + "\n", + "It’s exploitation—and it drives up prices.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 12:\n", + "\n", + "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n", + "\n", + "Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n", + "\n", + "Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n", + "\n", + "They keep moving. \n", + "\n", + "And the costs and the threats to America and the world keep rising.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 13:\n", + "\n", + "Cancer is the #2 cause of death in America–second only to heart disease. \n", + "\n", + "Last month, I announced our plan to supercharge \n", + "the Cancer Moonshot that President Obama asked me to lead six years ago. \n", + "\n", + "Our goal is to cut the cancer death rate by at least 50% over the next 25 years, turn more cancers from death sentences into treatable diseases. \n", + "\n", + "More support for patients and families. \n", + "\n", + "To get there, I call on Congress to fund ARPA-H, the Advanced Research Projects Agency for Health.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 14:\n", + "\n", + "It fueled our efforts to vaccinate the nation and combat COVID-19. It delivered immediate economic relief for tens of millions of Americans. \n", + "\n", + "Helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. \n", + "\n", + "And as my Dad used to say, it gave people a little breathing room.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 15:\n", + "\n", + "America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n", + "\n", + "These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n", + "\n", + "But I want you to know that we are going to be okay. \n", + "\n", + "When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 16:\n", + "\n", + "So that’s my plan. It will grow the economy and lower costs for families. \n", + "\n", + "So what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the Federal Reserve, which plays a critical role in fighting inflation. \n", + "\n", + "My plan will not only lower costs to give families a fair shot, it will lower the deficit.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 17:\n", + "\n", + "And we will, as one people. \n", + "\n", + "One America. \n", + "\n", + "The United States of America. \n", + "\n", + "May God bless you all. May God protect our troops.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 18:\n", + "\n", + "As I’ve told Xi Jinping, it is never a good bet to bet against the American people. \n", + "\n", + "We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n", + "\n", + "And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 19:\n", + "\n", + "And I know you’re tired, frustrated, and exhausted. \n", + "\n", + "But I also know this. \n", + "\n", + "Because of the progress we’ve made, because of your resilience and the tools we have, tonight I can say \n", + "we are moving forward safely, back to more normal routines. \n", + "\n", + "We’ve reached a new moment in the fight against COVID-19, with severe cases down to a level not seen since last July. \n", + "\n", + "Just a few days ago, the Centers for Disease Control and Prevention—the CDC—issued new mask guidelines.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 20:\n", + "\n", + "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n", + "\n", + "Last year COVID-19 kept us apart. This year we are finally together again. \n", + "\n", + "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n", + "\n", + "With a duty to one another to the American people to the Constitution. \n", + "\n", + "And with an unwavering resolve that freedom will always triumph over tyranny.\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.embeddings.dashscope import DashScopeEmbeddings\n", + "from langchain_community.vectorstores.faiss import FAISS\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", + "texts = text_splitter.split_documents(documents)\n", + "retriever = FAISS.from_documents(texts, DashScopeEmbeddings()).as_retriever( # type: ignore\n", + " search_kwargs={\"k\": 20}\n", + ")\n", + "\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = retriever.invoke(query)\n", + "pretty_print_docs(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reranking with DashScopeRerank\n", + "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll use the `DashScopeRerank` to rerank the returned results." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n", + "\n", + "Last year COVID-19 kept us apart. This year we are finally together again. \n", + "\n", + "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n", + "\n", + "With a duty to one another to the American people to the Constitution. \n", + "\n", + "And with an unwavering resolve that freedom will always triumph over tyranny.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n", + "\n", + "The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n", + "\n", + "We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.\n" + ] + } + ], + "source": [ + "from langchain.retrievers import ContextualCompressionRetriever\n", + "from langchain_community.document_compressors.dashscope_rerank import DashScopeRerank\n", + "\n", + "compressor = DashScopeRerank()\n", + "compression_retriever = ContextualCompressionRetriever(\n", + " base_compressor=compressor, base_retriever=retriever\n", + ")\n", + "\n", + "compressed_docs = compression_retriever.invoke(\n", + " \"What did the president say about Ketanji Jackson Brown\"\n", + ")\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb b/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb new file mode 100644 index 0000000000000..5272705479698 --- /dev/null +++ b/docs/docs/integrations/document_transformers/rankllm-reranker.ipynb @@ -0,0 +1,781 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RankLLM Reranker\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[RankLLM](https://github.com/castorini/rank_llm) offers a suite of listwise rerankers, albeit with focus on open source LLMs finetuned for the task - RankVicuna and RankZephyr being two of them." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet rank_llm" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet langchain_openai" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet faiss-cpu" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function for printing docs\n", + "def pretty_print_docs(docs):\n", + " print(\n", + " f\"\\n{'-' * 100}\\n\".join(\n", + " [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the base vector store retriever\n", + "Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can set up the retriever to retrieve a high number (20) of docs." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.vectorstores import FAISS\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "documents = TextLoader(\"../../modules/state_of_the_union.txt\").load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", + "texts = text_splitter.split_documents(documents)\n", + "for idx, text in enumerate(texts):\n", + " text.metadata[\"id\"] = idx\n", + "\n", + "embedding = OpenAIEmbeddings(model=\"text-embedding-ada-002\")\n", + "retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Retrieval + RankLLM Reranking (RankZephyr)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieval without reranking" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "And with an unwavering resolve that freedom will always triumph over tyranny. \n", + "\n", + "Six days ago, Russia’s Vladimir Putin sought to shake the foundations of the free world thinking he could make it bend to his menacing ways. But he badly miscalculated. \n", + "\n", + "He thought he could roll into Ukraine and the world would roll over. Instead he met a wall of strength he never imagined. \n", + "\n", + "He met the Ukrainian people.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "Together with our allies –we are right now enforcing powerful economic sanctions. \n", + "\n", + "We are cutting off Russia’s largest banks from the international financial system. \n", + "\n", + "Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \n", + "\n", + "We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \n", + "\n", + "The Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 4:\n", + "\n", + "I spent countless hours unifying our European allies. We shared with the world in advance what we knew Putin was planning and precisely how he would try to falsely justify his aggression. \n", + "\n", + "We countered Russia’s lies with truth. \n", + "\n", + "And now that he has acted the free world is holding him accountable.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 5:\n", + "\n", + "He rejected repeated efforts at diplomacy. \n", + "\n", + "He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \n", + "\n", + "We prepared extensively and carefully. \n", + "\n", + "We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 6:\n", + "\n", + "And now that he has acted the free world is holding him accountable. \n", + "\n", + "Along with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \n", + "\n", + "We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \n", + "\n", + "Together with our allies –we are right now enforcing powerful economic sanctions.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 7:\n", + "\n", + "To all Americans, I will be honest with you, as I’ve always promised. A Russian dictator, invading a foreign country, has costs around the world. \n", + "\n", + "And I’m taking robust action to make sure the pain of our sanctions is targeted at Russia’s economy. And I will use every tool at our disposal to protect American businesses and consumers. \n", + "\n", + "Tonight, I can announce that the United States has worked with 30 other countries to release 60 Million barrels of oil from reserves around the world.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 8:\n", + "\n", + "And we remain clear-eyed. The Ukrainians are fighting back with pure courage. But the next few days weeks, months, will be hard on them. \n", + "\n", + "Putin has unleashed violence and chaos. But while he may make gains on the battlefield – he will pay a continuing high price over the long run. \n", + "\n", + "And a proud Ukrainian people, who have known 30 years of independence, have repeatedly shown that they will not tolerate anyone who tries to take their country backwards.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 9:\n", + "\n", + "Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n", + "\n", + "The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n", + "\n", + "We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 10:\n", + "\n", + "America will lead that effort, releasing 30 Million barrels from our own Strategic Petroleum Reserve. And we stand ready to do more if necessary, unified with our allies. \n", + "\n", + "These steps will help blunt gas prices here at home. And I know the news about what’s happening can seem alarming. \n", + "\n", + "But I want you to know that we are going to be okay. \n", + "\n", + "When the history of this era is written Putin’s war on Ukraine will have left Russia weaker and the rest of the world stronger.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 11:\n", + "\n", + "They keep moving. \n", + "\n", + "And the costs and the threats to America and the world keep rising. \n", + "\n", + "That’s why the NATO Alliance was created to secure peace and stability in Europe after World War 2. \n", + "\n", + "The United States is a member along with 29 other nations. \n", + "\n", + "It matters. American diplomacy matters. American resolve matters. \n", + "\n", + "Putin’s latest attack on Ukraine was premeditated and unprovoked. \n", + "\n", + "He rejected repeated efforts at diplomacy.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 12:\n", + "\n", + "Our forces are not going to Europe to fight in Ukraine, but to defend our NATO Allies – in the event that Putin decides to keep moving west. \n", + "\n", + "For that purpose we’ve mobilized American ground forces, air squadrons, and ship deployments to protect NATO countries including Poland, Romania, Latvia, Lithuania, and Estonia. \n", + "\n", + "As I have made crystal clear the United States and our Allies will defend every inch of territory of NATO countries with the full force of our collective power.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 13:\n", + "\n", + "While it shouldn’t have taken something so terrible for people around the world to see what’s at stake now everyone sees it clearly. \n", + "\n", + "We see the unity among leaders of nations and a more unified Europe a more unified West. And we see unity among the people who are gathering in cities in large crowds around the world even in Russia to demonstrate their support for Ukraine.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 14:\n", + "\n", + "He met the Ukrainian people. \n", + "\n", + "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n", + "\n", + "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n", + "\n", + "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 15:\n", + "\n", + "In the battle between democracy and autocracy, democracies are rising to the moment, and the world is clearly choosing the side of peace and security. \n", + "\n", + "This is a real test. It’s going to take time. So let us continue to draw inspiration from the iron will of the Ukrainian people. \n", + "\n", + "To our fellow Ukrainian Americans who forge a deep bond that connects our two nations we stand with you. \n", + "\n", + "Putin may circle Kyiv with tanks, but he will never gain the hearts and souls of the Ukrainian people.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 16:\n", + "\n", + "Together with our allies we are providing support to the Ukrainians in their fight for freedom. Military assistance. Economic assistance. Humanitarian assistance. \n", + "\n", + "We are giving more than $1 Billion in direct assistance to Ukraine. \n", + "\n", + "And we will continue to aid the Ukrainian people as they defend their country and to help ease their suffering. \n", + "\n", + "Let me be clear, our forces are not engaged and will not engage in conflict with Russian forces in Ukraine.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 17:\n", + "\n", + "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n", + "\n", + "Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n", + "\n", + "Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n", + "\n", + "They keep moving. \n", + "\n", + "And the costs and the threats to America and the world keep rising.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 18:\n", + "\n", + "It fueled our efforts to vaccinate the nation and combat COVID-19. It delivered immediate economic relief for tens of millions of Americans. \n", + "\n", + "Helped put food on their table, keep a roof over their heads, and cut the cost of health insurance. \n", + "\n", + "And as my Dad used to say, it gave people a little breathing room.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 19:\n", + "\n", + "My administration is providing assistance with job training and housing, and now helping lower-income veterans get VA care debt-free. \n", + "\n", + "Our troops in Iraq and Afghanistan faced many dangers. \n", + "\n", + "One was stationed at bases and breathing in toxic smoke from “burn pits” that incinerated wastes of war—medical and hazard material, jet fuel, and more. \n", + "\n", + "When they came home, many of the world’s fittest and best trained warriors were never the same. \n", + "\n", + "Headaches. Numbness. Dizziness.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 20:\n", + "\n", + "Every Administration says they’ll do it, but we are actually doing it. \n", + "\n", + "We will buy American to make sure everything from the deck of an aircraft carrier to the steel on highway guardrails are made in America. \n", + "\n", + "But to compete for the best jobs of the future, we also need to level the playing field with China and other competitors.\n" + ] + } + ], + "source": [ + "query = \"What was done to Russia?\"\n", + "docs = retriever.invoke(query)\n", + "pretty_print_docs(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieval + Reranking with RankZephyr" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers.contextual_compression import ContextualCompressionRetriever\n", + "from langchain_community.document_compressors.rankllm_rerank import RankLLMRerank\n", + "\n", + "compressor = RankLLMRerank(top_n=3, model=\"zephyr\")\n", + "compression_retriever = ContextualCompressionRetriever(\n", + " base_compressor=compressor, base_retriever=retriever\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "Together with our allies –we are right now enforcing powerful economic sanctions. \n", + "\n", + "We are cutting off Russia’s largest banks from the international financial system. \n", + "\n", + "Preventing Russia’s central bank from defending the Russian Ruble making Putin’s $630 Billion “war fund” worthless. \n", + "\n", + "We are choking off Russia’s access to technology that will sap its economic strength and weaken its military for years to come.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "And tonight I am announcing that we will join our allies in closing off American air space to all Russian flights – further isolating Russia – and adding an additional squeeze –on their economy. The Ruble has lost 30% of its value. \n", + "\n", + "The Russian stock market has lost 40% of its value and trading remains suspended. Russia’s economy is reeling and Putin alone is to blame.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "And now that he has acted the free world is holding him accountable. \n", + "\n", + "Along with twenty-seven members of the European Union including France, Germany, Italy, as well as countries like the United Kingdom, Canada, Japan, Korea, Australia, New Zealand, and many others, even Switzerland. \n", + "\n", + "We are inflicting pain on Russia and supporting the people of Ukraine. Putin is now isolated from the world more than ever. \n", + "\n", + "Together with our allies –we are right now enforcing powerful economic sanctions.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "compressed_docs = compression_retriever.invoke(query)\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Can be used within a QA pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'query': 'What was done to Russia?',\n", + " 'result': 'Russia has been subjected to powerful economic sanctions, including cutting off its largest banks from the international financial system, preventing its central bank from defending the Russian Ruble, and choking off its access to technology. Additionally, American airspace has been closed to all Russian flights, further isolating Russia and adding pressure on its economy. These actions have led to a significant devaluation of the Ruble, a sharp decline in the Russian stock market, and overall economic turmoil in Russia.'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "llm = ChatOpenAI(temperature=0)\n", + "\n", + "chain = RetrievalQA.from_chain_type(\n", + " llm=ChatOpenAI(temperature=0), retriever=compression_retriever\n", + ")\n", + "\n", + "chain({\"query\": query})" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Retrieval + RankLLM Reranking (RankGPT)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieval without reranking" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n", + "\n", + "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 4:\n", + "\n", + "He met the Ukrainian people. \n", + "\n", + "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n", + "\n", + "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n", + "\n", + "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 5:\n", + "\n", + "But that trickle-down theory led to weaker economic growth, lower wages, bigger deficits, and the widest gap between those at the top and everyone else in nearly a century. \n", + "\n", + "Vice President Harris and I ran for office with a new economic vision for America. \n", + "\n", + "Invest in America. Educate Americans. Grow the workforce. Build the economy from the bottom up \n", + "and the middle out, not from the top down.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 6:\n", + "\n", + "And tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n", + "\n", + "By the end of this year, the deficit will be down to less than half what it was before I took office. \n", + "\n", + "The only president ever to cut the deficit by more than one trillion dollars in a single year. \n", + "\n", + "Lowering your costs also means demanding more competition. \n", + "\n", + "I’m a capitalist, but capitalism without competition isn’t capitalism. \n", + "\n", + "It’s exploitation—and it drives up prices.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 7:\n", + "\n", + "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", + "\n", + "I’ve worked on these issues a long time. \n", + "\n", + "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety. \n", + "\n", + "So let’s not abandon our streets. Or choose between safety and equal justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 8:\n", + "\n", + "As I’ve told Xi Jinping, it is never a good bet to bet against the American people. \n", + "\n", + "We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n", + "\n", + "And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 9:\n", + "\n", + "Madam Speaker, Madam Vice President, our First Lady and Second Gentleman. Members of Congress and the Cabinet. Justices of the Supreme Court. My fellow Americans. \n", + "\n", + "Last year COVID-19 kept us apart. This year we are finally together again. \n", + "\n", + "Tonight, we meet as Democrats Republicans and Independents. But most importantly as Americans. \n", + "\n", + "With a duty to one another to the American people to the Constitution. \n", + "\n", + "And with an unwavering resolve that freedom will always triumph over tyranny.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 10:\n", + "\n", + "As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n", + "\n", + "It’s time. \n", + "\n", + "But with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \n", + "\n", + "Inflation is robbing them of the gains they might otherwise feel. \n", + "\n", + "I get it. That’s why my top priority is getting prices under control.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 11:\n", + "\n", + "I’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \n", + "\n", + "And fourth, let’s end cancer as we know it. \n", + "\n", + "This is personal to me and Jill, to Kamala, and to so many of you. \n", + "\n", + "Cancer is the #2 cause of death in America–second only to heart disease.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 12:\n", + "\n", + "Headaches. Numbness. Dizziness. \n", + "\n", + "A cancer that would put them in a flag-draped coffin. \n", + "\n", + "I know. \n", + "\n", + "One of those soldiers was my son Major Beau Biden. \n", + "\n", + "We don’t know for sure if a burn pit was the cause of his brain cancer, or the diseases of so many of our troops. \n", + "\n", + "But I’m committed to finding out everything we can. \n", + "\n", + "Committed to military families like Danielle Robinson from Ohio. \n", + "\n", + "The widow of Sergeant First Class Heath Robinson.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 13:\n", + "\n", + "He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \n", + "\n", + "We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \n", + "\n", + "The pandemic has been punishing. \n", + "\n", + "And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \n", + "\n", + "I understand.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 14:\n", + "\n", + "When we invest in our workers, when we build the economy from the bottom up and the middle out together, we can do something we haven’t done in a long time: build a better America. \n", + "\n", + "For more than two years, COVID-19 has impacted every decision in our lives and the life of the nation. \n", + "\n", + "And I know you’re tired, frustrated, and exhausted. \n", + "\n", + "But I also know this.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 15:\n", + "\n", + "My plan to fight inflation will lower your costs and lower the deficit. \n", + "\n", + "17 Nobel laureates in economics say my plan will ease long-term inflationary pressures. Top business leaders and most Americans support my plan. And here’s the plan: \n", + "\n", + "First – cut the cost of prescription drugs. Just look at insulin. One in ten Americans has diabetes. In Virginia, I met a 13-year-old boy named Joshua Davis.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 16:\n", + "\n", + "And soon, we’ll strengthen the Violence Against Women Act that I first wrote three decades ago. It is important for us to show the nation that we can come together and do big things. \n", + "\n", + "So tonight I’m offering a Unity Agenda for the Nation. Four big things we can do together. \n", + "\n", + "First, beat the opioid epidemic. \n", + "\n", + "There is so much we can do. Increase funding for prevention, treatment, harm reduction, and recovery.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 17:\n", + "\n", + "My plan will not only lower costs to give families a fair shot, it will lower the deficit. \n", + "\n", + "The previous Administration not only ballooned the deficit with tax cuts for the very wealthy and corporations, it undermined the watchdogs whose job was to keep pandemic relief funds from being wasted. \n", + "\n", + "But in my administration, the watchdogs have been welcomed back. \n", + "\n", + "We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 18:\n", + "\n", + "So let’s not abandon our streets. Or choose between safety and equal justice. \n", + "\n", + "Let’s come together to protect our communities, restore trust, and hold law enforcement accountable. \n", + "\n", + "That’s why the Justice Department required body cameras, banned chokeholds, and restricted no-knock warrants for its officers.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 19:\n", + "\n", + "I understand. \n", + "\n", + "I remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. \n", + "\n", + "That’s why one of the first things I did as President was fight to pass the American Rescue Plan. \n", + "\n", + "Because people were hurting. We needed to act, and we did. \n", + "\n", + "Few pieces of legislation have done more in a critical moment in our history to lift us out of crisis.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 20:\n", + "\n", + "And we will, as one people. \n", + "\n", + "One America. \n", + "\n", + "The United States of America. \n", + "\n", + "May God bless you all. May God protect our troops.\n" + ] + } + ], + "source": [ + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = retriever.invoke(query)\n", + "pretty_print_docs(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Retrieval + Reranking with RankGPT" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.retrievers.contextual_compression import ContextualCompressionRetriever\n", + "from langchain_community.document_compressors.rankllm_rerank import RankLLMRerank\n", + "\n", + "compressor = RankLLMRerank(top_n=3, model=\"gpt\", gpt_model=\"gpt-3.5-turbo\")\n", + "compression_retriever = ContextualCompressionRetriever(\n", + " base_compressor=compressor, base_retriever=retriever\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n", + "\n", + "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n" + ] + } + ], + "source": [ + "compressed_docs = compression_retriever.invoke(query)\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use this retriever within a QA pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'query': 'What did the president say about Ketanji Brown Jackson',\n", + " 'result': \"The President mentioned that Ketanji Brown Jackson is one of the nation's top legal minds and that she will continue Justice Breyer's legacy of excellence. He highlighted her background as a former top litigator in private practice and a former federal public defender, as well as coming from a family of public school educators and police officers. He also mentioned that since her nomination, she has received broad support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.\"}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "llm = ChatOpenAI(temperature=0)\n", + "\n", + "chain = RetrievalQA.from_chain_type(\n", + " llm=ChatOpenAI(temperature=0), retriever=compression_retriever\n", + ")\n", + "\n", + "chain({\"query\": query})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "rankllm", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/document_transformers/volcengine_rerank.ipynb b/docs/docs/integrations/document_transformers/volcengine_rerank.ipynb new file mode 100644 index 0000000000000..59c04de45af47 --- /dev/null +++ b/docs/docs/integrations/document_transformers/volcengine_rerank.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Volcengine Reranker\n", + "\n", + "This notebook shows how to use Volcengine Reranker for document compression and retrieval. [Volcengine](https://www.volcengine.com/) is a cloud service platform developed by ByteDance, the parent company of TikTok.\n", + "\n", + "Volcengine's Rerank Service supports reranking up to 50 documents with a maximum of 4000 tokens. For more, please visit [here](https://www.volcengine.com/docs/84313/1254474) and [here](https://www.volcengine.com/docs/84313/1254605)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet volcengine" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet faiss\n", + "\n", + "# OR (depending on Python version)\n", + "\n", + "%pip install --upgrade --quiet faiss-cpu" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# To obtain ak/sk: https://www.volcengine.com/docs/84313/1254488\n", + "\n", + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"VOLC_API_AK\"] = getpass.getpass(\"Volcengine API AK:\")\n", + "os.environ[\"VOLC_API_SK\"] = getpass.getpass(\"Volcengine API SK:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Helper function for printing docs\n", + "def pretty_print_docs(docs):\n", + " print(\n", + " f\"\\n{'-' * 100}\\n\".join(\n", + " [f\"Document {i+1}:\\n\\n\" + d.page_content for i, d in enumerate(docs)]\n", + " )\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Set up the base vector store retriever\n", + "Let's start by initializing a simple vector store retriever and storing the 2023 State of the Union speech (in chunks). We can set up the retriever to retrieve a high number (20) of docs." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/terminator/Developer/langchain/.venv/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", + " from tqdm.autonotebook import tqdm, trange\n", + "/Users/terminator/Developer/langchain/.venv/lib/python3.11/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n", + "\n", + "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 4:\n", + "\n", + "He will never extinguish their love of freedom. He will never weaken the resolve of the free world. \n", + "\n", + "We meet tonight in an America that has lived through two of the hardest years this nation has ever faced. \n", + "\n", + "The pandemic has been punishing. \n", + "\n", + "And so many families are living paycheck to paycheck, struggling to keep up with the rising cost of food, gas, housing, and so much more. \n", + "\n", + "I understand.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 5:\n", + "\n", + "As Ohio Senator Sherrod Brown says, “It’s time to bury the label “Rust Belt.” \n", + "\n", + "It’s time. \n", + "\n", + "But with all the bright spots in our economy, record job growth and higher wages, too many families are struggling to keep up with the bills. \n", + "\n", + "Inflation is robbing them of the gains they might otherwise feel. \n", + "\n", + "I get it. That’s why my top priority is getting prices under control.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 6:\n", + "\n", + "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", + "\n", + "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 7:\n", + "\n", + "It’s not only the right thing to do—it’s the economically smart thing to do. \n", + "\n", + "That’s why immigration reform is supported by everyone from labor unions to religious leaders to the U.S. Chamber of Commerce. \n", + "\n", + "Let’s get it done once and for all. \n", + "\n", + "Advancing liberty and justice also requires protecting the rights of women. \n", + "\n", + "The constitutional right affirmed in Roe v. Wade—standing precedent for half a century—is under attack as never before.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 8:\n", + "\n", + "I understand. \n", + "\n", + "I remember when my Dad had to leave our home in Scranton, Pennsylvania to find work. I grew up in a family where if the price of food went up, you felt it. \n", + "\n", + "That’s why one of the first things I did as President was fight to pass the American Rescue Plan. \n", + "\n", + "Because people were hurting. We needed to act, and we did. \n", + "\n", + "Few pieces of legislation have done more in a critical moment in our history to lift us out of crisis.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 9:\n", + "\n", + "Third – we can end the shutdown of schools and businesses. We have the tools we need. \n", + "\n", + "It’s time for Americans to get back to work and fill our great downtowns again. People working from home can feel safe to begin to return to the office. \n", + "\n", + "We’re doing that here in the federal government. The vast majority of federal workers will once again work in person. \n", + "\n", + "Our schools are open. Let’s keep it that way. Our kids need to be in school.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 10:\n", + "\n", + "He met the Ukrainian people. \n", + "\n", + "From President Zelenskyy to every Ukrainian, their fearlessness, their courage, their determination, inspires the world. \n", + "\n", + "Groups of citizens blocking tanks with their bodies. Everyone from students to retirees teachers turned soldiers defending their homeland. \n", + "\n", + "In this struggle as President Zelenskyy said in his speech to the European Parliament “Light will win over darkness.” The Ukrainian Ambassador to the United States is here tonight.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 11:\n", + "\n", + "The widow of Sergeant First Class Heath Robinson. \n", + "\n", + "He was born a soldier. Army National Guard. Combat medic in Kosovo and Iraq. \n", + "\n", + "Stationed near Baghdad, just yards from burn pits the size of football fields. \n", + "\n", + "Heath’s widow Danielle is here with us tonight. They loved going to Ohio State football games. He loved building Legos with their daughter. \n", + "\n", + "But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \n", + "\n", + "Danielle says Heath was a fighter to the very end.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 12:\n", + "\n", + "Danielle says Heath was a fighter to the very end. \n", + "\n", + "He didn’t know how to stop fighting, and neither did she. \n", + "\n", + "Through her pain she found purpose to demand we do better. \n", + "\n", + "Tonight, Danielle—we are. \n", + "\n", + "The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \n", + "\n", + "And tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 13:\n", + "\n", + "We can do all this while keeping lit the torch of liberty that has led generations of immigrants to this land—my forefathers and so many of yours. \n", + "\n", + "Provide a pathway to citizenship for Dreamers, those on temporary status, farm workers, and essential workers. \n", + "\n", + "Revise our laws so businesses have the workers they need and families don’t wait decades to reunite. \n", + "\n", + "It’s not only the right thing to do—it’s the economically smart thing to do.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 14:\n", + "\n", + "He rejected repeated efforts at diplomacy. \n", + "\n", + "He thought the West and NATO wouldn’t respond. And he thought he could divide us at home. Putin was wrong. We were ready. Here is what we did. \n", + "\n", + "We prepared extensively and carefully. \n", + "\n", + "We spent months building a coalition of other freedom-loving nations from Europe and the Americas to Asia and Africa to confront Putin.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 15:\n", + "\n", + "As I’ve told Xi Jinping, it is never a good bet to bet against the American people. \n", + "\n", + "We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n", + "\n", + "And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 16:\n", + "\n", + "Tonight I say to the Russian oligarchs and corrupt leaders who have bilked billions of dollars off this violent regime no more. \n", + "\n", + "The U.S. Department of Justice is assembling a dedicated task force to go after the crimes of Russian oligarchs. \n", + "\n", + "We are joining with our European allies to find and seize your yachts your luxury apartments your private jets. We are coming for your ill-begotten gains.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 17:\n", + "\n", + "Look at cars. \n", + "\n", + "Last year, there weren’t enough semiconductors to make all the cars that people wanted to buy. \n", + "\n", + "And guess what, prices of automobiles went up. \n", + "\n", + "So—we have a choice. \n", + "\n", + "One way to fight inflation is to drive down wages and make Americans poorer. \n", + "\n", + "I have a better plan to fight inflation. \n", + "\n", + "Lower your costs, not your wages. \n", + "\n", + "Make more cars and semiconductors in America. \n", + "\n", + "More infrastructure and innovation in America. \n", + "\n", + "More goods moving faster and cheaper in America.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 18:\n", + "\n", + "So that’s my plan. It will grow the economy and lower costs for families. \n", + "\n", + "So what are we waiting for? Let’s get this done. And while you’re at it, confirm my nominees to the Federal Reserve, which plays a critical role in fighting inflation. \n", + "\n", + "My plan will not only lower costs to give families a fair shot, it will lower the deficit.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 19:\n", + "\n", + "Let each of us here tonight in this Chamber send an unmistakable signal to Ukraine and to the world. \n", + "\n", + "Please rise if you are able and show that, Yes, we the United States of America stand with the Ukrainian people. \n", + "\n", + "Throughout our history we’ve learned this lesson when dictators do not pay a price for their aggression they cause more chaos. \n", + "\n", + "They keep moving. \n", + "\n", + "And the costs and the threats to America and the world keep rising.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 20:\n", + "\n", + "It’s based on DARPA—the Defense Department project that led to the Internet, GPS, and so much more. \n", + "\n", + "ARPA-H will have a singular purpose—to drive breakthroughs in cancer, Alzheimer’s, diabetes, and more. \n", + "\n", + "A unity agenda for the nation. \n", + "\n", + "We can do this. \n", + "\n", + "My fellow Americans—tonight , we have gathered in a sacred space—the citadel of our democracy. \n", + "\n", + "In this Capitol, generation after generation, Americans have debated great questions amid great strife, and have done great things.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", + "To disable this warning, you can either:\n", + "\t- Avoid using `tokenizers` before the fork if possible\n", + "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "from langchain_community.vectorstores.faiss import FAISS\n", + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "documents = TextLoader(\"../../how_to/state_of_the_union.txt\").load()\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n", + "texts = text_splitter.split_documents(documents)\n", + "retriever = FAISS.from_documents(\n", + " texts, HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")\n", + ").as_retriever(search_kwargs={\"k\": 20})\n", + "\n", + "query = \"What did the president say about Ketanji Brown Jackson\"\n", + "docs = retriever.invoke(query)\n", + "pretty_print_docs(docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Reranking with VolcengineRerank\n", + "Now let's wrap our base retriever with a `ContextualCompressionRetriever`. We'll use the `VolcengineRerank` to rerank the returned results." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Document 1:\n", + "\n", + "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", + "\n", + "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 2:\n", + "\n", + "As I said last year, especially to our younger transgender Americans, I will always have your back as your President, so you can be yourself and reach your God-given potential. \n", + "\n", + "While it often appears that we never agree, that isn’t true. I signed 80 bipartisan bills into law last year. From preventing government shutdowns to protecting Asian-Americans from still-too-common hate crimes to reforming military justice.\n", + "----------------------------------------------------------------------------------------------------\n", + "Document 3:\n", + "\n", + "We cannot let this happen. \n", + "\n", + "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", + "\n", + "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service.\n" + ] + } + ], + "source": [ + "from langchain.retrievers import ContextualCompressionRetriever\n", + "from langchain_community.document_compressors.volcengine_rerank import VolcengineRerank\n", + "\n", + "compressor = VolcengineRerank()\n", + "compression_retriever = ContextualCompressionRetriever(\n", + " base_compressor=compressor, base_retriever=retriever\n", + ")\n", + "\n", + "compressed_docs = compression_retriever.invoke(\n", + " \"What did the president say about Ketanji Jackson Brown\"\n", + ")\n", + "pretty_print_docs(compressed_docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/llm_caching.ipynb b/docs/docs/integrations/llm_caching.ipynb new file mode 100644 index 0000000000000..dba454c231c0d --- /dev/null +++ b/docs/docs/integrations/llm_caching.ipynb @@ -0,0 +1,2237 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f36d938c", + "metadata": {}, + "source": [ + "# Model caches\n", + "\n", + "This notebook covers how to cache results of individual LLM calls using different caches." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "10ad9224", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-12T02:05:57.319706Z", + "start_time": "2024-04-12T02:05:57.303868Z" + } + }, + "outputs": [], + "source": [ + "from langchain.globals import set_llm_cache\n", + "from langchain_openai import OpenAI\n", + "\n", + "# To make the caching really obvious, lets use a slower model.\n", + "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2)" + ] + }, + { + "cell_type": "markdown", + "id": "b50f0598", + "metadata": { + "tags": [] + }, + "source": [ + "## `In Memory` Cache" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "426ff912", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import InMemoryCache\n", + "\n", + "set_llm_cache(InMemoryCache())" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "64005d1f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 52.2 ms, sys: 15.2 ms, total: 67.4 ms\n", + "Wall time: 1.19 s\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\"" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c8a1cb2b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 191 µs, sys: 11 µs, total: 202 µs\n", + "Wall time: 205 µs\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy couldn't the bicycle stand up by itself? Because it was...two tired!\"" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "4bf59c12", + "metadata": { + "tags": [] + }, + "source": [ + "## `SQLite` Cache" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "aefd9d2f", + "metadata": {}, + "outputs": [], + "source": [ + "!rm .langchain.db" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5f036236", + "metadata": {}, + "outputs": [], + "source": [ + "# We can do the same thing with a SQLite cache\n", + "from langchain_community.cache import SQLiteCache\n", + "\n", + "set_llm_cache(SQLiteCache(database_path=\".langchain.db\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "fa18e3af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 33.2 ms, sys: 18.1 ms, total: 51.2 ms\n", + "Wall time: 667 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "5bf2f6fd", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4.86 ms, sys: 1.97 ms, total: 6.83 ms\n", + "Wall time: 5.79 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "e71273ab", + "metadata": {}, + "source": [ + "## `Upstash Redis` Cache" + ] + }, + { + "cell_type": "markdown", + "id": "f10dabef", + "metadata": {}, + "source": [ + "### Standard Cache\n", + "Use [Upstash Redis](https://upstash.com) to cache prompts and responses with a serverless HTTP API." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "f3920f25", + "metadata": {}, + "outputs": [], + "source": [ + "import langchain\n", + "from langchain_community.cache import UpstashRedisCache\n", + "from upstash_redis import Redis\n", + "\n", + "URL = \"\"\n", + "TOKEN = \"\"\n", + "\n", + "langchain.llm_cache = UpstashRedisCache(redis_=Redis(url=URL, token=TOKEN))" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "3bf7d959", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 7.56 ms, sys: 2.98 ms, total: 10.5 ms\n", + "Wall time: 1.14 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "00fc3a34", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.78 ms, sys: 1.95 ms, total: 4.73 ms\n", + "Wall time: 82.9 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "278ad7ae", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## `Redis` Cache" + ] + }, + { + "cell_type": "markdown", + "id": "c5c9a4d5", + "metadata": {}, + "source": [ + "### Standard Cache\n", + "Use [Redis](/docs/integrations/providers/redis) to cache prompts and responses." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "39f6eb0b", + "metadata": {}, + "outputs": [], + "source": [ + "# We can do the same thing with a Redis cache\n", + "# (make sure your local Redis instance is running first before running this example)\n", + "from langchain_community.cache import RedisCache\n", + "from redis import Redis\n", + "\n", + "set_llm_cache(RedisCache(redis_=Redis()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28920749", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 6.88 ms, sys: 8.75 ms, total: 15.6 ms\n", + "Wall time: 1.04 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "94bf9415", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.59 ms, sys: 610 µs, total: 2.2 ms\n", + "Wall time: 5.58 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "82be23f6", + "metadata": {}, + "source": [ + "### Semantic Cache\n", + "Use [Redis](/docs/integrations/providers/redis) to cache prompts and responses and evaluate hits based on semantic similarity." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "64df3099", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import RedisSemanticCache\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "set_llm_cache(\n", + " RedisSemanticCache(redis_url=\"redis://localhost:6379\", embedding=OpenAIEmbeddings())\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "8e91d3ac", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 351 ms, sys: 156 ms, total: 507 ms\n", + "Wall time: 3.37 s\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy don't scientists trust atoms?\\nBecause they make up everything.\"" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "df856948", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 6.25 ms, sys: 2.72 ms, total: 8.97 ms\n", + "Wall time: 262 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy don't scientists trust atoms?\\nBecause they make up everything.\"" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time, while not a direct hit, the question is semantically similar to the original question,\n", + "# so it uses the cached result!\n", + "llm(\"Tell me one joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "684eab55", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## `GPTCache`\n", + "\n", + "We can use [GPTCache](https://github.com/zilliztech/GPTCache) for exact match caching OR to cache results based on semantic similarity\n", + "\n", + "Let's first start with an example of exact match" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "14a82124", + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "\n", + "from gptcache import Cache\n", + "from gptcache.manager.factory import manager_factory\n", + "from gptcache.processor.pre import get_prompt\n", + "from langchain_community.cache import GPTCache\n", + "\n", + "\n", + "def get_hashed_name(name):\n", + " return hashlib.sha256(name.encode()).hexdigest()\n", + "\n", + "\n", + "def init_gptcache(cache_obj: Cache, llm: str):\n", + " hashed_llm = get_hashed_name(llm)\n", + " cache_obj.init(\n", + " pre_embedding_func=get_prompt,\n", + " data_manager=manager_factory(manager=\"map\", data_dir=f\"map_cache_{hashed_llm}\"),\n", + " )\n", + "\n", + "\n", + "set_llm_cache(GPTCache(init_gptcache))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e4ecfd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 21.5 ms, sys: 21.3 ms, total: 42.8 ms\n", + "Wall time: 6.2 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "c98bbe3b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 571 µs, sys: 43 µs, total: 614 µs\n", + "Wall time: 635 µs\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "502b6076", + "metadata": {}, + "source": [ + "Let's now show an example of similarity caching" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "b3c663bb", + "metadata": {}, + "outputs": [], + "source": [ + "import hashlib\n", + "\n", + "from gptcache import Cache\n", + "from gptcache.adapter.api import init_similar_cache\n", + "from langchain_community.cache import GPTCache\n", + "\n", + "\n", + "def get_hashed_name(name):\n", + " return hashlib.sha256(name.encode()).hexdigest()\n", + "\n", + "\n", + "def init_gptcache(cache_obj: Cache, llm: str):\n", + " hashed_llm = get_hashed_name(llm)\n", + " init_similar_cache(cache_obj=cache_obj, data_dir=f\"similar_cache_{hashed_llm}\")\n", + "\n", + "\n", + "set_llm_cache(GPTCache(init_gptcache))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8c273ced", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.42 s, sys: 279 ms, total: 1.7 s\n", + "Wall time: 8.44 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "93e21a5f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 866 ms, sys: 20 ms, total: 886 ms\n", + "Wall time: 226 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# This is an exact match, so it finds it in the cache\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "c4bb024b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 853 ms, sys: 14.8 ms, total: 868 ms\n", + "Wall time: 224 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side.'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# This is not an exact match, but semantically within distance so it hits!\n", + "llm(\"Tell me joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "9b2b2777", + "metadata": {}, + "source": [ + "## `MongoDB Atlas` Cache\n", + "\n", + "[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS, Azure, and GCP. It has native support for \n", + "Vector Search on the MongoDB document data.\n", + "Use [MongoDB Atlas Vector Search](/docs/integrations/providers/mongodb_atlas) to semantically cache prompts and responses." + ] + }, + { + "cell_type": "markdown", + "id": "ecdc2a0a", + "metadata": {}, + "source": [ + "### `MongoDBCache`\n", + "An abstraction to store a simple cache in MongoDB. This does not use Semantic Caching, nor does it require an index to be made on the collection before generation.\n", + "\n", + "To import this cache:\n", + "\n", + "```python\n", + "from langchain_mongodb.cache import MongoDBCache\n", + "```\n", + "\n", + "\n", + "To use this cache with your LLMs:\n", + "```python\n", + "from langchain_core.globals import set_llm_cache\n", + "\n", + "# use any embedding provider...\n", + "from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings\n", + "\n", + "mongodb_atlas_uri = \"\"\n", + "COLLECTION_NAME=\"\"\n", + "DATABASE_NAME=\"\"\n", + "\n", + "set_llm_cache(MongoDBCache(\n", + " connection_string=mongodb_atlas_uri,\n", + " collection_name=COLLECTION_NAME,\n", + " database_name=DATABASE_NAME,\n", + "))\n", + "```\n", + "\n", + "\n", + "### `MongoDBAtlasSemanticCache`\n", + "Semantic caching allows users to retrieve cached prompts based on semantic similarity between the user input and previously cached results. Under the hood it blends MongoDBAtlas as both a cache and a vectorstore.\n", + "The MongoDBAtlasSemanticCache inherits from `MongoDBAtlasVectorSearch` and needs an Atlas Vector Search Index defined to work. Please look at the [usage example](/docs/integrations/vectorstores/mongodb_atlas) on how to set up the index.\n", + "\n", + "To import this cache:\n", + "```python\n", + "from langchain_mongodb.cache import MongoDBAtlasSemanticCache\n", + "```\n", + "\n", + "To use this cache with your LLMs:\n", + "```python\n", + "from langchain_core.globals import set_llm_cache\n", + "\n", + "# use any embedding provider...\n", + "from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings\n", + "\n", + "mongodb_atlas_uri = \"\"\n", + "COLLECTION_NAME=\"\"\n", + "DATABASE_NAME=\"\"\n", + "\n", + "set_llm_cache(MongoDBAtlasSemanticCache(\n", + " embedding=FakeEmbeddings(),\n", + " connection_string=mongodb_atlas_uri,\n", + " collection_name=COLLECTION_NAME,\n", + " database_name=DATABASE_NAME,\n", + "))\n", + "```\n", + "\n", + "To find more resources about using MongoDBSemanticCache visit [here](https://www.mongodb.com/blog/post/introducing-semantic-caching-dedicated-mongodb-lang-chain-package-gen-ai-apps)" + ] + }, + { + "cell_type": "markdown", + "id": "726fe754", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## `Momento` Cache\n", + "Use [Momento](/docs/integrations/providers/momento) to cache prompts and responses.\n", + "\n", + "Requires momento to use, uncomment below to install:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8949f29", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet momento" + ] + }, + { + "cell_type": "markdown", + "id": "56ea6a08", + "metadata": {}, + "source": [ + "You'll need to get a Momento auth token to use this class. This can either be passed in to a momento.CacheClient if you'd like to instantiate that directly, as a named parameter `auth_token` to `MomentoChatMessageHistory.from_client_params`, or can just be set as an environment variable `MOMENTO_AUTH_TOKEN`." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2005f03a", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import timedelta\n", + "\n", + "from langchain_community.cache import MomentoCache\n", + "\n", + "cache_name = \"langchain\"\n", + "ttl = timedelta(days=1)\n", + "set_llm_cache(MomentoCache.from_client_params(cache_name, ttl))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "c6a6c238", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 40.7 ms, sys: 16.5 ms, total: 57.2 ms\n", + "Wall time: 1.73 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "b8f78f9d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.16 ms, sys: 2.98 ms, total: 6.14 ms\n", + "Wall time: 57.9 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time it is, so it goes faster\n", + "# When run in the same region as the cache, latencies are single digit ms\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "934943dc", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## `SQLAlchemy` Cache\n", + "\n", + "You can use `SQLAlchemyCache` to cache with any SQL database supported by `SQLAlchemy`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "acccff40", + "metadata": {}, + "outputs": [], + "source": [ + "# from langchain.cache import SQLAlchemyCache\n", + "# from sqlalchemy import create_engine\n", + "\n", + "# engine = create_engine(\"postgresql://postgres:postgres@localhost:5432/postgres\")\n", + "# set_llm_cache(SQLAlchemyCache(engine))" + ] + }, + { + "cell_type": "markdown", + "id": "0959d640", + "metadata": {}, + "source": [ + "### Custom SQLAlchemy Schemas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac967b39", + "metadata": {}, + "outputs": [], + "source": [ + "# You can define your own declarative SQLAlchemyCache child class to customize the schema used for caching. For example, to support high-speed fulltext prompt indexing with Postgres, use:\n", + "\n", + "from langchain_community.cache import SQLAlchemyCache\n", + "from sqlalchemy import Column, Computed, Index, Integer, Sequence, String, create_engine\n", + "from sqlalchemy.ext.declarative import declarative_base\n", + "from sqlalchemy_utils import TSVectorType\n", + "\n", + "Base = declarative_base()\n", + "\n", + "\n", + "class FulltextLLMCache(Base): # type: ignore\n", + " \"\"\"Postgres table for fulltext-indexed LLM Cache\"\"\"\n", + "\n", + " __tablename__ = \"llm_cache_fulltext\"\n", + " id = Column(Integer, Sequence(\"cache_id\"), primary_key=True)\n", + " prompt = Column(String, nullable=False)\n", + " llm = Column(String, nullable=False)\n", + " idx = Column(Integer)\n", + " response = Column(String)\n", + " prompt_tsv = Column(\n", + " TSVectorType(),\n", + " Computed(\"to_tsvector('english', llm || ' ' || prompt)\", persisted=True),\n", + " )\n", + " __table_args__ = (\n", + " Index(\"idx_fulltext_prompt_tsv\", prompt_tsv, postgresql_using=\"gin\"),\n", + " )\n", + "\n", + "\n", + "engine = create_engine(\"postgresql://postgres:postgres@localhost:5432/postgres\")\n", + "set_llm_cache(SQLAlchemyCache(engine, FulltextLLMCache))" + ] + }, + { + "cell_type": "markdown", + "id": "eeba7d60", + "metadata": {}, + "source": [ + "## `Cassandra` caches\n", + "\n", + "> [Apache Cassandra®](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database. Starting with version 5.0, the database ships with [vector search capabilities](https://cassandra.apache.org/doc/trunk/cassandra/vector-search/overview.html).\n", + "\n", + "You can use Cassandra for caching LLM responses, choosing from the exact-match `CassandraCache` or the (vector-similarity-based) `CassandraSemanticCache`.\n", + "\n", + "Let's see both in action. The next cells guide you through the (little) required setup, and the following cells showcase the two available cache classes." + ] + }, + { + "cell_type": "markdown", + "id": "6cf6acb4-1bc4-4c4b-9325-2420c17e5e2b", + "metadata": {}, + "source": [ + "### Required dependency" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fe842b0d-fd3d-47dd-bc6a-975997c9707f", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet \"cassio>=0.1.4\"" + ] + }, + { + "cell_type": "markdown", + "id": "a4a6725d", + "metadata": {}, + "source": [ + "### Connect to the DB\n", + "\n", + "The Cassandra caches shown in this page can be used with Cassandra as well as other derived databases, such as Astra DB, which use the CQL (Cassandra Query Language) protocol.\n", + "\n", + "> DataStax [Astra DB](https://docs.datastax.com/en/astra-serverless/docs/vector-search/quickstart.html) is a managed serverless database built on Cassandra, offering the same interface and strengths.\n", + "\n", + "Depending on whether you connect to a Cassandra cluster or to Astra DB through CQL, you will provide different parameters when instantiating the cache (through initialization of a CassIO connection)." + ] + }, + { + "cell_type": "markdown", + "id": "15735abe-2567-43ce-aa91-f253b33b5a88", + "metadata": {}, + "source": [ + "#### Connecting to a Cassandra cluster\n", + "\n", + "You first need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e4b898a5-fe0e-4f11-a87b-7979652322a7", + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "\n", + "cluster = Cluster([\"127.0.0.1\"])\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "id": "6435198e-8713-4045-906b-879613bf5083", + "metadata": {}, + "source": [ + "You can now set the session, along with your desired keyspace name, as a global CassIO parameter:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "992267dc-0d19-45e0-9a13-ccbb6348d804", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CASSANDRA_KEYSPACE = demo_keyspace\n" + ] + } + ], + "source": [ + "import cassio\n", + "\n", + "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")\n", + "\n", + "cassio.init(session=session, keyspace=CASSANDRA_KEYSPACE)" + ] + }, + { + "cell_type": "markdown", + "id": "2cc7ba29-8f84-4fbf-aaf7-3daa1be7e7b0", + "metadata": {}, + "source": [ + "#### Connecting to Astra DB through CQL\n", + "\n", + "In this case you initialize CassIO with the following connection parameters:\n", + "\n", + "- the Database ID, e.g. `01234567-89ab-cdef-0123-456789abcdef`\n", + "- the Token, e.g. `AstraCS:6gBhNmsk135....` (it must be a \"Database Administrator\" token)\n", + "- Optionally a Keyspace name (if omitted, the default one for the database will be used)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "ead97077-cc79-4f5c-940c-91eb21650466", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASTRA_DB_ID = 01234567-89ab-cdef-0123-456789abcdef\n", + "ASTRA_DB_APPLICATION_TOKEN = ········\n", + "ASTRA_DB_KEYSPACE (optional, can be left empty) = my_keyspace\n" + ] + } + ], + "source": [ + "import getpass\n", + "\n", + "ASTRA_DB_ID = input(\"ASTRA_DB_ID = \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")\n", + "\n", + "desired_keyspace = input(\"ASTRA_DB_KEYSPACE (optional, can be left empty) = \")\n", + "if desired_keyspace:\n", + " ASTRA_DB_KEYSPACE = desired_keyspace\n", + "else:\n", + " ASTRA_DB_KEYSPACE = None" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "cc53ce1b", + "metadata": {}, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "cassio.init(\n", + " database_id=ASTRA_DB_ID,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " keyspace=ASTRA_DB_KEYSPACE,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8665664a", + "metadata": {}, + "source": [ + "### Cassandra: Exact cache\n", + "\n", + "This will avoid invoking the LLM when the supplied prompt is _exactly_ the same as one encountered already:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "00a5e66f", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import CassandraCache\n", + "from langchain_core.globals import set_llm_cache\n", + "\n", + "set_llm_cache(CassandraCache())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "956a5145", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n", + "CPU times: user 92.5 ms, sys: 8.89 ms, total: 101 ms\n", + "Wall time: 1.98 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "158f0151", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon is tidally locked with the Earth, which means that its rotation on its own axis is synchronized with its orbit around the Earth. This results in the Moon always showing the same side to the Earth. This is because the gravitational forces between the Earth and the Moon have caused the Moon's rotation to slow down over time, until it reached a point where it takes the same amount of time for the Moon to rotate on its axis as it does to orbit around the Earth. This phenomenon is common among satellites in close orbits around their parent planets and is known as tidal locking.\n", + "CPU times: user 5.51 ms, sys: 0 ns, total: 5.51 ms\n", + "Wall time: 5.78 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "markdown", + "id": "8fc4d017", + "metadata": {}, + "source": [ + "### Cassandra: Semantic cache\n", + "\n", + "This cache will do a semantic similarity search and return a hit if it finds a cached entry that is similar enough, For this, you need to provide an `Embeddings` instance of your choice." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b9ad3f54", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "embedding = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "4623f95e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.cache import CassandraSemanticCache\n", + "from langchain_core.globals import set_llm_cache\n", + "\n", + "set_llm_cache(\n", + " CassandraSemanticCache(\n", + " embedding=embedding,\n", + " table_name=\"my_semantic_cache\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1a8e577b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n", + "CPU times: user 49.5 ms, sys: 7.38 ms, total: 56.9 ms\n", + "Wall time: 2.55 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Why is the Moon always showing the same side?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f7abddfd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "The Moon is always showing the same side because of a phenomenon called synchronous rotation. This means that the Moon rotates on its axis at the same rate that it orbits around the Earth, which takes approximately 27.3 days. This results in the same side of the Moon always facing the Earth. This is due to the gravitational forces between the Earth and the Moon, which have caused the Moon's rotation to gradually slow down and become synchronized with its orbit. This is a common occurrence among many moons in our solar system.\n", + "CPU times: user 21.2 ms, sys: 3.38 ms, total: 24.6 ms\n", + "Wall time: 532 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"How come we always see one face of the moon?\"))" + ] + }, + { + "cell_type": "markdown", + "id": "55dc84b3-37cb-4f19-b175-40e18e06f83f", + "metadata": {}, + "source": [ + "#### Attribution statement\n", + "\n", + ">Apache Cassandra, Cassandra and Apache are either registered trademarks or trademarks of the [Apache Software Foundation](http://www.apache.org/) in the United States and/or other countries." + ] + }, + { + "cell_type": "markdown", + "id": "8712f8fc-bb89-4164-beb9-c672778bbd91", + "metadata": {}, + "source": [ + "## `Astra DB` Caches" + ] + }, + { + "cell_type": "markdown", + "id": "173041d9-e4af-4f68-8461-d302bfc7e1bd", + "metadata": {}, + "source": [ + "You can easily use [Astra DB](https://docs.datastax.com/en/astra/home/astra.html) as an LLM cache, with either the \"exact\" or the \"semantic-based\" cache.\n", + "\n", + "Make sure you have a running database (it must be a Vector-enabled database to use the Semantic cache) and get the required credentials on your Astra dashboard:\n", + "\n", + "- the API Endpoint looks like `https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com`\n", + "- the Token looks like `AstraCS:6gBhNmsk135....`" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "feb510b6-99a3-4228-8e11-563051f8178e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ASTRA_DB_API_ENDPOINT = https://01234567-89ab-cdef-0123-456789abcdef-us-east1.apps.astra.datastax.com\n", + "ASTRA_DB_APPLICATION_TOKEN = ········\n" + ] + } + ], + "source": [ + "import getpass\n", + "\n", + "ASTRA_DB_API_ENDPOINT = input(\"ASTRA_DB_API_ENDPOINT = \")\n", + "ASTRA_DB_APPLICATION_TOKEN = getpass.getpass(\"ASTRA_DB_APPLICATION_TOKEN = \")" + ] + }, + { + "cell_type": "markdown", + "id": "ee6d587f-4b7c-43f4-9e90-5129c842a143", + "metadata": {}, + "source": [ + "### Astra DB exact LLM cache\n", + "\n", + "This will avoid invoking the LLM when the supplied prompt is _exactly_ the same as one encountered already:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ad63c146-ee41-4896-90ee-29fcc39f0ed5", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.globals import set_llm_cache\n", + "from langchain_astradb import AstraDBCache\n", + "\n", + "set_llm_cache(\n", + " AstraDBCache(\n", + " api_endpoint=ASTRA_DB_API_ENDPOINT,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "83e0fb02-e8eb-4483-9eb1-55b5e14c4487", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n", + "CPU times: user 70.8 ms, sys: 4.13 ms, total: 74.9 ms\n", + "Wall time: 2.06 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Is a true fakery the same as a fake truth?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4d20d498-fe28-4e26-8531-2b31c52ee687", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "There is no definitive answer to this question as it depends on the interpretation of the terms \"true fakery\" and \"fake truth\". However, one possible interpretation is that a true fakery is a counterfeit or imitation that is intended to deceive, whereas a fake truth is a false statement that is presented as if it were true.\n", + "CPU times: user 15.1 ms, sys: 3.7 ms, total: 18.8 ms\n", + "Wall time: 531 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Is a true fakery the same as a fake truth?\"))" + ] + }, + { + "cell_type": "markdown", + "id": "524b94fa-6162-4880-884d-d008749d14e2", + "metadata": {}, + "source": [ + "### Astra DB Semantic cache\n", + "\n", + "This cache will do a semantic similarity search and return a hit if it finds a cached entry that is similar enough, For this, you need to provide an `Embeddings` instance of your choice." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "dc329c55-1cc4-4b74-94f9-61f8990fb214", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "embedding = OpenAIEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "83952a90-ab14-4e59-87c0-d2bdc1d43e43", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_astradb import AstraDBSemanticCache\n", + "\n", + "set_llm_cache(\n", + " AstraDBSemanticCache(\n", + " api_endpoint=ASTRA_DB_API_ENDPOINT,\n", + " token=ASTRA_DB_APPLICATION_TOKEN,\n", + " embedding=embedding,\n", + " collection_name=\"demo_semantic_cache\",\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "d74b249a-94d5-42d0-af74-f7565a994dea", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n", + "CPU times: user 65.6 ms, sys: 15.3 ms, total: 80.9 ms\n", + "Wall time: 2.72 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Are there truths that are false?\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "11973d73-d2f4-46bd-b229-1c589df9b788", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "There is no definitive answer to this question since it presupposes a great deal about the nature of truth itself, which is a matter of considerable philosophical debate. It is possible, however, to construct scenarios in which something could be considered true despite being false, such as if someone sincerely believes something to be true even though it is not.\n", + "CPU times: user 29.3 ms, sys: 6.21 ms, total: 35.5 ms\n", + "Wall time: 1.03 s\n" + ] + } + ], + "source": [ + "%%time\n", + "\n", + "print(llm.invoke(\"Is is possible that something false can be also true?\"))" + ] + }, + { + "cell_type": "markdown", + "id": "40624c26e86b57a4", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Azure Cosmos DB Semantic Cache\n", + "\n", + "You can use this integrated [vector database](https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database) for caching." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4a9d592db01b11b2", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-18T01:01:32.014750Z", + "start_time": "2024-03-18T01:01:31.955991Z" + } + }, + "outputs": [], + "source": [ + "from langchain_community.cache import AzureCosmosDBSemanticCache\n", + "from langchain_community.vectorstores.azure_cosmos_db import (\n", + " CosmosDBSimilarityType,\n", + " CosmosDBVectorSearchType,\n", + ")\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "# Read more about Azure CosmosDB Mongo vCore vector search here https://learn.microsoft.com/en-us/azure/cosmos-db/mongodb/vcore/vector-search\n", + "\n", + "NAMESPACE = \"langchain_test_db.langchain_test_collection\"\n", + "CONNECTION_STRING = (\n", + " \"Please provide your azure cosmos mongo vCore vector db connection string\"\n", + ")\n", + "\n", + "DB_NAME, COLLECTION_NAME = NAMESPACE.split(\".\")\n", + "\n", + "# Default value for these params\n", + "num_lists = 3\n", + "dimensions = 1536\n", + "similarity_algorithm = CosmosDBSimilarityType.COS\n", + "kind = CosmosDBVectorSearchType.VECTOR_IVF\n", + "m = 16\n", + "ef_construction = 64\n", + "ef_search = 40\n", + "score_threshold = 0.9\n", + "application_name = \"LANGCHAIN_CACHING_PYTHON\"\n", + "\n", + "\n", + "set_llm_cache(\n", + " AzureCosmosDBSemanticCache(\n", + " cosmosdb_connection_string=CONNECTION_STRING,\n", + " cosmosdb_client=None,\n", + " embedding=OpenAIEmbeddings(),\n", + " database_name=DB_NAME,\n", + " collection_name=COLLECTION_NAME,\n", + " num_lists=num_lists,\n", + " similarity=similarity_algorithm,\n", + " kind=kind,\n", + " dimensions=dimensions,\n", + " m=m,\n", + " ef_construction=ef_construction,\n", + " ef_search=ef_search,\n", + " score_threshold=score_threshold,\n", + " application_name=application_name,\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "14ca942820e8140c", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-12T00:12:57.462226Z", + "start_time": "2024-03-12T00:12:55.166201Z" + }, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 45.6 ms, sys: 19.7 ms, total: 65.3 ms\n", + "Wall time: 2.29 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy was the math book sad? Because it had too many problems.'" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "bc1570a2a77b58c8", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-12T00:13:03.652755Z", + "start_time": "2024-03-12T00:13:03.159428Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 9.61 ms, sys: 3.42 ms, total: 13 ms\n", + "Wall time: 474 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy was the math book sad? Because it had too many problems.'" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "306ff47b", + "metadata": {}, + "source": [ + "## `Elasticsearch` Cache\n", + "A caching layer for LLMs that uses Elasticsearch.\n", + "\n", + "First install the LangChain integration with Elasticsearch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ee5cd3e", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -U langchain-elasticsearch" + ] + }, + { + "cell_type": "markdown", + "id": "9e70b0a0", + "metadata": {}, + "source": [ + "Use the class `ElasticsearchCache`.\n", + "\n", + "Simple example:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1762c9c1", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.globals import set_llm_cache\n", + "from langchain_elasticsearch import ElasticsearchCache\n", + "\n", + "set_llm_cache(\n", + " ElasticsearchCache(\n", + " es_url=\"http://localhost:9200\",\n", + " index_name=\"llm-chat-cache\",\n", + " metadata={\"project\": \"my_chatgpt_project\"},\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "d4fac5d6", + "metadata": {}, + "source": [ + "The `index_name` parameter can also accept aliases. This allows to use the \n", + "[ILM: Manage the index lifecycle](https://www.elastic.co/guide/en/elasticsearch/reference/current/index-lifecycle-management.html)\n", + "that we suggest to consider for managing retention and controlling cache growth.\n", + "\n", + "Look at the class docstring for all parameters." + ] + }, + { + "cell_type": "markdown", + "id": "eaf9dfd7", + "metadata": {}, + "source": [ + "### Index the generated text\n", + "\n", + "The cached data won't be searchable by default.\n", + "The developer can customize the building of the Elasticsearch document in order to add indexed text fields,\n", + "where to put, for example, the text generated by the LLM.\n", + "\n", + "This can be done by subclassing end overriding methods.\n", + "The new cache class can be applied also to a pre-existing cache index:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5104c2c0", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "from typing import Any, Dict, List\n", + "\n", + "from langchain.globals import set_llm_cache\n", + "from langchain_core.caches import RETURN_VAL_TYPE\n", + "from langchain_elasticsearch import ElasticsearchCache\n", + "\n", + "\n", + "class SearchableElasticsearchCache(ElasticsearchCache):\n", + " @property\n", + " def mapping(self) -> Dict[str, Any]:\n", + " mapping = super().mapping\n", + " mapping[\"mappings\"][\"properties\"][\"parsed_llm_output\"] = {\n", + " \"type\": \"text\",\n", + " \"analyzer\": \"english\",\n", + " }\n", + " return mapping\n", + "\n", + " def build_document(\n", + " self, prompt: str, llm_string: str, return_val: RETURN_VAL_TYPE\n", + " ) -> Dict[str, Any]:\n", + " body = super().build_document(prompt, llm_string, return_val)\n", + " body[\"parsed_llm_output\"] = self._parse_output(body[\"llm_output\"])\n", + " return body\n", + "\n", + " @staticmethod\n", + " def _parse_output(data: List[str]) -> List[str]:\n", + " return [\n", + " json.loads(output)[\"kwargs\"][\"message\"][\"kwargs\"][\"content\"]\n", + " for output in data\n", + " ]\n", + "\n", + "\n", + "set_llm_cache(\n", + " SearchableElasticsearchCache(\n", + " es_url=\"http://localhost:9200\", index_name=\"llm-chat-cache\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "db0dea73", + "metadata": {}, + "source": [ + "When overriding the mapping and the document building, \n", + "please only make additive modifications, keeping the base mapping intact." + ] + }, + { + "cell_type": "markdown", + "id": "0c69d84d", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Optional Caching\n", + "You can also turn off caching for specific LLMs should you choose. In the example below, even though global caching is enabled, we turn it off for a specific LLM" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6af46e2b", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", n=2, best_of=2, cache=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "26c4fd8f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 5.8 ms, sys: 2.71 ms, total: 8.51 ms\n", + "Wall time: 745 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nWhy did the chicken cross the road?\\n\\nTo get to the other side!'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "46846b20", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4.91 ms, sys: 2.64 ms, total: 7.55 ms\n", + "Wall time: 623 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nTwo guys stole a calendar. They got six months each.'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "5da41b77", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Optional Caching in Chains\n", + "You can also turn off caching for particular nodes in chains. Note that because of certain interfaces, its often easier to construct the chain first, and then edit the LLM afterwards.\n", + "\n", + "As an example, we will load a summarizer map-reduce chain. We will cache results for the map-step, but then not freeze it for the combine step." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9afa3f7a", + "metadata": {}, + "outputs": [], + "source": [ + "llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\")\n", + "no_cache_llm = OpenAI(model_name=\"gpt-3.5-turbo-instruct\", cache=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "98a78e8e", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_text_splitters import CharacterTextSplitter\n", + "\n", + "text_splitter = CharacterTextSplitter()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2bfb099b", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"../../how_to/state_of_the_union.txt\") as f:\n", + " state_of_the_union = f.read()\n", + "texts = text_splitter.split_text(state_of_the_union)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "f78b7f51", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.documents import Document\n", + "\n", + "docs = [Document(page_content=t) for t in texts[:3]]\n", + "from langchain.chains.summarize import load_summarize_chain" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "a2a30822", + "metadata": {}, + "outputs": [], + "source": [ + "chain = load_summarize_chain(llm, chain_type=\"map_reduce\", reduce_llm=no_cache_llm)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a545b743", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 452 ms, sys: 60.3 ms, total: 512 ms\n", + "Wall time: 5.09 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure. In response to Russian aggression in Ukraine, the United States is joining with European allies to impose sanctions and isolate Russia. American forces are being mobilized to protect NATO countries in the event that Putin decides to keep moving west. The Ukrainians are bravely fighting back, but the next few weeks will be hard for them. Putin will pay a high price for his actions in the long run. Americans should not be alarmed, as the United States is taking action to protect its interests and allies.'" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "chain.run(docs)" + ] + }, + { + "cell_type": "markdown", + "id": "3ed85e9d", + "metadata": {}, + "source": [ + "When we run it again, we see that it runs substantially faster but the final answer is different. This is due to caching at the map steps, but not at the reduce step." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "39cbb282", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 11.5 ms, sys: 4.33 ms, total: 15.8 ms\n", + "Wall time: 1.04 s\n" + ] + }, + { + "data": { + "text/plain": [ + "'\\n\\nPresident Biden is discussing the American Rescue Plan and the Bipartisan Infrastructure Law, which will create jobs and help Americans. He also talks about his vision for America, which includes investing in education and infrastructure.'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "chain.run(docs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9df0dab8", + "metadata": {}, + "outputs": [], + "source": [ + "!rm .langchain.db sqlite.db" + ] + }, + { + "cell_type": "markdown", + "id": "544a90cbdd9894ba", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "id": "9ecfa565038eff71", + "metadata": {}, + "source": [ + "## OpenSearch Semantic Cache\n", + "Use [OpenSearch](https://python.langchain.com/docs/integrations/vectorstores/opensearch/) as a semantic cache to cache prompts and responses and evaluate hits based on semantic similarity." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7379fd5aa83ee500", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-12T02:06:03.766873Z", + "start_time": "2024-04-12T02:06:03.754481Z" + } + }, + "outputs": [], + "source": [ + "from langchain_community.cache import OpenSearchSemanticCache\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "set_llm_cache(\n", + " OpenSearchSemanticCache(\n", + " opensearch_url=\"http://localhost:9200\", embedding=OpenAIEmbeddings()\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fecb26634bf27e93", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-12T02:06:08.734403Z", + "start_time": "2024-04-12T02:06:07.178381Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 39.4 ms, sys: 11.8 ms, total: 51.2 ms\n", + "Wall time: 1.55 s\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything.\"" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The first time, it is not yet in cache, so it should take longer\n", + "llm(\"Tell me a joke\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "43b24b725ea4ba98", + "metadata": { + "ExecuteTime": { + "end_time": "2024-04-12T02:06:12.073448Z", + "start_time": "2024-04-12T02:06:11.957571Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4.66 ms, sys: 1.1 ms, total: 5.76 ms\n", + "Wall time: 113 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "\"\\n\\nWhy don't scientists trust atoms?\\n\\nBecause they make up everything.\"" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "# The second time, while not a direct hit, the question is semantically similar to the original question,\n", + "# so it uses the cached result!\n", + "llm(\"Tell me one joke\")" + ] + }, + { + "cell_type": "markdown", + "id": "ae1f5e1c-085e-4998-9f2d-b5867d2c3d5b", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-31T17:18:43.345495Z", + "iopub.status.busy": "2024-05-31T17:18:43.345015Z", + "iopub.status.idle": "2024-05-31T17:18:43.351003Z", + "shell.execute_reply": "2024-05-31T17:18:43.350073Z", + "shell.execute_reply.started": "2024-05-31T17:18:43.345456Z" + } + }, + "source": [ + "## Cache classes: summary table" + ] + }, + { + "cell_type": "markdown", + "id": "65072e45-10bc-40f1-979b-2617656bbbce", + "metadata": { + "execution": { + "iopub.execute_input": "2024-05-31T17:16:05.616430Z", + "iopub.status.busy": "2024-05-31T17:16:05.616221Z", + "iopub.status.idle": "2024-05-31T17:16:05.624164Z", + "shell.execute_reply": "2024-05-31T17:16:05.623673Z", + "shell.execute_reply.started": "2024-05-31T17:16:05.616418Z" + } + }, + "source": [ + "**Cache** classes are implemented by inheriting the [BaseCache](https://api.python.langchain.com/en/latest/caches/langchain_core.caches.BaseCache.html) class.\n", + "\n", + "This table lists all 20 derived classes with links to the API Reference.\n", + "\n", + "\n", + "| Namespace 🔻 | Class |\n", + "|------------|---------|\n", + "| langchain_astradb.cache | [AstraDBCache](https://api.python.langchain.com/en/latest/cache/langchain_astradb.cache.AstraDBCache.html) |\n", + "| langchain_astradb.cache | [AstraDBSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_astradb.cache.AstraDBSemanticCache.html) |\n", + "| langchain_community.cache | [AstraDBCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.AstraDBCache.html) |\n", + "| langchain_community.cache | [AstraDBSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.AstraDBSemanticCache.html) |\n", + "| langchain_community.cache | [AzureCosmosDBSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.AzureCosmosDBSemanticCache.html) |\n", + "| langchain_community.cache | [CassandraCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.CassandraCache.html) |\n", + "| langchain_community.cache | [CassandraSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.CassandraSemanticCache.html) |\n", + "| langchain_community.cache | [GPTCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.GPTCache.html) |\n", + "| langchain_community.cache | [InMemoryCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.InMemoryCache.html) |\n", + "| langchain_community.cache | [MomentoCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.MomentoCache.html) |\n", + "| langchain_community.cache | [OpenSearchSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.OpenSearchSemanticCache.html) |\n", + "| langchain_community.cache | [RedisSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.RedisSemanticCache.html) |\n", + "| langchain_community.cache | [SQLAlchemyCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyCache.html) |\n", + "| langchain_community.cache | [SQLAlchemyMd5Cache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.SQLAlchemyMd5Cache.html) |\n", + "| langchain_community.cache | [UpstashRedisCache](https://api.python.langchain.com/en/latest/cache/langchain_community.cache.UpstashRedisCache.html) |\n", + "| langchain_core.caches | [InMemoryCache](https://api.python.langchain.com/en/latest/caches/langchain_core.caches.InMemoryCache.html) |\n", + "| langchain_elasticsearch.cache | [ElasticsearchCache](https://api.python.langchain.com/en/latest/cache/langchain_elasticsearch.cache.ElasticsearchCache.html) |\n", + "| langchain_mongodb.cache | [MongoDBAtlasSemanticCache](https://api.python.langchain.com/en/latest/cache/langchain_mongodb.cache.MongoDBAtlasSemanticCache.html) |\n", + "| langchain_mongodb.cache | [MongoDBCache](https://api.python.langchain.com/en/latest/cache/langchain_mongodb.cache.MongoDBCache.html) |\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19067f14-c69a-4156-9504-af43a0713669", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/memory/kafka_chat_message_history.ipynb b/docs/docs/integrations/memory/kafka_chat_message_history.ipynb new file mode 100644 index 0000000000000..f6d673d46a2d3 --- /dev/null +++ b/docs/docs/integrations/memory/kafka_chat_message_history.ipynb @@ -0,0 +1,245 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c21deb80-9cf7-4185-8205-a38110152d2c", + "metadata": {}, + "source": [ + "# Kafka\n", + "\n", + "[Kafka](https://github.com/apache/kafka) is a distributed messaging system that is used to publish and subscribe to streams of records. \n", + "This demo shows how to use `KafkaChatMessageHistory` to store and retrieve chat messages from a Kafka cluster." + ] + }, + { + "cell_type": "markdown", + "id": "c7c4fc02-18ac-4285-b8d6-507357e2aa13", + "metadata": {}, + "source": [ + "A running Kafka cluster is required to run the demo. You can follow this [instruction](https://developer.confluent.io/get-started/python) to create a Kafka cluster locally." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f09f3b45-c4ff-4e59-bf79-238cc85d6465", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chat_message_histories import KafkaChatMessageHistory\n", + "\n", + "chat_session_id = \"chat-message-history-kafka\"\n", + "bootstrap_servers = \"localhost:64797\" # host:port. `localhost:Plaintext Ports` if setup Kafka cluster locally\n", + "history = KafkaChatMessageHistory(\n", + " chat_session_id,\n", + " bootstrap_servers,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "109812d2-85c5-4a65-a8a0-2d16eb80347b", + "metadata": {}, + "source": [ + "Optional parameters to construct `KafkaChatMessageHistory`:\n", + " - `ttl_ms`: Time to live in milliseconds for the chat messages.\n", + " - `partition`: Number of partition of the topic to store the chat messages.\n", + " - `replication_factor`: Replication factor of the topic to store the chat messages." + ] + }, + { + "cell_type": "markdown", + "id": "c8fba39f-650b-4192-94ea-1a2a89f5348d", + "metadata": {}, + "source": [ + "`KafkaChatMessageHistory` internally uses Kafka consumer to read chat messages, and it has the ability to mark the consumed position persistently. It has following methods to retrieve chat messages:\n", + "- `messages`: continue consuming chat messages from last one.\n", + "- `messages_from_beginning`: reset the consumer to the beginning of the history and consume messages. Optional parameters:\n", + " 1. `max_message_count`: maximum number of messages to read.\n", + " 2. `max_time_sec`: maximum time in seconds to read messages.\n", + "- `messages_from_latest`: reset the consumer to the end of the chat history and try consuming messages. Optional parameters same as above.\n", + "- `messages_from_last_consumed`: return messages continuing from the last consumed message, similar to `messages`, but with optional parameters.\n", + "\n", + "`max_message_count` and `max_time_sec` are used to avoid blocking indefinitely when retrieving messages.\n", + "As a result, `messages` and other methods to retrieve messages may not return all messages in the chat history. You will need to specify `max_message_count` and `max_time_sec` to retrieve all chat history in a single batch.\n" + ] + }, + { + "cell_type": "markdown", + "id": "caf2176b-db7a-451a-a292-3d9fde585ded", + "metadata": {}, + "source": [ + "Add messages and retrieve." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7e52a70d-3921-4614-b8cd-53b8d3c2deb4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='hi!'), AIMessage(content='whats up?')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "history.add_user_message(\"hi!\")\n", + "history.add_ai_message(\"whats up?\")\n", + "\n", + "history.messages" + ] + }, + { + "cell_type": "markdown", + "id": "874ce388-da8f-4796-b9ca-3ac114195b10", + "metadata": {}, + "source": [ + "Calling `messages` again returns an empty list because the consumer is at the end of the chat history." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "f863618e-7da1-4f46-9182-7a1387b93b16", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "history.messages" + ] + }, + { + "cell_type": "markdown", + "id": "e108255b-c240-44f7-9ecc-52bf04cd15b6", + "metadata": {}, + "source": [ + "Add new messages and continue consuming." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "31aa7403-5392-4ad4-ba43-226020a274e3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='hi again!'), AIMessage(content='whats up again?')]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "history.add_user_message(\"hi again!\")\n", + "history.add_ai_message(\"whats up again?\")\n", + "history.messages" + ] + }, + { + "cell_type": "markdown", + "id": "5062fabc-c605-40dd-933b-c68de2727874", + "metadata": {}, + "source": [ + "To reset the consumer and read from beginning:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "005816ae-c8ed-4e41-9ecd-b6432578c8f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='hi again!'),\n", + " AIMessage(content='whats up again?'),\n", + " HumanMessage(content='hi!'),\n", + " AIMessage(content='whats up?')]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "history.messages_from_beginning()" + ] + }, + { + "cell_type": "markdown", + "id": "42cc7bed-5cd7-417f-94fd-fe2e511cc9c6", + "metadata": {}, + "source": [ + "Set the consumer to the end of the chat history, add a couple of new messages, and consume:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d8b4f1cd-fa47-461b-b1b6-278ad54e9ac5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[HumanMessage(content='HI!'), AIMessage(content='WHATS UP?')]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "history.messages_from_latest()\n", + "history.add_user_message(\"HI!\")\n", + "history.add_ai_message(\"WHATS UP?\")\n", + "history.messages" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/memory/zep_cloud_chat_message_history.ipynb b/docs/docs/integrations/memory/zep_cloud_chat_message_history.ipynb new file mode 100644 index 0000000000000..1294043291737 --- /dev/null +++ b/docs/docs/integrations/memory/zep_cloud_chat_message_history.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1cdd080f9ea3e0b", + "metadata": {}, + "source": [ + "# ZepCloudChatMessageHistory\n", + "> Recall, understand, and extract data from chat histories. Power personalized AI experiences.\n", + "\n", + ">[Zep](https://www.getzep.com) is a long-term memory service for AI Assistant apps.\n", + "> With Zep, you can provide AI assistants with the ability to recall past conversations, no matter how distant,\n", + "> while also reducing hallucinations, latency, and cost.\n", + "\n", + "> See [Zep Cloud Installation Guide](https://help.getzep.com/sdks) and more [Zep Cloud Langchain Examples](https://github.com/getzep/zep-python/tree/main/examples)\n", + "\n", + "## Example\n", + "\n", + "This notebook demonstrates how to use [Zep](https://www.getzep.com/) to persist chat history and use Zep Memory with your chain.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "82fb8484eed2ee9a", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:12.069045Z", + "start_time": "2024-05-10T05:20:12.062518Z" + } + }, + "outputs": [], + "source": [ + "from uuid import uuid4\n", + "\n", + "from langchain_community.chat_message_histories import ZepCloudChatMessageHistory\n", + "from langchain_community.memory.zep_cloud_memory import ZepCloudMemory\n", + "from langchain_core.messages import AIMessage, HumanMessage\n", + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder\n", + "from langchain_core.runnables import (\n", + " RunnableParallel,\n", + ")\n", + "from langchain_core.runnables.history import RunnableWithMessageHistory\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "session_id = str(uuid4()) # This is a unique identifier for the session" + ] + }, + { + "cell_type": "markdown", + "id": "d79e0e737db426ac", + "metadata": {}, + "source": [ + "Provide your OpenAI key" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7430ea2341ecd227", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:17.983314Z", + "start_time": "2024-05-10T05:20:13.805729Z" + } + }, + "outputs": [], + "source": [ + "import getpass\n", + "\n", + "openai_key = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "81a87004bc92c3e2", + "metadata": {}, + "source": [ + "Provide your Zep API key. See https://help.getzep.com/projects#api-keys\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c21632a2c7223170", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:24.694643Z", + "start_time": "2024-05-10T05:20:22.174681Z" + } + }, + "outputs": [], + "source": [ + "zep_api_key = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "436de864fe0000", + "metadata": {}, + "source": [ + "Preload some messages into the memory. The default message window is 4 messages. We want to push beyond this to demonstrate auto-summarization." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e8fb07edd965ef1f", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:38.657289Z", + "start_time": "2024-05-10T05:20:26.981492Z" + } + }, + "outputs": [], + "source": [ + "test_history = [\n", + " {\"role\": \"human\", \"content\": \"Who was Octavia Butler?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American\"\n", + " \" science fiction author.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"Which books of hers were made into movies?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"The most well-known adaptation of Octavia Butler's work is the FX series\"\n", + " \" Kindred, based on her novel of the same name.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"Who were her contemporaries?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R.\"\n", + " \" Delany, and Joanna Russ.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"What awards did she win?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur\"\n", + " \" Fellowship.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"content\": \"Which other women sci-fi writers might I want to read?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": \"You might want to read Ursula K. Le Guin or Joanna Russ.\",\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"content\": (\n", + " \"Write a short synopsis of Butler's book, Parable of the Sower. What is it\"\n", + " \" about?\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Parable of the Sower is a science fiction novel by Octavia Butler,\"\n", + " \" published in 1993. It follows the story of Lauren Olamina, a young woman\"\n", + " \" living in a dystopian future where society has collapsed due to\"\n", + " \" environmental disasters, poverty, and violence.\"\n", + " ),\n", + " \"metadata\": {\"foo\": \"bar\"},\n", + " },\n", + "]\n", + "\n", + "zep_memory = ZepCloudMemory(\n", + " session_id=session_id,\n", + " api_key=zep_api_key,\n", + ")\n", + "\n", + "for msg in test_history:\n", + " zep_memory.chat_memory.add_message(\n", + " HumanMessage(content=msg[\"content\"])\n", + " if msg[\"role\"] == \"human\"\n", + " else AIMessage(content=msg[\"content\"])\n", + " )\n", + "\n", + "import time\n", + "\n", + "time.sleep(\n", + " 10\n", + ") # Wait for the messages to be embedded and summarized, this happens asynchronously." + ] + }, + { + "cell_type": "markdown", + "id": "bfa6b19f0b501aea", + "metadata": {}, + "source": [ + "**MessagesPlaceholder** - We’re using the variable name chat_history here. This will incorporate the chat history into the prompt.\n", + "It’s important that this variable name aligns with the history_messages_key in the RunnableWithMessageHistory chain for seamless integration.\n", + "\n", + "**question** must match input_messages_key in `RunnableWithMessageHistory“ chain." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2b12eccf9b4908eb", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:46.592163Z", + "start_time": "2024-05-10T05:20:46.464326Z" + } + }, + "outputs": [], + "source": [ + "template = \"\"\"Be helpful and answer the question below using the provided context:\n", + " \"\"\"\n", + "answer_prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", template),\n", + " MessagesPlaceholder(variable_name=\"chat_history\"),\n", + " (\"user\", \"{question}\"),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7d6014d6fe7f2d22", + "metadata": {}, + "source": [ + "We use RunnableWithMessageHistory to incorporate Zep’s Chat History into our chain. This class requires a session_id as a parameter when you activate the chain." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "83ea7322638f8ead", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:49.681754Z", + "start_time": "2024-05-10T05:20:49.663404Z" + } + }, + "outputs": [], + "source": [ + "inputs = RunnableParallel(\n", + " {\n", + " \"question\": lambda x: x[\"question\"],\n", + " \"chat_history\": lambda x: x[\"chat_history\"],\n", + " },\n", + ")\n", + "chain = RunnableWithMessageHistory(\n", + " inputs | answer_prompt | ChatOpenAI(openai_api_key=openai_key) | StrOutputParser(),\n", + " lambda s_id: ZepCloudChatMessageHistory(\n", + " session_id=s_id, # This uniquely identifies the conversation, note that we are getting session id as chain configurable field\n", + " api_key=zep_api_key,\n", + " memory_type=\"perpetual\",\n", + " ),\n", + " input_messages_key=\"question\",\n", + " history_messages_key=\"chat_history\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "db8bdc1d0d7bb672", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T05:20:54.966758Z", + "start_time": "2024-05-10T05:20:52.117440Z" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Parent run 622c6f75-3e4a-413d-ba20-558c1fea0d50 not found for run af12a4b1-e882-432d-834f-e9147465faf6. Treating as a root run.\n" + ] + }, + { + "data": { + "text/plain": [ + "'\"Parable of the Sower\" is relevant to the challenges facing contemporary society as it explores themes of environmental degradation, economic inequality, social unrest, and the search for hope and community in the face of chaos. The novel\\'s depiction of a dystopian future where society has collapsed due to environmental and economic crises serves as a cautionary tale about the potential consequences of our current societal and environmental challenges. By addressing issues such as climate change, social injustice, and the impact of technology on humanity, Octavia Butler\\'s work prompts readers to reflect on the pressing issues of our time and the importance of resilience, empathy, and collective action in building a better future.'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke(\n", + " {\n", + " \"question\": \"What is the book's relevance to the challenges facing contemporary society?\"\n", + " },\n", + " config={\"configurable\": {\"session_id\": session_id}},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d9c609652110db3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/memory/zep_memory_cloud.ipynb b/docs/docs/integrations/memory/zep_memory_cloud.ipynb new file mode 100644 index 0000000000000..64f8e3c0d55eb --- /dev/null +++ b/docs/docs/integrations/memory/zep_memory_cloud.ipynb @@ -0,0 +1,428 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Zep Cloud Memory\n", + "> Recall, understand, and extract data from chat histories. Power personalized AI experiences.\n", + "\n", + ">[Zep](https://www.getzep.com) is a long-term memory service for AI Assistant apps.\n", + "> With Zep, you can provide AI assistants with the ability to recall past conversations, no matter how distant,\n", + "> while also reducing hallucinations, latency, and cost.\n", + "\n", + "> See [Zep Cloud Installation Guide](https://help.getzep.com/sdks) and more [Zep Cloud Langchain Examples](https://github.com/getzep/zep-python/tree/main/examples)\n", + "\n", + "## Example\n", + "\n", + "This notebook demonstrates how to use [Zep](https://www.getzep.com/) as memory for your chatbot.\n", + "\n", + "We'll demonstrate:\n", + "\n", + "1. Adding conversation history to Zep.\n", + "2. Running an agent and having message automatically added to the store.\n", + "3. Viewing the enriched messages.\n", + "4. Vector search over the conversation history." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-14T17:25:10.779451Z", + "start_time": "2024-05-14T17:25:10.375249Z" + } + }, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'FieldInfo' object has no attribute 'deprecated'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[3], line 8\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_community\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutilities\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m WikipediaAPIWrapper\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmessages\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AIMessage, HumanMessage\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[1;32m 10\u001b[0m session_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(uuid4()) \u001b[38;5;66;03m# This is a unique identifier for the session\u001b[39;00m\n", + "File \u001b[0;32m~/job/integrations/langchain/libs/partners/openai/langchain_openai/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 2\u001b[0m AzureChatOpenAI,\n\u001b[1;32m 3\u001b[0m ChatOpenAI,\n\u001b[1;32m 4\u001b[0m )\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01membeddings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 6\u001b[0m AzureOpenAIEmbeddings,\n\u001b[1;32m 7\u001b[0m OpenAIEmbeddings,\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mllms\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureOpenAI, OpenAI\n", + "File \u001b[0;32m~/job/integrations/langchain/libs/partners/openai/langchain_openai/chat_models/__init__.py:1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat_models\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mazure\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m AzureChatOpenAI\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_openai\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mchat_models\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ChatOpenAI\n\u001b[1;32m 4\u001b[0m __all__ \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mChatOpenAI\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAzureChatOpenAI\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 7\u001b[0m ]\n", + "File \u001b[0;32m~/job/integrations/langchain/libs/partners/openai/langchain_openai/chat_models/azure.py:8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Any, Callable, Dict, List, Optional, Union\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mopenai\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01moutputs\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m ChatResult\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mlangchain_core\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpydantic_v1\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Field, SecretStr, root_validator\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/openai/__init__.py:8\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mos\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01m_os\u001b[39;00m\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping_extensions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m override\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m types\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_types\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m file_from_path\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/openai/types/__init__.py:5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m__future__\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m annotations\n\u001b[0;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbatch\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Batch \u001b[38;5;28;01mas\u001b[39;00m Batch\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mimage\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Image \u001b[38;5;28;01mas\u001b[39;00m Image\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Model \u001b[38;5;28;01mas\u001b[39;00m Model\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/openai/types/batch.py:7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m List, Optional\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping_extensions\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Literal\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m_models\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BaseModel\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbatch_error\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BatchError\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbatch_request_counts\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BatchRequestCounts\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/openai/_models.py:667\u001b[0m\n\u001b[1;32m 662\u001b[0m json_data: Body\n\u001b[1;32m 663\u001b[0m extra_json: AnyMapping\n\u001b[1;32m 666\u001b[0m \u001b[38;5;129;43m@final\u001b[39;49m\n\u001b[0;32m--> 667\u001b[0m \u001b[38;5;28;43;01mclass\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;21;43;01mFinalRequestOptions\u001b[39;49;00m\u001b[43m(\u001b[49m\u001b[43mpydantic\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBaseModel\u001b[49m\u001b[43m)\u001b[49m\u001b[43m:\u001b[49m\n\u001b[1;32m 668\u001b[0m \u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\n\u001b[1;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_model_construction.py:202\u001b[0m, in \u001b[0;36m__new__\u001b[0;34m(mcs, cls_name, bases, namespace, __pydantic_generic_metadata__, __pydantic_reset_parent_namespace__, _create_model_module, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m super(cls, cls).__pydantic_init_subclass__(**kwargs) # type: ignore[misc]\n\u001b[1;32m 200\u001b[0m return cls\n\u001b[1;32m 201\u001b[0m else:\n\u001b[0;32m--> 202\u001b[0m # this is the BaseModel class itself being created, no logic required\n\u001b[1;32m 203\u001b[0m return super().__new__(mcs, cls_name, bases, namespace, **kwargs)\n\u001b[1;32m 205\u001b[0m if not typing.TYPE_CHECKING: # pragma: no branch\n\u001b[1;32m 206\u001b[0m # We put `__getattr__` in a non-TYPE_CHECKING block because otherwise, mypy allows arbitrary attribute access\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_model_construction.py:539\u001b[0m, in \u001b[0;36mcomplete_model_class\u001b[0;34m(cls, cls_name, config_wrapper, raise_errors, types_namespace, create_model_module)\u001b[0m\n\u001b[1;32m 532\u001b[0m \u001b[38;5;66;03m# debug(schema)\u001b[39;00m\n\u001b[1;32m 533\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m__pydantic_core_schema__ \u001b[38;5;241m=\u001b[39m schema\n\u001b[1;32m 535\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m__pydantic_validator__ \u001b[38;5;241m=\u001b[39m create_schema_validator(\n\u001b[1;32m 536\u001b[0m schema,\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[1;32m 538\u001b[0m create_model_module \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m,\n\u001b[0;32m--> 539\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m,\n\u001b[1;32m 540\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcreate_model\u001b[39m\u001b[38;5;124m'\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m create_model_module \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mBaseModel\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 541\u001b[0m core_config,\n\u001b[1;32m 542\u001b[0m config_wrapper\u001b[38;5;241m.\u001b[39mplugin_settings,\n\u001b[1;32m 543\u001b[0m )\n\u001b[1;32m 544\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m__pydantic_serializer__ \u001b[38;5;241m=\u001b[39m SchemaSerializer(schema, core_config)\n\u001b[1;32m 545\u001b[0m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m__pydantic_complete__ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/main.py:626\u001b[0m, in \u001b[0;36m__get_pydantic_core_schema__\u001b[0;34m(cls, source, handler)\u001b[0m\n\u001b[1;32m 611\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 612\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__pydantic_init_subclass__\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 613\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"This is intended to behave just like `__init_subclass__`, but is called by `ModelMetaclass`\u001b[39;00m\n\u001b[1;32m 614\u001b[0m \u001b[38;5;124;03m only after the class is actually fully initialized. In particular, attributes like `model_fields` will\u001b[39;00m\n\u001b[1;32m 615\u001b[0m \u001b[38;5;124;03m be present when this is called.\u001b[39;00m\n\u001b[1;32m 616\u001b[0m \n\u001b[1;32m 617\u001b[0m \u001b[38;5;124;03m This is necessary because `__init_subclass__` will always be called by `type.__new__`,\u001b[39;00m\n\u001b[1;32m 618\u001b[0m \u001b[38;5;124;03m and it would require a prohibitively large refactor to the `ModelMetaclass` to ensure that\u001b[39;00m\n\u001b[1;32m 619\u001b[0m \u001b[38;5;124;03m `type.__new__` was called in such a manner that the class would already be sufficiently initialized.\u001b[39;00m\n\u001b[1;32m 620\u001b[0m \n\u001b[1;32m 621\u001b[0m \u001b[38;5;124;03m This will receive the same `kwargs` that would be passed to the standard `__init_subclass__`, namely,\u001b[39;00m\n\u001b[1;32m 622\u001b[0m \u001b[38;5;124;03m any kwargs passed to the class definition that aren't used internally by pydantic.\u001b[39;00m\n\u001b[1;32m 623\u001b[0m \n\u001b[1;32m 624\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 625\u001b[0m \u001b[38;5;124;03m **kwargs: Any keyword arguments passed to the class definition that aren't used internally\u001b[39;00m\n\u001b[0;32m--> 626\u001b[0m \u001b[38;5;124;03m by pydantic.\u001b[39;00m\n\u001b[1;32m 627\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 628\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_schema_generation_shared.py:82\u001b[0m, in \u001b[0;36mCallbackGetCoreSchemaHandler.__call__\u001b[0;34m(self, source_type)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, __source_type: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m core_schema\u001b[38;5;241m.\u001b[39mCoreSchema:\n\u001b[0;32m---> 82\u001b[0m schema \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handler(__source_type)\n\u001b[1;32m 83\u001b[0m ref \u001b[38;5;241m=\u001b[39m schema\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mref\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ref_mode \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mto-def\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:502\u001b[0m, in \u001b[0;36mgenerate_schema\u001b[0;34m(self, obj, from_dunder_get_core_schema)\u001b[0m\n\u001b[1;32m 498\u001b[0m schema \u001b[38;5;241m=\u001b[39m _add_custom_serialization_from_json_encoders(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_config_wrapper\u001b[38;5;241m.\u001b[39mjson_encoders, obj, schema)\n\u001b[1;32m 500\u001b[0m schema \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_post_process_generated_schema(schema)\n\u001b[0;32m--> 502\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m schema\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:753\u001b[0m, in \u001b[0;36m_generate_schema_inner\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 749\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mmatch_type\u001b[39m(\u001b[38;5;28mself\u001b[39m, obj: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m core_schema\u001b[38;5;241m.\u001b[39mCoreSchema: \u001b[38;5;66;03m# noqa: C901\u001b[39;00m\n\u001b[1;32m 750\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Main mapping of types to schemas.\u001b[39;00m\n\u001b[1;32m 751\u001b[0m \n\u001b[1;32m 752\u001b[0m \u001b[38;5;124;03m The general structure is a series of if statements starting with the simple cases\u001b[39;00m\n\u001b[0;32m--> 753\u001b[0m \u001b[38;5;124;03m (non-generic primitive types) and then handling generics and other more complex cases.\u001b[39;00m\n\u001b[1;32m 754\u001b[0m \n\u001b[1;32m 755\u001b[0m \u001b[38;5;124;03m Each case either generates a schema directly, calls into a public user-overridable method\u001b[39;00m\n\u001b[1;32m 756\u001b[0m \u001b[38;5;124;03m (like `GenerateSchema.tuple_variable_schema`) or calls into a private method that handles some\u001b[39;00m\n\u001b[1;32m 757\u001b[0m \u001b[38;5;124;03m boilerplate before calling into the user-facing method (e.g. `GenerateSchema._tuple_schema`).\u001b[39;00m\n\u001b[1;32m 758\u001b[0m \n\u001b[1;32m 759\u001b[0m \u001b[38;5;124;03m The idea is that we'll evolve this into adding more and more user facing methods over time\u001b[39;00m\n\u001b[1;32m 760\u001b[0m \u001b[38;5;124;03m as they get requested and we figure out what the right API for them is.\u001b[39;00m\n\u001b[1;32m 761\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 762\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m obj \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28mstr\u001b[39m:\n\u001b[1;32m 763\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstr_schema()\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:580\u001b[0m, in \u001b[0;36m_model_schema\u001b[0;34m(self, cls)\u001b[0m\n\u001b[1;32m 574\u001b[0m inner_schema \u001b[38;5;241m=\u001b[39m new_inner_schema\n\u001b[1;32m 575\u001b[0m inner_schema \u001b[38;5;241m=\u001b[39m apply_model_validators(inner_schema, model_validators, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124minner\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 577\u001b[0m model_schema \u001b[38;5;241m=\u001b[39m core_schema\u001b[38;5;241m.\u001b[39mmodel_schema(\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[1;32m 579\u001b[0m inner_schema,\n\u001b[0;32m--> 580\u001b[0m custom_init\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__pydantic_custom_init__\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 581\u001b[0m root_model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 582\u001b[0m post_init\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__pydantic_post_init__\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 583\u001b[0m config\u001b[38;5;241m=\u001b[39mcore_config,\n\u001b[1;32m 584\u001b[0m ref\u001b[38;5;241m=\u001b[39mmodel_ref,\n\u001b[1;32m 585\u001b[0m metadata\u001b[38;5;241m=\u001b[39mmetadata,\n\u001b[1;32m 586\u001b[0m )\n\u001b[1;32m 588\u001b[0m schema \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply_model_serializers(model_schema, decorators\u001b[38;5;241m.\u001b[39mmodel_serializers\u001b[38;5;241m.\u001b[39mvalues())\n\u001b[1;32m 589\u001b[0m schema \u001b[38;5;241m=\u001b[39m apply_model_validators(schema, model_validators, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mouter\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:580\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 574\u001b[0m inner_schema \u001b[38;5;241m=\u001b[39m new_inner_schema\n\u001b[1;32m 575\u001b[0m inner_schema \u001b[38;5;241m=\u001b[39m apply_model_validators(inner_schema, model_validators, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124minner\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 577\u001b[0m model_schema \u001b[38;5;241m=\u001b[39m core_schema\u001b[38;5;241m.\u001b[39mmodel_schema(\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28mcls\u001b[39m,\n\u001b[1;32m 579\u001b[0m inner_schema,\n\u001b[0;32m--> 580\u001b[0m custom_init\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__pydantic_custom_init__\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 581\u001b[0m root_model\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 582\u001b[0m post_init\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mgetattr\u001b[39m(\u001b[38;5;28mcls\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__pydantic_post_init__\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m),\n\u001b[1;32m 583\u001b[0m config\u001b[38;5;241m=\u001b[39mcore_config,\n\u001b[1;32m 584\u001b[0m ref\u001b[38;5;241m=\u001b[39mmodel_ref,\n\u001b[1;32m 585\u001b[0m metadata\u001b[38;5;241m=\u001b[39mmetadata,\n\u001b[1;32m 586\u001b[0m )\n\u001b[1;32m 588\u001b[0m schema \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply_model_serializers(model_schema, decorators\u001b[38;5;241m.\u001b[39mmodel_serializers\u001b[38;5;241m.\u001b[39mvalues())\n\u001b[1;32m 589\u001b[0m schema \u001b[38;5;241m=\u001b[39m apply_model_validators(schema, model_validators, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mouter\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:916\u001b[0m, in \u001b[0;36m_generate_md_field_schema\u001b[0;34m(self, name, field_info, decorators)\u001b[0m\n\u001b[1;32m 906\u001b[0m common_field \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_common_field_schema(name, field_info, decorators)\n\u001b[1;32m 907\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m core_schema\u001b[38;5;241m.\u001b[39mmodel_field(\n\u001b[1;32m 908\u001b[0m common_field[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mschema\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 909\u001b[0m serialization_exclude\u001b[38;5;241m=\u001b[39mcommon_field[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mserialization_exclude\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 913\u001b[0m metadata\u001b[38;5;241m=\u001b[39mcommon_field[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m 914\u001b[0m )\n\u001b[0;32m--> 916\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_generate_dc_field_schema\u001b[39m(\n\u001b[1;32m 917\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 918\u001b[0m name: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 919\u001b[0m field_info: FieldInfo,\n\u001b[1;32m 920\u001b[0m decorators: DecoratorInfos,\n\u001b[1;32m 921\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m core_schema\u001b[38;5;241m.\u001b[39mDataclassField:\n\u001b[1;32m 922\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Prepare a DataclassField to represent the parameter/field, of a dataclass.\"\"\"\u001b[39;00m\n\u001b[1;32m 923\u001b[0m common_field \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_common_field_schema(name, field_info, decorators)\n", + "File \u001b[0;32m~/job/zep-proprietary/venv/lib/python3.11/site-packages/pydantic/_internal/_generate_schema.py:1114\u001b[0m, in \u001b[0;36m_common_field_schema\u001b[0;34m(self, name, field_info, decorators)\u001b[0m\n\u001b[1;32m 1108\u001b[0m json_schema_extra \u001b[38;5;241m=\u001b[39m field_info\u001b[38;5;241m.\u001b[39mjson_schema_extra\n\u001b[1;32m 1110\u001b[0m metadata \u001b[38;5;241m=\u001b[39m build_metadata_dict(\n\u001b[1;32m 1111\u001b[0m js_annotation_functions\u001b[38;5;241m=\u001b[39m[get_json_schema_update_func(json_schema_updates, json_schema_extra)]\n\u001b[1;32m 1112\u001b[0m )\n\u001b[0;32m-> 1114\u001b[0m alias_generator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_config_wrapper\u001b[38;5;241m.\u001b[39malias_generator\n\u001b[1;32m 1115\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m alias_generator \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1116\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply_alias_generator_to_field_info(alias_generator, field_info, name)\n", + "\u001b[0;31mAttributeError\u001b[0m: 'FieldInfo' object has no attribute 'deprecated'" + ] + } + ], + "source": [ + "from uuid import uuid4\n", + "\n", + "from langchain.agents import AgentType, Tool, initialize_agent\n", + "from langchain_community.memory.zep_cloud_memory import ZepCloudMemory\n", + "from langchain_community.retrievers import ZepCloudRetriever\n", + "from langchain_community.utilities import WikipediaAPIWrapper\n", + "from langchain_core.messages import AIMessage, HumanMessage\n", + "from langchain_openai import OpenAI\n", + "\n", + "session_id = str(uuid4()) # This is a unique identifier for the session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Provide your OpenAI key\n", + "import getpass\n", + "\n", + "openai_key = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Provide your Zep API key. See https://help.getzep.com/projects#api-keys\n", + "\n", + "zep_api_key = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize the Zep Chat Message History Class and initialize the Agent\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "search = WikipediaAPIWrapper()\n", + "tools = [\n", + " Tool(\n", + " name=\"Search\",\n", + " func=search.run,\n", + " description=(\n", + " \"useful for when you need to search online for answers. You should ask\"\n", + " \" targeted questions\"\n", + " ),\n", + " ),\n", + "]\n", + "\n", + "# Set up Zep Chat History\n", + "memory = ZepCloudMemory(\n", + " session_id=session_id,\n", + " api_key=zep_api_key,\n", + " return_messages=True,\n", + " memory_key=\"chat_history\",\n", + ")\n", + "\n", + "# Initialize the agent\n", + "llm = OpenAI(temperature=0, openai_api_key=openai_key)\n", + "agent_chain = initialize_agent(\n", + " tools,\n", + " llm,\n", + " agent=AgentType.CONVERSATIONAL_REACT_DESCRIPTION,\n", + " verbose=True,\n", + " memory=memory,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add some history data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Preload some messages into the memory. The default message window is 12 messages. We want to push beyond this to demonstrate auto-summarization.\n", + "test_history = [\n", + " {\"role\": \"human\", \"content\": \"Who was Octavia Butler?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American\"\n", + " \" science fiction author.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"Which books of hers were made into movies?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"The most well-known adaptation of Octavia Butler's work is the FX series\"\n", + " \" Kindred, based on her novel of the same name.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"Who were her contemporaries?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R.\"\n", + " \" Delany, and Joanna Russ.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"content\": \"What awards did she win?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur\"\n", + " \" Fellowship.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"content\": \"Which other women sci-fi writers might I want to read?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": \"You might want to read Ursula K. Le Guin or Joanna Russ.\",\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"content\": (\n", + " \"Write a short synopsis of Butler's book, Parable of the Sower. What is it\"\n", + " \" about?\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"Parable of the Sower is a science fiction novel by Octavia Butler,\"\n", + " \" published in 1993. It follows the story of Lauren Olamina, a young woman\"\n", + " \" living in a dystopian future where society has collapsed due to\"\n", + " \" environmental disasters, poverty, and violence.\"\n", + " ),\n", + " \"metadata\": {\"foo\": \"bar\"},\n", + " },\n", + "]\n", + "\n", + "for msg in test_history:\n", + " memory.chat_memory.add_message(\n", + " (\n", + " HumanMessage(content=msg[\"content\"])\n", + " if msg[\"role\"] == \"human\"\n", + " else AIMessage(content=msg[\"content\"])\n", + " ),\n", + " metadata=msg.get(\"metadata\", {}),\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Run the agent\n", + "\n", + "Doing so will automatically add the input and response to the Zep memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:34:37.613049Z", + "start_time": "2024-05-10T14:34:35.883359Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "AI: Parable of the Sower is highly relevant to contemporary society as it explores themes of environmental degradation, social and economic inequality, and the struggle for survival in a chaotic world. It also delves into issues of race, gender, and religion, making it a thought-provoking and timely read.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': \"What is the book's relevance to the challenges facing contemporary society?\",\n", + " 'chat_history': [HumanMessage(content=\"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\\nOctavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.\\nUrsula K. Le Guin is known for novels like The Left Hand of Darkness and The Dispossessed.\\nJoanna Russ is the author of the influential feminist science fiction novel The Female Man.\\nMargaret Atwood is known for works like The Handmaid's Tale and the MaddAddam trilogy.\\nConnie Willis is an award-winning author of science fiction and fantasy, known for novels like Doomsday Book.\\nOctavia Butler is a pioneering black female science fiction author, known for Kindred and the Parable series.\\nOctavia Estelle Butler was an acclaimed American science fiction author. While none of her books were directly adapted into movies, her novel Kindred was adapted into a TV series on FX. Butler was part of a generation of prominent science fiction writers in the 20th century, including contemporaries such as Ursula K. Le Guin, Samuel R. Delany, Chip Delany, and Nalo Hopkinson.\\nhuman: What awards did she win?\\nai: Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.\\nhuman: Which other women sci-fi writers might I want to read?\\nai: You might want to read Ursula K. Le Guin or Joanna Russ.\\nhuman: Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\\nai: Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.\")],\n", + " 'output': 'Parable of the Sower is highly relevant to contemporary society as it explores themes of environmental degradation, social and economic inequality, and the struggle for survival in a chaotic world. It also delves into issues of race, gender, and religion, making it a thought-provoking and timely read.'}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_chain.invoke(\n", + " input=\"What is the book's relevance to the challenges facing contemporary society?\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inspect the Zep memory\n", + "\n", + "Note the summary, and that the history has been enriched with token counts, UUIDs, and timestamps.\n", + "\n", + "Summaries are biased towards the most recent messages.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:35:11.437446Z", + "start_time": "2024-05-10T14:35:10.664076Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Octavia Estelle Butler was an acclaimed American science fiction author. While none of her books were directly adapted into movies, her novel Kindred was adapted into a TV series on FX. Butler was part of a generation of prominent science fiction writers in the 20th century, including contemporaries such as Ursula K. Le Guin, Samuel R. Delany, Chip Delany, and Nalo Hopkinson.\n", + "\n", + "\n", + "Conversation Facts: \n", + "Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\n", + "\n", + "Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.\n", + "\n", + "Ursula K. Le Guin is known for novels like The Left Hand of Darkness and The Dispossessed.\n", + "\n", + "Joanna Russ is the author of the influential feminist science fiction novel The Female Man.\n", + "\n", + "Margaret Atwood is known for works like The Handmaid's Tale and the MaddAddam trilogy.\n", + "\n", + "Connie Willis is an award-winning author of science fiction and fantasy, known for novels like Doomsday Book.\n", + "\n", + "Octavia Butler is a pioneering black female science fiction author, known for Kindred and the Parable series.\n", + "\n", + "Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993.\n", + "\n", + "The novel follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.\n", + "\n", + "Parable of the Sower explores themes of environmental degradation, social and economic inequality, and the struggle for survival in a chaotic world.\n", + "\n", + "The novel also delves into issues of race, gender, and religion, making it a thought-provoking and timely read.\n", + "\n", + "human :\n", + " {'content': \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R. Delany, and Joanna Russ.\\nOctavia Butler won the Hugo Award, the Nebula Award, and the MacArthur Fellowship.\\nUrsula K. Le Guin is known for novels like The Left Hand of Darkness and The Dispossessed.\\nJoanna Russ is the author of the influential feminist science fiction novel The Female Man.\\nMargaret Atwood is known for works like The Handmaid's Tale and the MaddAddam trilogy.\\nConnie Willis is an award-winning author of science fiction and fantasy, known for novels like Doomsday Book.\\nOctavia Butler is a pioneering black female science fiction author, known for Kindred and the Parable series.\\nParable of the Sower is a science fiction novel by Octavia Butler, published in 1993.\\nThe novel follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.\\nParable of the Sower explores themes of environmental degradation, social and economic inequality, and the struggle for survival in a chaotic world.\\nThe novel also delves into issues of race, gender, and religion, making it a thought-provoking and timely read.\\nOctavia Estelle Butler was an acclaimed American science fiction author. While none of her books were directly adapted into movies, her novel Kindred was adapted into a TV series on FX. Butler was part of a generation of prominent science fiction writers in the 20th century, including contemporaries such as Ursula K. Le Guin, Samuel R. Delany, Chip Delany, and Nalo Hopkinson.\\nhuman: Which other women sci-fi writers might I want to read?\\nai: You might want to read Ursula K. Le Guin or Joanna Russ.\\nhuman: Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\\nai: Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.\\nhuman: What is the book's relevance to the challenges facing contemporary society?\\nai: Parable of the Sower is highly relevant to contemporary society as it explores themes of environmental degradation, social and economic inequality, and the struggle for survival in a chaotic world. It also delves into issues of race, gender, and religion, making it a thought-provoking and timely read.\", 'additional_kwargs': {}, 'response_metadata': {}, 'type': 'human', 'name': None, 'id': None, 'example': False}\n" + ] + } + ], + "source": [ + "def print_messages(messages):\n", + " for m in messages:\n", + " print(m.type, \":\\n\", m.dict())\n", + "\n", + "\n", + "print(memory.chat_memory.zep_summary)\n", + "print(\"\\n\")\n", + "print(\"Conversation Facts: \")\n", + "facts = memory.chat_memory.zep_facts\n", + "for fact in facts:\n", + " print(fact + \"\\n\")\n", + "print_messages(memory.chat_memory.messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Vector search over the Zep memory\n", + "\n", + "Zep provides native vector search over historical conversation memory via the `ZepRetriever`.\n", + "\n", + "You can use the `ZepRetriever` with chains that support passing in a Langchain `Retriever` object.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:35:33.023765Z", + "start_time": "2024-05-10T14:35:32.613576Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "content='Which other women sci-fi writers might I want to read?' created_at='2024-05-10T14:34:16.714292Z' metadata=None role='human' role_type=None token_count=12 updated_at='0001-01-01T00:00:00Z' uuid_='64ca1fae-8db1-4b4f-8a45-9b0e57e88af5' 0.8960460126399994\n" + ] + } + ], + "source": [ + "retriever = ZepCloudRetriever(\n", + " session_id=session_id,\n", + " api_key=zep_api_key,\n", + ")\n", + "\n", + "search_results = memory.chat_memory.search(\"who are some famous women sci-fi authors?\")\n", + "for r in search_results:\n", + " if r.score > 0.8: # Only print results with similarity of 0.8 or higher\n", + " print(r.message, r.score)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/providers/ascend.mdx b/docs/docs/integrations/providers/ascend.mdx new file mode 100644 index 0000000000000..b8c1769a48965 --- /dev/null +++ b/docs/docs/integrations/providers/ascend.mdx @@ -0,0 +1,24 @@ +# Ascend + +>[Ascend](https://https://www.hiascend.com/) is Natural Process Unit provide by Huawei + +This page covers how to use ascend NPU with LangChain. + +### Installation + +Install using torch-npu using: + +```bash +pip install torch-npu +``` + +Please follow the installation instructions as specified below: +* Install CANN as shown [here](https://www.hiascend.com/document/detail/zh/canncommercial/700/quickstart/quickstart/quickstart_18_0002.html). + +### Embedding Models + +See a [usage example](/docs/integrations/text_embedding/ascend). + +```python +from langchain_community.embeddings import AscendEmbeddings +``` diff --git a/docs/docs/integrations/providers/pebblo/index.md b/docs/docs/integrations/providers/pebblo/index.md new file mode 100644 index 0000000000000..d0ed9a69b2b2a --- /dev/null +++ b/docs/docs/integrations/providers/pebblo/index.md @@ -0,0 +1,21 @@ +# Pebblo + +[Pebblo](https://www.daxa.ai/pebblo) enables developers to safely load and retrieve data to promote their Gen AI app to deployment without +worrying about the organization’s compliance and security requirements. The Pebblo SafeLoader identifies semantic topics and entities found in the +loaded data and the Pebblo SafeRetriever enforces identity and semantic controls on the retrieved context. The results are +summarized on the UI or a PDF report. + + +## Pebblo Overview: + +`Pebblo` provides a safe way to load and retrieve data for Gen AI applications. +It includes: +1. **Identity-aware Safe Loader** that loads data and identifies semantic topics and entities. +2. **SafeRetrieval** that enforces identity and semantic controls on the retrieved context. +3. **User Data Report** that summarizes the data loaded and retrieved. + +## Example Notebooks + +For a more detailed examples of using Pebblo, see the following notebooks: +* [PebbloSafeLoader](/docs/integrations/document_loaders/pebblo) shows how to use Pebblo loader to safely load data. +* [PebbloRetrievalQA](/docs/integrations/providers/pebblo/pebblo_retrieval_qa) shows how to use Pebblo retrieval QA chain to safely retrieve data. diff --git a/docs/docs/integrations/providers/pebblo/pebblo_retrieval_qa.ipynb b/docs/docs/integrations/providers/pebblo/pebblo_retrieval_qa.ipynb new file mode 100644 index 0000000000000..14cd3c1603f54 --- /dev/null +++ b/docs/docs/integrations/providers/pebblo/pebblo_retrieval_qa.ipynb @@ -0,0 +1,584 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3ce451e9-f8f1-4f27-8c6b-4a93a406d504", + "metadata": {}, + "source": [ + "# Identity-enabled RAG using PebbloRetrievalQA\n", + "\n", + "> PebbloRetrievalQA is a Retrieval chain with Identity & Semantic Enforcement for question-answering\n", + "against a vector database.\n", + "\n", + "This notebook covers how to retrieve documents using Identity & Semantic Enforcement (Deny Topics/Entities).\n", + "For more details on Pebblo and its SafeRetriever feature visit [Pebblo documentation](https://daxa-ai.github.io/pebblo/retrieval_chain/)\n", + "\n", + "### Steps:\n", + "\n", + "1. **Loading Documents:**\n", + "We will load documents with authorization and semantic metadata into an in-memory Qdrant vectorstore. This vectorstore will be used as a retriever in PebbloRetrievalQA. \n", + "\n", + "> **Note:** It is recommended to use [PebbloSafeLoader](https://daxa-ai.github.io/pebblo/rag) as the counterpart for loading documents with authentication and semantic metadata on the ingestion side. `PebbloSafeLoader` guarantees the secure and efficient loading of documents while maintaining the integrity of the metadata.\n", + "\n", + "\n", + "\n", + "2. **Testing Enforcement Mechanisms**:\n", + " We will test Identity and Semantic Enforcement separately. For each use case, we will define a specific \"ask\" function with the required contexts (*auth_context* and *semantic_context*) and then pose our questions.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4ee16b6b-5dac-4b5c-bb69-3ec87398a33c", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "### Dependencies\n", + "\n", + "We'll use an OpenAI LLM, OpenAI embeddings and a Qdrant vector store in this walkthrough.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e68494fa-f387-4481-9a6c-58294865d7b7", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet langchain langchain_core langchain-community langchain-openai qdrant_client" + ] + }, + { + "cell_type": "markdown", + "id": "61498d51-0c38-40e2-adcd-19dfdf4d37ef", + "metadata": {}, + "source": [ + "### Identity-aware Data Ingestion\n", + "\n", + "Here we are using Qdrant as a vector database; however, you can use any of the supported vector databases.\n", + "\n", + "**PebbloRetrievalQA chain supports the following vector databases:**\n", + "- Qdrant\n", + "- Pinecone\n", + "\n", + "\n", + "**Load vector database with authorization and semantic information in metadata:**\n", + "\n", + "In this step, we capture the authorization and semantic information of the source document into the `authorized_identities`, `pebblo_semantic_topics`, and `pebblo_semantic_entities` fields within the metadata of the VectorDB entry for each chunk. \n", + "\n", + "\n", + "*NOTE: To use the PebbloRetrievalQA chain, you must always place authorization and semantic metadata in the specified fields. These fields must contain a list of strings.*" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ae4fcbc1-bdc3-40d2-b2df-8c82cad1f89c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Vectordb loaded.\n" + ] + } + ], + "source": [ + "from langchain_community.vectorstores.qdrant import Qdrant\n", + "from langchain_core.documents import Document\n", + "from langchain_openai.embeddings import OpenAIEmbeddings\n", + "from langchain_openai.llms import OpenAI\n", + "\n", + "llm = OpenAI()\n", + "embeddings = OpenAIEmbeddings()\n", + "collection_name = \"pebblo-identity-and-semantic-rag\"\n", + "\n", + "page_content = \"\"\"\n", + "**ACME Corp Financial Report**\n", + "\n", + "**Overview:**\n", + "ACME Corp, a leading player in the merger and acquisition industry, presents its financial report for the fiscal year ending December 31, 2020. \n", + "Despite a challenging economic landscape, ACME Corp demonstrated robust performance and strategic growth.\n", + "\n", + "**Financial Highlights:**\n", + "Revenue soared to $50 million, marking a 15% increase from the previous year, driven by successful deal closures and expansion into new markets. \n", + "Net profit reached $12 million, showcasing a healthy margin of 24%.\n", + "\n", + "**Key Metrics:**\n", + "Total assets surged to $80 million, reflecting a 20% growth, highlighting ACME Corp's strong financial position and asset base. \n", + "Additionally, the company maintained a conservative debt-to-equity ratio of 0.5, ensuring sustainable financial stability.\n", + "\n", + "**Future Outlook:**\n", + "ACME Corp remains optimistic about the future, with plans to capitalize on emerging opportunities in the global M&A landscape. \n", + "The company is committed to delivering value to shareholders while maintaining ethical business practices.\n", + "\n", + "**Bank Account Details:**\n", + "For inquiries or transactions, please refer to ACME Corp's US bank account:\n", + "Account Number: 123456789012\n", + "Bank Name: Fictitious Bank of America\n", + "\"\"\"\n", + "\n", + "documents = [\n", + " Document(\n", + " **{\n", + " \"page_content\": page_content,\n", + " \"metadata\": {\n", + " \"pebblo_semantic_topics\": [\"financial-report\"],\n", + " \"pebblo_semantic_entities\": [\"us-bank-account-number\"],\n", + " \"authorized_identities\": [\"finance-team\", \"exec-leadership\"],\n", + " \"page\": 0,\n", + " \"source\": \"https://drive.google.com/file/d/xxxxxxxxxxxxx/view\",\n", + " \"title\": \"ACME Corp Financial Report.pdf\",\n", + " },\n", + " }\n", + " )\n", + "]\n", + "\n", + "vectordb = Qdrant.from_documents(\n", + " documents,\n", + " embeddings,\n", + " location=\":memory:\",\n", + " collection_name=collection_name,\n", + ")\n", + "\n", + "print(\"Vectordb loaded.\")" + ] + }, + { + "cell_type": "markdown", + "id": "f630bb8b-67ba-41f9-8715-76d006207e75", + "metadata": {}, + "source": [ + "## Retrieval with Identity Enforcement\n", + "\n", + "PebbloRetrievalQA chain uses a SafeRetrieval to enforce that the snippets used for in-context are retrieved only from the documents authorized for the user. \n", + "To achieve this, the Gen-AI application needs to provide an authorization context for this retrieval chain. \n", + "This *auth_context* should be filled with the identity and authorization groups of the user accessing the Gen-AI app.\n", + "\n", + "\n", + "Here is the sample code for the `PebbloRetrievalQA` with `user_auth`(List of user authorizations, which may include their User ID and \n", + " the groups they are part of) from the user accessing the RAG application, passed in `auth_context`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e978bee6-3a8c-459f-ab82-d380d7499b36", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.chains import PebbloRetrievalQA\n", + "from langchain_community.chains.pebblo_retrieval.models import AuthContext, ChainInput\n", + "\n", + "# Initialize PebbloRetrievalQA chain\n", + "qa_chain = PebbloRetrievalQA.from_chain_type(\n", + " llm=llm,\n", + " retriever=vectordb.as_retriever(),\n", + " app_name=\"pebblo-identity-rag\",\n", + " description=\"Identity Enforcement app using PebbloRetrievalQA\",\n", + " owner=\"ACME Corp\",\n", + ")\n", + "\n", + "\n", + "def ask(question: str, auth_context: dict):\n", + " \"\"\"\n", + " Ask a question to the PebbloRetrievalQA chain\n", + " \"\"\"\n", + " auth_context_obj = AuthContext(**auth_context) if auth_context else None\n", + " chain_input_obj = ChainInput(query=question, auth_context=auth_context_obj)\n", + " return qa_chain.invoke(chain_input_obj.dict())" + ] + }, + { + "cell_type": "markdown", + "id": "7a267e96-70cb-468f-b830-83b65e9b7f6f", + "metadata": {}, + "source": [ + "### 1. Questions by Authorized User\n", + "\n", + "We ingested data for authorized identities `[\"finance-team\", \"exec-leadership\"]`, so a user with the authorized identity/group `finance-team` should receive the correct answer." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "2688fc18-1eac-45a5-be55-aabbe6b25af5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "\n", + "Answer: \n", + "Revenue: $50 million (15% increase from previous year)\n", + "Net profit: $12 million (24% margin)\n", + "Total assets: $80 million (20% growth)\n", + "Debt-to-equity ratio: 0.5\n" + ] + } + ], + "source": [ + "auth = {\n", + " \"user_id\": \"finance-user@acme.org\",\n", + " \"user_auth\": [\n", + " \"finance-team\",\n", + " ],\n", + "}\n", + "\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "resp = ask(question, auth)\n", + "print(f\"Question: {question}\\n\\nAnswer: {resp['result']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "b4db6566-6562-4a49-b19c-6d99299b374e", + "metadata": {}, + "source": [ + "### 2. Questions by Unauthorized User\n", + "\n", + "Since the user's authorized identity/group `eng-support` is not included in the authorized identities `[\"finance-team\", \"exec-leadership\"]`, we should not receive an answer." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "2d736ce3-6e05-48d3-a5e1-fb4e7cccc1ee", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "\n", + "Answer: I don't know.\n" + ] + } + ], + "source": [ + "auth = {\n", + " \"user_id\": \"eng-user@acme.org\",\n", + " \"user_auth\": [\n", + " \"eng-support\",\n", + " ],\n", + "}\n", + "\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "resp = ask(question, auth)\n", + "print(f\"Question: {question}\\n\\nAnswer: {resp['result']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "33a8afe1-3071-4118-9714-a17cba809ee4", + "metadata": {}, + "source": [ + "### 3. Using PromptTemplate to provide additional instructions\n", + "You can use PromptTemplate to provide additional instructions to the LLM for generating a custom response." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "59c055ba-fdd1-48c6-9bc9-2793eb47438d", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.prompts import PromptTemplate\n", + "\n", + "prompt_template = PromptTemplate.from_template(\n", + " \"\"\"\n", + "Answer the question using the provided context. \n", + "If no context is provided, just say \"I'm sorry, but that information is unavailable, or Access to it is restricted.\".\n", + "\n", + "Question: {question}\n", + "\"\"\"\n", + ")\n", + "\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "prompt = prompt_template.format(question=question)" + ] + }, + { + "cell_type": "markdown", + "id": "c4d27c00-73d9-4ce8-bc70-29535deaf0e2", + "metadata": {}, + "source": [ + "#### 3.1 Questions by Authorized User" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "e68a13a4-b735-421d-9655-2a9a087ba9e5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "\n", + "Answer: \n", + "Revenue soared to $50 million, marking a 15% increase from the previous year, and net profit reached $12 million, showcasing a healthy margin of 24%. Total assets also grew by 20% to $80 million, and the company maintained a conservative debt-to-equity ratio of 0.5.\n" + ] + } + ], + "source": [ + "auth = {\n", + " \"user_id\": \"finance-user@acme.org\",\n", + " \"user_auth\": [\n", + " \"finance-team\",\n", + " ],\n", + "}\n", + "resp = ask(prompt, auth)\n", + "print(f\"Question: {question}\\n\\nAnswer: {resp['result']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "7b97a9ca-bdc6-400a-923d-65a8536658be", + "metadata": {}, + "source": [ + "#### 3.2 Questions by Unauthorized Users" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "438e48c6-96a2-4d5e-81db-47f8c8f37739", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "\n", + "Answer: \n", + "I'm sorry, but that information is unavailable, or Access to it is restricted.\n" + ] + } + ], + "source": [ + "auth = {\n", + " \"user_id\": \"eng-user@acme.org\",\n", + " \"user_auth\": [\n", + " \"eng-support\",\n", + " ],\n", + "}\n", + "resp = ask(prompt, auth)\n", + "print(f\"Question: {question}\\n\\nAnswer: {resp['result']}\")" + ] + }, + { + "cell_type": "markdown", + "id": "4306cab3-d070-405f-a23b-5c6011a61c50", + "metadata": {}, + "source": [ + "## Retrieval with Semantic Enforcement" + ] + }, + { + "cell_type": "markdown", + "id": "1c3757cf-832f-483e-aafe-cb09b5130ec0", + "metadata": {}, + "source": [ + "The PebbloRetrievalQA chain uses SafeRetrieval to ensure that the snippets used in context are retrieved only from documents that comply with the\n", + "provided semantic context.\n", + "To achieve this, the Gen-AI application must provide a semantic context for this retrieval chain.\n", + "This `semantic_context` should include the topics and entities that should be denied for the user accessing the Gen-AI app.\n", + "\n", + "Below is a sample code for PebbloRetrievalQA with `topics_to_deny` and `entities_to_deny`. These are passed in `semantic_context` to the chain input." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "daf37bf7-9a16-4102-8893-5b698cae1b07", + "metadata": {}, + "outputs": [], + "source": [ + "from typing import List, Optional\n", + "\n", + "from langchain_community.chains import PebbloRetrievalQA\n", + "from langchain_community.chains.pebblo_retrieval.models import (\n", + " ChainInput,\n", + " SemanticContext,\n", + ")\n", + "\n", + "# Initialize PebbloRetrievalQA chain\n", + "qa_chain = PebbloRetrievalQA.from_chain_type(\n", + " llm=llm,\n", + " retriever=vectordb.as_retriever(),\n", + " app_name=\"pebblo-semantic-rag\",\n", + " description=\"Semantic Enforcement app using PebbloRetrievalQA\",\n", + " owner=\"ACME Corp\",\n", + ")\n", + "\n", + "\n", + "def ask(\n", + " question: str,\n", + " topics_to_deny: Optional[List[str]] = None,\n", + " entities_to_deny: Optional[List[str]] = None,\n", + "):\n", + " \"\"\"\n", + " Ask a question to the PebbloRetrievalQA chain\n", + " \"\"\"\n", + " semantic_context = dict()\n", + " if topics_to_deny:\n", + " semantic_context[\"pebblo_semantic_topics\"] = {\"deny\": topics_to_deny}\n", + " if entities_to_deny:\n", + " semantic_context[\"pebblo_semantic_entities\"] = {\"deny\": entities_to_deny}\n", + "\n", + " semantic_context_obj = (\n", + " SemanticContext(**semantic_context) if semantic_context else None\n", + " )\n", + " chain_input_obj = ChainInput(query=question, semantic_context=semantic_context_obj)\n", + " return qa_chain.invoke(chain_input_obj.dict())" + ] + }, + { + "cell_type": "markdown", + "id": "9718819b-f5cd-4212-9947-d18cd507c8b7", + "metadata": {}, + "source": [ + "### 1. Without semantic enforcement\n", + "\n", + "Since no semantic enforcement is applied, the system should return the answer without excluding any context due to the semantic labels associated with the context.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "69158be1-f223-4d14-b61f-f4afdf5af526", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topics to deny: []\n", + "Entities to deny: []\n", + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "Answer: \n", + "Revenue for ACME Corp increased by 15% to $50 million in 2020, with a net profit of $12 million and a strong asset base of $80 million. The company also maintained a conservative debt-to-equity ratio of 0.5.\n" + ] + } + ], + "source": [ + "topic_to_deny = []\n", + "entities_to_deny = []\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "resp = ask(question, topics_to_deny=topic_to_deny, entities_to_deny=entities_to_deny)\n", + "print(\n", + " f\"Topics to deny: {topic_to_deny}\\nEntities to deny: {entities_to_deny}\\n\"\n", + " f\"Question: {question}\\nAnswer: {resp['result']}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "c8789c58-0d64-404e-bc09-92f6952022ac", + "metadata": {}, + "source": [ + "### 2. Deny financial-report topic\n", + "\n", + "Data has been ingested with the topics: `[\"financial-report\"]`.\n", + "Therefore, an app that denies the `financial-report` topic should not receive an answer." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "9b17b2fc-eefb-4229-a41e-2f943d2eb48e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topics to deny: ['financial-report']\n", + "Entities to deny: []\n", + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "Answer: \n", + "\n", + "Unfortunately, I do not have access to the financial performance of ACME Corp for the year 2020.\n" + ] + } + ], + "source": [ + "topic_to_deny = [\"financial-report\"]\n", + "entities_to_deny = []\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "resp = ask(question, topics_to_deny=topic_to_deny, entities_to_deny=entities_to_deny)\n", + "print(\n", + " f\"Topics to deny: {topic_to_deny}\\nEntities to deny: {entities_to_deny}\\n\"\n", + " f\"Question: {question}\\nAnswer: {resp['result']}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "894f21b0-2913-4ef6-b5ed-cbca8f74214d", + "metadata": {}, + "source": [ + "### 3. Deny us-bank-account-number entity\n", + "Since the entity `us-bank-account-number` is denied, the system should not return the answer." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "2b8abce3-7af3-437f-8999-2866a4b9beda", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Topics to deny: []\n", + "Entities to deny: ['us-bank-account-number']\n", + "Question: Share the financial performance of ACME Corp for the year 2020\n", + "Answer: I don't have information about ACME Corp's financial performance for 2020.\n" + ] + } + ], + "source": [ + "topic_to_deny = []\n", + "entities_to_deny = [\"us-bank-account-number\"]\n", + "question = \"Share the financial performance of ACME Corp for the year 2020\"\n", + "resp = ask(question, topics_to_deny=topic_to_deny, entities_to_deny=entities_to_deny)\n", + "print(\n", + " f\"Topics to deny: {topic_to_deny}\\nEntities to deny: {entities_to_deny}\\n\"\n", + " f\"Question: {question}\\nAnswer: {resp['result']}\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/retrievers/asknews.ipynb b/docs/docs/integrations/retrievers/asknews.ipynb new file mode 100644 index 0000000000000..b9edb52e7e079 --- /dev/null +++ b/docs/docs/integrations/retrievers/asknews.ipynb @@ -0,0 +1,221 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AskNews\n", + "\n", + "> [AskNews](https://asknews.app) infuses any LLM with the latest global news (or historical news), using a single natural language query. Specifically, AskNews is enriching over 300k articles per day by translating, summarizing, extracting entities, and indexing them into hot and cold vector databases. AskNews puts these vector databases on a low-latency endpoint for you. When you query AskNews, you get back a prompt-optimized string that contains all the most pertinent enrichments (e.g. entities, classifications, translation, summarization). This means that you do not need to manage your own news RAG, and you do not need to worry about how to properly convey news information in a condensed way to your LLM.\n", + "> AskNews is also committed to transparency, which is why our coverage is monitored and diversified across hundreds of countries, 13 languages, and 50 thousand sources. If you'd like to track our source coverage, you can visit our [transparency dashboard](https://asknews.app/en/transparency).\n", + "\n", + "## Setup\n", + "\n", + "The integration lives in the `langchain-community` package. We also need to install the `asknews` package itself.\n", + "\n", + "```bash\n", + "pip install -U langchain-community asknews\n", + "```\n", + "\n", + "We also need to set our AskNews API credentials, which can be generated at the [AskNews console](https://my.asknews.app)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"ASKNEWS_CLIENT_ID\"] = getpass.getpass()\n", + "os.environ[\"ASKNEWS_CLIENT_SECRET\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's also helpful (but not needed) to set up [LangSmith](https://smith.langchain.com/) for best-in-class observability" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", + "# os.environ[\"LANGCHAIN_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Usage" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='[1]:\\ntitle: US Stock Market Declines Amid High Interest Rates\\nsummary: The US stock market has experienced a significant decline in recent days, with the S&P 500 index falling 9.94 points or 0.2% to 4320.06. The decline was attributed to interest rates, which are expected to remain high for a longer period. The yield on 10-year Treasury notes rose to 4.44%, the highest level since 2007, which has a negative impact on stock prices. The high interest rates have also affected the technology sector, with companies such as Intel and Microsoft experiencing declines. The auto sector is also experiencing fluctuations, with General Motors and Ford experiencing declines. The labor market is also facing challenges, with workers demanding higher wages and benefits, which could lead to increased inflation. The Federal Reserve is expected to keep interest rates high for a longer period, which could have a negative impact on the stock market. Some economists expect the Fed to raise interest rates again this year, which could lead to further declines in the stock market.\\nsource: ALYAUM Holding Group for Press\\npublished: May 12 2024 13:12\\nLocation: US\\nTechnology: S&P 500\\nQuantity: 9.94 points\\nNumber: 4320.06\\nProduct: 10-year Treasury notes\\nDate: 2007, this year\\nOrganization: General Motors, Fed, Intel, Ford, Microsoft, Federal Reserve\\nclassification: Finance\\nsentiment: -1', metadata={'title': 'الأسهم الأمريكية تتطلع لتعويض خسائرها بعد موجة تراجع كبيرة', 'source': 'https://www.alyaum.com/articles/6529353/%D8%A7%D9%84%D8%A7%D9%82%D8%AA%D8%B5%D8%A7%D8%AF/%D8%A3%D8%B3%D9%88%D8%A7%D9%82-%D8%A7%D9%84%D8%A3%D8%B3%D9%87%D9%85/%D8%A7%D9%84%D8%A3%D8%B3%D9%87%D9%85-%D8%A7%D9%84%D8%A3%D9%85%D8%B1%D9%8A%D9%83%D9%8A%D8%A9-%D8%AA%D8%AA%D8%B7%D9%84%D8%B9-%D9%84%D8%AA%D8%B9%D9%88%D9%8A%D8%B6-%D8%AE%D8%B3%D8%A7%D8%A6%D8%B1%D9%87%D8%A7-%D8%A8%D8%B9%D8%AF-%D9%85%D9%88%D8%AC%D8%A9-%D8%AA%D8%B1%D8%A7%D8%AC%D8%B9-%D9%83%D8%A8%D9%8A%D8%B1%D8%A9', 'images': 'https://www.alyaum.com/uploads/images/2024/05/12/2312237.jpg'}),\n", + " Document(page_content=\"[2]:\\ntitle: US Federal Reserve's Decision to Limit Stock Market Correction\\nsummary: The Federal Reserve of the United States, led by Jerome Powell, has achieved its goal of limiting the correction of the stock market by reducing the balance of the central bank and maintaining massive liquidity injections into the markets to combat various crises that have occurred since the pandemic. Despite April's contraction of around 5%, the stock market has behaved well this week, with most indices showing increases. The Dow Jones and S&P 500 have risen almost 2% after declines of around 5% in April, while the Nasdaq has increased by 1.4% after a decline of over 4% in April. The correction is taking place in an orderly manner, and the market is trying to find a new equilibrium in asset valuations, adapted to a normalized cost of money and a moderate but positive growth framework.\\nsource: okdiario.com\\npublished: May 12 2024 04:45\\nOrganization: Federal Reserve of the United States, Dow Jones, S&P 500, Nasdaq\\nPerson: Jerome Powell\\nEvent: pandemic\\nDate: April\\nclassification: Business\\nsentiment: 1\", metadata={'title': 'Las Bolsas afrontan una corrección ordenada apoyas por la Fed de EEUU', 'source': 'https://okdiario.com/economia/reserva-federal-mantiene-liquidez-asegura-correccion-limitada-bolsas-12798172', 'images': 'https://okdiario.com/img/2023/08/25/bild-powell-subida-de-tipos-interior.jpg'}),\n", + " Document(page_content=\"[3]:\\ntitle: How the Fed's quest for transparency made markets more volatile\\nsummary: The Federal Reserve's increased transparency and communication with the public may be contributing to market volatility, according to some experts. The Fed's forecasting strategy and frequent communication may be causing \\nsource: NBC4 Washington\\npublished: May 11 2024 12:00\\nOrganization: Fed, Federal Reserve\\nclassification: Business\\nsentiment: 0\", metadata={'title': \"How the Fed's quest for transparency made markets more volatile\", 'source': 'https://www.nbcwashington.com/news/business/money-report/how-the-feds-quest-for-transparency-made-markets-more-volatile/3613897', 'images': 'https://media.nbcwashington.com/2024/05/107409380-1714652843711-gettyimages-2151006318-_s2_5018_hwe7dfbl.jpeg?quality=85&strip=all&resize=1200%2C675'})]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_community.retrievers import AskNewsRetriever\n", + "\n", + "retriever = AskNewsRetriever(k=3)\n", + "\n", + "retriever.invoke(\"impact of fed policy on the tech sector\")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"[1]:\\ntitle: US Stocks End Week with Modest Gains Ahead of Inflation Data\\nsummary: US stocks ended the week with modest gains, marking another weekly advance for the three main indices, as investors evaluated comments from Federal Reserve officials and awaited crucial inflation data next week. The S&P 500 and Dow Jones experienced slight increases, while the Nasdaq closed almost unchanged. The Dow Jones recorded its largest weekly percentage gain since mid-December. Federal Reserve officials' comments generated expectations, with market participants awaiting inflation data to be released next week. The data includes the Consumer Price Index (CPI) and Producer Price Index (PPI) from the Department of Labor, which is expected to provide insight into progress towards the 2% inflation target.\\nsource: El Cronista\\npublished: May 10 2024 23:35\\nLocation: US\\nOrganization: Dow Jones, Department of Labor, Nasdaq, Federal Reserve\\nDate: next week, mid-December\\nclassification: Business\\nsentiment: 0\", metadata={'title': 'Modesta suba en los mercados a la espera de los datos de inflación', 'source': 'http://www.cronista.com/usa/wall-street-dolar/modesta-suba-en-los-mercados-a-la-espera-de-los-datos-de-inflacion', 'images': 'https://www.cronista.com/files/image/141/141554/5ff7985549d06_600_315!.jpg?s=99126c63cc44ed5c15ed2177cb022f55&d=1712540173'}),\n", + " Document(page_content=\"[2]:\\ntitle: US Stocks End Week on a High Note\\nsummary: The US stock market ended the week on a positive note, with the Dow Jones Industrial Average closing 0.32% higher at 39,512.84, the S&P 500 rising 0.16% to 5,222.68, and the Nasdaq 100 gaining 0.26% to 18,161.18. The three indices all recorded gains for the week, with the Dow leading the way with a 2.2% increase. The Federal Reserve's stance on interest rates was a key factor, with several Fed members expressing caution about cutting rates in the near future. The University of Michigan's consumer sentiment survey showed a decline in May, and consumer inflation expectations also increased, which tempered expectations for rate cuts. Despite this, the market remained resilient and ended the week on a strong note.\\nsource: Investing.com\\npublished: May 10 2024 20:19\\nLocation: US\\nQuantity: 39,512.84\\nOrganization: University of Michigan, Fed, Federal Reserve\\nDate: May\\nclassification: Business\\nsentiment: 1\", metadata={'title': 'Aktien New York Schluss: Erneut höher zum Ende einer starken Börsenwoche', 'source': 'https://de.investing.com/news/economy/aktien-new-york-schluss-erneut-hoher-zum-ende-einer-starken-borsenwoche-2618612', 'images': 'https://i-invdn-com.investing.com/news/LYNXMPED2T082_L.jpg'}),\n", + " Document(page_content=\"[3]:\\ntitle: US Stocks End Week with Slight Gain Despite Inflation Concerns\\nsummary: The US stock market ended the week with a slight gain, despite inflation expectations and skepticism from Federal Reserve members about potential interest rate cuts. The Dow Jones Industrial Average rose 0.24% to 39,480.38, while the S&P 500 gained 0.07% to 5,217.67. The Nasdaq 100 also rose 0.15% to 18,140.02. The week's performance was driven by a number of factors, including the release of inflation expectations and the comments of Federal Reserve members. The University of Michigan's consumer sentiment survey also showed a decline in May, with consumer inflation expectations rising. The stock of 3M, the best-performing Dow stock, rose 2% after analysts at HSBC upgraded it to \\nsource: Yahoo\\npublished: May 10 2024 18:11\\nLocation: US\\nOrganization: University of Michigan, 3M, HSBC, Federal Reserve\\nQuantity: 39,480.38, 5,217.67\\nDate: May\\nclassification: Business\\nsentiment: 0\", metadata={'title': 'Aktien New York: Knapp im Plus gegen Ende einer starken Börsenwoche', 'source': 'https://de.finance.yahoo.com/nachrichten/aktien-new-york-knapp-plus-181143398.html', 'images': 'https://s.yimg.com/cv/apiv2/social/images/yahoo_default_logo-1200x1200.png'})]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# you have full control on filtering by category, time, pagination, and even the search method you use.\n", + "from datetime import datetime, timedelta\n", + "\n", + "start = (datetime.now() - timedelta(days=7)).timestamp()\n", + "end = datetime.now().timestamp()\n", + "\n", + "retriever = AskNewsRetriever(\n", + " k=3,\n", + " categories=[\"Business\", \"Technology\"],\n", + " start_timestamp=int(start), # defaults to 48 hours ago\n", + " end_timestamp=int(end), # defaults to now\n", + " method=\"kw\", # defaults to \"nl\", natural language, can also be \"kw\" for keyword search\n", + " offset=10, # allows you to paginate results\n", + ")\n", + "\n", + "retriever.invoke(\"federal reserve S&P500\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chaining\n", + "\n", + "We can easily combine this retriever in to a chain." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "prompt = ChatPromptTemplate.from_template(\n", + " \"\"\"The following news articles may come in handy for answering the question:\n", + "\n", + "{context}\n", + "\n", + "Question:\n", + "\n", + "{question}\"\"\"\n", + ")\n", + "chain = (\n", + " RunnablePassthrough.assign(context=(lambda x: x[\"question\"]) | retriever)\n", + " | prompt\n", + " | ChatOpenAI(model=\"gpt-4-1106-preview\")\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"According to the information provided in the second news article, the impact of Federal Reserve policy on the technology sector has been negative. The article mentions that due to expectations that interest rates will remain high for a longer period, the US stock market has experienced a significant decline. This rise in interest rates has particularly affected the technology sector, with companies such as Intel and Microsoft experiencing declines. High interest rates can lead to increased borrowing costs for businesses, which can dampen investment and spending. In the technology sector, where companies often rely on borrowing to fund research and development and other growth initiatives, higher rates can be especially challenging.\\n\\nTherefore, the Federal Reserve's policy of maintaining high interest rates has had a detrimental effect on tech stocks, contributing to a decrease in their market valuations.\"" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke({\"question\": \"What is the impact of fed policy on the tech sector?\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/retrievers/milvus_hybrid_search.ipynb b/docs/docs/integrations/retrievers/milvus_hybrid_search.ipynb new file mode 100644 index 0000000000000..2aac742475850 --- /dev/null +++ b/docs/docs/integrations/retrievers/milvus_hybrid_search.ipynb @@ -0,0 +1,636 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "# Milvus Hybrid Search\n", + "\n", + "> [Milvus](https://milvus.io/docs) is an open-source vector database built to power embedding similarity search and AI applications. Milvus makes unstructured data search more accessible, and provides a consistent user experience regardless of the deployment environment.\n", + "\n", + "This notebook goes over how to use the Milvus Hybrid Search retriever, which combines the strengths of both dense and sparse vector search.\n", + "\n", + "For more reference please go to [Milvus Multi-Vector Search](https://milvus.io/docs/multi-vector-search.md)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "## Prerequisites\n", + "### Install dependencies\n", + "You need to prepare to install the following dependencies\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet pymilvus[model] langchain-milvus langchain-openai" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Import necessary modules and classes" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from pymilvus import (\n", + " Collection,\n", + " CollectionSchema,\n", + " DataType,\n", + " FieldSchema,\n", + " WeightedRanker,\n", + " connections,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_core.output_parsers import StrOutputParser\n", + "from langchain_core.prompts import PromptTemplate\n", + "from langchain_core.runnables import RunnablePassthrough\n", + "from langchain_milvus.retrievers import MilvusCollectionHybridSearchRetriever\n", + "from langchain_milvus.utils.sparse import BM25SparseEmbedding\n", + "from langchain_openai import ChatOpenAI, OpenAIEmbeddings" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### Start the Milvus service\n", + "\n", + "Please refer to the [Milvus documentation](https://milvus.io/docs/install_standalone-docker.md) to start the Milvus service.\n", + "\n", + "After starting milvus, you need to specify your milvus connection URI.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "CONNECTION_URI = \"http://localhost:19530\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "### Prepare OpenAI API Key\n", + "\n", + "Please refer to the [OpenAI documentation](https://platform.openai.com/account/api-keys) to obtain your OpenAI API key, and set it as an environment variable.\n", + "\n", + "```shell\n", + "export OPENAI_API_KEY=\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "## Prepare data and Load\n", + "### Prepare dense and sparse embedding functions\n", + "\n", + " Let us fictionalize 10 fake descriptions of novels. In actual production, it may be a large amount of text data." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "texts = [\n", + " \"In 'The Whispering Walls' by Ava Moreno, a young journalist named Sophia uncovers a decades-old conspiracy hidden within the crumbling walls of an ancient mansion, where the whispers of the past threaten to destroy her own sanity.\",\n", + " \"In 'The Last Refuge' by Ethan Blackwood, a group of survivors must band together to escape a post-apocalyptic wasteland, where the last remnants of humanity cling to life in a desperate bid for survival.\",\n", + " \"In 'The Memory Thief' by Lila Rose, a charismatic thief with the ability to steal and manipulate memories is hired by a mysterious client to pull off a daring heist, but soon finds themselves trapped in a web of deceit and betrayal.\",\n", + " \"In 'The City of Echoes' by Julian Saint Clair, a brilliant detective must navigate a labyrinthine metropolis where time is currency, and the rich can live forever, but at a terrible cost to the poor.\",\n", + " \"In 'The Starlight Serenade' by Ruby Flynn, a shy astronomer discovers a mysterious melody emanating from a distant star, which leads her on a journey to uncover the secrets of the universe and her own heart.\",\n", + " \"In 'The Shadow Weaver' by Piper Redding, a young orphan discovers she has the ability to weave powerful illusions, but soon finds herself at the center of a deadly game of cat and mouse between rival factions vying for control of the mystical arts.\",\n", + " \"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\",\n", + " \"In 'The Clockwork Kingdom' by Augusta Wynter, a brilliant inventor discovers a hidden world of clockwork machines and ancient magic, where a rebellion is brewing against the tyrannical ruler of the land.\",\n", + " \"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\",\n", + " \"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will use the [OpenAI Embedding](https://platform.openai.com/docs/guides/embeddings) to generate dense vectors, and the [BM25 algorithm](https://en.wikipedia.org/wiki/Okapi_BM25) to generate sparse vectors.\n", + "\n", + "Initialize dense embedding function and get dimension" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1536" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dense_embedding_func = OpenAIEmbeddings()\n", + "dense_dim = len(dense_embedding_func.embed_query(texts[1]))\n", + "dense_dim" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initialize sparse embedding function.\n", + "\n", + "Note that the output of sparse embedding is a set of sparse vectors, which represents the index and weight of the keywords of the input text." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: 0.4270424944042204,\n", + " 21: 1.845826690498331,\n", + " 22: 1.845826690498331,\n", + " 23: 1.845826690498331,\n", + " 24: 1.845826690498331,\n", + " 25: 1.845826690498331,\n", + " 26: 1.845826690498331,\n", + " 27: 1.2237754316221157,\n", + " 28: 1.845826690498331,\n", + " 29: 1.845826690498331,\n", + " 30: 1.845826690498331,\n", + " 31: 1.845826690498331,\n", + " 32: 1.845826690498331,\n", + " 33: 1.845826690498331,\n", + " 34: 1.845826690498331,\n", + " 35: 1.845826690498331,\n", + " 36: 1.845826690498331,\n", + " 37: 1.845826690498331,\n", + " 38: 1.845826690498331,\n", + " 39: 1.845826690498331}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sparse_embedding_func = BM25SparseEmbedding(corpus=texts)\n", + "sparse_embedding_func.embed_query(texts[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create Milvus Collection and load data\n", + "\n", + "Initialize connection URI and establish connection" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "connections.connect(uri=CONNECTION_URI)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define field names and their data types" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "pk_field = \"doc_id\"\n", + "dense_field = \"dense_vector\"\n", + "sparse_field = \"sparse_vector\"\n", + "text_field = \"text\"\n", + "fields = [\n", + " FieldSchema(\n", + " name=pk_field,\n", + " dtype=DataType.VARCHAR,\n", + " is_primary=True,\n", + " auto_id=True,\n", + " max_length=100,\n", + " ),\n", + " FieldSchema(name=dense_field, dtype=DataType.FLOAT_VECTOR, dim=dense_dim),\n", + " FieldSchema(name=sparse_field, dtype=DataType.SPARSE_FLOAT_VECTOR),\n", + " FieldSchema(name=text_field, dtype=DataType.VARCHAR, max_length=65_535),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create a collection with the defined schema" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "schema = CollectionSchema(fields=fields, enable_dynamic_field=False)\n", + "collection = Collection(\n", + " name=\"IntroductionToTheNovels\", schema=schema, consistency_level=\"Strong\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define index for dense and sparse vectors" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "dense_index = {\"index_type\": \"FLAT\", \"metric_type\": \"IP\"}\n", + "collection.create_index(\"dense_vector\", dense_index)\n", + "sparse_index = {\"index_type\": \"SPARSE_INVERTED_INDEX\", \"metric_type\": \"IP\"}\n", + "collection.create_index(\"sparse_vector\", sparse_index)\n", + "collection.flush()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Insert entities into the collection and load the collection" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "entities = []\n", + "for text in texts:\n", + " entity = {\n", + " dense_field: dense_embedding_func.embed_documents([text])[0],\n", + " sparse_field: sparse_embedding_func.embed_documents([text])[0],\n", + " text_field: text,\n", + " }\n", + " entities.append(entity)\n", + "collection.insert(entities)\n", + "collection.load()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Build RAG chain with Retriever\n", + "### Create the Retriever\n", + "\n", + "Define search parameters for sparse and dense fields, and create a retriever" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "sparse_search_params = {\"metric_type\": \"IP\"}\n", + "dense_search_params = {\"metric_type\": \"IP\", \"params\": {}}\n", + "retriever = MilvusCollectionHybridSearchRetriever(\n", + " collection=collection,\n", + " rerank=WeightedRanker(0.5, 0.5),\n", + " anns_fields=[dense_field, sparse_field],\n", + " field_embeddings=[dense_embedding_func, sparse_embedding_func],\n", + " field_search_params=[dense_search_params, sparse_search_params],\n", + " top_k=3,\n", + " text_field=text_field,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "In the input parameters of this Retriever, we use a dense embedding and a sparse embedding to perform hybrid search on the two fields of this Collection, and use WeightedRanker for reranking. Finally, 3 top-K Documents will be returned." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"In 'The Lost Expedition' by Caspian Grey, a team of explorers ventures into the heart of the Amazon rainforest in search of a lost city, but soon finds themselves hunted by a ruthless treasure hunter and the treacherous jungle itself.\", metadata={'doc_id': '449281835035545843'}),\n", + " Document(page_content=\"In 'The Phantom Pilgrim' by Rowan Welles, a charismatic smuggler is hired by a mysterious organization to transport a valuable artifact across a war-torn continent, but soon finds themselves pursued by deadly assassins and rival factions.\", metadata={'doc_id': '449281835035545845'}),\n", + " Document(page_content=\"In 'The Dreamwalker's Journey' by Lyra Snow, a young dreamwalker discovers she has the ability to enter people's dreams, but soon finds herself trapped in a surreal world of nightmares and illusions, where the boundaries between reality and fantasy blur.\", metadata={'doc_id': '449281835035545846'})]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.invoke(\"What are the story about ventures?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Build the RAG chain\n", + "\n", + "Initialize ChatOpenAI and define a prompt template" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "llm = ChatOpenAI()\n", + "\n", + "PROMPT_TEMPLATE = \"\"\"\n", + "Human: You are an AI assistant, and provides answers to questions by using fact based and statistical information when possible.\n", + "Use the following pieces of information to provide a concise answer to the question enclosed in tags.\n", + "\n", + "\n", + "{context}\n", + "\n", + "\n", + "\n", + "{question}\n", + "\n", + "\n", + "Assistant:\"\"\"\n", + "\n", + "prompt = PromptTemplate(\n", + " template=PROMPT_TEMPLATE, input_variables=[\"context\", \"question\"]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Define a function for formatting documents" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def format_docs(docs):\n", + " return \"\\n\\n\".join(doc.page_content for doc in docs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Define a chain using the retriever and other components" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rag_chain = (\n", + " {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n", + " | prompt\n", + " | llm\n", + " | StrOutputParser()\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Perform a query using the defined chain" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\"Lila Rose has written 'The Memory Thief,' which follows a charismatic thief with the ability to steal and manipulate memories as they navigate a daring heist and a web of deceit and betrayal.\"" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rag_chain.invoke(\"What novels has Lila written and what are their contents?\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "source": [ + "Drop the collection" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "collection.drop()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/docs/docs/integrations/retrievers/zep_cloud_memorystore.ipynb b/docs/docs/integrations/retrievers/zep_cloud_memorystore.ipynb new file mode 100644 index 0000000000000..6b0c1c674ea8b --- /dev/null +++ b/docs/docs/integrations/retrievers/zep_cloud_memorystore.ipynb @@ -0,0 +1,470 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "collapsed": false + }, + "source": [ + "# Zep Cloud\n", + "## Retriever Example for [Zep Cloud](https://docs.getzep.com/)\n", + "\n", + "> Recall, understand, and extract data from chat histories. Power personalized AI experiences.\n", + "\n", + "> [Zep](https://www.getzep.com) is a long-term memory service for AI Assistant apps.\n", + "> With Zep, you can provide AI assistants with the ability to recall past conversations, no matter how distant,\n", + "> while also reducing hallucinations, latency, and cost.\n", + "\n", + "> See [Zep Cloud Installation Guide](https://help.getzep.com/sdks) and more [Zep Cloud Langchain Examples](https://github.com/getzep/zep-python/tree/main/examples)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retriever Example\n", + "\n", + "This notebook demonstrates how to search historical chat message histories using the [Zep Long-term Memory Store](https://www.getzep.com/).\n", + "\n", + "We'll demonstrate:\n", + "\n", + "1. Adding conversation history to the Zep memory store.\n", + "2. Vector search over the conversation history: \n", + " 1. With a similarity search over chat messages\n", + " 2. Using maximal marginal relevance re-ranking of a chat message search\n", + " 3. Filtering a search using metadata filters\n", + " 4. A similarity search over summaries of the chat messages\n", + " 5. Using maximal marginal relevance re-ranking of a summary search\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "import getpass\n", + "import time\n", + "from uuid import uuid4\n", + "\n", + "from langchain_community.memory.zep_cloud_memory import ZepCloudMemory\n", + "from langchain_community.retrievers import ZepCloudRetriever\n", + "from langchain_core.messages import AIMessage, HumanMessage\n", + "\n", + "# Provide your Zep API key.\n", + "zep_api_key = getpass.getpass()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialize the Zep Chat Message History Class and add a chat message history to the memory store\n", + "\n", + "**NOTE:** Unlike other Retrievers, the content returned by the Zep Retriever is session/user specific. A `session_id` is required when instantiating the Retriever." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "session_id = str(uuid4()) # This is a unique identifier for the user/session\n", + "\n", + "# Initialize the Zep Memory Class\n", + "zep_memory = ZepCloudMemory(session_id=session_id, api_key=zep_api_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Preload some messages into the memory. The default message window is 4 messages. We want to push beyond this to demonstrate auto-summarization.\n", + "test_history = [\n", + " {\"role\": \"human\", \"role_type\": \"user\", \"content\": \"Who was Octavia Butler?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"Octavia Estelle Butler (June 22, 1947 – February 24, 2006) was an American\"\n", + " \" science fiction author.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"Which books of hers were made into movies?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"The most well-known adaptation of Octavia Butler's work is the FX series\"\n", + " \" Kindred, based on her novel of the same name.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"role_type\": \"user\", \"content\": \"Who were her contemporaries?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"Octavia Butler's contemporaries included Ursula K. Le Guin, Samuel R.\"\n", + " \" Delany, and Joanna Russ.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"role_type\": \"user\", \"content\": \"What awards did she win?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"Octavia Butler won the Hugo Award, the Nebula Award, and the MacArthur\"\n", + " \" Fellowship.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"Which other women sci-fi writers might I want to read?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": \"You might want to read Ursula K. Le Guin or Joanna Russ.\",\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": (\n", + " \"Write a short synopsis of Butler's book, Parable of the Sower. What is it\"\n", + " \" about?\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"Parable of the Sower is a science fiction novel by Octavia Butler,\"\n", + " \" published in 1993. It follows the story of Lauren Olamina, a young woman\"\n", + " \" living in a dystopian future where society has collapsed due to\"\n", + " \" environmental disasters, poverty, and violence.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"What is the setting of the book?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"The book is set in a dystopian future in the 2020s, where society has\"\n", + " \" collapsed due to climate change and economic crises.\"\n", + " ),\n", + " },\n", + " {\"role\": \"human\", \"role_type\": \"user\", \"content\": \"Who is the protagonist?\"},\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"The protagonist of the book is Lauren Olamina, a young woman who possesses\"\n", + " \" 'hyperempathy', the ability to feel pain and other sensations she\"\n", + " \" witnesses.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"What is the main theme of the book?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"The main theme of the book is survival in the face of drastic societal\"\n", + " \" change and collapse. It also explores themes of adaptability, community,\"\n", + " \" and the human capacity for change.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"What is the 'Parable of the Sower'?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"The 'Parable of the Sower' is a biblical parable that Butler uses as a\"\n", + " \" metaphor in the book. In the parable, a sower scatters seeds, some of\"\n", + " \" which fall on fertile ground and grow, while others fall on rocky ground\"\n", + " \" or among thorns and fail to grow. The parable is used to illustrate the\"\n", + " \" importance of receptivity and preparedness in the face of change.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"What is Butler's writing style like?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"role_type\": \"assistant\",\n", + " \"content\": (\n", + " \"Butler's writing style is known for its clarity, directness, and\"\n", + " \" psychological insight. Her narratives often involve complex, diverse\"\n", + " \" characters and explore themes of race, gender, and power.\"\n", + " ),\n", + " },\n", + " {\n", + " \"role\": \"human\",\n", + " \"role_type\": \"user\",\n", + " \"content\": \"What other books has she written?\",\n", + " },\n", + " {\n", + " \"role\": \"ai\",\n", + " \"content\": (\n", + " \"In addition to 'Parable of the Sower', Butler has written several other\"\n", + " \" notable works, including 'Kindred', 'Dawn', and 'Parable of the Talents'.\"\n", + " ),\n", + " },\n", + "]\n", + "\n", + "for msg in test_history:\n", + " zep_memory.chat_memory.add_message(\n", + " HumanMessage(content=msg[\"content\"])\n", + " if msg[\"role\"] == \"human\"\n", + " else AIMessage(content=msg[\"content\"])\n", + " )\n", + "\n", + "time.sleep(\n", + " 10\n", + ") # Wait for the messages to be embedded and summarized, this happens asynchronously." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use the Zep Retriever to vector search over the Zep memory\n", + "\n", + "Zep provides native vector search over historical conversation memory. Embedding happens automatically.\n", + "\n", + "NOTE: Embedding of messages occurs asynchronously, so the first query may not return results. Subsequent queries will return results as the embeddings are generated." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:32:06.613100Z", + "start_time": "2024-05-10T14:32:06.369301Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content=\"What is the 'Parable of the Sower'?\", metadata={'score': 0.9333381652832031, 'uuid': 'bebc441c-a32d-44a1-ae61-968e7b3d4956', 'created_at': '2024-05-10T05:02:01.857627Z', 'token_count': 11, 'role': 'human'}),\n", + " Document(page_content=\"The 'Parable of the Sower' is a biblical parable that Butler uses as a metaphor in the book. In the parable, a sower scatters seeds, some of which fall on fertile ground and grow, while others fall on rocky ground or among thorns and fail to grow. The parable is used to illustrate the importance of receptivity and preparedness in the face of change.\", metadata={'score': 0.8757256865501404, 'uuid': '193c60d8-2b7b-4eb1-a4be-c2d8afd92991', 'created_at': '2024-05-10T05:02:01.97174Z', 'token_count': 82, 'role': 'ai'}),\n", + " Document(page_content=\"Write a short synopsis of Butler's book, Parable of the Sower. What is it about?\", metadata={'score': 0.8641344904899597, 'uuid': 'fc78901d-a625-4530-ba63-1ae3e3b11683', 'created_at': '2024-05-10T05:02:00.942994Z', 'token_count': 21, 'role': 'human'}),\n", + " Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler, published in 1993. It follows the story of Lauren Olamina, a young woman living in a dystopian future where society has collapsed due to environmental disasters, poverty, and violence.', metadata={'score': 0.8581685125827789, 'uuid': '91f2cda4-276e-446d-96bf-07d34e5af616', 'created_at': '2024-05-10T05:02:01.05577Z', 'token_count': 54, 'role': 'ai'}),\n", + " Document(page_content=\"In addition to 'Parable of the Sower', Butler has written several other notable works, including 'Kindred', 'Dawn', and 'Parable of the Talents'.\", metadata={'score': 0.8076582252979279, 'uuid': 'e3994519-9a90-410c-b14c-2c652f6d184f', 'created_at': '2024-05-10T05:02:02.401682Z', 'token_count': 37, 'role': 'ai'})]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zep_retriever = ZepCloudRetriever(\n", + " api_key=zep_api_key,\n", + " session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n", + " top_k=5,\n", + ")\n", + "\n", + "await zep_retriever.ainvoke(\"Who wrote Parable of the Sower?\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also use the Zep sync API to retrieve results:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:31:37.611570Z", + "start_time": "2024-05-10T14:31:37.298903Z" + }, + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler set in a dystopian future in the 2020s. The story follows Lauren Olamina, a young woman living in a society that has collapsed due to environmental disasters, poverty, and violence. The novel explores themes of societal breakdown, the struggle for survival, and the search for a better future.', metadata={'score': 0.8473024368286133, 'uuid': 'e4689f8e-33be-4a59-a9c2-e5ef5dd70f74', 'created_at': '2024-05-10T05:02:02.713123Z', 'token_count': 76})]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zep_retriever.invoke(\"Who wrote Parable of the Sower?\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reranking using MMR (Maximal Marginal Relevance)\n", + "\n", + "Zep has native, SIMD-accelerated support for reranking results using MMR. This is useful for removing redundancy in results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "zep_retriever = ZepCloudRetriever(\n", + " api_key=zep_api_key,\n", + " session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n", + " top_k=5,\n", + " search_type=\"mmr\",\n", + " mmr_lambda=0.5,\n", + ")\n", + "\n", + "await zep_retriever.ainvoke(\"Who wrote Parable of the Sower?\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using metadata filters to refine search results\n", + "\n", + "Zep supports filtering results by metadata. This is useful for filtering results by entity type, or other metadata.\n", + "\n", + "More information here: https://help.getzep.com/document-collections#searching-a-collection-with-hybrid-vector-search" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filter = {\"where\": {\"jsonpath\": '$[*] ? (@.baz == \"qux\")'}}\n", + "\n", + "await zep_retriever.ainvoke(\n", + " \"Who wrote Parable of the Sower?\", config={\"metadata\": filter}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Searching over Summaries with MMR Reranking\n", + "\n", + "Zep automatically generates summaries of chat messages. These summaries can be searched over using the Zep Retriever. Since a summary is a distillation of a conversation, they're more likely to match your search query and offer rich, succinct context to the LLM.\n", + "\n", + "Successive summaries may include similar content, with Zep's similarity search returning the highest matching results but with little diversity.\n", + "MMR re-ranks the results to ensure that the summaries you populate into your prompt are both relevant and each offers additional information to the LLM." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T14:32:56.877960Z", + "start_time": "2024-05-10T14:32:56.517360Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='Parable of the Sower is a science fiction novel by Octavia Butler set in a dystopian future in the 2020s. The story follows Lauren Olamina, a young woman living in a society that has collapsed due to environmental disasters, poverty, and violence. The novel explores themes of societal breakdown, the struggle for survival, and the search for a better future.', metadata={'score': 0.8473024368286133, 'uuid': 'e4689f8e-33be-4a59-a9c2-e5ef5dd70f74', 'created_at': '2024-05-10T05:02:02.713123Z', 'token_count': 76}),\n", + " Document(page_content='The \\'Parable of the Sower\\' refers to a new religious belief system that the protagonist, Lauren Olamina, develops over the course of the novel. As her community disintegrates due to climate change, economic collapse, and social unrest, Lauren comes to believe that humanity must adapt and \"shape God\" in order to survive. The \\'Parable of the Sower\\' is the foundational text of this new religion, which Lauren calls \"Earthseed\", that emphasizes the inevitability of change and the need for humanity to take an active role in shaping its own future. This parable is a central thematic element of the novel, representing the protagonist\\'s search for meaning and purpose in the face of societal upheaval.', metadata={'score': 0.8466987311840057, 'uuid': '1f1a44eb-ebd8-4617-ac14-0281099bd770', 'created_at': '2024-05-10T05:02:07.541073Z', 'token_count': 146}),\n", + " Document(page_content='The dialog discusses the central themes of Octavia Butler\\'s acclaimed science fiction novel \"Parable of the Sower.\" The main theme is survival in the face of drastic societal collapse, and the importance of adaptability, community, and the human capacity for change. The \"Parable of the Sower,\" a biblical parable, serves as a metaphorical framework for the novel, illustrating the need for receptivity and preparedness when confronting transformative upheaval.', metadata={'score': 0.8283970355987549, 'uuid': '4158a750-3ccd-45ce-ab88-fed5ba68b755', 'created_at': '2024-05-10T05:02:06.510068Z', 'token_count': 91})]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "zep_retriever = ZepCloudRetriever(\n", + " api_key=zep_api_key,\n", + " session_id=session_id, # Ensure that you provide the session_id when instantiating the Retriever\n", + " top_k=3,\n", + " search_scope=\"summary\",\n", + " search_type=\"mmr\",\n", + " mmr_lambda=0.5,\n", + ")\n", + "\n", + "await zep_retriever.ainvoke(\"Who wrote Parable of the Sower?\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/retrievers/zilliz_cloud_pipeline.ipynb b/docs/docs/integrations/retrievers/zilliz_cloud_pipeline.ipynb new file mode 100644 index 0000000000000..bfbf6ff92340b --- /dev/null +++ b/docs/docs/integrations/retrievers/zilliz_cloud_pipeline.ipynb @@ -0,0 +1,222 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Zilliz Cloud Pipeline\n", + "\n", + "> [Zilliz Cloud Pipelines](https://docs.zilliz.com/docs/pipelines) transform your unstructured data to a searchable vector collection, chaining up the embedding, ingestion, search, and deletion of your data.\n", + "> \n", + "> Zilliz Cloud Pipelines are available in the Zilliz Cloud Console and via RestFul APIs.\n", + "\n", + "This notebook demonstrates how to prepare Zilliz Cloud Pipelines and use the them via a LangChain Retriever." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prepare Zilliz Cloud Pipelines\n", + "\n", + "To get pipelines ready for LangChain Retriever, you need to create and configure the services in Zilliz Cloud." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**1. Set up Database**\n", + "\n", + "- [Register with Zilliz Cloud](https://docs.zilliz.com/docs/register-with-zilliz-cloud)\n", + "- [Create a cluster](https://docs.zilliz.com/docs/create-cluster)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**2. Create Pipelines**\n", + "\n", + "- [Document ingestion, search, deletion](https://docs.zilliz.com/docs/pipelines-doc-data)\n", + "- [Text ingestion, search, deletion](https://docs.zilliz.com/docs/pipelines-text-data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use LangChain Retriever" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet langchain-milvus" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_milvus import ZillizCloudPipelineRetriever\n", + "\n", + "retriever = ZillizCloudPipelineRetriever(\n", + " pipeline_ids={\n", + " \"ingestion\": \"\", # skip this line if you do NOT need to add documents\n", + " \"search\": \"\", # skip this line if you do NOT need to get relevant documents\n", + " \"deletion\": \"\", # skip this line if you do NOT need to delete documents\n", + " },\n", + " token=\"\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add documents\n", + "\n", + "To add documents, you can use the method `add_texts` or `add_doc_url`, which inserts documents from a list of texts or a presigned/public url with corresponding metadata into the store." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- if using a **text ingestion pipeline**, you can use the method `add_texts`, which inserts a batch of texts with the corresponding metadata into the Zilliz Cloud storage.\n", + "\n", + " **Arguments:**\n", + " - `texts`: A list of text strings.\n", + " - `metadata`: A key-value dictionary of metadata will be inserted as preserved fields required by ingestion pipeline. Defaults to None.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# retriever.add_texts(\n", + "# texts = [\"example text 1e\", \"example text 2\"],\n", + "# metadata={\"\": \"\"} # skip this line if no preserved field is required by the ingestion pipeline\n", + "# )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- if using a **document ingestion pipeline**, you can use the method `add_doc_url`, which inserts a document from url with the corresponding metadata into the Zilliz Cloud storage.\n", + "\n", + " **Arguments:**\n", + " - `doc_url`: A document url.\n", + " - `metadata`: A key-value dictionary of metadata will be inserted as preserved fields required by ingestion pipeline. Defaults to None.\n", + "\n", + "The following example works with a document ingestion pipeline, which requires milvus version as metadata. We will use an [example document](https://publicdataset.zillizcloud.com/milvus_doc.md) describing how to delete entities in Milvus v2.3.x. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'token_usage': 1247, 'doc_name': 'milvus_doc.md', 'num_chunks': 6}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.add_doc_url(\n", + " doc_url=\"https://publicdataset.zillizcloud.com/milvus_doc.md\",\n", + " metadata={\"version\": \"v2.3.x\"},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get relevant documents\n", + "\n", + "To query the retriever, you can use the method `get_relevant_documents`, which returns a list of LangChain Document objects.\n", + "\n", + "**Arguments:**\n", + "- `query`: String to find relevant documents for.\n", + "- `top_k`: The number of results. Defaults to 10.\n", + "- `offset`: The number of records to skip in the search result. Defaults to 0.\n", + "- `output_fields`: The extra fields to present in output.\n", + "- `filter`: The Milvus expression to filter search results. Defaults to \"\".\n", + "- `run_manager`: The callbacks handler to use." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(page_content='# Delete Entities\\nThis topic describes how to delete entities in Milvus. \\nMilvus supports deleting entities by primary key or complex boolean expressions. Deleting entities by primary key is much faster and lighter than deleting them by complex boolean expressions. This is because Milvus executes queries first when deleting data by complex boolean expressions. \\nDeleted entities can still be retrieved immediately after the deletion if the consistency level is set lower than Strong.\\nEntities deleted beyond the pre-specified span of time for Time Travel cannot be retrieved again.\\nFrequent deletion operations will impact the system performance. \\nBefore deleting entities by comlpex boolean expressions, make sure the collection has been loaded.\\nDeleting entities by complex boolean expressions is not an atomic operation. Therefore, if it fails halfway through, some data may still be deleted.\\nDeleting entities by complex boolean expressions is supported only when the consistency is set to Bounded. For details, see Consistency.\\\\\\n\\\\\\n# Delete Entities\\n## Prepare boolean expression\\nPrepare the boolean expression that filters the entities to delete. \\nMilvus supports deleting entities by primary key or complex boolean expressions. For more information on expression rules and supported operators, see Boolean Expression Rules.', metadata={'id': 448986959321277978, 'distance': 0.7871403694152832}),\n", + " Document(page_content='# Delete Entities\\n## Prepare boolean expression\\n### Simple boolean expression\\nUse a simple expression to filter data with primary key values of 0 and 1: \\n```python\\nexpr = \"book_id in [0,1]\"\\n```\\\\\\n\\\\\\n# Delete Entities\\n## Prepare boolean expression\\n### Complex boolean expression\\nTo filter entities that meet specific conditions, define complex boolean expressions. \\nFilter entities whose word_count is greater than or equal to 11000: \\n```python\\nexpr = \"word_count >= 11000\"\\n``` \\nFilter entities whose book_name is not Unknown: \\n```python\\nexpr = \"book_name != Unknown\"\\n``` \\nFilter entities whose primary key values are greater than 5 and word_count is smaller than or equal to 9999: \\n```python\\nexpr = \"book_id > 5 && word_count <= 9999\"\\n```', metadata={'id': 448986959321277979, 'distance': 0.7775762677192688}),\n", + " Document(page_content='# Delete Entities\\n## Delete entities\\nDelete the entities with the boolean expression you created. Milvus returns the ID list of the deleted entities.\\n```python\\nfrom pymilvus import Collection\\ncollection = Collection(\"book\") # Get an existing collection.\\ncollection.delete(expr)\\n``` \\nParameter\\tDescription\\nexpr\\tBoolean expression that specifies the entities to delete.\\npartition_name (optional)\\tName of the partition to delete entities from.\\\\\\n\\\\\\n# Upsert Entities\\nThis topic describes how to upsert entities in Milvus. \\nUpserting is a combination of insert and delete operations. In the context of a Milvus vector database, an upsert is a data-level operation that will overwrite an existing entity if a specified field already exists in a collection, and insert a new entity if the specified value doesn’t already exist. \\nThe following example upserts 3,000 rows of randomly generated data as the example data. When performing upsert operations, it\\'s important to note that the operation may compromise performance. This is because the operation involves deleting data during execution.', metadata={'id': 448986959321277980, 'distance': 0.680284857749939}),\n", + " Document(page_content='# Upsert Entities\\n## Flush data\\nWhen data is upserted into Milvus it is updated and inserted into segments. Segments have to reach a certain size to be sealed and indexed. Unsealed segments will be searched brute force. In order to avoid this with any remainder data, it is best to call flush(). The flush() call will seal any remaining segments and send them for indexing. It is important to only call this method at the end of an upsert session. Calling it too often will cause fragmented data that will need to be cleaned later on.\\\\\\n\\\\\\n# Upsert Entities\\n## Limits\\nUpdating primary key fields is not supported by upsert().\\nupsert() is not applicable and an error can occur if autoID is set to True for primary key fields.', metadata={'id': 448986959321277983, 'distance': 0.5672488212585449}),\n", + " Document(page_content='# Upsert Entities\\n## Prepare data\\nFirst, prepare the data to upsert. The type of data to upsert must match the schema of the collection, otherwise Milvus will raise an exception. \\nMilvus supports default values for scalar fields, excluding a primary key field. This indicates that some fields can be left empty during data inserts or upserts. For more information, refer to Create a Collection. \\n```python\\n# Generate data to upsert\\n\\nimport random\\nnb = 3000\\ndim = 8\\nvectors = [[random.random() for _ in range(dim)] for _ in range(nb)]\\ndata = [\\n[i for i in range(nb)],\\n[str(i) for i in range(nb)],\\n[i for i in range(10000, 10000+nb)],\\nvectors,\\n[str(\"dy\"*i) for i in range(nb)]\\n]\\n```', metadata={'id': 448986959321277981, 'distance': 0.5107149481773376}),\n", + " Document(page_content='# Upsert Entities\\n## Upsert data\\nUpsert the data to the collection. \\n```python\\nfrom pymilvus import Collection\\ncollection = Collection(\"book\") # Get an existing collection.\\nmr = collection.upsert(data)\\n``` \\nParameter\\tDescription\\ndata\\tData to upsert into Milvus.\\npartition_name (optional)\\tName of the partition to upsert data into.\\ntimeout (optional)\\tAn optional duration of time in seconds to allow for the RPC. If it is set to None, the client keeps waiting until the server responds or error occurs.\\nAfter upserting entities into a collection that has previously been indexed, you do not need to re-index the collection, as Milvus will automatically create an index for the newly upserted data. For more information, refer to Can indexes be created after inserting vectors?', metadata={'id': 448986959321277982, 'distance': 0.4341375529766083})]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "retriever.get_relevant_documents(\n", + " \"Can users delete entities by complex boolean expressions?\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "develop", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/docs/docs/integrations/stores/cassandra.ipynb b/docs/docs/integrations/stores/cassandra.ipynb new file mode 100644 index 0000000000000..bd9413da77331 --- /dev/null +++ b/docs/docs/integrations/stores/cassandra.ipynb @@ -0,0 +1,228 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Cassandra\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Cassandra\n", + "\n", + "[Cassandra](https://cassandra.apache.org/) is a NoSQL, row-oriented, highly scalable and highly available database.\n", + "\n", + "`CassandraByteStore` needs the `cassio` package to be installed:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet cassio" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Store takes the following parameters:\n", + "\n", + "* table: The table where to store the data.\n", + "* session: (Optional) The cassandra driver session. If not provided, the cassio resolved session will be used.\n", + "* keyspace: (Optional) The keyspace of the table. If not provided, the cassio resolved keyspace will be used.\n", + "* setup_mode: (Optional) The mode used to create the Cassandra table (SYNC, ASYNC or OFF). Defaults to SYNC." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## CassandraByteStore\n", + "\n", + "The `CassandraByteStore` is an implementation of `ByteStore` that stores the data in your Cassandra instance.\n", + "The store keys must be strings and will be mapped to the `row_id` column of the Cassandra table.\n", + "The store `bytes` values are mapped to the `body_blob` column of the Cassandra table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.storage import CassandraByteStore" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Init from a cassandra driver Session\n", + "\n", + "You need to create a `cassandra.cluster.Session` object, as described in the [Cassandra driver documentation](https://docs.datastax.com/en/developer/python-driver/latest/api/cassandra/cluster/#module-cassandra.cluster). The details vary (e.g. with network settings and authentication), but this might be something like:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from cassandra.cluster import Cluster\n", + "\n", + "cluster = Cluster()\n", + "session = cluster.connect()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "You need to provide the name of an existing keyspace of the Cassandra instance:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "CASSANDRA_KEYSPACE = input(\"CASSANDRA_KEYSPACE = \")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Creating the store:" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[b'v1', b'v2']\n" + ] + } + ], + "source": [ + "store = CassandraByteStore(\n", + " table=\"my_store\",\n", + " session=session,\n", + " keyspace=CASSANDRA_KEYSPACE,\n", + ")\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Init from cassio\n", + "\n", + "It's also possible to use cassio to configure the session and keyspace." + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "import cassio\n", + "\n", + "cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n", + "\n", + "store = CassandraByteStore(\n", + " table=\"my_store\",\n", + ")\n", + "\n", + "store.mset([(\"k1\", b\"v1\"), (\"k2\", b\"v2\")])\n", + "print(store.mget([\"k1\", \"k2\"]))" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "markdown", + "source": [ + "### Usage with CacheBackedEmbeddings\n", + "\n", + "You may use the `CassandraByteStore` in conjunction with a [`CacheBackedEmbeddings`](/docs/how_to/caching_embeddings) to cache the result of embeddings computations.\n" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "from langchain.embeddings import CacheBackedEmbeddings\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "cassio.init(contact_points=\"127.0.0.1\", keyspace=CASSANDRA_KEYSPACE)\n", + "\n", + "store = CassandraByteStore(\n", + " table=\"my_store\",\n", + ")\n", + "\n", + "embeddings = CacheBackedEmbeddings.from_bytes_store(\n", + " underlying_embeddings=OpenAIEmbeddings(), document_embedding_cache=store\n", + ")" + ], + "metadata": { + "collapsed": false + } + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/stores/elasticsearch.ipynb b/docs/docs/integrations/stores/elasticsearch.ipynb new file mode 100644 index 0000000000000..f55919c23d652 --- /dev/null +++ b/docs/docs/integrations/stores/elasticsearch.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: Elasticsearch \n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ElasticsearchEmbeddingsCache\n", + "\n", + "The `ElasticsearchEmbeddingsCache` is a `ByteStore` implementation that uses your Elasticsearch instance for efficient storage and retrieval of embeddings.\n", + "\n", + "\n", + "First install the LangChain integration with Elasticsearch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -U langchain-elasticsearch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "it can be instantiated using `CacheBackedEmbeddings.from_bytes_store` method." + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.embeddings import CacheBackedEmbeddings\n", + "from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "underlying_embeddings = OpenAIEmbeddings(model=\"text-embedding-3-small\")\n", + "\n", + "store = ElasticsearchEmbeddingsCache(\n", + " es_url=\"http://localhost:9200\",\n", + " index_name=\"llm-chat-cache\",\n", + " metadata={\"project\": \"my_chatgpt_project\"},\n", + " namespace=\"my_chatgpt_project\",\n", + ")\n", + "\n", + "embeddings = CacheBackedEmbeddings.from_bytes_store(\n", + " underlying_embeddings=OpenAIEmbeddings(),\n", + " document_embedding_cache=store,\n", + " query_embedding_cache=store,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The index_name parameter can also accept aliases. This allows to use the ILM: Manage the index lifecycle that we suggest to consider for managing retention and controlling cache growth.\n", + "\n", + "Look at the class docstring for all parameters." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Index the generated vectors\n", + "The cached vectors won't be searchable by default. The developer can customize the building of the Elasticsearch document in order to add indexed vector field.\n", + "\n", + "This can be done by subclassing end overriding methods. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from typing import Any, Dict, List\n", + "\n", + "from langchain_elasticsearch import ElasticsearchEmbeddingsCache\n", + "\n", + "\n", + "class SearchableElasticsearchStore(ElasticsearchEmbeddingsCache):\n", + " @property\n", + " def mapping(self) -> Dict[str, Any]:\n", + " mapping = super().mapping\n", + " mapping[\"mappings\"][\"properties\"][\"vector\"] = {\n", + " \"type\": \"dense_vector\",\n", + " \"dims\": 1536,\n", + " \"index\": True,\n", + " \"similarity\": \"dot_product\",\n", + " }\n", + " return mapping\n", + "\n", + " def build_document(self, llm_input: str, vector: List[float]) -> Dict[str, Any]:\n", + " body = super().build_document(llm_input, vector)\n", + " body[\"vector\"] = vector\n", + " return body" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "When overriding the mapping and the document building, please only make additive modifications, keeping the base mapping intact." + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/text_embedding/ascend.ipynb b/docs/docs/integrations/text_embedding/ascend.ipynb new file mode 100644 index 0000000000000..4d3559f837ce3 --- /dev/null +++ b/docs/docs/integrations/text_embedding/ascend.ipynb @@ -0,0 +1,183 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "a636f6f3-00d7-4248-8c36-3da51190e882", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[-0.04053403 -0.05560051 -0.04385472 ... 0.09371872 0.02846981\n", + " -0.00576814]\n" + ] + } + ], + "source": [ + "from langchain_community.embeddings import AscendEmbeddings\n", + "\n", + "model = AscendEmbeddings(\n", + " model_path=\"/root/.cache/modelscope/hub/yangjhchs/acge_text_embedding\",\n", + " device_id=0,\n", + " query_instruction=\"Represend this sentence for searching relevant passages: \",\n", + ")\n", + "emb = model.embed_query(\"hellow\")\n", + "print(emb)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8d29ddaa-eef3-4a4e-93d8-0f1c13525fb4", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-0.00348254 0.03098977 -0.00203087 ... 0.08492374 0.03970494\n", + " -0.03372753]\n", + " [-0.02198593 -0.01601127 0.00215684 ... 0.06065163 0.00126425\n", + " -0.03634358]]\n" + ] + } + ], + "source": [ + "doc_embs = model.embed_documents(\n", + " [\"This is a content of the document\", \"This is another document\"]\n", + ")\n", + "print(doc_embs)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "797a720d-c478-4254-be2c-975bc4529f57", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.aembed_query(\"hellow\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "57e62e53-4d2c-4532-9b77-a46bc3da1130", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([-0.04053403, -0.05560051, -0.04385472, ..., 0.09371872,\n", + " 0.02846981, -0.00576814], dtype=float32)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await model.aembed_query(\"hellow\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "7e260457-8b50-4ca3-8f76-8a76d8bba8c8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.aembed_documents(\n", + " [\"This is a content of the document\", \"This is another document\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ce954b94-aaac-4d2c-80be-b2988c16af6d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.00348254, 0.03098977, -0.00203087, ..., 0.08492374,\n", + " 0.03970494, -0.03372753],\n", + " [-0.02198593, -0.01601127, 0.00215684, ..., 0.06065163,\n", + " 0.00126425, -0.03634358]], dtype=float32)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await model.aembed_documents(\n", + " [\"This is a content of the document\", \"This is another document\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7823d69d-de79-4f95-90dd-38f4bdeb9bcc", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/text_embedding/clova.ipynb b/docs/docs/integrations/text_embedding/clova.ipynb new file mode 100644 index 0000000000000..73004e8a7a388 --- /dev/null +++ b/docs/docs/integrations/text_embedding/clova.ipynb @@ -0,0 +1,86 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Clova Embeddings\n", + "[Clova](https://api.ncloud-docs.com/docs/ai-naver-clovastudio-summary) offers an embeddings service\n", + "\n", + "This example goes over how to use LangChain to interact with Clova inference for text embedding.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"CLOVA_EMB_API_KEY\"] = \"\"\n", + "os.environ[\"CLOVA_EMB_APIGW_API_KEY\"] = \"\"\n", + "os.environ[\"CLOVA_EMB_APP_ID\"] = \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import ClovaEmbeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "embeddings = ClovaEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "query_text = \"This is a test query.\"\n", + "query_result = embeddings.embed_query(query_text)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "document_text = [\"This is a test doc1.\", \"This is a test doc2.\"]\n", + "document_result = embeddings.embed_documents(document_text)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/text_embedding/databricks.ipynb b/docs/docs/integrations/text_embedding/databricks.ipynb new file mode 100644 index 0000000000000..a2a181e6cfa2b --- /dev/null +++ b/docs/docs/integrations/text_embedding/databricks.ipynb @@ -0,0 +1,199 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Databricks\n", + "\n", + "> [Databricks](https://www.databricks.com/) Lakehouse Platform unifies data, analytics, and AI on one platform.\n", + "\n", + "This notebook provides a quick overview for getting started with Databricks [embedding models](/docs/concepts/#embedding-models). For detailed documentation of all DatabricksEmbeddings features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html).\n", + "\n", + "\n", + "\n", + "## Overview\n", + "\n", + "`DatabricksEmbeddings` class wraps an embedding model endpoint hosted on [Databricks Model Serving](https://docs.databricks.com/en/machine-learning/model-serving/index.html). This example notebook shows how to wrap your serving endpoint and use it as a embedding model in your LangChain application.\n", + "\n", + "\n", + "### Supported Methods\n", + "\n", + "`DatabricksEmbeddings` supports all methods of `Embeddings` class including async APIs.\n", + "\n", + "\n", + "### Endpoint Requirement\n", + "\n", + "The serving endpoint `DatabricksEmbeddings` wraps must have OpenAI-compatible embedding input/output format ([reference](https://mlflow.org/docs/latest/llms/deployments/index.html#embeddings)). As long as the input format is compatible, `DatabricksEmbeddings` can be used for any endpoint type hosted on [Databricks Model Serving](https://docs.databricks.com/en/machine-learning/model-serving/index.html):\n", + "\n", + "1. Foundation Models - Curated list of state-of-the-art foundation models such as BAAI General Embedding (BGE). These endpoint are ready to use in your Databricks workspace without any set up.\n", + "2. Custom Models - You can also deploy custom embedding models to a serving endpoint via MLflow with\n", + "your choice of framework such as LangChain, Pytorch, Transformers, etc.\n", + "3. External Models - Databricks endpoints can serve models that are hosted outside Databricks as a proxy, such as proprietary model service like OpenAI text-embedding-3.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "To access Databricks models you'll need to create a Databricks account, set up credentials (only if you are outside Databricks workspace), and install required packages.\n", + "\n", + "### Credentials (only if you are outside Databricks)\n", + "\n", + "If you are running LangChain app inside Databricks, you can skip this step.\n", + "\n", + "Otherwise, you need manually set the Databricks workspace hostname and personal access token to `DATABRICKS_HOST` and `DATABRICKS_TOKEN` environment variables, respectively. See [Authentication Documentation](https://docs.databricks.com/en/dev-tools/auth/index.html#databricks-personal-access-tokens) for how to get an access token." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"DATABRICKS_HOST\"] = \"https://your-workspace.cloud.databricks.com\"\n", + "os.environ[\"DATABRICKS_TOKEN\"] = getpass.getpass(\"Enter your Databricks access token: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "The LangChain Databricks integration lives in the `langchain-community` package. Also, `mlflow >= 2.9 ` is required to run the code in this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain-community mlflow>=2.9.0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We first demonstrates how to query BGE model hosted as Foundation Models endpoint with `DatabricksEmbeddings`.\n", + "\n", + "For other type of endpoints, there are some difference in how to set up the endpoint itself, however, once the endpoint is ready, there is no difference in how to query it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import DatabricksEmbeddings\n", + "\n", + "embeddings = DatabricksEmbeddings(\n", + " endpoint=\"databricks-bge-large-en\",\n", + " # Specify parameters for embedding queries and documents if needed\n", + " # query_params={...},\n", + " # document_params={...},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Embed single text" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0.051055908203125, 0.007221221923828125, 0.003879547119140625]\n" + ] + } + ], + "source": [ + "embeddings.embed_query(\"hello\")[:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Embed documents" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "documents = [\"This is a dummy document.\", \"This is another dummy document.\"]\n", + "response = embeddings.embed_documents(documents)\n", + "print([e[:3] for e in response]) # Show first 3 elements of each embedding" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wrapping Other Types of Endpoints\n", + "\n", + "The example above uses an embedding model hosted as a Foundation Models API. To learn about how to use the other endpoint types, please refer to the documentation for `ChatDatabricks`. While the model type is different, required steps are the same.\n", + "\n", + "* [Custom Model Endpoint](https://python.langchain.com/v0.2/docs/integrations/chat/databricks/#wrapping-custom-model-endpoint)\n", + "* [External Models](https://python.langchain.com/v0.2/docs/integrations/chat/databricks/#wrapping-external-models)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all ChatDatabricks features and configurations head to the API reference: https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docs/docs/integrations/text_embedding/index.mdx b/docs/docs/integrations/text_embedding/index.mdx new file mode 100644 index 0000000000000..e4e191aa88e0d --- /dev/null +++ b/docs/docs/integrations/text_embedding/index.mdx @@ -0,0 +1,114 @@ +--- +sidebar_position: 0 +sidebar_class_name: hidden +--- + +# Embedding models + +**Embedding model** classes are implemented by inheriting the [Embeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.Embeddings.html) class. + +This table lists all 100 derived classes. + + +| Namespace 🔻 | Class | +|------------|---------| +| langchain.chains.hyde.base | [HypotheticalDocumentEmbedder](https://api.python.langchain.com/en/latest/chains/langchain.chains.hyde.base.HypotheticalDocumentEmbedder.html) | +| langchain.embeddings.cache | [CacheBackedEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain.embeddings.cache.CacheBackedEmbeddings.html) | +| langchain_ai21.embeddings | [AI21Embeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_ai21.embeddings.AI21Embeddings.html) | +| langchain_aws.embeddings.bedrock | [BedrockEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_aws.embeddings.bedrock.BedrockEmbeddings.html) | +| langchain_cohere.embeddings | [CohereEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_cohere.embeddings.CohereEmbeddings.html) | +| langchain_community.embeddings.aleph_alpha | [AlephAlphaAsymmetricSemanticEmbedding](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.aleph_alpha.AlephAlphaAsymmetricSemanticEmbedding.html) | +| langchain_community.embeddings.aleph_alpha | [AlephAlphaSymmetricSemanticEmbedding](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.aleph_alpha.AlephAlphaSymmetricSemanticEmbedding.html) | +| langchain_community.embeddings.anyscale | [AnyscaleEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.anyscale.AnyscaleEmbeddings.html) | +| langchain_community.embeddings.awa | [AwaEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.awa.AwaEmbeddings.html) | +| langchain_community.embeddings.azure_openai | [AzureOpenAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.azure_openai.AzureOpenAIEmbeddings.html) | +| langchain_community.embeddings.baichuan | [BaichuanTextEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.baichuan.BaichuanTextEmbeddings.html) | +| langchain_community.embeddings.baidu_qianfan_endpoint | [QianfanEmbeddingsEndpoint](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.baidu_qianfan_endpoint.QianfanEmbeddingsEndpoint.html) | +| langchain_community.embeddings.bedrock | [BedrockEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.bedrock.BedrockEmbeddings.html) | +| langchain_community.embeddings.bookend | [BookendEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.bookend.BookendEmbeddings.html) | +| langchain_community.embeddings.clarifai | [ClarifaiEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.clarifai.ClarifaiEmbeddings.html) | +| langchain_community.embeddings.cloudflare_workersai | [CloudflareWorkersAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.cloudflare_workersai.CloudflareWorkersAIEmbeddings.html) | +| langchain_community.embeddings.clova | [ClovaEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.clova.ClovaEmbeddings.html) | +| langchain_community.embeddings.cohere | [CohereEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.cohere.CohereEmbeddings.html) | +| langchain_community.embeddings.dashscope | [DashScopeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.dashscope.DashScopeEmbeddings.html) | +| langchain_community.embeddings.databricks | [DatabricksEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.databricks.DatabricksEmbeddings.html) | +| langchain_community.embeddings.deepinfra | [DeepInfraEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.deepinfra.DeepInfraEmbeddings.html) | +| langchain_community.embeddings.edenai | [EdenAiEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.edenai.EdenAiEmbeddings.html) | +| langchain_community.embeddings.elasticsearch | [ElasticsearchEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.elasticsearch.ElasticsearchEmbeddings.html) | +| langchain_community.embeddings.embaas | [EmbaasEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.embaas.EmbaasEmbeddings.html) | +| langchain_community.embeddings.ernie | [ErnieEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ernie.ErnieEmbeddings.html) | +| langchain_community.embeddings.fake | [DeterministicFakeEmbedding](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.fake.DeterministicFakeEmbedding.html) | +| langchain_community.embeddings.fake | [FakeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.fake.FakeEmbeddings.html) | +| langchain_community.embeddings.fastembed | [FastEmbedEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.fastembed.FastEmbedEmbeddings.html) | +| langchain_community.embeddings.gigachat | [GigaChatEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.gigachat.GigaChatEmbeddings.html) | +| langchain_community.embeddings.google_palm | [GooglePalmEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.google_palm.GooglePalmEmbeddings.html) | +| langchain_community.embeddings.gpt4all | [GPT4AllEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.gpt4all.GPT4AllEmbeddings.html) | +| langchain_community.embeddings.gradient_ai | [GradientEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.gradient_ai.GradientEmbeddings.html) | +| langchain_community.embeddings.huggingface | [HuggingFaceBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface.HuggingFaceBgeEmbeddings.html) | +| langchain_community.embeddings.huggingface | [HuggingFaceEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface.HuggingFaceEmbeddings.html) | +| langchain_community.embeddings.huggingface | [HuggingFaceInferenceAPIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface.HuggingFaceInferenceAPIEmbeddings.html) | +| langchain_community.embeddings.huggingface | [HuggingFaceInstructEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface.HuggingFaceInstructEmbeddings.html) | +| langchain_community.embeddings.huggingface_hub | [HuggingFaceHubEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.huggingface_hub.HuggingFaceHubEmbeddings.html) | +| langchain_community.embeddings.infinity | [InfinityEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.infinity.InfinityEmbeddings.html) | +| langchain_community.embeddings.infinity_local | [InfinityEmbeddingsLocal](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.infinity_local.InfinityEmbeddingsLocal.html) | +| langchain_community.embeddings.ipex_llm | [IpexLLMBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ipex_llm.IpexLLMBgeEmbeddings.html) | +| langchain_community.embeddings.itrex | [QuantizedBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.itrex.QuantizedBgeEmbeddings.html) | +| langchain_community.embeddings.javelin_ai_gateway | [JavelinAIGatewayEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.javelin_ai_gateway.JavelinAIGatewayEmbeddings.html) | +| langchain_community.embeddings.jina | [JinaEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.jina.JinaEmbeddings.html) | +| langchain_community.embeddings.johnsnowlabs | [JohnSnowLabsEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.johnsnowlabs.JohnSnowLabsEmbeddings.html) | +| langchain_community.embeddings.laser | [LaserEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.laser.LaserEmbeddings.html) | +| langchain_community.embeddings.llamacpp | [LlamaCppEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.llamacpp.LlamaCppEmbeddings.html) | +| langchain_community.embeddings.llamafile | [LlamafileEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.llamafile.LlamafileEmbeddings.html) | +| langchain_community.embeddings.llm_rails | [LLMRailsEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.llm_rails.LLMRailsEmbeddings.html) | +| langchain_community.embeddings.localai | [LocalAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.localai.LocalAIEmbeddings.html) | +| langchain_community.embeddings.minimax | [MiniMaxEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.minimax.MiniMaxEmbeddings.html) | +| langchain_community.embeddings.mlflow | [MlflowCohereEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.mlflow.MlflowCohereEmbeddings.html) | +| langchain_community.embeddings.mlflow | [MlflowEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.mlflow.MlflowEmbeddings.html) | +| langchain_community.embeddings.mlflow_gateway | [MlflowAIGatewayEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.mlflow_gateway.MlflowAIGatewayEmbeddings.html) | +| langchain_community.embeddings.modelscope_hub | [ModelScopeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.modelscope_hub.ModelScopeEmbeddings.html) | +| langchain_community.embeddings.mosaicml | [MosaicMLInstructorEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.mosaicml.MosaicMLInstructorEmbeddings.html) | +| langchain_community.embeddings.nemo | [NeMoEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.nemo.NeMoEmbeddings.html) | +| langchain_community.embeddings.nlpcloud | [NLPCloudEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.nlpcloud.NLPCloudEmbeddings.html) | +| langchain_community.embeddings.oci_generative_ai | [OCIGenAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.oci_generative_ai.OCIGenAIEmbeddings.html) | +| langchain_community.embeddings.octoai_embeddings | [OctoAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.octoai_embeddings.OctoAIEmbeddings.html) | +| langchain_community.embeddings.ollama | [OllamaEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ollama.OllamaEmbeddings.html) | +| langchain_community.embeddings.openai | [OpenAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.openai.OpenAIEmbeddings.html) | +| langchain_community.embeddings.openvino | [OpenVINOBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.openvino.OpenVINOBgeEmbeddings.html) | +| langchain_community.embeddings.openvino | [OpenVINOEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.openvino.OpenVINOEmbeddings.html) | +| langchain_community.embeddings.optimum_intel | [QuantizedBiEncoderEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.optimum_intel.QuantizedBiEncoderEmbeddings.html) | +| langchain_community.embeddings.oracleai | [OracleEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.oracleai.OracleEmbeddings.html) | +| langchain_community.embeddings.ovhcloud | [OVHCloudEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ovhcloud.OVHCloudEmbeddings.html) | +| langchain_community.embeddings.premai | [PremAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.premai.PremAIEmbeddings.html) | +| langchain_community.embeddings.sagemaker_endpoint | [SagemakerEndpointEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.sagemaker_endpoint.SagemakerEndpointEmbeddings.html) | +| langchain_community.embeddings.sambanova | [SambaStudioEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.sambanova.SambaStudioEmbeddings.html) | +| langchain_community.embeddings.self_hosted | [SelfHostedEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.self_hosted.SelfHostedEmbeddings.html) | +| langchain_community.embeddings.self_hosted_hugging_face | [SelfHostedHuggingFaceEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.self_hosted_hugging_face.SelfHostedHuggingFaceEmbeddings.html) | +| langchain_community.embeddings.self_hosted_hugging_face | [SelfHostedHuggingFaceInstructEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.self_hosted_hugging_face.SelfHostedHuggingFaceInstructEmbeddings.html) | +| langchain_community.embeddings.solar | [SolarEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.solar.SolarEmbeddings.html) | +| langchain_community.embeddings.spacy_embeddings | [SpacyEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.spacy_embeddings.SpacyEmbeddings.html) | +| langchain_community.embeddings.sparkllm | [SparkLLMTextEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.sparkllm.SparkLLMTextEmbeddings.html) | +| langchain_community.embeddings.tensorflow_hub | [TensorflowHubEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.tensorflow_hub.TensorflowHubEmbeddings.html) | +| langchain_community.embeddings.text2vec | [Text2vecEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.text2vec.Text2vecEmbeddings.html) | +| langchain_community.embeddings.titan_takeoff | [TitanTakeoffEmbed](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.titan_takeoff.TitanTakeoffEmbed.html) | +| langchain_community.embeddings.vertexai | [VertexAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.vertexai.VertexAIEmbeddings.html) | +| langchain_community.embeddings.volcengine | [VolcanoEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.volcengine.VolcanoEmbeddings.html) | +| langchain_community.embeddings.voyageai | [VoyageEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.voyageai.VoyageEmbeddings.html) | +| langchain_community.embeddings.xinference | [XinferenceEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.xinference.XinferenceEmbeddings.html) | +| langchain_community.embeddings.yandex | [YandexGPTEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.yandex.YandexGPTEmbeddings.html) | +| langchain_community.embeddings.zhipuai | [ZhipuAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.zhipuai.ZhipuAIEmbeddings.html) | +| langchain_core.embeddings.fake | [DeterministicFakeEmbedding](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.fake.DeterministicFakeEmbedding.html) | +| langchain_core.embeddings.fake | [FakeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_core.embeddings.fake.FakeEmbeddings.html) | +| langchain_elasticsearch.embeddings | [ElasticsearchEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_elasticsearch.embeddings.ElasticsearchEmbeddings.html) | +| langchain_fireworks.embeddings | [FireworksEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_fireworks.embeddings.FireworksEmbeddings.html) | +| langchain_google_genai.embeddings | [GoogleGenerativeAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_google_genai.embeddings.GoogleGenerativeAIEmbeddings.html) | +| langchain_google_genai.google_vector_store | [ServerSideEmbedding](https://api.python.langchain.com/en/latest/google_vector_store/langchain_google_genai.google_vector_store.ServerSideEmbedding.html) | +| langchain_google_vertexai.embeddings | [VertexAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_google_vertexai.embeddings.VertexAIEmbeddings.html) | +| langchain_huggingface.embeddings.huggingface | [HuggingFaceEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_huggingface.embeddings.huggingface.HuggingFaceEmbeddings.html) | +| langchain_huggingface.embeddings.huggingface_endpoint | [HuggingFaceEndpointEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_huggingface.embeddings.huggingface_endpoint.HuggingFaceEndpointEmbeddings.html) | +| langchain_ibm.embeddings | [WatsonxEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_ibm.embeddings.WatsonxEmbeddings.html) | +| langchain_mistralai.embeddings | [MistralAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_mistralai.embeddings.MistralAIEmbeddings.html) | +| langchain_nomic.embeddings | [NomicEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_nomic.embeddings.NomicEmbeddings.html) | +| langchain_nvidia_ai_endpoints.embeddings | [NVIDIAEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_nvidia_ai_endpoints.embeddings.NVIDIAEmbeddings.html) | +| langchain_together.embeddings | [TogetherEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_together.embeddings.TogetherEmbeddings.html) | +| langchain_upstage.embeddings | [UpstageEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_upstage.embeddings.UpstageEmbeddings.html) | +| langchain_voyageai.embeddings | [VoyageAIEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_voyageai.embeddings.VoyageAIEmbeddings.html) | diff --git a/docs/docs/integrations/text_embedding/ipex_llm.ipynb b/docs/docs/integrations/text_embedding/ipex_llm.ipynb new file mode 100644 index 0000000000000..ef13acbb9d868 --- /dev/null +++ b/docs/docs/integrations/text_embedding/ipex_llm.ipynb @@ -0,0 +1,101 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Local BGE Embeddings with IPEX-LLM on Intel CPU\n", + "\n", + "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\n", + "\n", + "This example goes over how to use LangChain to conduct embedding tasks with `ipex-llm` optimizations on Intel CPU. This would be helpful in applications such as RAG, document QA, etc.\n", + "\n", + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain langchain-community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install IPEX-LLM for optimizations on Intel CPU, as well as `sentence-transformers`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --pre --upgrade ipex-llm[all] --extra-index-url https://download.pytorch.org/whl/cpu\n", + "%pip install sentence-transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note**\n", + ">\n", + "> For Windows users, `--extra-index-url https://download.pytorch.org/whl/cpu` when install `ipex-llm` is not required.\n", + "\n", + "## Basic Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import IpexLLMBgeEmbeddings\n", + "\n", + "embedding_model = IpexLLMBgeEmbeddings(\n", + " model_name=\"BAAI/bge-large-en-v1.5\",\n", + " model_kwargs={},\n", + " encode_kwargs={\"normalize_embeddings\": True},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "API Reference\n", + "- [IpexLLMBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ipex_llm.IpexLLMBgeEmbeddings.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sentence = \"IPEX-LLM is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\"\n", + "query = \"What is IPEX-LLM?\"\n", + "\n", + "text_embeddings = embedding_model.embed_documents([sentence, query])\n", + "print(f\"text_embeddings[0][:10]: {text_embeddings[0][:10]}\")\n", + "print(f\"text_embeddings[1][:10]: {text_embeddings[1][:10]}\")\n", + "\n", + "query_embedding = embedding_model.embed_query(query)\n", + "print(f\"query_embedding[:10]: {query_embedding[:10]}\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb b/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb new file mode 100644 index 0000000000000..3bfe477b296a7 --- /dev/null +++ b/docs/docs/integrations/text_embedding/ipex_llm_gpu.ipynb @@ -0,0 +1,164 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Local BGE Embeddings with IPEX-LLM on Intel GPU\n", + "\n", + "> [IPEX-LLM](https://github.com/intel-analytics/ipex-llm) is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\n", + "\n", + "This example goes over how to use LangChain to conduct embedding tasks with `ipex-llm` optimizations on Intel GPU. This would be helpful in applications such as RAG, document QA, etc.\n", + "\n", + "> **Note**\n", + ">\n", + "> It is recommended that only Windows users with Intel Arc A-Series GPU (except for Intel Arc A300-Series or Pro A60) run this Jupyter notebook directly. For other cases (e.g. Linux users, Intel iGPU, etc.), it is recommended to run the code with Python scripts in terminal for best experiences.\n", + "\n", + "## Install Prerequisites\n", + "To benefit from IPEX-LLM on Intel GPUs, there are several prerequisite steps for tools installation and environment preparation.\n", + "\n", + "If you are a Windows user, visit the [Install IPEX-LLM on Windows with Intel GPU Guide](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html), and follow [Install Prerequisites](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_windows_gpu.html#install-prerequisites) to update GPU driver (optional) and install Conda.\n", + "\n", + "If you are a Linux user, visit the [Install IPEX-LLM on Linux with Intel GPU](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html), and follow [**Install Prerequisites**](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Quickstart/install_linux_gpu.html#install-prerequisites) to install GPU driver, Intel® oneAPI Base Toolkit 2024.0, and Conda.\n", + "\n", + "## Setup\n", + "\n", + "After the prerequisites installation, you should have created a conda environment with all prerequisites installed. **Start the jupyter service in this conda environment**:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain langchain-community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install IPEX-LLM for optimizations on Intel GPU, as well as `sentence-transformers`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --pre --upgrade ipex-llm[xpu] --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/\n", + "%pip install sentence-transformers" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note**\n", + ">\n", + "> You can also use `https://pytorch-extension.intel.com/release-whl/stable/xpu/cn/` as the extra-indel-url.\n", + "\n", + "## Runtime Configuration\n", + "\n", + "For optimal performance, it is recommended to set several environment variables based on your device:\n", + "\n", + "### For Windows Users with Intel Core Ultra integrated GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\"\n", + "os.environ[\"BIGDL_LLM_XMX_DISABLED\"] = \"1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### For Windows Users with Intel Arc A-Series GPU" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"SYCL_CACHE_PERSISTENT\"] = \"1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> **Note**\n", + ">\n", + "> For the first time that each model runs on Intel iGPU/Intel Arc A300-Series or Pro A60, it may take several minutes to compile.\n", + ">\n", + "> For other GPU type, please refer to [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#runtime-configuration) for Windows users, and [here](https://ipex-llm.readthedocs.io/en/latest/doc/LLM/Overview/install_gpu.html#id5) for Linux users.\n", + "\n", + "\n", + "## Basic Usage\n", + "\n", + "Setting `device` to `\"xpu\"` in `model_kwargs` when initializing `IpexLLMBgeEmbeddings` will put the embedding model on Intel GPU and benefit from IPEX-LLM optimizations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.embeddings import IpexLLMBgeEmbeddings\n", + "\n", + "embedding_model = IpexLLMBgeEmbeddings(\n", + " model_name=\"BAAI/bge-large-en-v1.5\",\n", + " model_kwargs={\"device\": \"xpu\"},\n", + " encode_kwargs={\"normalize_embeddings\": True},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "API Reference\n", + "- [IpexLLMBgeEmbeddings](https://api.python.langchain.com/en/latest/embeddings/langchain_community.embeddings.ipex_llm.IpexLLMBgeEmbeddings.html)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sentence = \"IPEX-LLM is a PyTorch library for running LLM on Intel CPU and GPU (e.g., local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency.\"\n", + "query = \"What is IPEX-LLM?\"\n", + "\n", + "text_embeddings = embedding_model.embed_documents([sentence, query])\n", + "print(f\"text_embeddings[0][:10]: {text_embeddings[0][:10]}\")\n", + "print(f\"text_embeddings[1][:10]: {text_embeddings[1][:10]}\")\n", + "\n", + "query_embedding = embedding_model.embed_query(query)\n", + "print(f\"query_embedding[:10]: {query_embedding[:10]}\")" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/text_embedding/ovhcloud.ipynb b/docs/docs/integrations/text_embedding/ovhcloud.ipynb new file mode 100644 index 0000000000000..f79a022c695c6 --- /dev/null +++ b/docs/docs/integrations/text_embedding/ovhcloud.ipynb @@ -0,0 +1,77 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6a3268e4", + "metadata": {}, + "source": [ + "# OVHcloud\n", + "\n", + "> In order to use this model you need to create a new token on the AI Endpoints website: https://endpoints.ai.cloud.ovh.net/.\n", + "\n", + "This notebook explains how to use OVHCloudEmbeddings, which is included in the langchain_community package, to embed texts in langchain." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "3da0fce0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embedding generated by OVHCloudEmbeddings: [0.013393167, 0.03732136, -0.0042296825, 0.042756177, 0.022484222, -0.0045062024, -0.039632313, -0.016722068, 0.035480227, 0.021987874, -0.007831321, -0.002374333, 0.14827625, 0.044154406, -0.038416132, -0.051982213, 0.0054677227, -0.030913774, 0.03872776, 0.00080121466, 0.06914864, -0.01889285, 0.051963896, -0.049868602, 0.032817934, -0.012699772, 0.029378777, -0.019135607, -0.014866451, 0.010978144, 0.036102116, -0.024129838, 0.0059551136, 0.025592148, 0.0330448, 0.05326782, -0.008618608, -0.04626763, 0.046697352, 0.007115799, 0.025649866, 0.02479821, -0.011125191, -0.011960746, 0.027974335, -0.008805084, -0.0014219242, -0.016421443, -0.042280562, -0.027239632, 0.013829875, 0.008757527, 0.024184346, 0.01559286, -0.05868668, -0.058345765, 0.048597623, 0.027461633, -0.03701432, 0.014960674, 0.0055076545, 0.011801827, 0.003185198, 0.024615683, 0.04377218, -0.02428408, -0.01129621, -0.008451086, -0.024531357, -0.029689042, 0.019617215, -0.009499967, 0.034481872, -0.022548521, -0.027568225, -0.027564893, -0.035448182, 0.042752527, -0.017667985, -0.006429351, 0.028172225, 0.021843985, 0.05518236, 0.026108254, -0.01864022, -0.021649806, -0.042575743, 0.043298125, 0.032832835, 0.05166516, 0.011787296, 0.013220338, -0.028256241, 0.028401077, -0.0023671573, 0.005722865, 0.0051106545, 0.02541163, 0.033874203, -0.038719863, 0.010436916, -0.043236364, -0.023228278, -0.03358519, -0.024077564, 8.423713e-05, -0.010243255, -0.0191856, 0.013305917, -0.020698957, -0.04052005, 0.026274782, 0.054635983, -0.057551324, 0.034959268, -0.025090441, 0.02773644, -0.031936403, 0.008175286, -0.05568246, -0.013420259, 0.02285795, -0.012539113, -0.018429684, 0.061069433, -0.026720565, -0.0008156537, 0.0014032789, 0.043951243, -0.0065814317, 0.011629428, -0.06288238, -0.011015484, 0.02559024, -0.041962583, 0.037935987, 0.027115924, 0.0132008735, 0.010924172, 0.010795455, 0.010284355, -0.060599748, 0.037463877, 0.051818028, 0.027654605, -0.054811046, 0.019782307, 0.003971803, -0.028695354, 0.086431496, 0.038262486, -0.039732426, -0.0045595192, -0.016946813, 0.047315836, -0.02716498, -0.07109412, -0.01315921, -0.010889335, 0.043004725, 0.060072046, 0.013834794, 0.014518575, -0.013399902, 0.050983083, -0.0066714315, -0.0225039, 0.01304092, -0.009736523, -0.038328674, -0.02377573, 0.010796089, -0.058065176, 0.008482149, 0.016022328, -0.023443135, 0.008705595, -0.03286985, -0.028836675, -0.05872433, -0.056536626, -0.04216553, 0.00037138985, 0.0410015, -0.0035549132, -0.01765879, 0.0072336365, -0.02078376, 0.018013686, 0.004380657, 0.009426806, 0.04496503, 0.013016738, 0.041157607, 0.011397934, 0.018421972, 0.032070283, 0.023589233, -0.031510174, -0.03177538, 0.019968262, 0.0031847644, 0.03337904, 0.04362253, -0.032639876, -0.078984305, 0.022978902, 0.022103759, 0.03148644, 0.040037263, -0.013258827, 0.04515543, -0.045400217, 0.031163648, -0.038804565, -0.031007605, 0.023038942, -0.0011629256, -0.0042446335, 0.038747445, 0.039732117, -0.052249946, 0.003977225, -0.005010845, -0.025907055, 0.0050243125, 0.042312477, -0.014793418, 0.03156775, 0.015165637, 0.03156905, 0.025180826, -0.04529393, 0.029273387, -0.033187654, -0.022706378, 0.030855985, -0.023539526, -0.030362226, -0.10679687, -0.030312601, 0.019773543, 0.0250672, -0.019900797, 0.027153352, -0.0407523, -0.053737476, 0.037743907, -0.03822609, 0.004701334, 0.018653622, -0.029043352, 0.013663203, -0.0109640695, 0.013055596, 0.0071652965, -0.045625277, 0.008842614, -0.010782444, 0.029200679, 0.0108302515, -0.041484382, -0.027877813, -0.0050561433, 0.001051684, 0.028695228, 0.111581974, -0.033906773, -0.005789528, -0.01691454, -0.057842113, -0.017875662, -0.063073546, 0.04463758, 0.014070596, -0.02932851, -0.06885991, 0.022553641, -0.009132225, -0.0017104117, -0.02573207, 0.07448002, -0.03711256, -0.051314738, -0.034982048, 0.027141424, -0.06874479, -0.02388671, -0.06571741, 0.013449982, 0.0638684, 0.040945932, -0.026943449, 0.03134517, 0.0344114, 0.01026371, 0.038227987, -0.027564207, 0.016802547, 0.0010686627, -0.030647304, -0.042039596, 0.054497436, -0.040464804, -0.03338543, 0.02260303, 0.09915977, 0.022541435, -0.039202042, 0.050876632, -0.03609107, -0.034563474, -0.06706467, 0.02145668, 0.03366738, -0.03411402, 0.012604448, 0.08058551, -0.007932673, 0.06885105, -0.030523479, 0.027278991, -0.046861324, -0.093850814, 0.030921744, -0.0001421079, 0.030683668, 0.004855684, 0.076923594, 0.010595104, -0.027388366, 0.015755313, -0.008487381, 0.06655968, -0.08168016, -0.0018116324, -0.012591006, -0.027123308, 0.07031095, 0.04333807, -0.03915123, 0.044605616, -0.022781648, 0.0093986215, 0.08933287, -0.055434275, -0.0102571435, -0.013561327, -0.027190993, 0.036250908, 0.034637712, -0.023693744, 0.028632542, 0.0072604613, -0.027789399, -0.012132866, 0.062264692, -0.038561203, -0.0028939024, -0.052279804, -0.012950528, 0.029368099, -0.01727593, -0.009667129, 0.031548638, 0.042698074, -0.015050672, -0.03120724, 0.024719479, -0.01405534, 0.100237176, 0.03711969, -0.025223013, -0.015259215, 0.04748944, -0.025767656, 0.047430392, -0.03020851, -0.015670097, 0.07102661, 0.065665215, 0.019847495, 0.029551532, -0.0025012908, -0.036311474, 0.07141015, 0.009710868, -0.03509336, 0.033391234, -0.03209491, -0.0020321321, -0.022850651, -0.015315501, 0.006990975, 0.028062506, -0.017586777, 0.034145683, -0.0033781836, 0.004156304, -0.032210104, 0.010601066, 0.0048922407, -0.01577382, 0.048883524, 0.028200202, -0.0072908103, -0.035968613, 0.0346475, -0.027046643, -0.020911021, 0.04035775, -0.027658584, 0.051322017, 0.02296099, -0.023528099, -0.022320902, -0.027127141, 0.031832132, 0.05151557, 0.02551352, -0.03645325, 0.025603358, 0.009815077, 0.005696306, 0.037177127, -0.055942845, -0.01414183, -0.032122746, 0.008900472, -0.04909773, -0.025821397, 0.008453814, -0.00066156825, 0.00021046218, 0.033632305, -0.037483413, 0.035067603, -0.0068717552, -0.012747838, 0.033523142, -0.018556861, -0.0013153392, 0.00041785053, 0.0056918347, -0.047996394, -0.0121070435, -0.016490333, 0.021066675, -0.013169224, -0.004945434, 0.018413335, 0.021549506, 0.010436816, -0.005243072, 0.037264977, -0.05419949, 0.051790405, -0.02995299, 0.00830641, -0.09983456, 0.035929922, 0.046650928, -0.02201099, -0.015233417, -0.03984102, -0.02588652, 0.014141513, -0.032033846, -0.00088278914, -0.017325765, -0.041028026, -0.04869172, 0.032118957, -0.0049999636, 0.031933237, 0.03276276, -0.013989229, 0.049034916, 0.026281273, -0.046090055, -0.010919109, -0.026676904, -0.025712457, 0.03475912, 0.025149662, 0.0555755, -0.06140382, 0.061161127, 0.0030567874, -0.0365629, -0.01902828, 0.01566279, 0.026230726, -0.016239835, -0.004278232, 0.033120655, -0.03901436, -0.091688305, 0.02096186, 0.04452546, -0.02552988, -0.025540574, -0.048035506, 0.00352354, -0.042762343, 0.030095248, -0.043667283, -0.028081333, 0.03475098, -0.038900424, 0.11706695, -0.06385194, -0.01991146, -0.042574335, 0.013697014, 0.014061967, -0.027709525, 0.015497077, 0.0059057726, -0.017043548, 0.016371854, 0.011076671, -0.043543536, -0.021559414, 0.014312511, -0.059308562, 0.027888788, -0.06514841, 0.026988931, -0.021745581, 0.04002255, -0.043083742, 0.023495318, -0.08972084, -0.061623067, 0.02676458, -0.016847553, -0.017119622, 0.038647383, -0.008218997, 0.0025763474, -0.028317345, 0.014624456, -0.013914129, 0.02815451, -0.036198556, 0.009238764, -0.015274136, 0.015579736, 0.0032229964, 0.006329244, -0.013670273, 0.00665867, 0.05292342, -0.03484488, 0.0024167884, -0.0057902746, 0.05282686, 0.0005000245, -0.017153509, -0.028036479, -0.027311599, -0.009889913, 0.02431086, -0.0038315805, 0.03333115, 0.02464384, -0.025810424, -0.062422995, 0.060066104, -0.092039555, 0.014482284, 0.02608793, -0.021194633, 0.061447322, 0.04130138, -0.03332812, -0.042047437, 0.05123424, -0.07453314, -0.028080523, 0.0046392465, -0.029648444, 0.028595202, -0.013599953, 0.014632302, 0.056478836, -0.058442805, -0.1930449, 0.021608872, -0.015712231, -0.06269486, 0.020336937, 0.011157828, -0.0015483286, 0.007450834, 0.019511258, 0.0021432014, 0.039572544, -0.06631538, 0.061495554, -0.027214238, -0.03019643, 0.021765657, -0.018758483, -0.059369385, 0.0104420185, 0.04438893, 0.011059414, 0.07466604, -0.026724849, 0.0012903131, -0.046258427, -0.044896547, -0.027910942, 0.026439613, 0.02349909, 0.047514588, 0.0037869548, 0.014885506, 0.04653929, 0.03331027, -0.02470549, 0.006271202, -0.026514992, 0.02139273, -0.07263723, 0.016668767, -0.013725435, -0.06854902, -0.0118108615, 0.021851629, -0.03325869, 0.05457194, 0.026849618, 0.014450217, -0.04094698, -0.025690362, 0.08235594, 0.01301538, -0.08728046, -0.022502083, -0.047967393, 0.053036716, 0.03064825, 0.016691066, 0.05352114, -0.028005075, 0.029265208, 0.03578327, -0.0068425513, 0.028201371, -0.022989936, -0.0082469685, 0.014952235, -0.02482578, -0.001407646, 0.032534935, 0.029727332, -0.01972456, -0.05026965, 0.037474174, 0.041771494, -0.032914527, 0.023217645, -0.025604516, -0.076627776, 0.020639537, -0.02778371, -0.03310011, 0.008182295, 0.01675386, -0.031687014, -0.023041338, 0.031311274, 0.004942907, 0.032867808, 0.03349065, -0.010631844, -0.023672776, 0.011756044, 0.016093759, 0.035696387, -0.011801524, -0.047790904, 0.04692089, -0.004650366, -0.03204725, 0.0035557016, -0.025661316, -0.013585784, 0.034187928, 0.023118816, -0.021673787, -0.05335118, 0.027510943, -0.009999087, -0.03761488, 0.0060729063, 0.025209012, 0.021224871, -0.014786133, -0.013114826, -0.017566234, 0.007534834, -0.043879513, -0.037670616, -0.05683524, -0.00272405, -0.012196419, 0.04363327, 0.00737273, 0.05148426, 0.0055279597, -0.015904596, 0.047310952, 0.03045511, 0.030634195, -0.009361755, -0.039683104, 0.06814403, -0.051854312, -0.011992879, 0.06865308, 0.041739926, -0.017870232, 0.032383803, 0.05271035, 0.05132444, -0.021921795, -0.004006664, 0.013597788, -0.040238887, 0.030131282, 0.02308868, -0.04397555, 0.03917002, -0.011500181, -0.037450697, -0.09086526, -0.031017756, -0.016214617, -0.028405681, 0.009200299, 0.0074356734, -0.0019842882, -0.049663708, 0.035164695, -0.044908658, -0.05643445, 0.02870157, 0.03714114, 0.033664975, -0.013390485, -0.0004991374, 0.02155712, 0.021897627, 0.047690097, -0.01916393, -0.01362889, 0.025618015, 0.018122612, 0.036994122, -0.01447489, 0.040231027, -0.047183823, -0.020032013, 0.027981037]\n" + ] + } + ], + "source": [ + "from langchain_community.embeddings.ovhcloud import OVHCloudEmbeddings\n", + "\n", + "embedder = OVHCloudEmbeddings(\n", + " model_name=\"multilingual-e5-base\", region=\"kepler\", access_token=\"MyAccessToken\"\n", + ")\n", + "embed = embedder.embed_query(\"Hello World!\")\n", + "\n", + "\"\"\" verify \"\"\"\n", + "print(f\"Embedding generated by OVHCloudEmbeddings: {embed}\")" + ] + }, + { + "cell_type": "markdown", + "id": "47c9af05-4d25-40f2-9305-7bccf1e14c64", + "metadata": {}, + "source": [ + "## Further reading\n", + "- [Enhance your applications with AI Endpoints](https://blog.ovhcloud.com/enhance-your-applications-with-ai-endpoints/)\n", + "- [How to use AI Endpoints and LangChain4j](https://blog.ovhcloud.com/how-to-use-ai-endpoints-and-langchain4j/)\n", + "- [LLMs streaming with AI Endpoints and LangChain4j](https://blog.ovhcloud.com/llms-streaming-with-ai-endpoints-and-langchain4j/)\n", + "- [How to use AI Endpoints and LangChain to create a chatbot](https://blog.ovhcloud.com/how-to-use-ai-endpoints-and-langchain-to-create-a-chatbot/)\n", + "- [How to use AI Endpoints, LangChain and Javascript to create a chatbot](https://blog.ovhcloud.com/how-to-use-ai-endpoints-langchain-and-javascript-to-create-a-chatbot/)\n", + "- [RAG chatbot using AI Endpoints and LangChain](https://blog.ovhcloud.com/rag-chatbot-using-ai-endpoints-and-langchain/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/tools/asknews.ipynb b/docs/docs/integrations/tools/asknews.ipynb new file mode 100644 index 0000000000000..42810b72df737 --- /dev/null +++ b/docs/docs/integrations/tools/asknews.ipynb @@ -0,0 +1,172 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a6f91f20", + "metadata": {}, + "source": [ + "# AskNews" + ] + }, + { + "cell_type": "markdown", + "id": "5e24a889", + "metadata": {}, + "source": [ + "> [AskNews](https://asknews.app) infuses any LLM with the latest global news (or historical news), using a single natural language query. Specifically, AskNews is enriching over 300k articles per day by translating, summarizing, extracting entities, and indexing them into hot and cold vector databases. AskNews puts these vector databases on a low-latency endpoint for you. When you query AskNews, you get back a prompt-optimized string that contains all the most pertinent enrichments (e.g. entities, classifications, translation, summarization). This means that you do not need to manage your own news RAG, and you do not need to worry about how to properly convey news information in a condensed way to your LLM.\n", + "> AskNews is also committed to transparency, which is why our coverage is monitored and diversified across hundreds of countries, 13 languages, and 50 thousand sources. If you'd like to track our source coverage, you can visit our [transparency dashboard](https://asknews.app/en/transparency).\n", + "\n", + "## Setup\n", + "\n", + "The integration lives in the `langchain-community` package. We also need to install the `asknews` package itself.\n", + "\n", + "```bash\n", + "pip install -U langchain-community asknews\n", + "```\n", + "\n", + "We also need to set our AskNews API credentials, which can be obtained at the [AskNews console](https://my.asknews.app)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"ASKNEWS_CLIENT_ID\"] = getpass.getpass()\n", + "os.environ[\"ASKNEWS_CLIENT_SECRET\"] = getpass.getpass()" + ] + }, + { + "cell_type": "markdown", + "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f", + "metadata": {}, + "source": [ + "## Usage\n", + "\n", + "Here we show how to use the tool individually." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\"\\n\\n[1]:\\ntitle: Market Awaits Comments From Key Fed Official\\nsummary: The market is awaiting comments from Fed Governor Christopher Waller, but it's not expected to move markets significantly. The recent Consumer Price Index (CPI) report showed slimming inflation figures, leading to a conclusion that inflation is being curbed. This has led to a 'soft landing' narrative, causing bullish sentiment in the stock market, with the Dow, S&P 500, Nasdaq, and Russell 2000 indices near all-time highs. NVIDIA is set to report earnings next week, and despite its 95% year-to-date growth, it remains a Zacks Rank #1 (Strong Buy) stock. The article also mentions upcoming economic data releases, including New and Existing Home Sales, S&P flash PMI Services and Manufacturing, Durable Goods, and Weekly Jobless Claims.\\nsource: Yahoo\\npublished: May 17 2024 14:53\\nOrganization: Nasdaq, Fed, NVIDIA, Zacks\\nPerson: Christopher Waller\\nEvent: Weekly Jobless Claims\\nclassification: Business\\nsentiment: 0\\n\\n\\n\\n[2]:\\ntitle: US futures flat as Fed comments cool rate cut optimism\\nsummary: US stock index futures remained flat on Thursday evening, following a weak finish on Wall Street, as Federal Reserve officials warned that bets on interest rate cuts were potentially premature. The Fed officials, including Atlanta Fed President Raphael Bostic, New York Fed President John Williams, and Cleveland Fed President Loretta Mester, stated that the central bank still needed more confidence to cut interest rates, and that the timing of the move remained uncertain. As a result, investors slightly trimmed their expectations for a September rate cut, and the S&P 500 and Nasdaq 100 indexes fell 0.2% and 0.3%, respectively. Meanwhile, Reddit surged 11% after announcing a partnership with OpenAI, while Take-Two Interactive and DXC Technology fell after issuing disappointing earnings guidance.\\nsource: Yahoo\\npublished: May 16 2024 20:08\\nLocation: US, Wall Street\\nDate: September, Thursday\\nOrganization: Atlanta Fed, Cleveland Fed, New York Fed, Fed, Reddit, Take-Two Interactive, DXC Technology, OpenAI, Federal Reserve\\nTitle: President\\nPerson: Loretta Mester, Raphael Bostic, John Williams\\nclassification: Business\\nsentiment: 0\\n\\n\"" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain_community.tools.asknews import AskNewsSearch\n", + "\n", + "tool = AskNewsSearch(max_results=2)\n", + "tool.invoke({\"query\": \"Effect of fed policy on tech sector\"})" + ] + }, + { + "cell_type": "markdown", + "id": "21c5b56f-0da0-485f-b6f5-38950bae4fd0", + "metadata": {}, + "source": [ + "## Chaining\n", + "We show here how to use it as part of an agent. We use the OpenAI Functions Agent, so we will need to setup and install the required dependencies for that. We will also use [LangSmith Hub](https://smith.langchain.com/hub) to pull the prompt from, so we will need to install that.\n", + "\n", + "```bash\n", + "pip install -U langchain-openai langchainhub\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a1c8ea19-7100-407d-8e8c-f037f9317255", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "520767b8-9e61-4485-840a-d16f1da5eb3a", + "metadata": { + "ExecuteTime": { + "end_time": "2023-10-21T13:15:37.974229Z", + "start_time": "2023-10-21T13:15:10.007898Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': 'How is the tech sector being affected by fed policy?',\n", + " 'output': 'The tech sector is being affected by federal policy in various ways, particularly in relation to artificial intelligence (AI) regulation and investment. Here are some recent news articles related to the tech sector and federal policy:\\n\\n1. The US Senate has released a bipartisan AI policy roadmap, addressing areas of consensus and disagreement on AI use and development. The roadmap includes recommendations for intellectual property reforms, funding for AI research, sector-specific rules, and transparency requirements. It also emphasizes the need for increased funding for AI innovation and investments in national defense. [Source: The National Law Review]\\n\\n2. A bipartisan group of US senators, led by Senate Majority Leader Chuck Schumer, has proposed allocating at least $32 billion over the next three years to develop AI and establish safeguards around it. The proposal aims to regulate and promote AI development to maintain US competitiveness and improve quality of life. [Source: Cointelegraph]\\n\\n3. The US administration is planning to restrict the export of advanced AI models to prevent China and Russia from accessing the technology. This move is part of efforts to protect national security and prevent the misuse of AI by foreign powers. [Source: O Cafezinho]\\n\\n4. The US and China have discussed the risks of AI technologies, with the US taking the lead in the AI arms race. The US has proposed a $32 billion increase in federal spending on AI to maintain its lead, despite concerns about stifling innovation. [Source: AOL]\\n\\nThese articles highlight the ongoing discussions and actions related to AI regulation, investment, and export restrictions that are impacting the tech sector in response to federal policy decisions.'}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain import hub\n", + "from langchain.agents import AgentExecutor, create_openai_functions_agent\n", + "from langchain_community.tools.asknews import AskNewsSearch\n", + "from langchain_openai import ChatOpenAI\n", + "\n", + "prompt = hub.pull(\"hwchase17/openai-functions-agent\")\n", + "llm = ChatOpenAI(temperature=0)\n", + "asknews_tool = AskNewsSearch()\n", + "tools = [asknews_tool]\n", + "agent = create_openai_functions_agent(llm, tools, prompt)\n", + "agent_executor = AgentExecutor(agent=agent, tools=tools)\n", + "agent_executor.invoke({\"input\": \"How is the tech sector being affected by fed policy?\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "917f2045", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/tools/databricks.ipynb b/docs/docs/integrations/tools/databricks.ipynb new file mode 100644 index 0000000000000..49e5bc63905ec --- /dev/null +++ b/docs/docs/integrations/tools/databricks.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Databricks Unity Catalog (UC)\n", + "\n", + "This notebook shows how to use UC functions as LangChain tools.\n", + "\n", + "See Databricks documentation ([AWS](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-sql-function.html)|[Azure](https://learn.microsoft.com/en-us/azure/databricks/sql/language-manual/sql-ref-syntax-ddl-create-sql-function)|[GCP](https://docs.gcp.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-sql-function.html)) to learn how to create SQL or Python functions in UC. Do not skip function and parameter comments, which are critical for LLMs to call functions properly.\n", + "\n", + "In this example notebook, we create a simple Python function that executes arbitrary code and use it as a LangChain tool:\n", + "\n", + "```sql\n", + "CREATE FUNCTION main.tools.python_exec (\n", + " code STRING COMMENT 'Python code to execute. Remember to print the final result to stdout.'\n", + ")\n", + "RETURNS STRING\n", + "LANGUAGE PYTHON\n", + "COMMENT 'Executes Python code and returns its stdout.'\n", + "AS $$\n", + " import sys\n", + " from io import StringIO\n", + " stdout = StringIO()\n", + " sys.stdout = stdout\n", + " exec(code)\n", + " return stdout.getvalue()\n", + "$$\n", + "```\n", + "\n", + "It runs in a secure and isolated environment within a Databricks SQL warehouse." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install --upgrade --quiet databricks-sdk langchain-community langchain-openai" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_openai import ChatOpenAI\n", + "\n", + "llm = ChatOpenAI(model=\"gpt-3.5-turbo\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.tools.databricks import UCFunctionToolkit\n", + "\n", + "tools = (\n", + " UCFunctionToolkit(\n", + " # You can find the SQL warehouse ID in its UI after creation.\n", + " warehouse_id=\"xxxx123456789\"\n", + " )\n", + " .include(\n", + " # Include functions as tools using their qualified names.\n", + " # You can use \"{catalog_name}.{schema_name}.*\" to get all functions in a schema.\n", + " \"main.tools.python_exec\",\n", + " )\n", + " .get_tools()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.agents import AgentExecutor, create_tool_calling_agent\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\n", + " \"system\",\n", + " \"You are a helpful assistant. Make sure to use tool for information.\",\n", + " ),\n", + " (\"placeholder\", \"{chat_history}\"),\n", + " (\"human\", \"{input}\"),\n", + " (\"placeholder\", \"{agent_scratchpad}\"),\n", + " ]\n", + ")\n", + "\n", + "agent = create_tool_calling_agent(llm, tools, prompt)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", + "\u001b[32;1m\u001b[1;3m\n", + "Invoking: `main__tools__python_exec` with `{'code': 'print(36939 * 8922.4)'}`\n", + "\n", + "\n", + "\u001b[0m\u001b[36;1m\u001b[1;3m{\"format\": \"SCALAR\", \"value\": \"329584533.59999996\\n\", \"truncated\": false}\u001b[0m\u001b[32;1m\u001b[1;3mThe result of the multiplication 36939 * 8922.4 is 329,584,533.60.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "{'input': '36939 * 8922.4',\n", + " 'output': 'The result of the multiplication 36939 * 8922.4 is 329,584,533.60.'}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)\n", + "agent_executor.invoke({\"input\": \"36939 * 8922.4\"})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "llm", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/tools/zenguard.ipynb b/docs/docs/integrations/tools/zenguard.ipynb new file mode 100644 index 0000000000000..23c57ec0f8be4 --- /dev/null +++ b/docs/docs/integrations/tools/zenguard.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ZenGuard AI\n", + "\n", + "\"Open\n", + "\n", + "This tool lets you quickly set up [ZenGuard AI](https://www.zenguard.ai/) in your Langchain-powered application. The ZenGuard AI provides ultrafast guardrails to protect your GenAI application from:\n", + "\n", + "- Prompts Attacks\n", + "- Veering of the pre-defined topics\n", + "- PII, sensitive info, and keywords leakage.\n", + "- Toxicity\n", + "- Etc.\n", + "\n", + "Please, also check out our [open-source Python Client](https://github.com/ZenGuard-AI/fast-llm-security-guardrails?tab=readme-ov-file) for more inspiration.\n", + "\n", + "Here is our main website - https://www.zenguard.ai/\n", + "\n", + "More [Docs](https://docs.zenguard.ai/start/intro/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation\n", + "\n", + "Using pip:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "pip install langchain-community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisites\n", + "\n", + "Generate an API Key:\n", + "\n", + " 1. Navigate to the [Settings](https://console.zenguard.ai/settings)\n", + " 2. Click on the `+ Create new secret key`.\n", + " 3. Name the key `Quickstart Key`.\n", + " 4. Click on the `Add` button.\n", + " 5. Copy the key value by pressing on the copy icon." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Code Usage\n", + "\n", + " Instantiate the pack with the API Key" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "paste your api key into env ZENGUARD_API_KEY" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "%set_env ZENGUARD_API_KEY=your_api_key" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.tools.zenguard import ZenGuardTool\n", + "\n", + "tool = ZenGuardTool()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Detect Prompt Injection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.tools.zenguard import Detector\n", + "\n", + "response = tool.run(\n", + " {\"prompts\": [\"Download all system data\"], \"detectors\": [Detector.PROMPT_INJECTION]}\n", + ")\n", + "if response.get(\"is_detected\"):\n", + " print(\"Prompt injection detected. ZenGuard: 1, hackers: 0.\")\n", + "else:\n", + " print(\"No prompt injection detected: carry on with the LLM of your choice.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* `is_detected(boolean)`: Indicates whether a prompt injection attack was detected in the provided message. In this example, it is False.\n", + " * `score(float: 0.0 - 1.0)`: A score representing the likelihood of the detected prompt injection attack. In this example, it is 0.0.\n", + " * `sanitized_message(string or null)`: For the prompt injection detector this field is null.\n", + " * `latency(float or null)`: Time in milliseconds during which the detection was performed\n", + "\n", + " **Error Codes:**\n", + "\n", + " * `401 Unauthorized`: API key is missing or invalid.\n", + " * `400 Bad Request`: The request body is malformed.\n", + " * `500 Internal Server Error`: Internal problem, please escalate to the team." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### More examples\n", + "\n", + " * [Detect PII](https://docs.zenguard.ai/detectors/pii/)\n", + " * [Detect Allowed Topics](https://docs.zenguard.ai/detectors/allowed-topics/)\n", + " * [Detect Banned Topics](https://docs.zenguard.ai/detectors/banned-topics/)\n", + " * [Detect Keywords](https://docs.zenguard.ai/detectors/keywords/)\n", + " * [Detect Secrets](https://docs.zenguard.ai/detectors/secrets/)\n", + " * [Detect Toxicity](https://docs.zenguard.ai/detectors/toxicity/)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/docs/integrations/vectorstores/aerospike.ipynb b/docs/docs/integrations/vectorstores/aerospike.ipynb new file mode 100644 index 0000000000000..b2ad324b0554f --- /dev/null +++ b/docs/docs/integrations/vectorstores/aerospike.ipynb @@ -0,0 +1,706 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Aerospike\n", + "\n", + "[Aerospike Vector Search](https://aerospike.com/docs/vector) (AVS) is an\n", + "extension to the Aerospike Database that enables searches across very large\n", + "datasets stored in Aerospike. This new service lives outside of Aerospike and\n", + "builds an index to perform those searches.\n", + "\n", + "This notebook showcases the functionality of the LangChain Aerospike VectorStore\n", + "integration.\n", + "\n", + "## Install AVS\n", + "\n", + "Before using this notebook, we need to have a running AVS instance. Use one of\n", + "the [available installation methods](https://aerospike.com/docs/vector/install). \n", + "\n", + "When finished, store your AVS instance's IP address and port to use later\n", + "in this demo:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "PROXIMUS_HOST = \"\"\n", + "PROXIMUS_PORT = 5000" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Install Dependencies \n", + "The `sentence-transformers` dependency is large. This step could take several minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "!pip install --upgrade --quiet aerospike-vector-search==0.6.1 langchain-community sentence-transformers langchain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download Quotes Dataset\n", + "\n", + "We will download a dataset of approximately 100,000 quotes and use a subset of those quotes for semantic search." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2024-05-10 17:28:17-- https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n", + "Resolving github.com (github.com)... 140.82.116.4\n", + "Connecting to github.com (github.com)|140.82.116.4|:443... connected.\n", + "HTTP request sent, awaiting response... 302 Found\n", + "Location: https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz [following]\n", + "--2024-05-10 17:28:17-- https://raw.githubusercontent.com/aerospike/aerospike-vector-search-examples/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz\n", + "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.111.133, ...\n", + "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 11597643 (11M) [application/octet-stream]\n", + "Saving to: ‘quotes.csv.tgz’\n", + "\n", + "quotes.csv.tgz 100%[===================>] 11.06M 1.94MB/s in 6.1s \n", + "\n", + "2024-05-10 17:28:23 (1.81 MB/s) - ‘quotes.csv.tgz’ saved [11597643/11597643]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://github.com/aerospike/aerospike-vector-search-examples/raw/7dfab0fccca0852a511c6803aba46578729694b5/quote-semantic-search/container-volumes/quote-search/data/quotes.csv.tgz" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load the Quotes Into Documents\n", + "\n", + "We will load our quotes dataset using the `CSVLoader` document loader. In this case, `lazy_load` returns an iterator to ingest our quotes more efficiently. In this example, we only load 5,000 quotes." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import itertools\n", + "import os\n", + "import tarfile\n", + "\n", + "from langchain_community.document_loaders.csv_loader import CSVLoader\n", + "\n", + "filename = \"./quotes.csv\"\n", + "\n", + "if not os.path.exists(filename) and os.path.exists(filename + \".tgz\"):\n", + " # Untar the file\n", + " with tarfile.open(filename + \".tgz\", \"r:gz\") as tar:\n", + " tar.extractall(path=os.path.dirname(filename))\n", + "\n", + "NUM_QUOTES = 5000\n", + "documents = CSVLoader(filename, metadata_columns=[\"author\", \"category\"]).lazy_load()\n", + "documents = list(\n", + " itertools.islice(documents, NUM_QUOTES)\n", + ") # Allows us to slice an iterator" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "page_content=\"quote: I'm selfish, impatient and a little insecure. I make mistakes, I am out of control and at times hard to handle. But if you can't handle me at my worst, then you sure as hell don't deserve me at my best.\" metadata={'source': './quotes.csv', 'row': 0, 'author': 'Marilyn Monroe', 'category': 'attributed-no-source, best, life, love, mistakes, out-of-control, truth, worst'}\n" + ] + } + ], + "source": [ + "print(documents[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create your Embedder\n", + "\n", + "In this step, we use HuggingFaceEmbeddings and the \"all-MiniLM-L6-v2\" sentence transformer model to embed our documents so we can perform a vector search." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "60662fc2676a46a2ac48fbf30d9c85fe", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "modules.json: 0%| | 0.00/349 [00:00 \u001b[0m\u001b[32;49m24.0\u001b[0m\r\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\r\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install --upgrade --quiet manticoresearch-dev" + ] + }, + { + "cell_type": "markdown", + "id": "f90b4793255edcb1", + "metadata": { + "collapsed": false + }, + "source": [ + "We want to use OpenAIEmbeddings so we have to get the OpenAI API Key." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a303c63186fd8abd", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-03T11:28:38.546877Z", + "start_time": "2024-03-03T11:28:38.544907Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from langchain.text_splitter import CharacterTextSplitter\n", + "from langchain_community.embeddings import GPT4AllEmbeddings\n", + "from langchain_community.vectorstores import ManticoreSearch, ManticoreSearchSettings" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "46ad30f36815ed15", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-03T11:28:38.991083Z", + "start_time": "2024-03-03T11:28:38.547705Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Created a chunk of size 338, which is longer than the specified 100\n", + "Created a chunk of size 508, which is longer than the specified 100\n", + "Created a chunk of size 277, which is longer than the specified 100\n", + "Created a chunk of size 777, which is longer than the specified 100\n", + "Created a chunk of size 247, which is longer than the specified 100\n", + "Created a chunk of size 228, which is longer than the specified 100\n", + "Created a chunk of size 557, which is longer than the specified 100\n", + "Created a chunk of size 587, which is longer than the specified 100\n", + "Created a chunk of size 173, which is longer than the specified 100\n", + "Created a chunk of size 622, which is longer than the specified 100\n", + "Created a chunk of size 775, which is longer than the specified 100\n", + "Created a chunk of size 292, which is longer than the specified 100\n", + "Created a chunk of size 456, which is longer than the specified 100\n", + "Created a chunk of size 291, which is longer than the specified 100\n", + "Created a chunk of size 367, which is longer than the specified 100\n", + "Created a chunk of size 604, which is longer than the specified 100\n", + "Created a chunk of size 618, which is longer than the specified 100\n", + "Created a chunk of size 340, which is longer than the specified 100\n", + "Created a chunk of size 395, which is longer than the specified 100\n", + "Created a chunk of size 321, which is longer than the specified 100\n", + "Created a chunk of size 453, which is longer than the specified 100\n", + "Created a chunk of size 354, which is longer than the specified 100\n", + "Created a chunk of size 481, which is longer than the specified 100\n", + "Created a chunk of size 233, which is longer than the specified 100\n", + "Created a chunk of size 270, which is longer than the specified 100\n", + "Created a chunk of size 305, which is longer than the specified 100\n", + "Created a chunk of size 520, which is longer than the specified 100\n", + "Created a chunk of size 289, which is longer than the specified 100\n", + "Created a chunk of size 280, which is longer than the specified 100\n", + "Created a chunk of size 417, which is longer than the specified 100\n", + "Created a chunk of size 495, which is longer than the specified 100\n", + "Created a chunk of size 602, which is longer than the specified 100\n", + "Created a chunk of size 1004, which is longer than the specified 100\n", + "Created a chunk of size 272, which is longer than the specified 100\n", + "Created a chunk of size 1203, which is longer than the specified 100\n", + "Created a chunk of size 844, which is longer than the specified 100\n", + "Created a chunk of size 135, which is longer than the specified 100\n", + "Created a chunk of size 306, which is longer than the specified 100\n", + "Created a chunk of size 407, which is longer than the specified 100\n", + "Created a chunk of size 910, which is longer than the specified 100\n", + "Created a chunk of size 398, which is longer than the specified 100\n", + "Created a chunk of size 674, which is longer than the specified 100\n", + "Created a chunk of size 356, which is longer than the specified 100\n", + "Created a chunk of size 474, which is longer than the specified 100\n", + "Created a chunk of size 814, which is longer than the specified 100\n", + "Created a chunk of size 530, which is longer than the specified 100\n", + "Created a chunk of size 469, which is longer than the specified 100\n", + "Created a chunk of size 489, which is longer than the specified 100\n", + "Created a chunk of size 433, which is longer than the specified 100\n", + "Created a chunk of size 603, which is longer than the specified 100\n", + "Created a chunk of size 380, which is longer than the specified 100\n", + "Created a chunk of size 354, which is longer than the specified 100\n", + "Created a chunk of size 391, which is longer than the specified 100\n", + "Created a chunk of size 772, which is longer than the specified 100\n", + "Created a chunk of size 267, which is longer than the specified 100\n", + "Created a chunk of size 571, which is longer than the specified 100\n", + "Created a chunk of size 594, which is longer than the specified 100\n", + "Created a chunk of size 458, which is longer than the specified 100\n", + "Created a chunk of size 386, which is longer than the specified 100\n", + "Created a chunk of size 417, which is longer than the specified 100\n", + "Created a chunk of size 370, which is longer than the specified 100\n", + "Created a chunk of size 402, which is longer than the specified 100\n", + "Created a chunk of size 306, which is longer than the specified 100\n", + "Created a chunk of size 173, which is longer than the specified 100\n", + "Created a chunk of size 628, which is longer than the specified 100\n", + "Created a chunk of size 321, which is longer than the specified 100\n", + "Created a chunk of size 294, which is longer than the specified 100\n", + "Created a chunk of size 689, which is longer than the specified 100\n", + "Created a chunk of size 641, which is longer than the specified 100\n", + "Created a chunk of size 473, which is longer than the specified 100\n", + "Created a chunk of size 414, which is longer than the specified 100\n", + "Created a chunk of size 585, which is longer than the specified 100\n", + "Created a chunk of size 764, which is longer than the specified 100\n", + "Created a chunk of size 502, which is longer than the specified 100\n", + "Created a chunk of size 640, which is longer than the specified 100\n", + "Created a chunk of size 507, which is longer than the specified 100\n", + "Created a chunk of size 564, which is longer than the specified 100\n", + "Created a chunk of size 707, which is longer than the specified 100\n", + "Created a chunk of size 380, which is longer than the specified 100\n", + "Created a chunk of size 615, which is longer than the specified 100\n", + "Created a chunk of size 733, which is longer than the specified 100\n", + "Created a chunk of size 277, which is longer than the specified 100\n", + "Created a chunk of size 497, which is longer than the specified 100\n", + "Created a chunk of size 625, which is longer than the specified 100\n", + "Created a chunk of size 468, which is longer than the specified 100\n", + "Created a chunk of size 289, which is longer than the specified 100\n", + "Created a chunk of size 576, which is longer than the specified 100\n", + "Created a chunk of size 297, which is longer than the specified 100\n", + "Created a chunk of size 534, which is longer than the specified 100\n", + "Created a chunk of size 427, which is longer than the specified 100\n", + "Created a chunk of size 412, which is longer than the specified 100\n", + "Created a chunk of size 381, which is longer than the specified 100\n", + "Created a chunk of size 417, which is longer than the specified 100\n", + "Created a chunk of size 244, which is longer than the specified 100\n", + "Created a chunk of size 307, which is longer than the specified 100\n", + "Created a chunk of size 528, which is longer than the specified 100\n", + "Created a chunk of size 565, which is longer than the specified 100\n", + "Created a chunk of size 487, which is longer than the specified 100\n", + "Created a chunk of size 470, which is longer than the specified 100\n", + "Created a chunk of size 332, which is longer than the specified 100\n", + "Created a chunk of size 552, which is longer than the specified 100\n", + "Created a chunk of size 427, which is longer than the specified 100\n", + "Created a chunk of size 596, which is longer than the specified 100\n", + "Created a chunk of size 192, which is longer than the specified 100\n", + "Created a chunk of size 403, which is longer than the specified 100\n", + "Created a chunk of size 255, which is longer than the specified 100\n", + "Created a chunk of size 1025, which is longer than the specified 100\n", + "Created a chunk of size 438, which is longer than the specified 100\n", + "Created a chunk of size 900, which is longer than the specified 100\n", + "Created a chunk of size 250, which is longer than the specified 100\n", + "Created a chunk of size 614, which is longer than the specified 100\n", + "Created a chunk of size 635, which is longer than the specified 100\n", + "Created a chunk of size 443, which is longer than the specified 100\n", + "Created a chunk of size 478, which is longer than the specified 100\n", + "Created a chunk of size 473, which is longer than the specified 100\n", + "Created a chunk of size 302, which is longer than the specified 100\n", + "Created a chunk of size 549, which is longer than the specified 100\n", + "Created a chunk of size 644, which is longer than the specified 100\n", + "Created a chunk of size 402, which is longer than the specified 100\n", + "Created a chunk of size 489, which is longer than the specified 100\n", + "Created a chunk of size 551, which is longer than the specified 100\n", + "Created a chunk of size 527, which is longer than the specified 100\n", + "Created a chunk of size 563, which is longer than the specified 100\n", + "Created a chunk of size 472, which is longer than the specified 100\n", + "Created a chunk of size 511, which is longer than the specified 100\n", + "Created a chunk of size 419, which is longer than the specified 100\n", + "Created a chunk of size 245, which is longer than the specified 100\n", + "Created a chunk of size 371, which is longer than the specified 100\n", + "Created a chunk of size 484, which is longer than the specified 100\n", + "Created a chunk of size 306, which is longer than the specified 100\n", + "Created a chunk of size 190, which is longer than the specified 100\n", + "Created a chunk of size 499, which is longer than the specified 100\n", + "Created a chunk of size 480, which is longer than the specified 100\n", + "Created a chunk of size 634, which is longer than the specified 100\n", + "Created a chunk of size 611, which is longer than the specified 100\n", + "Created a chunk of size 356, which is longer than the specified 100\n", + "Created a chunk of size 478, which is longer than the specified 100\n", + "Created a chunk of size 369, which is longer than the specified 100\n", + "Created a chunk of size 526, which is longer than the specified 100\n", + "Created a chunk of size 311, which is longer than the specified 100\n", + "Created a chunk of size 181, which is longer than the specified 100\n", + "Created a chunk of size 637, which is longer than the specified 100\n", + "Created a chunk of size 219, which is longer than the specified 100\n", + "Created a chunk of size 305, which is longer than the specified 100\n", + "Created a chunk of size 409, which is longer than the specified 100\n", + "Created a chunk of size 235, which is longer than the specified 100\n", + "Created a chunk of size 302, which is longer than the specified 100\n", + "Created a chunk of size 236, which is longer than the specified 100\n", + "Created a chunk of size 209, which is longer than the specified 100\n", + "Created a chunk of size 366, which is longer than the specified 100\n", + "Created a chunk of size 277, which is longer than the specified 100\n", + "Created a chunk of size 591, which is longer than the specified 100\n", + "Created a chunk of size 232, which is longer than the specified 100\n", + "Created a chunk of size 543, which is longer than the specified 100\n", + "Created a chunk of size 199, which is longer than the specified 100\n", + "Created a chunk of size 214, which is longer than the specified 100\n", + "Created a chunk of size 263, which is longer than the specified 100\n", + "Created a chunk of size 375, which is longer than the specified 100\n", + "Created a chunk of size 221, which is longer than the specified 100\n", + "Created a chunk of size 261, which is longer than the specified 100\n", + "Created a chunk of size 203, which is longer than the specified 100\n", + "Created a chunk of size 758, which is longer than the specified 100\n", + "Created a chunk of size 271, which is longer than the specified 100\n", + "Created a chunk of size 323, which is longer than the specified 100\n", + "Created a chunk of size 275, which is longer than the specified 100\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bert_load_from_file: gguf version = 2\n", + "bert_load_from_file: gguf alignment = 32\n", + "bert_load_from_file: gguf data offset = 695552\n", + "bert_load_from_file: model name = BERT\n", + "bert_load_from_file: model architecture = bert\n", + "bert_load_from_file: model file type = 1\n", + "bert_load_from_file: bert tokenizer vocab = 30522\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import TextLoader\n", + "\n", + "loader = TextLoader(\"../../modules/paul_graham_essay.txt\")\n", + "documents = loader.load()\n", + "text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)\n", + "\n", + "embeddings = GPT4AllEmbeddings()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "a06370cae96cbaef", + "metadata": { + "ExecuteTime": { + "end_time": "2024-03-03T11:28:42.366398Z", + "start_time": "2024-03-03T11:28:38.991827Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[Document(page_content='Computer Science is an uneasy alliance between two halves, theory and systems. The theory people prove things, and the systems people build things. I wanted to build things. I had plenty of respect for theory — indeed, a sneaking suspicion that it was the more admirable of the two halves — but building things seemed so much more exciting.', metadata={'some': 'metadata'}), Document(page_content=\"I applied to 3 grad schools: MIT and Yale, which were renowned for AI at the time, and Harvard, which I'd visited because Rich Draves went there, and was also home to Bill Woods, who'd invented the type of parser I used in my SHRDLU clone. Only Harvard accepted me, so that was where I went.\", metadata={'some': 'metadata'}), Document(page_content='For my undergraduate thesis, I reverse-engineered SHRDLU. My God did I love working on that program. It was a pleasing bit of code, but what made it even more exciting was my belief — hard to imagine now, but not unique in 1985 — that it was already climbing the lower slopes of intelligence.', metadata={'some': 'metadata'}), Document(page_content=\"The problem with systems work, though, was that it didn't last. Any program you wrote today, no matter how good, would be obsolete in a couple decades at best. People might mention your software in footnotes, but no one would actually use it. And indeed, it would seem very feeble work. Only people with a sense of the history of the field would even realize that, in its time, it had been good.\", metadata={'some': 'metadata'})]\n" + ] + } + ], + "source": [ + "for d in docs:\n", + " d.metadata = {\"some\": \"metadata\"}\n", + "settings = ManticoreSearchSettings(table=\"manticoresearch_vector_search_example\")\n", + "docsearch = ManticoreSearch.from_documents(docs, embeddings, config=settings)\n", + "\n", + "query = \"Robert Morris is\"\n", + "docs = docsearch.similarity_search(query)\n", + "print(docs)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/integrations/vectorstores/zep_cloud.ipynb b/docs/docs/integrations/vectorstores/zep_cloud.ipynb new file mode 100644 index 0000000000000..5d130f68ce847 --- /dev/null +++ b/docs/docs/integrations/vectorstores/zep_cloud.ipynb @@ -0,0 +1,484 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "550edc01b00149cd", + "metadata": { + "collapsed": false + }, + "source": [ + "# Zep Cloud\n", + "> Recall, understand, and extract data from chat histories. Power personalized AI experiences.\n", + "\n", + "> [Zep](https://www.getzep.com) is a long-term memory service for AI Assistant apps.\n", + "> With Zep, you can provide AI assistants with the ability to recall past conversations, no matter how distant,\n", + "> while also reducing hallucinations, latency, and cost.\n", + "\n", + "> See [Zep Cloud Installation Guide](https://help.getzep.com/sdks)\n", + "\n", + "## Usage\n", + "\n", + "In the examples below, we're using Zep's auto-embedding feature which automatically embeds documents on the Zep server \n", + "using low-latency embedding models.\n", + "\n", + "## Note\n", + "- These examples use Zep's async interfaces. Call sync interfaces by removing the `a` prefix from the method names." + ] + }, + { + "cell_type": "markdown", + "id": "9a3a11aab1412d98", + "metadata": { + "collapsed": false + }, + "source": [ + "## Load or create a Collection from documents" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "519418421a32e4d", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T03:10:19.459062Z", + "start_time": "2024-05-10T03:10:18.090479Z" + }, + "collapsed": false + }, + "outputs": [], + "source": [ + "from uuid import uuid4\n", + "\n", + "from langchain_community.document_loaders import WebBaseLoader\n", + "from langchain_community.vectorstores import ZepCloudVectorStore\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "ZEP_API_KEY = \"\" # You can generate your zep project key from the Zep dashboard\n", + "collection_name = f\"babbage{uuid4().hex}\" # a unique collection name. alphanum only\n", + "\n", + "# load the document\n", + "article_url = \"https://www.gutenberg.org/cache/epub/71292/pg71292.txt\"\n", + "loader = WebBaseLoader(article_url)\n", + "documents = loader.load()\n", + "\n", + "# split it into chunks\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)\n", + "\n", + "# Instantiate the VectorStore. Since the collection does not already exist in Zep,\n", + "# it will be created and populated with the documents we pass in.\n", + "vs = ZepCloudVectorStore.from_documents(\n", + " docs,\n", + " embedding=None,\n", + " collection_name=collection_name,\n", + " api_key=ZEP_API_KEY,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "201dc57b124cb6d7", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T03:10:24.393735Z", + "start_time": "2024-05-10T03:10:23.131246Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Embedding status: 401/401 documents embedded\n" + ] + } + ], + "source": [ + "# wait for the collection embedding to complete\n", + "\n", + "\n", + "async def wait_for_ready(collection_name: str) -> None:\n", + " import time\n", + "\n", + " from zep_cloud.client import AsyncZep\n", + "\n", + " client = AsyncZep(api_key=ZEP_API_KEY)\n", + "\n", + " while True:\n", + " c = await client.document.get_collection(collection_name)\n", + " print(\n", + " \"Embedding status: \"\n", + " f\"{c.document_embedded_count}/{c.document_count} documents embedded\"\n", + " )\n", + " time.sleep(1)\n", + " if c.document_embedded_count == c.document_count:\n", + " break\n", + "\n", + "\n", + "await wait_for_ready(collection_name)" + ] + }, + { + "cell_type": "markdown", + "id": "94ca9dfa7d0ecaa5", + "metadata": { + "collapsed": false + }, + "source": [ + "## Simarility Search Query over the Collection" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1998de0a96fe89c3", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T02:56:13.039583Z", + "start_time": "2024-05-10T02:56:12.825349Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the positions of the two principal planets, (and these the most\r\n", + "necessary for the navigator,) Jupiter and Saturn, require each not less\r\n", + "than one hundred and sixteen tables. Yet it is not only necessary to\r\n", + "predict the position of these bodies, but it is likewise expedient to\r\n", + "tabulate the motions of the four satellites of Jupiter, to predict the\r\n", + "exact times at which they enter his shadow, and at which their shadows\r\n", + "cross his disc, as well as the times at which they are interposed -> 0.78691166639328 \n", + "====\n", + "\n", + "are reduced to a system of wheel-work. We are, nevertheless, not without\r\n", + "hopes of conveying, even to readers unskilled in mathematics, some\r\n", + "satisfactory notions of a general nature on this subject.\r\n", + "\r\n", + "_Thirdly_, To explain the actual state of the machinery at the present\r\n", + "time; what progress has been made towards its completion; and what are\r\n", + "the probable causes of those delays in its progress, which must be a\r\n", + "subject of regret to all friends of science. We shall indicate what -> 0.7853284478187561 \n", + "====\n", + "\n", + "from the improved state of astronomy, he found it necessary to recompute\r\n", + "these tables in 1821.\r\n", + "\r\n", + "Although it is now about thirty years since the discovery of the four\r\n", + "new planets, Ceres, Pallas, Juno, and Vesta, it was not till recently\r\n", + "that tables of their motions were published. They have lately appeared\r\n", + "in Encke's Ephemeris.\r\n", + "\r\n", + "We have thus attempted to convey some notion (though necessarily a very\r\n", + "inadequate one) of the immense extent of numerical tables which it has -> 0.7840130925178528 \n", + "====\n", + "\n" + ] + } + ], + "source": [ + "# query it\n", + "query = \"what is the structure of our solar system?\"\n", + "docs_scores = await vs.asimilarity_search_with_relevance_scores(query, k=3)\n", + "\n", + "# print results\n", + "for d, s in docs_scores:\n", + " print(d.page_content, \" -> \", s, \"\\n====\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "e02b61a9af0b2c80", + "metadata": { + "collapsed": false + }, + "source": [ + "## Search over Collection Re-ranked by MMR\n", + "\n", + "Zep offers native, hardware-accelerated MMR re-ranking of search results." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "488112da752b1d58", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T02:56:16.596274Z", + "start_time": "2024-05-10T02:56:16.284597Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "the positions of the two principal planets, (and these the most\r\n", + "necessary for the navigator,) Jupiter and Saturn, require each not less\r\n", + "than one hundred and sixteen tables. Yet it is not only necessary to\r\n", + "predict the position of these bodies, but it is likewise expedient to\r\n", + "tabulate the motions of the four satellites of Jupiter, to predict the\r\n", + "exact times at which they enter his shadow, and at which their shadows\r\n", + "cross his disc, as well as the times at which they are interposed \n", + "====\n", + "\n", + "are reduced to a system of wheel-work. We are, nevertheless, not without\r\n", + "hopes of conveying, even to readers unskilled in mathematics, some\r\n", + "satisfactory notions of a general nature on this subject.\r\n", + "\r\n", + "_Thirdly_, To explain the actual state of the machinery at the present\r\n", + "time; what progress has been made towards its completion; and what are\r\n", + "the probable causes of those delays in its progress, which must be a\r\n", + "subject of regret to all friends of science. We shall indicate what \n", + "====\n", + "\n", + "general commerce. But the science in which, above all others, the most\r\n", + "extensive and accurate tables are indispensable, is Astronomy; with the\r\n", + "improvement and perfection of which is inseparably connected that of the\r\n", + "kindred art of Navigation. We scarcely dare hope to convey to the\r\n", + "general reader any thing approaching to an adequate notion of the\r\n", + "multiplicity and complexity of the tables necessary for the purposes of\r\n", + "the astronomer and navigator. We feel, nevertheless, that the truly \n", + "====\n", + "\n" + ] + } + ], + "source": [ + "query = \"what is the structure of our solar system?\"\n", + "docs = await vs.asearch(query, search_type=\"mmr\", k=3)\n", + "\n", + "for d in docs:\n", + " print(d.page_content, \"\\n====\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "42455e31d4ab0d68", + "metadata": { + "collapsed": false + }, + "source": [ + "# Filter by Metadata\n", + "\n", + "Use a metadata filter to narrow down results. First, load another book: \"Adventures of Sherlock Holmes\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "146c8a96201c0ab9", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T03:10:38.028478Z", + "start_time": "2024-05-10T03:10:28.620287Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Adding documents\n", + "generated documents 1290\n", + "added documents ['ddec2883-5776-47be-a7bc-23e851e969a9', '19d2a5ce-0a36-4d32-b522-2a54a69970e8', '5e6d8ed2-4527-4515-9557-f178b2682df8', 'ad2b04d9-e1f1-4d90-9920-ab6d95c63dd7', '19871eb4-b624-49d1-a160-a24487298ac6', '19e9e04b-6b3f-4a91-9717-1565a5065642', '0e9a6411-a6a6-4453-a655-2a251519a1fa', '51de5926-dbb1-4d58-b6d4-47c8e5988042', '244dc860-2bf0-41a1-aa3c-e170c0209c5a', '175e8811-70cd-499c-a35f-2502749bff2e', '5298951a-1087-45f4-afff-3466032301fc', 'd87f4495-95c9-4152-bf1b-90f73652e639', '638f6b85-1e05-462f-8c75-4c642a309d88', '92f7f430-9ffc-4ca5-ac92-953793a4a7b0', '5ce532f2-86b0-4953-8815-96b7fb0da046', '8187857a-02b6-4faa-80b3-ca6d64b0ae59', '599d479c-7c2e-4ad9-b5e4-651fced1f38c', 'e0869e51-92a5-4c87-ba35-a977c3fe43d2', 'bed376d9-3785-4fa3-a986-4d069e817a0e', '31e8e7b1-164b-4872-8a21-0a417d9c2793', '87f96ecb-c78a-4aaf-a976-557a76adecf1', '8f0e7eb2-fb67-4010-bec4-70f1ea319d22', '5d280424-00cc-475c-8334-3ac1dbda84c9', 'a27876c9-68c2-460c-bce1-43cbc734885a', '492fe1fc-0dc8-45ee-a343-3fc514014298', '964a4887-0a21-442b-8677-9568c8ea5b4a', '87b82a96-c981-454e-87f6-171cbaa90e20', '14014728-1617-4ff5-8f56-b2cbcdf08bcf', 'af95852d-e7ce-4db7-9b47-3cfbffcfd81b', '1f6526bf-f5a9-4a1a-9a9f-42e6d175d9d5', '0a214058-d318-4f75-a413-140a4bdbc55f', '0eee06d6-d7e0-4097-8231-af823635f786', '7e74bdc5-f42f-474e-8c3d-0a346227f1e6', 'c8959f06-5282-47e8-92fe-16a90bb20132', 'd895bdcb-b7d1-40fe-a9a8-92647630688a', 'ebb30286-4eba-4733-94c7-f2e92cfcd0a3', '02c87eb9-031d-44bc-ad9a-c3b07b6d879c', '5338e6e3-ebfd-42b9-ba71-4eb728a9aba0', 'b372dba5-f1e7-4cff-9ba2-17132b51423d', 'bad22837-aba8-4431-a742-e1642abdc1d3', 'e73f3146-9ac1-4517-8905-45ba31de1551', '6903a3cb-acb1-42cd-9653-b208487368e9', '92600304-7eb9-4a6f-9a27-ba14a28fc636', '44e720af-efcf-477a-a655-08967ff3159a', '61565e97-7aa5-4c8c-a330-86ba265824d6', 'de10ad67-1992-4c85-8112-c1a2cd93edc0', '103d4a40-789a-4797-ad86-e50b2efe93e8', 'a5940a28-a9db-435c-b3c2-3485fafba55a', '637475cc-9a54-4ced-ab03-4a5dddb1e152', 'b209df0d-6513-4a74-b0ea-7bb2b2e01c4b', 'e6ee7511-eec8-4b20-94b1-e338093c9760', 'ef4ceb0d-a990-4cc2-91ed-320e1552c98d', '8058b68a-7ce5-4fb9-9f89-ed1c9e04b60e', 'ddebdfe0-87c1-4c4f-9d1f-fdc32b6bd5e2', '5211afd6-3076-40ec-b964-8c4a165b6e3d', 'ac5e9a17-ff57-4885-b3b7-4229d09a2077', 'fcea3094-7d86-4c05-9780-5c894197b65a', '81b0bdd9-5be6-4b95-89db-fe737446512a', '68f42964-0c21-4409-8356-067bbc5eddb0', '28b3dbc4-3b5f-4a7e-9906-b34a30832384', '83a15ca3-b54b-449a-813b-76db3e1f11ea', '1fd0d950-e1e3-4c5a-b62c-487ed11e06ac', '70253ef9-e4c3-4f9e-aa51-18b2b067d2d5', 'f5538e25-1e4e-438f-b1ef-ebc359085df8', '643e6f1e-b5dd-4630-9e30-f323abff5261', 'ef370f62-6a99-4116-9843-ad3f36ca0779', '915d4b2b-5467-4398-ab65-369250ca1245', '830c873a-eb80-40f9-9f6b-92f3b7be4f19', 'a965a589-8717-43d0-9419-7e8fefda070d', 'd686b00b-f91c-4d80-ae4d-5e2b6c227c86', '264951a6-5788-4a22-b925-758bcf0c499e', '32227ea1-1aa6-470c-9148-911c3fdda203', '97474d19-ef5d-4f14-8ffd-09d68bf5960a', '71fc221f-0048-4fbd-a68e-7c40885e091c', 'ef7a0a8c-16fd-4a2a-8d70-0fbf086b312e', 'fae9bea0-9815-4495-87a6-72700c728e71', '3cafc749-c648-477f-9b3d-33eacddc42aa', '31756e9d-993d-4653-b521-ef5cd653ab3a', '6c9b28ad-adde-4055-87f4-e34c387ab607', 'dfddcef4-9229-4c2c-9deb-6533f6c87bfe', '0511f770-041c-405d-a9f0-5e9a2c62563e', 'e9e8f9b2-1879-4528-bc72-1318bd026f53', 'dbcc7682-cf0e-49db-998c-d581df5cc0b9', '95c3a979-92fd-4268-9b1f-36f526fb1115', 'fb602a32-87c2-4fa1-9e15-8d69fd780baf', '163489b5-20a9-4eba-a9cc-660c7d5c0d21', 'beef4aaa-7eed-4dec-a953-de40a20efc9e', 'c61782a0-184b-43c7-a232-772087bdafa1', 'acf045d9-c43d-4e16-87d1-93b142bd3c3a', 'bb014519-b473-4329-8045-7dbe33200c34', 'ecb12661-86b1-47f7-a4e8-6854beedbe46', 'c2450f1b-593f-4b8f-ba80-ec56bcf802ae', 'bc47a6b4-8a4b-4e0a-9fd2-a9999d0c6c74', '3f2581c2-1c91-4e57-830b-a08cd7b09f9f', '4441431d-d3be-49f5-bf07-d415baced24d', 'bcd90088-0313-4647-a83d-673e470565a0', '3949bacf-8bb5-451d-81f4-f902f2fe8aca', '7925ac14-5ce2-4717-afcb-86da4ec6f933', 'ddbb242d-c914-4608-97c0-d3c5ae817a15', '7c351dfc-8f19-4a49-9a65-b79b5ed8aa07', '65db2c1b-36e1-4b38-8d28-554c37f1c897', '5674788e-a430-4706-9801-cae72ce45763', 'd59612f3-2a65-489a-87fd-27123d8258c0', 'd62368e3-7e43-454d-b5fb-b4e2f37badab', '55a71614-29b6-44ba-ba64-b6444bbb432a', 'f03f605b-3770-48d8-8e96-6b3fb7915ea5', 'a51f8dd4-883a-4bbd-92b2-34581756f341', '6993ea75-5ebe-4aff-be14-3efb9107f616', '99501367-dc36-4589-97a0-0cc9e066c042', '7a5e56c8-48d6-42ee-8a8e-431b31669d98', 'b5394c35-0112-4fc5-9336-3c614d2326d4', '8be29e49-a2c3-4a0a-bc3e-99b74000f834', 'b714564c-8f18-4d0d-b530-cdc593dc4914', '0c93f74d-b20e-41b6-a9dc-709717bfe168', '51ce05f9-40c4-4d12-96ac-9245402a49c8', 'c18e4c15-edbe-4a78-98d2-2979d69b1ebf', 'c3dc5265-f1c2-4057-8408-a3a2ee71f77f', '9dc4d84e-6f47-4516-8173-3dded3aad48a', '36b19b4c-c846-456b-b7ce-76aee21c3783', '74a69f64-0b90-439a-82e8-245063711c13', '11ef7bc0-ac49-4368-ba83-6126d3eae999', '60afad3c-4c1f-4968-800d-6493338be615', '3d44d417-d82c-44e3-aae7-431f920974d9', '2448df31-e100-494f-abea-5815adf0fa5a', 'f654ee44-45b2-48c3-b1d7-5b80420d680f', '454bd226-0d6b-40a2-b0a0-a2de2a6aa8d4', 'd728840f-8f90-4c83-9329-268921bc92b4', '404234bb-5592-425e-b3c5-d391bcdc5f15', '9cab7401-f7b6-4379-81d0-100aa1cd4ab2', 'adde751f-d319-4534-9100-c8a1c4f04729', '54b173cb-80f0-46c8-bdcc-4d56fd38a6fe', 'bc3555da-1e7b-4248-bf4d-2229520b75f5', 'cf8fa6c5-63cf-43e2-a577-c6eb083c1f81', 'bf1f2c37-9c67-4478-83be-952611c14771', 'ea2d5b80-7dfd-4655-b1a8-0f6f235ac818', '6837c567-092c-4568-bb33-6f8b0a1acfd0', 'cb834e5e-222b-45ed-b2e7-339803e8825c', '37e9723d-263f-47d7-8ce3-27fabba84c7e', 'b63e0f9d-c463-4d03-84e4-66abf9e2f2ed', '2b57cad2-795d-47c6-be27-9566fa63794d', '81807f59-2354-48f8-b78c-296b20cf5586', '15459589-637e-44a5-b2a4-cbb94f2b4176', '29dd752c-2802-4806-8f97-2518fae5bcc5', '424c81bc-f21a-4646-a3fa-12f5694d828e', '398253e5-c4b3-4784-a60a-010e8b289d59', 'e43dd183-64b5-4821-b0f0-e5f759d3e974', '09bdc512-544b-42ef-80a9-2122d217bf8e', '27117536-c539-4eb5-8c67-f090a230f0c3', 'd68e3696-4a01-43ea-b763-d592eed48152', '8c771c9d-6596-479d-9572-0bf5141496f1', 'df39063a-62ad-4331-96fc-c1f064042bf8', 'a4357878-5a22-4b48-bbf0-ba9b19d6003e', '054a4f9c-6237-4624-ba55-a6d4dde70f33', '5ffdfff1-8379-4187-9ab3-8e33367855c2', '57f51416-cec0-437c-8e75-8c53b16a1e1d', 'ad030ad5-0b65-4160-8f7f-4d4aa04df8bb', '2350031c-86db-4926-baef-4337fdf0f501', '194a3bdd-5f37-4056-877f-e3a9f14e717b', 'c5ce3142-2dbc-4284-9a1b-6a82f17496b0', 'e86823b9-bdb3-4d1a-b929-8ec5ea69fb3a', '5c2913ed-c4e2-4fe0-81ff-406bb9b452ad', 'b2c7b0b9-5288-4000-89d7-43c1c21179bf', 'bd1e3aac-bf70-45c0-a519-91556bdeb3ec', '3b63324c-96ed-4d6d-bac7-a611fdbe4eef', '670050eb-6956-47a2-8fc0-a297c1cc5cb0', '53874ec0-1513-4603-a805-23433947b10a', 'd31c0999-46ea-4d4f-a60b-b3290931efe6', '39d99205-5de0-4234-b846-9758e67eb02b', 'aae705a4-9398-4068-90f5-59ae5569894f', '2d4e003e-ab3d-4759-b5bc-681d5b082f92', '67cd305c-7f2d-4847-8e0f-5cfb6708db8b', 'b5314a0a-57ea-4e7a-a47f-eccf626c2370', 'a5e7a9a7-824b-4622-8b55-8cbdfd5a0d00', '818cd68e-6f21-431c-9bb6-c7e03aa07327', '3bd22a1c-7a2f-4496-9b10-9853a277f9da', '50a8184f-f19f-404e-9877-ea1077230e4e', '019bcc50-6123-4eeb-8546-5250538e61e2', '4fba3e0d-6bbc-451a-a178-3bb18ee078fb', '091602d9-05e9-4901-962c-3270bb9c312e', '3b22a002-3b7c-4fa4-8ee5-cc2c3e8d4d56', 'cc8008e9-d40d-47da-b54b-b3f424b24505', '2f947fc7-5c73-4851-a963-21f509a4896c', 'f75586b2-2ca2-43e1-bb6e-f822d27be24a', 'd490b5f6-a992-48e0-aabe-ce0bce34d670', 'df79b0d2-97c5-4f27-b666-2adb5cd0f48a', '2bd54ccf-c293-4314-8c1e-a303644d6bb9', 'cfa5613c-6655-4a6d-8ab3-e07d7b7728bd', '14f6cf28-64b4-40e2-9268-1f089ea5da27', 'bd1ee1d9-f4dd-43eb-94bf-9239efd44bf3', '059cf712-6e18-4627-abe5-a1b8ff9cbdc1', 'baef22b3-4ad8-4abb-af43-450662a7776e', '39822d59-2f73-41b6-87cc-f3a2259daa47', 'd09ae6e8-2b33-42bc-9e1a-ce80a752610c', '7eb1f69d-4d32-408e-9bff-8b3db237c0eb', 'aa18b2d8-2a91-4a64-bfda-9c1c0fa57929', '89791f07-8c12-4a5f-ac63-df79bfb1a085', 'e650f78d-c168-4978-bdb0-b49d0dba2d13', '88bf06a6-1851-4f16-a12a-34491bcc3b83', '8bddc597-340e-4e1d-a478-d4b975bf7277', '68925f41-e1ce-4bfd-bd81-2f0f60efc800', 'e813e65c-d92b-4868-bb4c-5ba9db17d3a6', '99ae9b19-35ad-4eeb-8a1f-13e9a87ac29b', 'fd0aba93-6b07-4572-8780-c2d14c517140', '75b7fbec-537c-4673-aa46-31ada7d96748', '971c7918-518e-4706-9f4a-6aefe3397816', '2d465d63-b8d8-40c4-b2c3-2cb890faa342', '8ef2e4e8-2cc1-46be-b16f-d85545b72416', '43a37136-8118-4c80-ab60-8bc01a693454', '2d1c3f07-ae38-48cd-b1a0-c6aef9c93333', 'f80389fb-0d10-42d5-9304-59525bf496d3', '3921b742-4382-4245-ab08-3198477c568e', 'ae45bce1-c90d-4586-b538-9e669b3e8061', '15f18a09-e29c-4918-a2dc-339848b41f97', '2aec25df-faeb-4440-8933-7102573cced0', '49865b06-63c1-4402-87de-28536f7c1ee0', '53746900-96e8-4ece-b9e8-97a05362ed07', '761605bc-6868-4385-aa27-e8c70b35a6ba', 'fd7f52b3-0c01-4560-becc-1b15b2764a38', 'bf573243-b01d-489a-93d7-84965001ab46', 'ddb07668-0925-4a47-8d06-b8e728b22e2d', 'a0237897-65f3-4dec-ac16-f9d99575977a', '7354e80b-1dba-4618-aaa8-d20fc62f0d6d', '0e3abbf0-dc37-4070-8e88-f010c29a3883', '2ef6a642-43c6-4ed4-83b1-c3031a1a856e', 'f72bccde-0463-4b64-ae32-c2df5d3720d0', '4bdce24b-38c6-4f41-b989-c05b88c81e76', '073c3bfc-88b2-4d0f-9933-666975be19ff', 'bc580063-da93-4bff-9598-aea9b70b7469', '926fbe70-940c-403f-baff-aa6f99290a1e', '2186ca44-c989-4732-a6e1-b82a5b1f3422', '893bcc70-e07f-4b43-9b9f-31da760ecf03', '99e2ade8-0903-4faa-b6b7-c484e038dd93', 'b24eb30c-ecd9-4d3f-9480-33aed6622c4f', 'dd34383d-bb21-4d12-894d-8e5c1fc4673d', 'ffbbb894-c909-4ae1-8c4a-3df4ee8e7471', '6eb91e2f-8f73-4a22-99f3-fd7110a46ee0', 'ab957290-f34a-46c7-961a-c08ebcdcfb17', 'dd2b4477-6c53-42f0-afd9-a905b86ab0c6', '3b5bb293-b75e-4179-ad61-295ecb983919', '33af1259-1441-4b43-8bd0-4a120b7ea110', 'be7af3f4-bc14-4d34-8f4f-c29f25d7c464', 'b5319be8-f252-4f4f-98c0-8270fffb8d46', '3cd3e990-fd39-4300-ad8b-eadebfa6f51a', 'd82e9df0-0f27-4a86-8e15-508ba6e47291', 'a57c895c-b447-4429-b90c-c4c1c1555fab', 'e09b53ec-97fe-4269-bbf5-096b59c7b4ae', '95155dec-e2a7-4c18-a1c1-eb006cb05840', 'b972bb9f-82f7-48e6-a744-f3b9d00cd97b', 'fcbea328-096f-474f-867e-7d499ba4fade', '0f49f69d-deb8-4715-81e4-ab5a34136a12', 'e9eaeb38-86ca-43f0-ab62-089e86221b1f', 'def3d924-a214-41be-ad00-9b06c9f5e515', 'a72efe0d-7c19-420e-b2a2-f3800fa2e432', 'caa0e90a-53b9-4ae6-a022-0f48a273432a', '8727b6b4-37df-4fa8-a32a-0f4e0ab3e9a4', '9221c119-52ad-47da-905d-6cddc1efd9b9', 'c91d2e2d-174a-43f5-bd93-ca35788e8eee', '67081e29-4449-4be6-872c-5a76e2297336', 'eeae5f04-399d-4133-9e7a-d0d387c58d48', '1a36d310-4dbe-468e-b521-31fc65f1e8d4', 'fac8326f-cec3-46c3-b1c6-8d17146955a5', '08db75f5-658c-431f-ac6d-6a26e22c7d19', '9e5075dc-7714-451a-8a98-7093f734f2cb', '61d01b83-bc4d-4549-bece-00fd04057274', '92ba0c63-0575-4b5d-8e71-a21cbbc662b4', 'e1249e60-cc0d-4232-8c45-6cfe99f30ca8', '94448197-bd9c-42ca-8bc9-e385ec3520be', 'f65ef8cf-009a-46f0-9ef8-ebe99759ca95', 'ad39d970-8c4e-44ec-94c5-402b54564d5a', 'ec9578a2-b5c5-459c-9f5d-641e15485a0b', 'd6f4fc84-df30-4b23-bab5-a771027581cb', '8266b310-fae9-4718-b16b-2268a11a66ef', '0aa70d1d-866d-40ba-9904-37fe9ecac652', '7f161631-4a9d-442d-9674-fe94c7d4a8b0', 'b42ec05b-caee-47ee-ba28-bd57213d77c4', '647310af-5612-4f54-9000-c5cac99bc46d', '3540b56d-1478-4885-aa5f-cc487d2413e2', '6a592d02-1181-4277-852d-248e0ea7203e', '37ea11f2-3330-4dfb-8e39-14035b83136f', 'c4a8ecd2-6c60-48d7-a13f-8b3510b1a364', 'd67455a0-d1be-4ba8-b343-fddc705f6057', 'd1043eab-a90e-44c3-a256-34621e532f16', 'e2301dc5-421d-4c61-9ebf-025dfeb53327', 'f213efd1-6eeb-4265-8e9c-efacd175760c', '7f50a247-6f64-4890-ae09-417cb69132e2', 'fbad127d-491e-44cd-8b38-2ea083ef37a6', 'c49cc88c-b9df-4109-aad6-1e61455dc8ac', 'd142b43a-f677-4c9a-8ba4-03bbcb73bca4', '3b29ccee-9dc7-4dd9-b9f9-6c4e25dd68b8', '3bf63cee-b380-4abe-b2a5-602bb66b7d93', 'cc361fa6-74b2-47c5-ad23-191e170252ea', 'd9b28007-ccdd-4ea3-9e85-b8e800448890', '8d841fd1-23c0-493f-82c7-e86cec7e70a7', 'ef0c7fbd-536a-4e3c-92fc-4583b9cabacc', '4f4e9fe9-b60e-4e90-adba-cf8b8be7768b', '2acbdaab-7a3f-4312-95bd-cc4897e99a90', 'c4a355cb-2666-4ce6-bcac-3741b6745dee', '2eb77ed8-f881-4074-aa7a-b570a250ebb8', '14f2fd84-30d4-4100-b743-f3fa3de63d78', 'afaff09f-5c1a-4be1-ad7b-b08bfa7a6e30', '4efa44c8-0a36-4654-a0f1-fb23e724f1d6', 'f4b2c79e-e306-4895-9903-068dd1b928e2', 'b446c4c6-22b2-4d9b-a23e-e7340a1d6933', 'ccecf5b3-ec7b-4763-b1da-bd985edec793', '47459705-9628-449c-9041-ba003f817cd6', '53226652-144a-4145-a53e-e6bbdf5afd3d', 'c463f1ad-add4-46d0-a53a-2233cb16d4fb', 'c7a911aa-8120-4a7b-943d-061f40e67326', '5cc67de4-da04-427d-9bcc-f0b051550f43', '62f608a3-df66-48a4-b85d-7d2f6f382e4d', '6da7c7f8-1c14-4cb9-9350-d8218a4f3711', '77f2419a-fdcc-45ac-8843-ce6d41ea3dda', 'ce1e8075-ca25-4230-845c-a7cca62e47b9', '7ef78f30-e2fc-463b-8a95-474673fbdc32', '7aacf89c-8920-4e76-b274-e265f91b57bf', '363d8d40-9338-4c9e-846f-657d85e7e8fe', 'dd1bc021-ca81-47df-a456-51c925973528', '01c8055c-4568-40b3-811c-649b04d78030', '96936be1-ecbc-4134-a939-27938d22672a', '12c11d30-5826-4638-b503-0b4b7bd585bc', '47a6295d-edf6-4ca6-9a20-c21982312270', '18641e16-0a25-4cf4-ba6d-75fb6e98d003', '92429c5a-0d8c-4b06-aff6-8c13e22251f4', '0f33f6e5-61d1-48bf-9b96-1ae549d582e5', '5b5ab3aa-f370-44b5-a0b9-b36d71654300', 'bd8cd320-5c34-4e4a-b08b-a6fd8b4ff097', '7d91c76a-dfad-4aed-9162-66619b0e72d7', '349c7c9d-a778-499b-b4eb-2ba5b492ced5', '27ea878f-59c4-4661-bd84-e157e621a665', 'cb7e03f6-e6dd-42ff-babb-faced49bf441', 'd335601e-ab6d-4be1-9f91-b964e697585e', 'c9ab79dc-0649-4215-aadd-4ef2aea89b9a', 'adc314c0-5900-44fc-ac48-097c362dc71e', 'a26fbb87-bb80-47e2-9e0a-1cca43fda8ce', '20cf1b00-b850-4d8c-9364-27bf01a253fd', '3a7c9db5-938b-4f31-94e9-8ba416010954', '67fec110-95cc-4e94-b7cb-64673b9b066d', 'b431beb8-a095-466b-ab6b-4f27bc752d1a', '5898540f-b159-4a08-ab97-00b0c0ed8c2f', 'b915e56b-0f0b-4319-b819-144491be542d', '04016064-57c7-4c67-b4aa-6153bba8d63f', 'a0c41cc1-12e0-4450-b6e1-c54d0447d8b5', '9d082457-50d6-42d9-b1ab-a1997ca424a8', '90b077f3-7ece-4ab4-8d0f-2bcf591e2cc9', '0848b04e-0e86-4a33-8f6c-cbe009edc470', '58bef473-ea0e-4de1-b208-070f7b099361', '85828bb4-2356-4fbd-bc9b-6967c76173d5', '4eeebbfb-268d-4779-9b90-a83286226fb9', 'ffcbe280-14d2-425b-8830-121c2e1b9201', '24498133-6c5f-4980-9aa8-fd28cc5f80af', '21c37b1d-f752-49c7-8487-25e98549e8aa', '572cf6c2-3fbc-4ecf-92e3-6dcfd546d9a8', 'efe16640-eb29-4a06-834e-f8d207a484b3', 'd970fb5e-caf9-4372-b419-ce538e17253e', 'bb01de2d-348a-4e91-82ff-d62f2afceeea', 'b5f05ba5-d28f-4519-824d-19e9d7cca705', 'd0524158-01b9-47c3-a25f-cc782d4eb09e', '318b4246-f068-41f1-8220-85a8757bd6b4', '268394c7-9c8f-4f8f-a3a4-4114333cb500', '415c74c8-1066-4d1b-9147-31c8be04ae56', 'ac5ee5da-a735-4810-ae04-d7fc01841a63', '9d93f5a9-2ff7-4e6a-baa3-f4f0c702572c', '5048513a-5320-4398-9a4a-e89b6bae78a2', '253ea35b-28be-4f48-b1c7-4971a451ea5c', '5c42ef7c-274e-4125-bd18-b65ee6ed2d64', '8640a2da-86e2-4579-a788-b950b3f7426f', '5bb5bb32-dad0-49e4-9f6a-df7d4d67da97', 'e01555ee-d264-4a9e-8601-acffa3e65586', 'dbbca09c-d5ee-419e-b23d-ef79d477ea31', '485b1023-8c14-415d-bee0-7a894dd60f46', 'bec47bb7-0e46-4c5c-ae6c-c8255df69e1f', '9b78df40-14b3-4497-8690-2b6ce51c2980', '45ab50c9-ca64-4740-a7e8-16b7e0fa2bb1', 'd7834f1c-49b6-44df-8e46-7c230e0c5a8f', '7b0854fe-7d21-4ffb-833c-2730187e0e43', '9810d7f2-4c79-410e-a1f1-69b9cde4b470', 'be180eb6-a4a8-47da-83f8-fc8c680cd9e7', '3ab9ba59-209b-411f-a4d1-313a71c0e886', '2079c3f9-ca9b-4adc-9215-2e6e3ff75da2', '9cf9403b-521e-4646-a2a1-13e4d660d33a', '80a8d908-0bc0-47e8-bdb6-5b2f779c61ef', 'bd843e56-aeeb-4081-8706-98be171e2622', '6c81a630-ab27-482d-af1c-310db129984b', 'fc08f758-f4b5-45dc-9c5b-5a42f9e78c4c', 'e1085d3c-b660-4786-8662-1e2e3708625c', '87332a73-60be-4804-8533-d1e5b4840820', 'ad10f490-ccc8-4823-87e2-24ebf7b55ce2', '7124797d-bf3a-4a52-bb93-212a506bdae1', '2b5c7a1a-c9f4-4e45-be6b-3ada7037fd02', '94cc09fc-b071-4429-a1c9-b8ec0d7f008d', 'a05a5774-59e3-42f5-9a05-368c83db0e31', '08a452c7-3dbf-452c-b265-1277709b4e33', '2771b08d-3194-462e-a0db-9f18fa969cd8', '9f84b580-bca0-4957-9712-4a70b471e2ee', 'e26bbe5c-c4fc-4caa-bc0b-e8568712103a', 'f1e1e1b9-4e35-4eb1-8576-b97afd95dcbc', '38b8aae1-fbac-4dc2-9255-986c41ea9c49', 'ab5751db-7ff7-4a26-87b2-c2b1a23663ce', '3524ce81-7cd1-4763-8d1a-3eb716ec1aa2', '00d1e3dc-f1fc-4bb5-b3ee-427663891fe4', '62873e58-7ef2-4bd4-9dd1-5f419cb01522', '3a53f743-c00c-45de-a643-66378d821827', 'e3f49130-ddd1-4f08-979f-d63f01787fb8', '4e740bcc-c3a7-4274-885c-30debb96a392', '4a52fe1c-215d-44f6-8b00-41a6d1244e9f', '8953d92d-7655-4b74-96f3-e3a6fa90f74e', 'f9069a5c-a1f1-4355-a85c-40d8e2a3771a', '07ca0ac7-2fe8-4a57-a688-f83976072a09', 'a52491cf-671b-4a97-9c53-4ba0a021e569', '0c5688b8-44a6-4fe6-9ffa-28625ab7347b', 'da92ee1b-c8ed-4562-b8dd-62b86c4653dd', 'f049a99d-81ff-48c3-96a7-e2f6332fcd58', '16aab773-f2f3-483d-9ab0-6980224bb0bf', '8124628f-0e2b-45a8-9c18-b07636801ca7', 'b28bd0bf-d311-40fc-8f07-3d1624db154c', '2f5cbdbe-c6fb-43c8-abed-4edabd62c2d8', '4d5c65b2-befb-4ed9-854f-d7d629881648', '5c2e714b-d1a3-44a5-b7a2-05fa9b49e608', '5b098866-68bf-4159-88f3-746a11887159', 'd02b5096-18a3-4792-a21f-3ea66f16ff77', 'a94a2e79-21b1-485d-98fa-1eb1281bc1d5', '2de74cb6-d9f1-4f1d-ad60-736c37d2716d', '2ffa7795-566b-42ed-a03f-1c13bea4c3f3', '27680ad2-c00f-47b0-8748-f84619de1a59', 'e068c825-0ae2-4c98-9dbe-d3fc6384df70', '164d0a73-6957-4f23-9fae-3508975e7c2d', '0d26baba-e14a-4ea1-b45d-d966bbad9eef', '84886fdb-5170-4768-9808-5d1e8d32b2f6', '8629bc90-e1fb-4ddc-921e-d5b343324165', '7a9306d9-d33d-474b-87ca-d2275fdf2e76', 'e0502cfc-70ab-4ba7-8818-401488018d68', 'be47232e-69db-4d3c-9519-bbbf9e0fb083', 'a876e120-f044-49d3-9187-23b58925933b', '74796cf3-f636-4ca9-b923-e7c8b67f5115', '8ece30a9-3005-4824-8754-43eb2160b112', 'ed9147cd-ccd6-4ab6-9079-1579f03e91cd', '177e9252-d8d3-40bc-a8d6-8c403dcb525b', '06bf7457-03b3-4015-819f-ce574483496e', '008aa376-bce2-4ddc-9497-ef26d663adaf', '3b72bcbd-9fec-4bc0-bd3a-56b2bbb6b669', 'e3a90c5b-e6a9-47ea-be9e-608b9001e38e', '737b4160-d29b-4432-bf75-95a3cba43713', '854333ec-536e-4293-b286-4b3525f8a205', '57d7682b-210e-498a-afa1-cf69ac899c42', 'c866a97f-37ee-47b4-b501-1a3ee2f2d4fb', 'ac1e1c4c-30ef-4ce8-be7d-60947f8000e8', '6977a99f-7e60-4216-b164-18b235eb3630', '69a4a8eb-4b20-4ecb-9833-c829c165c74e', '7491e390-8eec-4d3d-81fd-b75fac819f9f', '9c8c1c39-38b1-47d3-957c-e0d97a9d0709', 'b29c8ddc-9503-42c5-bb04-952ef3be5b5f', 'eb431236-1f0c-4a65-b531-4da09d2cb100', '207f8bf3-872e-4e4a-90c3-8a9280ed3dc2', 'a4db6de7-d246-4209-a525-e49707e1f126', 'fe58879b-6425-49ca-af1b-31de68cb5db4', '0d31807c-c40d-4aee-8a67-72a6a3accf82', '87e1a8fd-2ea9-4e89-9b9d-b8dee07c0a25', '5c249008-ee64-4078-a1ec-92be4a8c73ef', '89c7140f-4534-4dbd-9d0f-e7e2db4695bc', 'c708ce97-36d4-4a6d-95a6-7172e30d61ae', '1bed46e7-4188-40c6-bab1-ad9d0afa9f12', '6cb5ff9a-10e2-4d1f-9450-070ba81e081c', '50a74e1d-9ed9-4cb4-83d2-4954e168f15d', '20eff6d1-ded6-4149-b91f-ac9956393eff', '23469f20-d953-4736-9cfd-81dde80a0f18', 'e1a794f6-6559-4b53-be98-6c637f9feccc', 'd7934eb4-b28a-4f73-b770-c0f885382610', '47ece14b-9b0c-4f81-8448-ccb5c1d5b8d7', '0496526e-6901-42de-8084-da3661fe38e3', '893dca1d-2539-4dc7-bade-c4ad4099f6e1', '85253820-3f5f-4bea-ba48-3eab8447565a', '21483981-f7d6-44c4-9640-025219f6a80e', '233ed799-8e76-46fc-9b45-1849ef6fd341', 'fd84ec83-126e-410c-a994-7eb240aafecd', '4b632633-6641-457f-80e9-4c78b28d58ba', '62946c03-ba80-402e-967b-6fcca9647b0c', '4aec542a-e1a5-4942-8235-2a116c06e9cc', '155b59fa-5d83-422b-83b5-d04e0223fabc', 'da0a190d-e0c2-4972-b152-0398e85ec8c8', 'ac57831d-a2c3-4bef-aab5-7f0253453877', 'f6f23d32-1d81-457c-b2d0-90f930a29f1f', '0b827adb-7b5a-4009-bf65-1158b31f6fdd', 'c8b1a1a6-e897-4ce3-b10b-c6f6380270c9', '4b2ce6da-71d4-432a-b3b5-c78808a5eca8', '45a19173-ce1d-490e-828c-92dbf18ff9c8', '77e13001-3d6c-4c0d-9005-c16f2217aec4', 'eb0d7792-8b3b-4e85-8f54-c620a3031129', '6a38c546-fcf6-4ffe-a20f-d91887c091b2', '976e396b-39aa-4b02-98b7-88361db9711c', '7497a745-5779-4041-acf1-9595ad72b4a5', '7f97e948-00a7-43f9-b7b5-276eacf66de7', '5d6a20b7-37ab-43bf-b345-d23a0595aa3e', '0d6f105a-ac5c-48c7-91f4-1953a0cb8078', '3a93fbfc-6b6d-453d-ae85-1620fbc612e7', 'e70eb078-a63c-4105-998f-24ad0b41fb30', 'f9bb5af7-1547-4edf-8dfb-d05390d67c3e', 'efe67e50-e915-4449-9c53-281d579273af', 'a43a029a-e205-4386-9ecc-476f76201329', 'f0d2b90c-410b-47af-b927-bbfb5eaf920b', 'e7f9ba04-d412-4fc3-b238-658d661f84bd', '5b814018-2152-402a-9502-63251b2edae9', '7bf87422-f89e-4bf7-9589-6688e0ff047d', 'fcdafcf6-a465-44a2-a52b-0650ba0cc5e7', '3d14a2d6-9f21-40dd-ae51-c12ee240d8db', 'ba58cb8e-ceca-428f-86ed-7b92fc69fc2e', 'b337913f-8451-47a3-ad28-0bf87e0643d2', '17944dc9-da80-41da-8efd-d5b65b450a27', 'cf90c388-13cb-49a3-9762-7ef6c6d499c6', 'c7f75f93-4e5c-488a-b3a4-5f111dfa33fd', 'b1291f75-29af-4b0b-860a-e77814d48c0a', 'ac57a236-33bc-495f-9123-1ea8fab126d9', '9c025d9b-94c0-4fe6-b76e-7c1ec212350b', '75928670-a256-42ed-b51e-9543b8ca1635', 'cf864623-74f5-4569-957d-bff279028373', '69efb8c0-9e46-45e8-a283-95e207ab2b59', 'fb21b068-3c9d-4e70-abbc-47810a638dea', '70a11a66-213d-4e5c-b20e-eb2257fefb85', '9a12459f-c255-45ae-84b1-6cfd9738ab2b', 'e21c0822-6e8b-40c3-ab0f-507a05cfe391', '5e7dd42b-1e3c-4ded-920a-138249a90e09', '018424bc-2b43-4895-bf27-25524f6f8380', '94fdec88-9cc7-48f4-b041-5fe7a0e43bfe', '63e5b952-9638-4eb0-a147-6621d743f64a', 'cb06d83c-b5e1-4eac-ab40-c47df0c2bf4b', '09050891-3b7b-435a-8a3f-d7c78a7ea59a', '742c43a2-d8c9-4384-ae47-74355916bde1', 'b22fd18c-13e8-492f-9206-f270e1c0063b', 'aad7bb7c-276a-47cc-bcfe-9a9928a7dc6a', '3016123a-a327-43c5-a94b-fabc3088031b', '0c335735-ee19-40a2-812f-f5a6836bba19', '6c662047-b05b-4584-ab28-41ec154e8bc0', '5c4f8bff-0af6-432e-8849-97cdaf4cf22d', '21984a20-8f34-4c0c-8ce3-aa501087ba00', '880a3c33-50ec-4de5-b6b2-df0f1dacd2a4', '3bd98e39-32bc-4132-ad22-6a4b72ec0a91', 'e5d40dcd-44ae-43a9-8308-849fe531e379', 'eecb4031-c493-4c33-9c13-6be3a0f7073e', '66346330-20cb-467b-8951-1041897cf4ca', '40eceb17-1c9f-45c9-8348-ff758fa9143d', '8122f3c5-49b0-4ee1-ad1b-17ab77090e42', '0015ee0e-5a8e-4fa9-8865-44ffb03f8aef', '3cd77bc1-e029-405b-afa8-17636d05fc54', 'dc315018-cd05-4201-be20-97475462e360', '8bf39c3c-11eb-45cf-8184-d4f479730c00', 'e1077c72-742d-4b92-9af9-809300848b25', '44cf6c9a-bc3e-4250-ba32-65e1a9ee8dba', 'ba54e5f8-d38c-486c-98a0-6805143916ca', 'edb6948a-3200-4bcf-9f21-b8e4cf9855cc', '0c331edb-93bc-4c05-ba8c-df5f9b8b5e3b', '5f534334-cad2-4e1c-8496-26e7c2851c56', '96921e5f-4581-41ac-ba19-481ecdc109f6', 'ed198579-9a17-48f9-becb-964d949610d3', '120b410c-dad9-411b-9ffd-54c21f9d9fe4', '03163388-ddee-41fc-9fc2-5a58d9cac277', 'fca0b907-93b7-43bd-988c-89361da1bc03', '343aebff-eb62-454f-b7ec-757094f90a37', 'a7fd44cb-d463-4629-8177-21a665450898', 'c1763fbb-2ef1-4bab-a748-ec22cffc2f45', '4f61e368-61a1-492c-a48e-94dee66ad83f', 'abe369c0-598c-4b89-9bde-d386bd7ac9bd', '10e77974-2e6e-4613-81e7-7666a782558a', 'f48eb466-2d0d-43c9-b207-f3b651288ab3', '381a22ab-a5c8-408e-9f8d-cdf1a6feddc0', '45432e3a-56ff-4001-8ea6-02cf98a53d9b', '1422db7e-2293-4dfc-947e-42c49dc5759e', '354445f2-d83c-4989-b22a-89e30ccfd5e7', '180a43ff-45c0-428e-9335-fd8ef5735acd', '14be4de9-e856-4ffa-9609-d692b832c32d', 'b00469f9-32b8-475e-ba32-7ef554e7f7eb', '72ee7187-3ee0-40ff-806f-8800c05ecd4f', 'b0b0313b-608d-4cc3-91c5-f3a210d3165f', 'd60da061-2da4-4bc3-9463-35c90e1437ae', '2113c51c-00e2-4f91-b7be-77179d6dbc04', 'aa221d26-5f13-40e2-8235-b769e1db1428', 'e9c1409c-b027-4812-8c0e-b46c10fb0b0d', '53d33101-763c-4d08-9d26-eb2df89a0aa0', '4fc3f2b5-4257-4896-a601-3866686574e2', 'be8ec969-4ec2-4cfe-982f-644d8c35c7ca', '5022fde2-bfb7-49a5-91e0-fd9817a2141c', 'd28ae317-ef56-445f-8faf-0d72bc83d624', '993d9ad0-37f6-4f35-9df3-94ca56959f7a', '6a5357ed-8024-4c8c-8cdc-9d6d4f84b398', 'd27fac31-8b44-48df-b96b-25b0302cf855', '8ca616fa-01d7-475a-bdc3-e64bd544401d', '99bf2bc2-d4d0-45ca-a315-05b1ebbbbf7c', 'c06eeaf9-5803-4445-8391-f81b1662e4d1', '3456c0cd-0926-49df-8df7-4b9903fdec1f', '33038505-ee5c-44a3-b822-61964af6e5b7', '98344f14-7384-4f4d-8d64-6f1c34b87715', 'dded1525-4c57-4874-bd54-788afea27583', '6636a5a6-f8ed-4891-ac7d-f1d2cea00a31', '83ccdbca-227d-4123-976d-1f36fd71f113', '7049651a-5ae6-49a0-a5f4-eab91585bb54', '06deee1e-3704-42e3-86b6-f9add0efc342', '5620d72d-5aaa-4ea8-baff-13172b1422c5', '818723fa-30ca-4a87-b360-ba2c7a0a99a7', '7a959cf4-1c86-4a6f-b0d4-80ef6457feff', '4081652f-3de8-4be5-a05c-4e005c40877c', 'ab181698-1c8f-4915-828f-11265d7cc12c', 'fd09a8c3-1d98-4023-8727-f4d01264eed4', '20395e65-de1c-4dfd-950f-90ad3fbdf116', '0a48e83a-d248-4da6-a37f-f0b18bf6d463', '352ecc06-ae1a-4ccb-b51e-69978be04ee8', '0e3074de-6497-492b-956f-2b0260d8b594', 'ccf599bd-a5d9-4468-9ff2-f160361c3930', '84a8b297-f730-481b-89df-509caaa7c71e', '3f83901f-8b7f-4d47-a63c-e7b469525be3', 'dc4cd5c4-f74f-49e8-bc4d-79c6fbeb6a53', 'defbea4c-0a86-4e51-a4be-4904e7241473', '95c0f6dc-9e6c-4eca-adfc-cfbf60a36766', 'c6b88c3c-14e7-41ec-bec8-52db96600e45', '57151ee1-c317-4b92-9063-9026fcd2d714', 'ac1a48f3-c04d-4581-972d-fcd8ac0475fc', 'dad57afa-3641-45b2-b72f-526fb7d3bcc1', 'cf34815b-73e8-4b54-8679-2feaa8d13de3', 'ec1e87af-dfee-4647-b8c0-fb20289fcee2', '8706fb54-7fd1-4aa3-b722-7d8fac4f827b', '0bfcd3c5-a38d-4404-bc29-ad8d3a9c4c24', '43efecde-9069-49a8-90bc-f8feb40d12b9', '2b5bf77b-82db-4503-97c8-9b0788ee8b22', '9f09ad54-c487-401e-8cd8-ea3a2280543b', '729e6258-bc72-4570-bcb8-d843c3267915', 'e7a1ed48-2e5f-444e-8d9d-c093b16ebe44', 'ea3157d6-fe7b-4e54-b5df-5379624cc497', 'cfa9d41d-3a02-44ea-91f8-953eb3182764', '59920d50-9d77-4fea-8d55-4a945acb63c8', '501eaea8-1f32-4229-80d1-2c1b5fae67af', 'e1e45db4-3118-4547-bed9-4c716a11159d', '5e1a930c-801f-47c6-9cd7-7b59afb19e25', 'b093c56e-4796-4d49-bd47-ff25c77cbfc0', 'd83bdf9a-1d8d-44f0-ae07-4c85fc160883', 'b5306c1c-5720-4e74-8144-fdbafa5d7985', '874d33e0-910c-491c-a02f-912260fa2a51', '8e49476e-b869-474d-a4d9-f14a70cef522', '179210b1-13e6-405d-8be5-d0ae62dfa88a', 'ede69a6e-c0ef-4473-b9a7-f1b7bacea0b4', 'b57049b0-7f33-452a-896d-b3f5b85758ca', '73b5bbed-bd34-4007-8fb7-fbe28dc000d1', '51179581-9016-40e3-95e9-e9066a8612f8', '0f51ca0e-7ca2-4f9a-96c3-099be2513ec7', 'd38538a1-2ae7-4110-8e59-049026736ba5', '90f9ebbf-7a12-434f-bc14-bac0d48f1ddf', '7f6b00e2-7322-491e-94dc-1162cffa67a0', '2a65e367-0ce0-44c9-9929-d90c1fc0fe37', 'eb989f2d-e476-4fb5-8b49-ee0594a6be1b', '142b5756-806f-4a84-aefb-47080fcfdd60', '7692fd44-1d22-40ea-8a70-8b185df98ae5', 'cbce5267-8389-4b70-bf07-522d4f39717a', 'c95e0733-b655-4cc0-9126-67f25ddc61bf', '3e485f6e-4dee-4693-a96b-506341ad0448', 'e99f020b-0897-4917-893e-8c546902525b', '4dbe96f9-6dbf-459c-9500-01d3264a16e7', '02cebec4-398d-45d6-9f73-ee8b3d008a3a', '1773424a-c26b-444f-ac50-ae6ab9476007', '39fe13bf-0417-4b5e-841b-76473ee321ec', '4e070586-f079-4ca6-b572-c2706141bdc7', '0e16fd32-50c1-4174-b140-7bafc9024e16', '56023f57-a23f-4dc5-a39d-50b26e76660c', '208ce00d-5063-469b-b608-52b44e58fefd', 'c61279b4-9a7d-4a0a-af05-7c8dc38c5efe', '02e5db5e-5dc6-439e-8ebb-aa962405d8f7', '7df20670-4aaf-48c2-8218-a04a3f3c5ea4', '966522e6-3e16-470a-9564-d4e7dca08d89', 'bb0fbc9b-68a7-45ad-860b-70fa65ddfad8', '4d9f77c5-d29c-47ae-b81e-2a9e7e07127d', 'f86486dc-3bd6-48d8-911d-2810f332e608', '3eab49c2-abc5-473d-b20e-2bbf28947943', 'c8a8283d-5abc-43d2-b663-050d0c03fe39', 'bb0dab1d-3c88-4827-b47e-3832fc3a5972', 'e14077fb-ba50-4235-b366-fd10724fe8a6', 'b14432bf-f0a3-4c69-a2c1-fcc60e48c254', 'c4fcb912-3b7a-484c-8829-8cfc799829e9', 'd88819a0-237b-44e3-80f7-9175c7bef484', 'a4b32777-37e2-400a-bc8e-89de8b3c34b7', 'fe989eaa-e5bc-41da-8393-ee95163fb109', '6e2ea477-9d45-468b-a188-4e91190292d2', 'ca01230f-fa59-4cb5-835e-011bbc75e25a', '1ab716a0-bd75-42e1-a200-0bf7ff94c351', '32b05b54-16b3-4a3a-982d-ed86e1711b56', '0c70d942-64d2-41ef-91d9-d1482b42a64b', '27a768f6-bc85-4cf6-bdae-c291de154696', '438b9df1-1e93-43ec-85c0-d95945edd11f', 'd96e5d7e-0b6c-457a-8aba-e3bd4bf22188', 'd1650e02-5777-4bb9-8ac0-1758cb3d48df', 'ee77275d-9118-40e6-be7d-6d4def126c28', '1809f320-111f-407c-be6e-967821501cc3', '0dd0c204-eb8e-43e0-8642-499bfe9864af', 'e8579d44-51fe-4f29-a6ee-cd01a6fe2730', '6056b55c-b4ce-436e-a906-d7c8af889a75', 'f9f00968-73d6-4816-bf04-1a994f0d8047', '8e9f5ed7-ea4a-46dd-a9d6-c80f17684bab', 'c4b18d3e-957e-4f24-b917-8a1740fedd9c', '6a2a5685-b8d7-4a56-80d3-bb1e5dfcfe43', '0b77a9e8-08d9-4f09-992f-c07a706cd476', '9f33555f-7429-4e6e-8021-ac93456b5b6a', '324f5e62-cca2-4824-88aa-d0d9c80e8ff5', 'f9de9b6e-7ed7-4488-aaee-4f53e387c121', '8808cf29-5a3f-4e63-ba82-8ab2a112899f', '3873a03c-c090-40d9-b74d-a372329b71e9', 'aba96395-220f-4223-8c33-ceaf8d40fe53', 'de689115-6d31-414d-a738-45c2d5a89196', 'af5a0677-8d11-4291-9ccb-47f2f32ee939', 'ad83fda2-6800-481c-848c-dee120da5b1d', '2bf05c38-ced0-4e8f-8261-f5da6ff2f9f0', 'ea3e198a-3128-4655-a085-b9f952572d78', '4c0631d6-6482-4465-bb7e-7c673ff99c12', '19540e55-5b04-4135-8297-d97d8583244c', '03a679e3-4fd4-4d5b-a076-b6792045fcc9', '5554db4d-2497-4788-a7a0-ae6300870da8', '0c8f41d3-6f83-4ab8-8620-8f023e53449a', '2652cf84-803a-45c3-8913-90452c43ac6c', '04e844fe-6920-44e6-bb06-46025ff83b3d', 'c13c4478-6c1d-4a22-b559-874ae9e9e347', '3d5f1acd-2933-4e11-995a-eb9388e8277a', '0362da56-5875-423e-85a1-1bc5c1c059e9', '1b349918-9828-4fed-ae81-24fc8e6a9463', '5b280eab-3331-46e9-931b-a56101a53c7c', '1665f5e7-3bc0-4369-9eb7-0d75fd062d4b', '0b156382-40cd-41db-9846-93a7de33cfbd', 'e8f9e94a-0aa9-4433-961f-a8f375ca8738', '111a58c5-babf-4807-b9d5-7f518007e24d', 'c1af6bd3-ca6f-49d1-b2dc-76657a2b2e18', '52881ebc-291e-4798-9324-3228cbcbe2a6', '1468e609-4a58-4594-b76d-616634f06f83', '87e456cf-7094-4e67-9551-c992f417437d', '5e2458c3-5c7f-41ca-bcdc-ed6599191fec', '9cbd340a-7dc1-4829-95fb-6cdd23bd86d9', '252c98ba-b3cc-40b8-8ae6-e5dc0946df55', '96bdfd44-2c03-471d-aecb-438e36d9c5e7', '1d1b90f8-f6ad-4628-b1b7-1da215ea88e4', '46d5c86b-fc1f-4f48-aff6-1c3d9385a888', 'fe4b6cd4-dce6-4006-ad73-c39a2f693f77', 'b952571c-27cc-4084-ba0b-ddb176b7fba4', '8c22f5a1-7639-41d6-8aee-e937e4a7b167', 'ccb754fb-3317-46c1-9dd1-3eaad9ebcff0', 'd42c6eb8-c397-489f-ab67-6a8ab3b3206e', 'db54bb81-1060-4791-87d9-586485252d9b', '2dfefa48-9d49-4a1c-964b-90dadda8a40b', '21d2e5cf-75b6-4e78-8706-b0b2125cd53f', 'dabbfe07-4de6-4959-8b1d-418132a1795b', '20b1a09b-2378-4ab1-9312-709ec3d845e6', '4da76e6f-d4d1-4468-a48d-2040f9727a0d', 'b9076a8a-39fa-4ff3-ba38-6b8d21b57b18', '43a9823a-525a-4731-bbfb-6f791718d55d', 'b2841ede-db4e-4743-8df9-9e9d9ed892f8', '4c27fb9a-66a2-48dc-a4d4-896ec87ffe1a', '21ef7798-c937-45eb-8428-b59a80c7290a', '110bfa8e-bb32-41d1-adf9-964c10a9ee76', '5cf10519-b699-4c1b-8f22-dfe9c7771161', '9da6918f-c2f2-4fdb-bd80-194220fcf989', '41cfb084-b1c7-4033-89c9-d67531384ead', 'd83e0652-95b3-4cd0-b17b-6d24cacf45dc', '58670ee5-4e97-4b59-9fa7-f969130df90b', '4d69eadc-23e2-4e98-af8d-6653f15563d3', '23b27688-6b1e-4431-8d64-81057262e7cd', '0a2d8bb5-16a4-4d53-85c3-1a134fc1014c', 'afcb3e74-3071-474b-8edf-8ffe4d1893b5', '90230c1a-41b3-4c31-89b0-04f5795d4fdd', '4e340de2-8ecc-43af-902a-78eb6baf587e', '188dab93-af56-4389-8879-e103a2cb2728', 'f23bf534-8bbc-4ae7-9eca-a9b4d54a96b8', '91093f44-eb86-46de-8f1f-62cbf71d60e3', 'eda66d17-ecd3-4029-b5d5-70e89510f9f4', '07809a86-b71c-4462-a42d-63690f16f623', '1ea65d5f-f256-435b-8288-6626c539f54e', 'd9321a2c-2965-4fb4-ae06-9ab99e829f95', 'd82dea44-9fa6-443c-985e-425e7782a8ea', '74bf3805-fa7d-4953-8d83-a1943b4d0810', '01365b0f-4886-4e0a-b9ee-82894430bca6', '931c5610-7fc8-498e-ae3e-d51fbb5067bf', 'b20d3cec-4c8e-41a9-ab63-9cf2b4bd0347', '3f9915c5-4f01-4bff-9592-c3aaa52b68de', '0d991e97-9bc7-4bcd-aa98-f0c2cf04f37a', '552eb22d-64fc-4530-956b-f4ba81e6ad0c', '5822a97d-b39b-4a8e-aaf6-66c0f55442e8', '34d72a45-7010-4d21-a036-4a2438622e84', '93052f03-d3f6-4e99-b33b-fbf01d9c4015', '03f5dfe8-049d-461b-85c3-6fc69fc8c283', 'cf03deec-23d3-4660-8c2e-b8bdefeb9b0a', '5fef6365-8c1e-4ed4-8fa1-57358c7b9fe4', '7b2ce29d-8516-4c4c-ac78-9ae1dc3691ce', '4104e677-2ae1-43a7-81f5-9cfc1b727c56', '8b577111-1c19-4dd4-9454-b667542fc14c', '747abfd9-f93b-4a35-a33f-2439d429e01c', '467b6110-a23a-413b-adaa-144687c8b582', '998dd0ad-3f08-4d96-9a8e-e9f183ca2f99', '7e96da24-14b2-433b-88f6-6996fa4b908c', '966380fc-3da0-4d88-8cae-78447e5d7da7', '6c507807-fc6a-462a-a752-6047b0c4aa80', '1eaaa15d-ff01-4799-806a-f445bb595cc8', 'f3e352ae-6087-423d-ad5a-8b74f74005a5', 'dec59be4-f19a-457e-8e5e-e8f83adc14c3', 'd82ec74f-6934-437d-8c2a-453571ae6c22', '951b0827-49b0-427d-9221-81eb2468bac9', '9577494a-352f-4b77-b8f3-0b636a94e776', '6387a9db-9a58-4c0c-8b55-672fb90a3b06', '0e89eaf6-9e6f-4182-b757-2b9d34b82e01', '0be9332f-4228-4dfa-aa4d-06df280f11e4', '3835d593-3474-4ac5-977a-7f3aa3233c47', '3131077e-71ff-46ea-bc8a-0e1b739cf1d7', '6b6b9fa7-a4b6-4972-afb4-8bd080746a3a', '5d657cf5-5def-44cf-9c73-cd5b8e4b4da5', 'fe0b73ca-891f-46ed-bcea-14cdf1d6081d', '725c1567-7319-4680-baa0-ce8b36462aac', 'c00aebb1-09e8-4033-828e-1e814976e43e', '935aa743-5e45-4ec7-9a13-3c451f051c57', 'd9ef103a-8dd5-4980-9500-80ea5d7f6f89', 'bffb275c-f674-43f8-b6f4-79a47f4e86ea', '8532452e-3204-46f4-8a71-5eba74002460', '0f893cfc-f4af-4c47-a870-e34b69ce837c', '3c1dd867-820e-44e7-99a1-e012d254371a', '6b42ab6c-0be1-4230-8564-4f3a0e8aad13', 'c959bbd4-979d-47c9-a901-dc89dafd4d32', '1f646ccf-5d76-4de6-b8a2-94b6d0c607e6', '845a5953-31b9-49fa-a131-96022bd91fa7', '2bb3aa71-526d-4b6f-a946-dee0f6e14337', '3b660af2-7cda-4118-842f-b7425cce0a68', 'e9dc4ab7-a302-4228-81a1-a43bfaec1164', 'e614955d-7776-4273-a8b8-d34f6f79ab8a', 'b0c04914-85fb-4225-810e-8a51136853ac', 'f1ced455-9899-4c94-bcc9-dd5580ee0e1b', 'ed57a0cf-58df-4b0c-9210-b618274047fe', '9d891ff6-69b0-41cc-8405-bdfca1093e9f', '438a4b88-6dda-4269-af1f-1b6c4b033e93', '7e0187fe-cff8-400a-8b41-7380564ecbc0', 'fc2a4192-9c3a-4691-a394-680e120ff67d', '8170a346-4781-4913-bf87-adddfb94fb3f', '8aad025a-2761-4cc0-a02b-d89232d2bf1f', 'c10b68ab-7a7a-4e1c-9d9b-09ffc0ea3b03', '3840edfe-a987-4127-bf96-cc48530700cd', '67b4e872-4238-4ed7-b87d-cf82945b5623', '173d0d33-b241-44d4-8ae9-f6e805482dac', '399686aa-be33-487c-b7ba-b282d8350b7b', '5746c9d6-3910-4e15-825d-2dcb61ea83fe', '99a4ff73-fcd8-4b7a-9299-8af82881f974', '72a01bc6-12d5-422b-9609-84e75db9a884', '96b058e7-3175-4d82-aea7-f799a0b0c99f', 'be7cb997-b4fb-4ef8-a5d6-7c64ad1291f4', '421aaffb-c82e-40dc-8d38-9898d99cbc26', '3dc31f9e-9bd0-4161-ad17-d959c813fd11', '53e5e0fc-8a69-4cfa-aec6-b0ce3b9edf94', '8b726202-b708-4739-a163-254b6fb39313', '8f962a0f-1ccb-461d-93bf-5e81fccf824d', '94d83e6d-7bcc-4fdc-93ed-6ba68298148c', '796b0c45-9307-4939-a30e-a5f27e5b2684', '0eee3b1f-00ac-4e2b-a98b-49cf63946c2f', '311fb727-5e71-4e40-a012-555f9a446d05', '8cc7d87a-78fd-4d45-bc1e-553f3a3a27e8', '2ca6e65e-5871-42ee-90a1-734a0136144a', '522959a7-fb80-4765-a7d0-2d522dc28faf', '51de8d48-3261-4288-a3f5-380223cdcc58', '4413e19f-3837-484a-91e1-56b6099fc231', 'd4e0fcb8-5772-4bea-a791-5b37fea8015e', '4d267849-be47-46d4-9c69-937bd09af531', '6874cb22-bd36-4d4a-9b90-b0f3996c4e99', '1916fcc1-b219-4215-b9e8-81adee18369b', 'e44fce12-c169-4132-bb7e-0dcdcd159631', '0cf9cc8d-faa8-4c1d-b3e0-7d2521184620', 'eff59481-d6dd-4705-8ee3-2659ace39f4e', 'f0f5d3ac-a5dd-4606-ba00-f9f2f822257a', 'eb59557b-b43b-4090-863c-a213b86c1253', 'aa89ca11-798c-48c2-947f-c6338fe1f5ad', 'cb672671-6930-4df1-ab16-3b93cc1fb317', 'e000050a-f23d-4a9c-b7f2-7857422518ad', 'babf30ba-c930-417a-8276-a44be7de21f2', '9bec8178-d59c-4ddb-a098-fc7114384adc', 'd5fcf26d-c357-41ca-bf2e-83a9c934922e', '8903ce30-dd24-4855-b231-04ca7078df2d', '02d922f8-31b6-4e0c-89a5-4e0e04ccb799', 'e6ee949f-dadf-4368-a548-87e7b11536f6', 'e2493301-56ac-4688-9810-3cbf11fc2c08', '43991c07-3760-45e9-96b1-94bfcf18e295', '3fb19119-dede-4957-88d5-0d1d7511c599', 'fefa4b6b-fad6-43fe-acc5-b09a58b51743', 'feb4d014-91b8-4cf4-b063-3db5fa1c5ad9', '210f2266-261e-4c97-86b0-c28ed536644d', 'bdeb5530-7f62-4a41-b7b3-15488cf0b46c', 'f09020cd-9642-4ce1-97ff-7fca54906d2c', '2900465f-4868-483d-a1ae-428094f0f476', '7084ac95-5d06-4794-8df1-1df11413b78d', 'ff3c5d6f-f241-4157-90a7-67ac26f8f006', '4d2cec7e-d5f1-4281-86ae-658a8fa41fe6', '1fca1b8d-e135-4bae-a10a-385e383d9653', '2c8b3a6a-9da3-46e8-8a32-49cf12335cde', 'ae5ae087-cb5c-4120-9632-4d3a0b911514', '197e9820-7d3f-4da7-a8a0-fa2fa80f42a8', '7f557950-8930-44cb-ba42-8e825ed735c0', '46eacd24-af3b-4e66-87cb-e643bee894fd', '169e2182-995e-479a-aca3-85268160cc53', 'c2bf5a54-dbd2-44b3-bfc8-2dce20557952', 'b759cf1a-7b77-415a-bb27-8ccffd28d7db', 'a39d982f-e7c8-4351-a90d-3fcf4a988c19', '46f78e1c-4123-41b1-a188-667850b231e4', '95e34313-c0f2-49dc-a0fd-fe2fdeac2a6b', 'b91e28e0-c437-4aaa-8408-e8a02d4b506d', '5e6395d7-06ec-423d-b50a-2ac4f1437fd8', '7903d3c5-95f0-4eec-b649-4d1c80d792e2', '6f5d09ba-947f-4f3b-bdb0-d18c02f1afcb', '22329393-c166-40c7-8d3e-6816bba6e29c', 'e9aa3380-b6f3-479d-bae2-376aa5c8dfb7', '3f7d36a6-e87c-4dc9-99a8-365badff3e05', '5678f58d-46ca-4b83-a49b-4fcddc6fdc0d', 'b274549e-86cb-48f0-9558-2184cdcff436', '89971c5e-5588-4492-9b31-06f6208e7000', '5fba0e8f-e69e-47b0-a343-76cedfdae173', 'b4c68b68-50e8-4113-8957-ec1679cc8dfa', '378261e5-81b1-4c04-9bc1-7c7ae8e7dd3d', 'c10a1b6a-aff2-4960-8c91-329876776d69', '7bc562a3-bcbc-4a52-9861-952c38277fa9', 'af80cb3e-2dee-4fa9-8956-9597e1688ddc', 'e8eaf64a-2297-44f9-90fc-cdc9bca1baf7', '5e8df0be-24ac-4a94-bee0-584ee04a07b5', '064723d6-00e5-4674-a242-464a93ea1d28', 'a323a4db-f6b1-4e74-9798-19daa2fbba68', '29cf590e-00e1-4742-9133-e82a679242f3', '860b7eee-16e5-4f29-aba8-181826c9f72e', '385649eb-50a1-4c1a-9a89-f33886c478a2', '858ddf6e-1849-4aaa-916c-817d51358073', '2ba0a6dd-2694-4477-bc0d-8208353a46c7', '040e4431-86c9-46e8-aaaa-4eb33042f9f4', '6c9971cf-8294-4c26-a621-ba608f2e2210', '9769cee8-4227-40a7-baa0-5979a6781d9b', '3cdaf913-86e5-4c56-8aaa-0e84256a6eba', 'cdb7771c-7b75-4cdf-be63-f5fd83f960a2', 'ba5c502a-b8f9-4ab1-8ef6-f69a33850530', '488c602f-9606-4652-9f48-92368287c697', 'bf8b3a4d-431d-4bd2-9fa2-4744fef02e22', '8448b340-6747-400f-9b41-32da8e44d7ab', '1d1d2cfa-a75c-472a-ab77-20eb6d3c4633', 'c904dae3-f53d-4d7a-85d1-4c8a2dc2339f', '47b7a0e4-0891-41eb-bde4-2544d86127a3', 'b35e857b-8a56-484c-b789-851322371a6d', 'f374f5a2-2929-4541-95b1-0a31790de38c', 'ddc26d09-ccb7-49a3-9268-d5a658fb64a4', '28687620-f1c4-4a63-8d61-39378a106936', '19f09579-6c09-4218-bfac-987279dc4d4a', 'd1f0e4ea-fcf5-484b-bb3f-47d5a45c0bf7', '1c1804f2-7225-4aba-b442-2a10f37fc1c0', 'a85dff36-4c1c-4720-aedd-2dae93df43ad', 'daacadca-84e4-47b8-a21f-2b50f00098a8', 'aa423fec-29d4-44ba-a751-d519c6c6f195', '4c9f597e-3920-48ea-9891-4911e3655c45', 'ade3111b-5d39-4ec0-b156-8288d09231c8', 'fb99de55-c54c-4141-b09c-717eca8f7d0f', '3f298b84-d402-48ea-bdf8-1ece07f281c3', '9c50f1d8-2a93-47b1-a017-1ffc6daa7b4d', 'c08f65fd-1910-4485-b722-a53187e09429', '7b7bea4d-98f4-4992-8b48-c18223bbbad7', 'cf51c84f-ce2c-4b61-8dd1-546d231409b8', '0551505c-d424-47e0-9d56-78d0a51f8dc3', 'b6801e5c-2242-4cf8-9604-78109cd11e21', '9f882e90-e94a-4585-874b-7ec8d8744e64', 'ea63003d-77da-40ab-a201-b99e4c9a1d0c', '11a6b488-ba25-440e-ac49-9808e106fb5f', 'ad7a086b-10de-4ac3-8ffb-251730a044bc', 'b92777e7-13ae-477b-84e1-a8555d9507d2', 'a9149c15-aa45-4752-a3e8-d032e9f39f8f', 'c6508b93-09da-4085-9516-fca9ece1e576', 'b56e2469-8cb8-47ac-b81f-9406ff4c8fdb', '373055b9-80f9-4909-a8af-5d75ffe56cc9', 'd2dd5c42-65c0-4269-849c-ab323ff18287', '7b148d1d-7614-4c0f-9e5d-a2a1551d069b', '399d6809-9c5b-4deb-81d3-04febd01e004', '55d0e130-d99e-4bc7-9eb7-c82d43b8ca90', '318d3ab5-c08a-4a28-a639-d975903cfe4e', '5e8de72a-e8e6-4231-b369-a7224eab9c03', '0fb3202f-1083-4f0a-89b4-c6bc08d579eb', '33accbac-6ea2-4d9c-bd65-17aecc2d1788', '3569f895-c7d8-4b32-85a1-5aa4390589f0', '9d1d4098-e4c7-46b7-8bdf-cf309f8d73db', '0603eb0f-f10a-40c2-ac2c-c4bd46e26b6e', 'bb0eb5d2-e021-42d4-8e73-fb39903f0f78', '44954583-fd9f-42b5-9d07-69dde1f0ec07', '258c6347-b992-4b76-ad70-be74790730f1', '81056a62-676b-4617-8de8-d9649a13f48f', '003c8f23-db0e-48a1-a064-95b5e3563b21', '02f3da3c-def3-42df-aa07-ef380c40dd50', 'd02096d2-ea9d-4cfa-b6c4-f53497e08ac3', '9b9c22ab-29fd-4b5d-8682-36e0ce7b7f0b', '598ab5a2-ef47-49b5-8519-f3300ceb3d4e', '84c26bb3-042a-4fa8-8b31-c1b2b82167f9', 'faf13981-3b3f-409c-ad0f-51573019e6ab', '22537d25-2ba7-49b2-842c-ce26778bfae2', 'd0e95c2c-c657-4aa2-b384-a4d50f60be69', 'e1613049-4b33-4fa2-a78e-70c4aaaf9270', 'c2028001-fc3c-4e8b-923e-55b904d40229', '4eb26e33-e232-45bf-a404-282144abe044', 'ef79ab95-b4f9-499e-8ee4-81eb5ed67251', '49ab448a-566a-4f73-ac9b-69101d468221', '8af208aa-b377-4c33-8fc8-6090b77a7afa', '1dae6998-86f1-4362-a152-4554f89f0fb3', '4b8b9049-809b-4648-ac76-8fca482e7e86', '62814558-2fa4-4e3a-a9e2-cabb62a59e7a', '8cda2b59-82cc-4bb3-a3cf-f12cf5fccadc', 'ec4eb38c-d38e-4302-afd9-524b0ad73cb4', '8a4e1146-c83d-4922-a321-4aec8785aad4', '1b4a6022-38d6-4b62-89d5-f6eb68c60ee8', '84bc9749-fa80-4f05-9b35-5c2ff3045419', 'b11a6f74-2ba4-44a7-80b0-869cabc2f63d', '1196e8b4-564f-4999-87e2-ae9ac6fad282', '6e34e507-3af5-4ac7-bfb5-51d78d7b0928', '6b91f418-6741-4070-ad58-ec73886e303f', '18983c58-6531-4faa-9727-ea6c13176ee3', '755d1d58-68ca-4dde-adf0-851c2b476d3d', 'c1d6a069-305d-4689-a558-17669cf479e4', '5602e6bb-f08b-487a-b3d9-7630dbcdcdc2', '813715af-5676-4b8f-9fd1-a2663f35ef7d', '26924049-c3ea-4759-a2f1-0fdebd1119b4', '9b6aae76-6230-4c92-9b4c-9304e022e548', 'fdc3ec36-ea11-4266-8662-de8352cc52c7', '97e3e269-a605-453b-9dda-c19fea3fca48', '53ae2d8d-4a16-42d9-bdb4-1bc1023d664d', '3211127a-4ec9-4ebd-8907-dffbbc58ec55', '1fc04565-7418-4c10-9ac6-50647eedde5b', '306678d4-3658-453f-a8a3-7456018d6309', 'faa249ba-de7e-4d76-9474-8926644a2a30', '5c7593cf-4a7d-4cd4-bd29-cb734a9634ed', '4023419f-9887-4839-a1c1-b21a49331495', '11f0be9d-9f94-402f-bdea-87147473f981', 'd4b7e836-c7a8-4c98-874c-132496ed2971', 'eca5a70e-7107-445e-a627-e66a2fb6905b', 'd345402b-0b15-49cc-b579-a2c8f8f52a54', 'e37b0b51-218f-4ba8-b863-42444de3ba28', '8a397265-6fa1-44f8-acab-f87a52690dfc', 'bab522f7-2829-4f92-bc7f-e37a3b80ecd5', '459832e0-6ac8-4983-bafa-a5c51c478b1b', 'a4b3d48f-9a05-4008-ab74-becb3639b0e9', '4833ae87-3c70-4121-821f-d1cece4b69e8', '51d7afd2-fa78-4efc-88bb-d134300e00ad', 'eb1cc4a8-d4ca-4da8-81ea-2fe995597d12', 'e26e859f-a6fd-4f0a-8cb2-bf789f2ff3ee', 'b5541836-7150-45e8-9ed9-c30fb8bc5848', '6f83e38a-93b6-46ce-8326-b57bb2b00da1', 'f1ccd644-8e75-4cd2-a80c-f970b516e199', '2ee73550-24bd-4892-b976-527b6f6a5726', '0aa57ceb-5914-457e-a15c-dbcf345866cb', '7f5631b7-39a2-4406-8a5e-3118999bac67', '47896e44-9374-4b91-a256-cf7153bd15fe', 'd76ea38f-63f8-4557-8297-9e89d8386e2a', 'f92523f0-582c-477d-a495-a4f470e344fd', '2a7f199f-02bc-4f62-883f-9ca54aede574', '324107ce-d849-43e3-8952-4b676f556e90', '3691c5ad-7deb-459f-9a0d-9c9f858ac41f', 'e7f317ff-1164-47b1-a771-9839b24eddfa', 'a3c75ca5-32f9-4e19-b098-d4316e81cfb2', '187395f9-c570-4e47-afe9-06347a813112', '569fcdc1-8b6a-4712-a952-9a24aae208f4', 'b5b715fd-ae86-43da-a2e5-e6be17e808ec', 'd11b411a-1916-47a9-837b-c0abe563da0f', '08c81688-0120-40c0-859d-0fd37adbe64d', '55cff872-df09-4714-875d-a435f2577338', 'a6355884-037a-46e3-9763-d53f0db0dcd9', '23ab1723-68b3-4c19-aed3-5082111b3638', '2eaf5944-ba2c-4564-8343-ba833cc0433d', '89562bb4-4510-42e7-a479-110b2c0083ee', 'abc5a263-55c6-4d2a-9b3c-28be717663ec', '7139dbe6-cdbe-441f-9df7-0a0a5decd365', '89f14c69-044f-4959-ad97-1de41da6fbec', 'f538d79d-4d75-41a6-a2f8-615a87e355db', '3959f1c0-f30a-4438-be70-df7b7c3c24bc', '3bba1626-aa31-41d9-ab67-3322b2552fc4', '4a6e293e-2163-40a7-a364-3a5258deb92d', 'a29a1179-6908-4d3d-a04e-0ee51265417f', '5d941dee-007d-4ee2-bc50-e7f65c35bc88', 'c7dc091b-15ea-4ea4-a2b1-84d58e77d154', '3240b04f-e939-4437-a96f-a88b67ac19f6', 'bad23e12-7f9f-4695-acae-089d9d854c77', '997fdcc8-ab4f-4568-9818-152e7317ce5b', '5a4c858f-fffc-4d9f-bd80-a390acd0343e', '82957453-0665-40db-be0b-bddb7719031b', '6027ecb5-32db-438b-8f08-9fb6202805bb', 'cfccab1a-39fe-4c1d-806d-195e9369888d', '039f2de2-d2df-40d1-a5e8-5113bbde69bc', 'd7c1185c-d0f3-4cef-b2dc-83b175589cd9', 'd4716363-a7dd-4f2b-a78a-b6503cc32842', 'b63785a9-52fb-480a-a44a-e2216a86ce8c', '90dfd53e-4eb6-41f9-acce-f375ce7d8fe9', 'fa6e8c36-d4bb-4f26-a584-0bc7ba59ba1e', '2bb3396c-050f-4aaf-b0f0-37998fcc5a6a', '57af70e8-e148-4c14-bee8-122d6c720af6', '21046f3c-e0bd-415e-9be8-dd57ea7708bc', 'f3513c31-5688-404f-a0d6-2cfbbb808e3a', '597884b0-1750-4665-baa5-fbf431300d65', '4c7fc02b-ef0d-483e-b2dd-cc1f559252e9', 'ef36cecd-0101-4a56-8b75-1fccb434ae0c', 'ae44ff15-8d57-4d1f-9871-8ae0d5b806de', '5fca6739-f372-4114-b61a-a118bd810e49', 'b2fd1280-1f2a-4ed8-a2c7-c982594269ca', '07eadb7d-c147-43d1-a8f2-28dbe3fd8860', 'dd3faab7-d4d3-44db-89b7-13973c451e67', '1486189a-4ee9-44fb-80aa-9407f2b07150', 'af1fffe5-5acc-4d1c-bb81-2c6d87a760f3', '9b102876-af60-46c8-af70-0b7e6f6911da', 'f8b86bca-b6bc-4789-8a39-c88ecd4015ad', '7c69ecea-2493-4a1e-8da1-57596aed4051', 'd4702512-48aa-44a0-acac-3e6f1f8109e3', '247be6ec-cf48-4588-94d8-bf12c3cb2c8f', '15ae0bf9-2470-4f58-849e-6f43b09c2973', 'c45c18ef-4f86-45e4-8106-27b335db73d7', '202824d7-f6d3-46b0-8d11-0eba8b61c219', '1b9cf29a-ed52-436b-a8c7-0d7d69e6d63c', 'f2f693eb-0c05-4c8f-8e8c-4c0f84c74b06', 'f5de50d7-536b-4973-91fc-a0c371a87671', 'f663e8ba-35cb-46e7-b387-67b840d9f39d', '2e20e906-4745-400e-a3a3-cf54a0b1c893', '6cfe9246-8659-401f-98cc-e6318feae664', '1ef72e1a-dbee-4190-8321-812d51e7d686', 'ca1a97d3-54df-4c01-80f2-9a7ca7704315', 'c92d09b1-0228-4e65-aecc-bad1b5fa1d4b', '7f0c65ba-28da-4974-bb48-94b88bd73338', 'c224e0f8-8c5a-4c97-a51b-0a19474a30fb', 'b37c52d5-bc51-433d-b8de-3118b421d8e2', '7fe940e1-1771-472e-a840-2b542266a9a4', 'c7c6956e-0b9f-4b99-9e7b-5658f50c327a', 'de2e819b-4944-4c4c-9d05-deaef8aa3bd9', '507ca903-c5da-447c-97c7-6c7357558a4b', 'e565b9e1-ceb9-405f-b657-62a9f748ccb3', '6d0edc41-58bf-45ff-803f-b7d27ffc6ccd', '395bfb31-cd92-4a4c-9a3d-add53a752d4f', '5d5786e6-23a6-4d40-bdba-e86f1fd7118e', 'fb9ea444-03b1-4903-884e-9b26ff708732', 'ff9a7888-89a9-451c-b845-af7424351181', '8f9624dc-5d94-4c9e-a00b-31c6415d895a', '5e91cd98-0d63-4c86-b9f7-c8baa6458861', '7dfa18ea-e1e2-4470-bad0-01e4b5307feb', 'd8bf2357-de69-40d4-ad59-c063955f41f4', '4967c305-97e2-4eab-9476-b112de6781f3', '2db2da1f-4eaf-4e1d-8235-d9e6ef07b2c4', '03a21d40-95b2-4d4e-92f2-abf80a4c8238', '8b26b6dc-4d67-4a62-a514-50a6ac3c01f1', '802db244-ab90-4df3-bb29-7c1dd0165874', 'e4f00665-9b29-4ed4-a88b-5241656b12af', '3939a5f7-4995-4e90-b40b-d2bb4fc22b49', 'c79947a7-56c0-4cfc-ada7-3fd6e945f248', '835fb369-320c-4073-91dc-de0edf07e1bd', 'f297ff6c-1660-400d-ad4a-d1a1c62a1bc2', '1f68d81a-3492-48e0-b3a1-8586c63da175', 'ae4a9dfc-0e60-4667-9655-e39adcbca78f', 'd21acb7f-64df-4673-9ce2-96666e7265fe', '8d64bd4d-8595-4ac1-939f-5bb3425a226f', '16732e86-4b6f-4df0-835c-0dd361a8dc62', '609bde74-281b-4427-952c-0b662f6a8a69', 'b4389ef7-afbf-492f-b479-337bd74c32e9', '55fb8a3f-4fd9-4660-9f94-2b6c29fc81a9', '57d539b2-42f5-4022-a0fc-b57409b482df', 'ae746ed6-1c6f-4249-8019-e17160c3848c', '923e39ba-b2df-494d-8197-aac3653d3b72', 'e96017a9-115c-4cbf-bbd0-9383e859eef3', 'd2f6bcc6-db57-4113-b660-9cf80d62c9ca', 'd330220a-aad0-4492-b624-b349f1cea454', 'c4ae012a-c1b7-4082-918e-257731b145fa', '74546286-bd40-432f-9f9d-575961e4dd45', '21c2b2fb-994c-487a-a625-ea269d4579ee', 'a1cfd3c1-f997-4cfa-adb0-54292bbd6b85', 'b9cb0ad2-3d9e-418a-9f5d-6566b8ed93d0', '98290691-5388-4ecb-9337-47147293b5fd', 'f1652b5d-9b6a-4d3b-b1ec-6ff963b3b477', '92b46f74-a073-429c-98b8-9e584478916f', '2ad3c93b-5a22-4d08-9cc2-6bf708e6c44e', '1da769da-20a7-4d93-a7ef-eec854bf00a6', 'efb4108d-6703-410f-8319-b213b4b154ea', 'd15effe7-2994-4e62-8303-22322d1da30a', '85ad8b85-ab16-4604-89b0-d718a9e51f87', '1900ff4e-b699-4dfe-8c99-8b958b5b71a6', 'c15430de-4d31-41b5-bc2b-e633ce072178', '138d822e-c11e-4598-972b-fc4ff509a399', '615d3a8f-f61d-49e5-9167-70533c1da7f2', 'd918e7df-7faf-48fe-9566-443022b8728f', 'f883a04b-2c5d-4e44-9e44-8b18d9aa8447', '16a55782-1dee-40a9-a9d3-adfa7ad95736', '6de79c13-5fc7-4d73-b7bb-019300d6d9fd', 'd7787324-8d3f-435f-8492-5bfa8a0e706a', '2da1e433-48ba-4149-a3d9-db8c3cf24785', '47846124-cee4-46f5-b4e1-bbbc91e0f6a8', '84dda06b-7227-413b-b8b1-6001a7aa2f6f', '90c04f9d-13da-49c5-8f32-3cd84f76025a', '10b71b21-a450-4170-9381-5ea888c3292e', '3a4f3e85-40bc-4a0b-be1b-9d0ec303a670', '33756a5a-9c8c-44d7-aff2-87a320990d14', '1fad5ce9-0ccb-40f7-911c-c43b98e3d12d', '75188c8a-fe3d-41bb-a093-d4a6d8eb729d', 'ea554bf7-b674-4745-aaca-c1def3c4af2f', '476d461e-88ab-4bfb-96bd-48a9ef2f31be', '3c10dc35-240b-4441-aa3a-bdb3a94bbbe3', '4fd9ec0b-fcb7-47ab-bfad-a0cecfcc475b', '123840a7-15f9-4fb2-9d5a-832b2f505d00', '31dde70a-4087-4f17-b395-89d02471bb3e', 'f5f973eb-21b1-45d8-8414-5bc12e950d01', '11a30795-1b48-4632-bff1-3caa01df8ef2', 'ddca540e-8358-47b4-8743-c3a44899cf72', '5514d8da-782b-4703-a90c-660d9c6231a8', '42ffef23-edbe-4751-be47-6e0d0726a6ab', '9324d62a-6668-435e-b8a9-198c3df52ba7', 'd92143c3-fb51-4ba4-8638-487dab3d6247', 'f2ff94ef-de8e-4f73-a37e-0bcaa347d99d', 'f60255ab-a4a2-49c1-8540-f992cab2cf9a', '38d16303-6a5c-4ac1-a32f-5173f58aaf74', 'a71ea415-d11f-4d25-9e8c-4ff2cf61e748', 'ccc108a9-a4c0-4b3e-bc77-8742c677eb40', 'e8470c51-fb5a-4d80-b268-a76244afa1cd', '4859471e-9b38-4d03-8595-6b42694b2754', '8ba99e2d-48d8-4cc5-b4d2-d44b0e007f87', '5c52c71f-79bb-4118-af46-9b3e1d14126e', 'f738e93b-f70f-4cc8-99ce-c6d37707e6d0', '9d032bfb-af2c-4e91-8e6d-2c023959875c', 'debe73b1-f5fc-46ac-b402-eb425e9c5fd0', '6d29f750-02f6-4b0f-8a38-6204a6fc92b7', 'e4398ba5-a39d-489e-9bba-2b92f429d97a', 'ea2ff417-ccbd-47d6-bd1c-af1a551d8af2', 'aaf0380e-4ea0-42a0-b1b1-7793fef5e2fe', 'ac2cb032-eb2d-46ff-a3db-852a6c81cda4', '6bc11515-5d3a-4e3c-bcab-8384543daabb', 'ff2e3046-836f-4b2a-b207-2b6d3dfa42ce', '6c2d5d19-df8e-4eb6-b614-8707e8cdf9a0', '1d8da1c9-8e2f-4b73-9b88-8fac9df1d231', 'c800dbc6-4da4-43d2-912f-9ce9921fbbc4', '8bda2d72-5128-47a6-830f-836074ebb065', '9a280b19-547e-428b-8f65-3777d0fb091b', '2d44fc49-10b5-48b1-8cc5-0f6389c3c0ba', '3d6c6485-cf6e-4f37-8554-4e3ba314ead9', 'a42971ac-5460-4131-bb1f-252b3bdc6ef3', '065d9070-44be-40b5-bdba-d457298805ca', 'e682a038-2cef-4195-82bc-2db6a0bfc190', 'e2be2967-49f3-4b45-a9d6-697e5ba28c19', 'cc14bfda-e671-45cd-92f9-726e3af73a75', 'f6a2bbb2-41aa-4535-98ef-fa2bc6690553', '0af5f6ba-88a0-429e-ab34-eda507ab7b2b', 'b50a6077-62dd-4370-838d-4504e756ec76', '4e46ecc8-95f4-4e71-8546-7eff67aa9674', 'f9cc3e05-a4e0-47d8-8f85-c536944550dd', '76cf6789-64f3-440e-83d3-ea3a785a5f18', '1ff7f628-9d32-4da8-ad05-5157f21941b4', '819f5622-c337-499d-a5fd-09a6645a0bac', 'c20836de-ba7b-4848-b301-02f9ee73c2c4', '654a3768-44be-4085-97b3-b0c81c5b1e25', '6078246d-c76f-473b-aa53-aa9811d85202', 'e39365be-501f-4175-8b96-7bc21822038b', '92d48f92-0ab0-4295-b566-020041776d81', '3ef430ae-20ea-4b90-b4e4-7a147feb538e', 'f0eaf582-15e0-4f4c-ad11-a238903e3398', '72fac284-ba6a-49fd-9c64-360e7916882f', 'd27abf6f-ba69-499f-b16d-aaf4977976c9', 'b647b47c-7ce3-4d07-ac00-04f19aeb813f', 'c870e45e-e8cf-4428-94e8-a6e445da34c9', '711f7051-020c-480c-8eb1-3e134684a53a', '0eb793d1-1ba9-4bd7-8266-e25a5293b290', '370765bb-1578-4b2d-a525-9b68eacb5d3d', '2b7f2ba9-d96b-4929-afc3-aca3d6815ff4', 'a017a9e9-7486-466d-be7f-86204f271ac6', '049f12de-f88b-4053-8f33-b0d2de666717', '8f465387-28a7-484d-bf80-63678548df43', 'a253ed92-8cce-4781-a737-25713b56d0f6', '6dbf6270-19ad-44d8-adb5-2afacbef528c', 'b75162b1-6df6-4307-ae15-5cd999c3f293', '4317f253-649d-4453-a2e2-33cc114b8d20', '45570cc0-6b65-4920-9cc0-2c8cc404cfd8', '84d7db8a-7bf4-4c5a-97a1-92079a5d3827', '2060d9f8-f893-44bf-8b10-1147abdd770e', '20b67577-5a56-44a4-8cd7-ca222f3ffe1e', '98f5e175-ea55-4f9a-a8e3-57f5e72c2b99', 'bdfd2ff2-bdaf-4b2a-93f9-f6059c48543d', '0931c0f2-a490-44e7-94e9-e697a8f14a25', '0c415312-f2c7-4cb7-8b0c-610e6022cebe', 'e99e6197-c87d-4997-9c1a-a7a773cd16a6', '7cb530a2-6707-44d4-8230-f6350eec6633', '662fc59e-1650-4f10-af4e-f517069299af', '68301a90-9ca9-4e35-b650-89e40cd83381', 'bd31a7c5-dc58-491f-9d46-a7e33df609a0', '2af17dee-840b-48b7-97ce-58ee3c30c636', '6b750930-cb18-4f47-b973-b3ae83a5670d', 'ac5a81a2-b1e7-48f7-8660-11d2ee4a8155', 'b5f8fdeb-b540-4698-9e80-5465aaa457dd', '78722194-4664-4d77-8049-55ab29c52019', 'ea956d74-6d2d-4c13-92d8-bc6dbbfb99b6', '1ebb6bce-9e77-43dc-ac99-25f473684dea', '3164e884-dbd6-4997-bc1f-7d7021d18621', '05a75e45-2205-4c6b-8848-de79593dd773', 'ce158a73-2efe-4176-925b-7cb31ee29eb1', 'e8aac383-e775-4c8f-9e71-dc83f1fdba72', '25a69d4e-393b-434e-8c31-595d65fb5ffa', '7cbe17cd-26e6-4b22-824c-7f8847fb66be', '7237c5fd-7944-4cee-b348-f66a7e4dc37f', '1f865aa0-f115-4746-8930-a96f43a9c940', '8f52221b-96de-4b55-aa3b-c8d8be25f227', '03b8eda4-0eaa-4f28-bbde-b43937177567', '3c793dca-6bb1-4a7e-ae2e-0211e901783e', '1fa9bc1a-dce7-4053-89d5-b15d561a4684', '622e7fb9-11eb-4391-a4bf-de7ac21c1318', '3ed2d80f-9191-48e5-8cd6-762b474cbb46', '5c297f97-ab24-4b4d-9fc0-91731eb53c47']\n", + "Embedding status: 401/1691 documents embedded\n", + "Embedding status: 571/1691 documents embedded\n", + "Embedding status: 741/1691 documents embedded\n", + "Embedding status: 941/1691 documents embedded\n", + "Embedding status: 1211/1691 documents embedded\n", + "Embedding status: 1431/1691 documents embedded\n", + "Embedding status: 1691/1691 documents embedded\n" + ] + } + ], + "source": [ + "# Let's add more content to the existing Collection\n", + "article_url = \"https://www.gutenberg.org/files/48320/48320-0.txt\"\n", + "loader = WebBaseLoader(article_url)\n", + "documents = loader.load()\n", + "\n", + "# split it into chunks\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)\n", + "docs = text_splitter.split_documents(documents)\n", + "\n", + "await vs.aadd_documents(docs)\n", + "\n", + "await wait_for_ready(collection_name)" + ] + }, + { + "cell_type": "markdown", + "id": "5b225f3ae1e61de8", + "metadata": { + "collapsed": false + }, + "source": [ + "We see results from both books. Note the `source` metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "53700a9cd817cde4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T03:10:49.644196Z", + "start_time": "2024-05-10T03:10:49.243453Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "of astronomy, and its kindred sciences, with the various arts dependent\r\n", + "on them. In none are computations more operose than those which\r\n", + "astronomy in particular requires;--in none are preparatory facilities\r\n", + "more needful;--in none is error more detrimental. The practical\r\n", + "astronomer is interrupted in his pursuit, and diverted from his task of\r\n", + "observation by the irksome labours of computation, or his diligence in\r\n", + "observing becomes ineffectual for want of yet greater industry of -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", + "====\n", + "\n", + "possess all knowledge which is likely to be useful to him in his work,\r\n", + "and this I have endeavored in my case to do. If I remember rightly, you\r\n", + "on one occasion, in the early days of our friendship, defined my limits\r\n", + "in a very precise fashion.”\r\n", + "\r\n", + "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n", + "astronomy, and politics were marked at zero, I remember. Botany\r\n", + "variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", + "====\n", + "\n", + "easily admitted, that an assembly of eminent naturalists and physicians,\r\n", + "with a sprinkling of astronomers, and one or two abstract\r\n", + "mathematicians, were not precisely the persons best qualified to\r\n", + "appreciate such an instrument of mechanical investigation as we have\r\n", + "here described. We shall not therefore be understood as intending the\r\n", + "slightest disrespect for these distinguished persons, when we express\r\n", + "our regret, that a discovery of such paramount practical value, in a -> {'source': 'https://www.gutenberg.org/cache/epub/71292/pg71292.txt'} \n", + "====\n", + "\n" + ] + } + ], + "source": [ + "query = \"Was he interested in astronomy?\"\n", + "docs = await vs.asearch(query, search_type=\"similarity\", k=3)\n", + "\n", + "for d in docs:\n", + " print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")" + ] + }, + { + "cell_type": "markdown", + "id": "7b81d7cae351a1ec", + "metadata": { + "collapsed": false + }, + "source": [ + "Now, we set up a filter" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8f1bdcba03979d22", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-10T03:10:53.663003Z", + "start_time": "2024-05-10T03:10:53.441327Z" + }, + "collapsed": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "possess all knowledge which is likely to be useful to him in his work,\r\n", + "and this I have endeavored in my case to do. If I remember rightly, you\r\n", + "on one occasion, in the early days of our friendship, defined my limits\r\n", + "in a very precise fashion.”\r\n", + "\r\n", + "“Yes,” I answered, laughing. “It was a singular document. Philosophy,\r\n", + "astronomy, and politics were marked at zero, I remember. Botany\r\n", + "variable, geology profound as regards the mud-stains from any region -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", + "====\n", + "\n", + "the evening than in the daylight, for he said that he hated to be\r\n", + "conspicuous. Very retiring and gentlemanly he was. Even his voice was\r\n", + "gentle. He’d had the quinsy and swollen glands when he was young, he\r\n", + "told me, and it had left him with a weak throat, and a hesitating,\r\n", + "whispering fashion of speech. He was always well dressed, very neat and\r\n", + "plain, but his eyes were weak, just as mine are, and he wore tinted\r\n", + "glasses against the glare.” -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", + "====\n", + "\n", + "which was characteristic of him. “It is perhaps less suggestive than\r\n", + "it might have been,” he remarked, “and yet there are a few inferences\r\n", + "which are very distinct, and a few others which represent at least a\r\n", + "strong balance of probability. That the man was highly intellectual\r\n", + "is of course obvious upon the face of it, and also that he was fairly\r\n", + "well-to-do within the last three years, although he has now fallen upon\r\n", + "evil days. He had foresight, but has less now than formerly, pointing -> {'source': 'https://www.gutenberg.org/files/48320/48320-0.txt'} \n", + "====\n", + "\n" + ] + } + ], + "source": [ + "filter = {\n", + " \"where\": {\n", + " \"jsonpath\": (\n", + " \"$[*] ? (@.source == 'https://www.gutenberg.org/files/48320/48320-0.txt')\"\n", + " )\n", + " },\n", + "}\n", + "\n", + "docs = await vs.asearch(query, search_type=\"similarity\", metadata=filter, k=3)\n", + "\n", + "for d in docs:\n", + " print(d.page_content, \" -> \", d.metadata, \"\\n====\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96132aa6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/docs/tutorials/pdf_qa.ipynb b/docs/docs/tutorials/pdf_qa.ipynb new file mode 100644 index 0000000000000..e8931ff24c439 --- /dev/null +++ b/docs/docs/tutorials/pdf_qa.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": { + "vscode": { + "languageId": "raw" + } + }, + "source": [ + "---\n", + "keywords: [pdf, document loader]\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Build a PDF ingestion and Question/Answering system\n", + "\n", + ":::info Prerequisites\n", + "\n", + "This guide assumes familiarity with the following concepts:\n", + "\n", + "- [Document loaders](/docs/concepts/#document-loaders)\n", + "- [Chat models](/docs/concepts/#chat-models)\n", + "- [Embeddings](/docs/concepts/#embedding-models)\n", + "- [Vector stores](/docs/concepts/#vector-stores)\n", + "- [Retrieval-augmented generation](/docs/tutorials/rag/)\n", + "\n", + ":::\n", + "\n", + "PDF files often hold crucial unstructured data unavailable from other sources. They can be quite lengthy, and unlike plain text files, cannot generally be fed directly into the prompt of a language model.\n", + "\n", + "In this tutorial, you'll create a system that can answer questions about PDF files. More specifically, you'll use a [Document Loader](/docs/concepts/#document-loaders) to load text in a format usable by an LLM, then build a retrieval-augmented generation (RAG) pipeline to answer questions, including citations from the source material.\n", + "\n", + "This tutorial will gloss over some concepts more deeply covered in our [RAG](/docs/tutorials/rag/) tutorial, so you may want to go through those first if you haven't already.\n", + "\n", + "Let's dive in!\n", + "\n", + "## Loading documents\n", + "\n", + "First, you'll need to choose a PDF to load. We'll use a document from [Nike's annual public SEC report](https://s1.q4cdn.com/806093406/files/doc_downloads/2023/414759-1-_5_Nike-NPS-Combo_Form-10-K_WR.pdf). It's over 100 pages long, and contains some crucial data mixed with longer explanatory text. However, you can feel free to use a PDF of your choosing.\n", + "\n", + "Once you've chosen your PDF, the next step is to load it into a format that an LLM can more easily handle, since LLMs generally require text inputs. LangChain has a few different [built-in document loaders](/docs/how_to/document_loader_pdf/) for this purpose which you can experiment with. Below, we'll use one powered by the [`pypdf`](https://pypi.org/project/pypdf/) package that reads from a filepath:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU pypdf langchain_community" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "107\n" + ] + } + ], + "source": [ + "from langchain_community.document_loaders import PyPDFLoader\n", + "\n", + "file_path = \"../example_data/nke-10k-2023.pdf\"\n", + "loader = PyPDFLoader(file_path)\n", + "\n", + "docs = loader.load()\n", + "\n", + "print(len(docs))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Table of Contents\n", + "UNITED STATES\n", + "SECURITIES AND EXCHANGE COMMISSION\n", + "Washington, D.C. 20549\n", + "FORM 10-K\n", + "\n", + "{'source': '../example_data/nke-10k-2023.pdf', 'page': 0}\n" + ] + } + ], + "source": [ + "print(docs[0].page_content[0:100])\n", + "print(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So what just happened?\n", + "\n", + "- The loader reads the PDF at the specified path into memory.\n", + "- It then extracts text data using the `pypdf` package.\n", + "- Finally, it creates a LangChain [Document](/docs/concepts/#documents) for each page of the PDF with the page's content and some metadata about where in the document the text came from.\n", + "\n", + "LangChain has [many other document loaders](/docs/integrations/document_loaders/) for other data sources, or you can create a [custom document loader](/docs/how_to/document_loader_custom/).\n", + "\n", + "## Question answering with RAG\n", + "\n", + "Next, you'll prepare the loaded documents for later retrieval. Using a [text splitter](/docs/concepts/#text-splitters), you'll split your loaded documents into smaller documents that can more easily fit into an LLM's context window, then load them into a [vector store](/docs/concepts/#vector-stores). You can then create a [retriever](/docs/concepts/#retrievers) from the vector store for use in our RAG chain:\n", + "\n", + "```{=mdx}\n", + "import ChatModelTabs from \"@theme/ChatModelTabs\";\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "%pip install langchain_anthropic\n", + "\n", + "import getpass\n", + "import os\n", + "\n", + "from langchain_anthropic import ChatAnthropic\n", + "\n", + "os.environ[\"ANTHROPIC_API_KEY\"] = getpass.getpass(\"Anthropic API Key:\")\n", + "\n", + "llm = ChatAnthropic(model=\"claude-3-sonnet-20240229\", temperature=0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install langchain_chroma langchain_openai" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# | output: false\n", + "# | echo: false\n", + "\n", + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"OpenAI API Key:\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_chroma import Chroma\n", + "from langchain_openai import OpenAIEmbeddings\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "splits = text_splitter.split_documents(docs)\n", + "vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())\n", + "\n", + "retriever = vectorstore.as_retriever()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, you'll use some built-in helpers to construct the final `rag_chain`:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': \"What was Nike's revenue in 2023?\",\n", + " 'context': [Document(page_content='Table of Contents\\nFISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS\\nThe following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively.\\nThe increase was due to higher revenues in North America, Europe, Middle East & Africa (\"EMEA\"), APLA and Greater China, which contributed approximately 7, 6,\\n2 and 1 percentage points to NIKE, Inc. Revenues, respectively.\\n•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This\\nincrease was primarily due to higher revenues in Men\\'s, the Jordan Brand, Women\\'s and Kids\\' which grew 17%, 35%,11% and 10%, respectively, on a wholesale\\nequivalent basis.', metadata={'page': 35, 'source': '../example_data/nke-10k-2023.pdf'}),\n", + " Document(page_content='Enterprise Resource Planning Platform, data and analytics, demand sensing, insight gathering, and other areas to create an end-to-end technology foundation, which we\\nbelieve will further accelerate our digital transformation. We believe this unified approach will accelerate growth and unlock more efficiency for our business, while driving\\nspeed and responsiveness as we serve consumers globally.\\nFINANCIAL HIGHLIGHTS\\n•In fiscal 2023, NIKE, Inc. achieved record Revenues of $51.2 billion, which increased 10% and 16% on a reported and currency-neutral basis, respectively\\n•NIKE Direct revenues grew 14% from $18.7 billion in fiscal 2022 to $21.3 billion in fiscal 2023, and represented approximately 44% of total NIKE Brand revenues for\\nfiscal 2023\\n•Gross margin for the fiscal year decreased 250 basis points to 43.5% primarily driven by higher product costs, higher markdowns and unfavorable changes in foreign\\ncurrency exchange rates, partially offset by strategic pricing actions', metadata={'page': 30, 'source': '../example_data/nke-10k-2023.pdf'}),\n", + " Document(page_content=\"Table of Contents\\nNORTH AMERICA\\n(Dollars in millions) FISCAL 2023FISCAL 2022 % CHANGE% CHANGE\\nEXCLUDING\\nCURRENCY\\nCHANGESFISCAL 2021 % CHANGE% CHANGE\\nEXCLUDING\\nCURRENCY\\nCHANGES\\nRevenues by:\\nFootwear $ 14,897 $ 12,228 22 % 22 %$ 11,644 5 % 5 %\\nApparel 5,947 5,492 8 % 9 % 5,028 9 % 9 %\\nEquipment 764 633 21 % 21 % 507 25 % 25 %\\nTOTAL REVENUES $ 21,608 $ 18,353 18 % 18 %$ 17,179 7 % 7 %\\nRevenues by: \\nSales to Wholesale Customers $ 11,273 $ 9,621 17 % 18 %$ 10,186 -6 % -6 %\\nSales through NIKE Direct 10,335 8,732 18 % 18 % 6,993 25 % 25 %\\nTOTAL REVENUES $ 21,608 $ 18,353 18 % 18 %$ 17,179 7 % 7 %\\nEARNINGS BEFORE INTEREST AND TAXES $ 5,454 $ 5,114 7 % $ 5,089 0 %\\nFISCAL 2023 COMPARED TO FISCAL 2022\\n•North America revenues increased 18% on a currency-neutral basis, primarily due to higher revenues in Men's and the Jordan Brand. NIKE Direct revenues\\nincreased 18%, driven by strong digital sales growth of 23%, comparable store sales growth of 9% and the addition of new stores.\", metadata={'page': 39, 'source': '../example_data/nke-10k-2023.pdf'}),\n", + " Document(page_content=\"Table of Contents\\nEUROPE, MIDDLE EAST & AFRICA\\n(Dollars in millions) FISCAL 2023FISCAL 2022 % CHANGE% CHANGE\\nEXCLUDING\\nCURRENCY\\nCHANGESFISCAL 2021 % CHANGE% CHANGE\\nEXCLUDING\\nCURRENCY\\nCHANGES\\nRevenues by:\\nFootwear $ 8,260 $ 7,388 12 % 25 %$ 6,970 6 % 9 %\\nApparel 4,566 4,527 1 % 14 % 3,996 13 % 16 %\\nEquipment 592 564 5 % 18 % 490 15 % 17 %\\nTOTAL REVENUES $ 13,418 $ 12,479 8 % 21 %$ 11,456 9 % 12 %\\nRevenues by: \\nSales to Wholesale Customers $ 8,522 $ 8,377 2 % 15 %$ 7,812 7 % 10 %\\nSales through NIKE Direct 4,896 4,102 19 % 33 % 3,644 13 % 15 %\\nTOTAL REVENUES $ 13,418 $ 12,479 8 % 21 %$ 11,456 9 % 12 %\\nEARNINGS BEFORE INTEREST AND TAXES $ 3,531 $ 3,293 7 % $ 2,435 35 % \\nFISCAL 2023 COMPARED TO FISCAL 2022\\n•EMEA revenues increased 21% on a currency-neutral basis, due to higher revenues in Men's, the Jordan Brand, Women's and Kids'. NIKE Direct revenues\\nincreased 33%, driven primarily by strong digital sales growth of 43% and comparable store sales growth of 22%.\", metadata={'page': 40, 'source': '../example_data/nke-10k-2023.pdf'})],\n", + " 'answer': 'According to the financial highlights, Nike, Inc. achieved record revenues of $51.2 billion in fiscal 2023, which increased 10% on a reported basis and 16% on a currency-neutral basis compared to fiscal 2022.'}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from langchain.chains import create_retrieval_chain\n", + "from langchain.chains.combine_documents import create_stuff_documents_chain\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "\n", + "system_prompt = (\n", + " \"You are an assistant for question-answering tasks. \"\n", + " \"Use the following pieces of retrieved context to answer \"\n", + " \"the question. If you don't know the answer, say that you \"\n", + " \"don't know. Use three sentences maximum and keep the \"\n", + " \"answer concise.\"\n", + " \"\\n\\n\"\n", + " \"{context}\"\n", + ")\n", + "\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", system_prompt),\n", + " (\"human\", \"{input}\"),\n", + " ]\n", + ")\n", + "\n", + "\n", + "question_answer_chain = create_stuff_documents_chain(llm, prompt)\n", + "rag_chain = create_retrieval_chain(retriever, question_answer_chain)\n", + "\n", + "results = rag_chain.invoke({\"input\": \"What was Nike's revenue in 2023?\"})\n", + "\n", + "results" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can see that you get both a final answer in the `answer` key of the results dict, and the `context` the LLM used to generate an answer.\n", + "\n", + "Examining the values under the `context` further, you can see that they are documents that each contain a chunk of the ingested page content. Usefully, these documents also preserve the original metadata from way back when you first loaded them:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Table of Contents\n", + "FISCAL 2023 NIKE BRAND REVENUE HIGHLIGHTS\n", + "The following tables present NIKE Brand revenues disaggregated by reportable operating segment, distribution channel and major product line:\n", + "FISCAL 2023 COMPARED TO FISCAL 2022\n", + "•NIKE, Inc. Revenues were $51.2 billion in fiscal 2023, which increased 10% and 16% compared to fiscal 2022 on a reported and currency-neutral basis, respectively.\n", + "The increase was due to higher revenues in North America, Europe, Middle East & Africa (\"EMEA\"), APLA and Greater China, which contributed approximately 7, 6,\n", + "2 and 1 percentage points to NIKE, Inc. Revenues, respectively.\n", + "•NIKE Brand revenues, which represented over 90% of NIKE, Inc. Revenues, increased 10% and 16% on a reported and currency-neutral basis, respectively. This\n", + "increase was primarily due to higher revenues in Men's, the Jordan Brand, Women's and Kids' which grew 17%, 35%,11% and 10%, respectively, on a wholesale\n", + "equivalent basis.\n" + ] + } + ], + "source": [ + "print(results[\"context\"][0].page_content)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'page': 35, 'source': '../example_data/nke-10k-2023.pdf'}\n" + ] + } + ], + "source": [ + "print(results[\"context\"][0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This particular chunk came from page 35 in the original PDF. You can use this data to show which page in the PDF the answer came from, allowing users to quickly verify that answers are based on the source material.\n", + "\n", + ":::info\n", + "For a deeper dive into RAG, see [this more focused tutorial](/docs/tutorials/rag/) or [our how-to guides](/docs/how_to/#qa-with-rag).\n", + ":::\n", + "\n", + "## Next steps\n", + "\n", + "You've now seen how to load documents from a PDF file with a Document Loader and some techniques you can use to prepare that loaded data for RAG.\n", + "\n", + "For more on document loaders, you can check out:\n", + "\n", + "- [The entry in the conceptual guide](/docs/concepts/#document-loaders)\n", + "- [Related how-to guides](/docs/how_to/#document-loaders)\n", + "- [Available integrations](/docs/integrations/document_loaders/)\n", + "- [How to create a custom document loader](/docs/how_to/document_loader_custom/)\n", + "\n", + "For more on RAG, see:\n", + "\n", + "- [Build a Retrieval Augmented Generation (RAG) App](/docs/tutorials/rag/)\n", + "- [Related how-to guides](/docs/how_to/#qa-with-rag)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/scripts/check_templates.py b/docs/scripts/check_templates.py new file mode 100644 index 0000000000000..4344cba283594 --- /dev/null +++ b/docs/scripts/check_templates.py @@ -0,0 +1,72 @@ +import re +import sys +from pathlib import Path +from typing import Union + +CURR_DIR = Path(__file__).parent.absolute() + +CHAT_MODEL_HEADERS = ( + "## Overview", + "### Integration details", + "### Model features", + "## Setup", + "## Instantiation", + "## Invocation", + "## Chaining", + "## API reference", +) +CHAT_MODEL_REGEX = r".*".join(CHAT_MODEL_HEADERS) + +DOCUMENT_LOADER_HEADERS = ( + "## Overview", + "### Integration details", + "### Loader features", + "## Setup", + "## Instantiation", + "## Load", + "## Lazy Load", + "## API reference", +) +DOCUMENT_LOADER_REGEX = r".*".join(DOCUMENT_LOADER_HEADERS) + + +def check_chat_model(path: Path) -> None: + with open(path, "r") as f: + doc = f.read() + if not re.search(CHAT_MODEL_REGEX, doc, re.DOTALL): + raise ValueError( + f"Document {path} does not match the ChatModel Integration page template. " + f"Please see https://github.com/langchain-ai/langchain/issues/22296 for " + f"instructions on how to correctly format a ChatModel Integration page." + ) + + +def check_document_loader(path: Path) -> None: + with open(path, "r") as f: + doc = f.read() + if not re.search(DOCUMENT_LOADER_REGEX, doc, re.DOTALL): + raise ValueError( + f"Document {path} does not match the DocumentLoader Integration page template. " + f"Please see https://github.com/langchain-ai/langchain/issues/22866 for " + f"instructions on how to correctly format a DocumentLoader Integration page." + ) + + +def main(*new_doc_paths: Union[str, Path]) -> None: + for path in new_doc_paths: + path = Path(path).resolve().absolute() + if CURR_DIR.parent / "docs" / "integrations" / "chat" in path.parents: + print(f"Checking chat model page {path}") + check_chat_model(path) + elif ( + CURR_DIR.parent / "docs" / "integrations" / "document_loaders" + in path.parents + ): + print(f"Checking document loader page {path}") + check_document_loader(path) + else: + continue + + +if __name__ == "__main__": + main(*sys.argv[1:]) diff --git a/docs/scripts/create_chat_model_docstring_tables.py b/docs/scripts/create_chat_model_docstring_tables.py new file mode 100644 index 0000000000000..bf911d0897b51 --- /dev/null +++ b/docs/scripts/create_chat_model_docstring_tables.py @@ -0,0 +1,120 @@ +imperative = [ + [ + "invoke", + "str | List[dict | tuple | BaseMessage] | PromptValue", + "BaseMessage", + "A single chat model call.", + ], + [ + "ainvoke", + "'''", + "BaseMessage", + "Defaults to running invoke in an async executor.", + ], + [ + "stream", + "'''", + "Iterator[BaseMessageChunk]", + "Defaults to yielding output of invoke.", + ], + [ + "astream", + "'''", + "AsyncIterator[BaseMessageChunk]", + "Defaults to yielding output of ainvoke.", + ], + [ + "astream_events", + "'''", + "AsyncIterator[StreamEvent]", + "Event types: 'on_chat_model_start', 'on_chat_model_stream', 'on_chat_model_end'.", + ], + [ + "batch", + "List[''']", + "List[BaseMessage]", + "Defaults to running invoke in concurrent threads.", + ], + [ + "abatch", + "List[''']", + "List[BaseMessage]", + "Defaults to running ainvoke in concurrent threads.", + ], + [ + "batch_as_completed", + "List[''']", + "Iterator[Tuple[int, Union[BaseMessage, Exception]]]", + "Defaults to running invoke in concurrent threads.", + ], + [ + "abatch_as_completed", + "List[''']", + "AsyncIterator[Tuple[int, Union[BaseMessage, Exception]]]", + "Defaults to running ainvoke in concurrent threads.", + ], +] +declarative = [ + [ + "bind_tools", + # "Tools, ...", + # "Runnable with same inputs/outputs as ChatModel", + "Create ChatModel that can call tools.", + ], + [ + "with_structured_output", + # "An output schema, ...", + # "Runnable that takes ChatModel inputs and returns a dict or Pydantic object", + "Create wrapper that structures model output using schema.", + ], + [ + "with_retry", + # "Max retries, exceptions to handle, ...", + # "Runnable with same inputs/outputs as ChatModel", + "Create wrapper that retries model calls on failure.", + ], + [ + "with_fallbacks", + # "List of models to fall back on", + # "Runnable with same inputs/outputs as ChatModel", + "Create wrapper that falls back to other models on failure.", + ], + [ + "configurable_fields", + # "*ConfigurableField", + # "Runnable with same inputs/outputs as ChatModel", + "Specify init args of the model that can be configured at runtime via the RunnableConfig.", + ], + [ + "configurable_alternatives", + # "ConfigurableField, ...", + # "Runnable with same inputs/outputs as ChatModel", + "Specify alternative models which can be swapped in at runtime via the RunnableConfig.", + ], +] + + +def create_table(to_build: list) -> str: + for x in to_build: + x[0] = "`" + x[0] + "`" + longest = [max(len(x[i]) for x in to_build) for i in range(len(to_build[0]))] + widths = [int(1.2 * col) for col in longest] + headers = ( + ["Method", "Input", "Output", "Description"] + if len(widths) == 4 + else ["Method", "Description"] + ) + rows = [[h + " " * (w - len(h)) for w, h in zip(widths, headers)]] + for x in to_build: + rows.append([y + " " * (w - len(y)) for w, y in zip(widths, x)]) + + table = [" | ".join(([""] + x + [""])).strip() for x in rows] + lines = [ + "+".join(([""] + ["-" * (len(y) + 2) for y in x] + [""])).strip() for x in rows + ] + lines[1] = lines[1].replace("-", "=") + lines.append(lines[-1]) + rst = lines[0] + for r, li in zip(table, lines[1:]): + rst += "\n" + r + "\n" + li + return rst diff --git a/docs/scripts/document_loader_feat_table.py b/docs/scripts/document_loader_feat_table.py new file mode 100644 index 0000000000000..facff1cab9c01 --- /dev/null +++ b/docs/scripts/document_loader_feat_table.py @@ -0,0 +1,77 @@ +import sys +from pathlib import Path + +from langchain_community import document_loaders +from langchain_core.document_loaders.base import BaseLoader + +DOCUMENT_LOADER_TEMPLATE = """\ +--- +sidebar_position: 0 +sidebar_class_name: hidden +keywords: [compatibility] +custom_edit_url: +hide_table_of_contents: true +--- + +# Document loaders + +## Features + +The following table shows the feature support for all document loaders. + +{table} + +""" + + +def get_document_loader_table() -> str: + """Get the table of document loaders.""" + + doc_loaders_feat_table = {} + for cm in document_loaders.__all__: + doc_loaders_feat_table[cm] = {} + cls = getattr(document_loaders, cm) + if issubclass(cls, BaseLoader): + for feat in ("aload", "alazy_load", ("lazy_load", "lazy_loading")): + if isinstance(feat, tuple): + feat, name = feat + else: + feat, name = feat, feat + doc_loaders_feat_table[cm][name] = getattr(cls, feat) != getattr( + BaseLoader, feat + ) + native_async = ( + doc_loaders_feat_table[cm]["aload"] + or doc_loaders_feat_table[cm]["alazy_load"] + ) + del doc_loaders_feat_table[cm]["aload"] + del doc_loaders_feat_table[cm]["alazy_load"] + doc_loaders_feat_table[cm]["native_async"] = native_async + doc_loaders_feat_table[cm]["description"] = (cls.__doc__ or "").split("\n")[ + 0 + ] + + header = ["loader", "description", "lazy_loading", "native_async"] + title = ["Document Loader", "Description", "Lazy loading", "Native async support"] + rows = [title, [":-"] * 2 + [":-:"] * (len(title) - 2)] + for loader, feats in sorted(doc_loaders_feat_table.items()): + if not feats: + continue + rows += [ + [loader, feats["description"]] + + ["✅" if feats.get(h) else "❌" for h in header[2:]] + ] + return "\n".join(["|".join(row) for row in rows]) + + +if __name__ == "__main__": + output_dir = Path(sys.argv[1]) + output_integrations_dir = output_dir / "integrations" + output_integrations_dir_doc_loaders = output_integrations_dir / "document_loaders" + output_integrations_dir_doc_loaders.mkdir(parents=True, exist_ok=True) + + document_loader_page = DOCUMENT_LOADER_TEMPLATE.format( + table=get_document_loader_table() + ) + with open(output_integrations_dir / "document_loaders" / "index.mdx", "w") as f: + f.write(document_loader_page) diff --git a/docs/src/theme/DocItem/Paginator/index.js b/docs/src/theme/DocItem/Paginator/index.js new file mode 100644 index 0000000000000..0d2093fecc15e --- /dev/null +++ b/docs/src/theme/DocItem/Paginator/index.js @@ -0,0 +1,12 @@ +import React from 'react'; +import Paginator from '@theme-original/DocItem/Paginator'; +import Feedback from "../../Feedback"; + +export default function PaginatorWrapper(props) { + return ( + <> + + + + ); +} diff --git a/docs/static/img/colbert.png b/docs/static/img/colbert.png new file mode 100644 index 0000000000000..17f902138eb6b Binary files /dev/null and b/docs/static/img/colbert.png differ diff --git a/docs/static/img/ecosystem_packages.png b/docs/static/img/ecosystem_packages.png new file mode 100644 index 0000000000000..e506d8b464318 Binary files /dev/null and b/docs/static/img/ecosystem_packages.png differ diff --git a/docs/static/img/embeddings.png b/docs/static/img/embeddings.png new file mode 100644 index 0000000000000..f56e19f229d6d Binary files /dev/null and b/docs/static/img/embeddings.png differ diff --git a/docs/static/img/langgraph_rag.png b/docs/static/img/langgraph_rag.png new file mode 100644 index 0000000000000..0dfcbb743a71f Binary files /dev/null and b/docs/static/img/langgraph_rag.png differ diff --git a/docs/static/img/langsmith_evaluate.png b/docs/static/img/langsmith_evaluate.png new file mode 100644 index 0000000000000..5d370fd30c573 Binary files /dev/null and b/docs/static/img/langsmith_evaluate.png differ diff --git a/docs/static/img/message_history.png b/docs/static/img/message_history.png new file mode 100644 index 0000000000000..31f7664d286bf Binary files /dev/null and b/docs/static/img/message_history.png differ diff --git a/docs/static/img/rag_landscape.png b/docs/static/img/rag_landscape.png new file mode 100644 index 0000000000000..417d2e6c7eb09 Binary files /dev/null and b/docs/static/img/rag_landscape.png differ diff --git a/docs/static/img/tokenization.png b/docs/static/img/tokenization.png new file mode 100644 index 0000000000000..3ca4bf7d20b14 Binary files /dev/null and b/docs/static/img/tokenization.png differ diff --git a/docs/static/robots.txt b/docs/static/robots.txt new file mode 100644 index 0000000000000..87fdd2eecd0d3 --- /dev/null +++ b/docs/static/robots.txt @@ -0,0 +1,3 @@ +User-agent: * + +Sitemap: https://python.langchain.com/sitemap.xml/ \ No newline at end of file diff --git a/libs/cli/langchain_cli/integration_template/docs/document_loaders.ipynb b/libs/cli/langchain_cli/integration_template/docs/document_loaders.ipynb new file mode 100644 index 0000000000000..1cc6d7d6a722b --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/docs/document_loaders.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "sidebar_label: __ModuleName__\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# __ModuleName__Loader\n", + "\n", + "- TODO: Make sure API reference link is correct.\n", + "\n", + "This notebook provides a quick overview for getting started with __ModuleName__ [document loader](/docs/integrations/document_loaders/). For detailed documentation of all __ModuleName__Loader features and configurations head to the [API reference](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html).\n", + "\n", + "- TODO: Add any other relevant links, like information about underlying API, etc.\n", + "\n", + "## Overview\n", + "### Integration details\n", + "\n", + "- TODO: Fill in table features.\n", + "- TODO: Remove JS support link if not relevant, otherwise ensure link is correct.\n", + "- TODO: Make sure API reference links are correct.\n", + "\n", + "| Class | Package | Local | Serializable | [JS support](https://js.langchain.com/v0.2/docs/integrations/document_loaders/web_loaders/__module_name___loader)|\n", + "| :--- | :--- | :---: | :---: | :---: |\n", + "| [__ModuleName__Loader](https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name__loader.__ModuleName__Loader.html) | [langchain_community](https://api.python.langchain.com/en/latest/community_api_reference.html) | ✅/❌ | beta/❌ | ✅/❌ | \n", + "### Loader features\n", + "| Source | Document Lazy Loading | Async Support\n", + "| :---: | :---: | :---: | \n", + "| __ModuleName__Loader | ✅/❌ | ✅/❌ | \n", + "\n", + "## Setup\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "To access __ModuleName__ document loader you'll need to install the `__package_name__` integration package, and create a **ModuleName** account and get an API key.\n", + "\n", + "### Credentials\n", + "\n", + "- TODO: Update with relevant info.\n", + "\n", + "Head to (TODO: link) to sign up to __ModuleName__ and generate an API key. Once you've done this set the __MODULE_NAME___API_KEY environment variable:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "os.environ[\"__MODULE_NAME___API_KEY\"] = getpass.getpass(\"Enter your __ModuleName__ API key: \")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you want to get automated tracing of your model calls you can also set your [LangSmith](https://docs.smith.langchain.com/) API key by uncommenting below:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# os.environ[\"LANGSMITH_API_KEY\"] = getpass.getpass(\"Enter your LangSmith API key: \")\n", + "# os.environ[\"LANGSMITH_TRACING\"] = \"true\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Installation\n", + "\n", + "Install **langchain_community**.\n", + "\n", + "- TODO: Add any other required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -qU langchain_community" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiation\n", + "\n", + "Now we can instantiate our model object and load documents:\n", + "\n", + "- TODO: Update model instantiation with relevant params." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import __ModuleName__Loader\n", + "\n", + "loader = __ModuleName__Loader(\n", + " # required params = ...\n", + " # optional params = ...\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load\n", + "\n", + "- TODO: Run cells to show loading capabilities" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "docs = loader.load()\n", + "docs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(docs[0].metadata)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lazy Load\n", + "\n", + "- TODO: Run cells to show lazy loading capabilities. Delete if lazy loading is not implemented." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "page = []\n", + "for doc in loader.lazy_load():\n", + " page.append(doc)\n", + " if len(page) >= 10:\n", + " # do some paged operation, e.g.\n", + " # index.upsert(page)\n", + "\n", + " page = []" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## TODO: Any functionality specific to this document loader\n", + "\n", + "E.g. using specific configs for different loading behavior. Delete if not relevant." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## API reference\n", + "\n", + "For detailed documentation of all __ModuleName__Loader features and configurations head to the API reference: https://api.python.langchain.com/en/latest/document_loaders/langchain_community.document_loaders.__module_name___loader.__ModuleName__Loader.html" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/cli/langchain_cli/integration_template/integration_template/document_loaders.py b/libs/cli/langchain_cli/integration_template/integration_template/document_loaders.py new file mode 100644 index 0000000000000..62269b8d5d67a --- /dev/null +++ b/libs/cli/langchain_cli/integration_template/integration_template/document_loaders.py @@ -0,0 +1,72 @@ +"""__ModuleName__ document loader.""" + +from typing import Iterator + +from langchain_core.document_loaders.base import BaseLoader +from langchain_core.documents import Document + + +class __ModuleName__Loader(BaseLoader): + # TODO: Replace all TODOs in docstring. See example docstring: + # https://github.com/langchain-ai/langchain/blob/869523ad728e6b76d77f170cce13925b4ebc3c1e/libs/community/langchain_community/document_loaders/recursive_url_loader.py#L54 + """ + __ModuleName__ document loader integration + + # TODO: Replace with relevant packages, env vars. + Setup: + Install ``__package_name__`` and set environment variable ``__MODULE_NAME___API_KEY``. + + .. code-block:: bash + + pip install -U __package_name__ + export __MODULE_NAME___API_KEY="your-api-key" + + # TODO: Replace with relevant init params. + Instantiate: + .. code-block:: python + + from langchain_community.document_loaders import __ModuleName__Loader + + loader = __ModuleName__Loader( + # required params = ... + # other params = ... + ) + + Lazy load: + .. code-block:: python + + docs = [] + docs_lazy = loader.lazy_load() + + # async variant: + # docs_lazy = await loader.alazy_load() + + for doc in docs_lazy: + docs.append(doc) + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + TODO: Example output + + # TODO: Delete if async load is not implemented + Async load: + .. code-block:: python + + docs = await loader.aload() + print(docs[0].page_content[:100]) + print(docs[0].metadata) + + .. code-block:: python + + TODO: Example output + """ + + # TODO: This method must be implemented to load documents. + # Do not implement load(), a default implementation is already available. + def lazy_load(self) -> Iterator[Document]: + raise NotImplementedError() + + # TODO: Implement if you would like to change default BaseLoader implementation + # async def alazy_load(self) -> AsyncIterator[Document]: \ No newline at end of file diff --git a/libs/cli/langchain_cli/integration_template/pyproject.toml b/libs/cli/langchain_cli/integration_template/pyproject.toml index dc538a0ab85be..5223a67704cc9 100644 --- a/libs/cli/langchain_cli/integration_template/pyproject.toml +++ b/libs/cli/langchain_cli/integration_template/pyproject.toml @@ -1,18 +1,18 @@ [tool.poetry] name = "__package_name__" -version = "0.0.1" +version = "0.1.0" description = "An integration package connecting __ModuleName__ and Gigachain" authors = [] readme = "README.md" -repository = "https://github.com/gigachain-ai/gigachain" +repository = "https://github.com/langchain-ai/langchain" license = "MIT" [tool.poetry.urls] -"Source Code" = "https://github.com/gigachain-ai/gigachain/tree/master/libs/partners/__package_name_short__" +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/__package_name_short__" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1,<0.3" +gigachain-core = "^0.2.0" [tool.poetry.group.test] optional = true @@ -21,7 +21,7 @@ optional = true pytest = "^7.4.3" pytest-asyncio = "^0.23.2" pytest-socket = "^0.7.0" -gigachain-core = {path = "../../core", develop = true} +gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.codespell] optional = true @@ -42,19 +42,19 @@ ruff = "^0.1.8" [tool.poetry.group.typing.dependencies] mypy = "^1.7.1" -gigachain-core = {path = "../../core", develop = true} +gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -gigachain-core = {path = "../../core", develop = true} +gigachain-core = { path = "../../core", develop = true } [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "I", # isort + "E", # pycodestyle + "F", # pyflakes + "I", # isort "T201", # print ] @@ -62,9 +62,7 @@ select = [ disallow_untyped_defs = "True" [tool.coverage.run] -omit = [ - "tests/*", -] +omit = ["tests/*"] [build-system] requires = ["poetry-core>=1.0.0"] @@ -86,4 +84,4 @@ addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5 markers = [ "compile: mark placeholder test used to compile integration tests without running them", ] -asyncio_mode = "auto" \ No newline at end of file +asyncio_mode = "auto" diff --git a/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/astradb.json b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/astradb.json new file mode 100644 index 0000000000000..eeb3bc196d39a --- /dev/null +++ b/libs/cli/langchain_cli/namespaces/migrate/codemods/migrations/astradb.json @@ -0,0 +1,30 @@ +[ + [ + "langchain_community.vectorstores.astradb.AstraDB", + "langchain_astradb.AstraDBVectorStore" + ], + [ + "langchain_community.storage.astradb.AstraDBByteStore", + "langchain_astradb.AstraDBByteStore" + ], + [ + "langchain_community.storage.astradb.AstraDBStore", + "langchain_astradb.AstraDBStore" + ], + [ + "langchain_community.cache.AstraDBCache", + "langchain_astradb.AstraDBCache" + ], + [ + "langchain_community.cache.AstraDBSemanticCache", + "langchain_astradb.AstraDBSemanticCache" + ], + [ + "langchain_community.chat_message_histories.astradb.AstraDBChatMessageHistory", + "langchain_astradb.AstraDBChatMessageHistory" + ], + [ + "langchain_community.document_loaders.astradb.AstraDBLoader", + "langchain_astradb.AstraDBLoader" + ] +] diff --git a/libs/cli/pyproject.toml b/libs/cli/pyproject.toml index 5bfa8170f8149..ef8e4eb1d170d 100644 --- a/libs/cli/pyproject.toml +++ b/libs/cli/pyproject.toml @@ -1,15 +1,11 @@ [tool.poetry] name = "gigachain-cli" -version = "0.0.22" -description = "CLI for interacting with gigachain" +version = "0.0.25" +description = "CLI for interacting with Gigachain" authors = ["Erick Friis "] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" license = "MIT" -packages = [ - {include = "langchain_cli"} -] - [tool.poetry.urls] "Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/cli" @@ -42,7 +38,7 @@ ruff = "^0.1.5" [tool.poetry.group.test_integration.dependencies] [tool.poetry.extras] -# For langserve +# For gigaserve serve = [] [tool.ruff.lint] @@ -59,7 +55,7 @@ watch = "poetry run ptw" version = "poetry version --short" bump = ["_bump_1", "_bump_2"] lint = ["_lint", "_check_formatting"] -format = ["_lint_fix", "_format"] +format = ["_format", "_lint_fix"] _bump_2.shell = """sed -i "" "/^__version__ =/c\\ \n__version__ = \\"$version\\"\n" langchain_cli/cli.py""" _bump_2.uses = { version = "version" } @@ -72,4 +68,4 @@ _lint_fix = "poetry run ruff . --fix" [build-system] requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" \ No newline at end of file +build-backend = "poetry.core.masonry.api" diff --git a/libs/community/extended_testing_deps.txt b/libs/community/extended_testing_deps.txt new file mode 100644 index 0000000000000..db8d9cdfd0b8d --- /dev/null +++ b/libs/community/extended_testing_deps.txt @@ -0,0 +1,87 @@ +aiosqlite>=0.19.0,<0.20 +aleph-alpha-client>=2.15.0,<3 +anthropic>=0.3.11,<0.4 +arxiv>=1.4,<2 +assemblyai>=0.17.0,<0.18 +atlassian-python-api>=3.36.0,<4 +azure-ai-documentintelligence>=1.0.0b1,<2 +azure-identity>=1.15.0,<2 +azure-search-documents==11.4.0 +beautifulsoup4>=4,<5 +bibtexparser>=1.4.0,<2 +cassio>=0.1.6,<0.2 +chardet>=5.1.0,<6 +cloudpathlib>=0.18,<0.19 +cloudpickle>=2.0.0 +cohere>=4,<6 +databricks-vectorsearch>=0.21,<0.22 +datasets>=2.15.0,<3 +dgml-utils>=0.3.0,<0.4 +elasticsearch>=8.12.0,<9 +esprima>=4.0.1,<5 +faiss-cpu>=1,<2 +feedparser>=6.0.10,<7 +fireworks-ai>=0.9.0,<0.10 +friendli-client>=1.2.4,<2 +geopandas>=0.13.1,<0.14 +gitpython>=3.1.32,<4 +google-cloud-documentai>=2.20.1,<3 +gql>=3.4.1,<4 +gradientai>=1.4.0,<2 +hdbcli>=2.19.21,<3 +hologres-vector==0.0.6 +html2text>=2020.1.16 +httpx>=0.24.1,<0.25 +httpx-sse>=0.4.0,<0.5 +javelin-sdk>=0.1.8,<0.2 +jinja2>=3,<4 +jq>=1.4.1,<2 +jsonschema>1 +lxml>=4.9.3,<6.0 +markdownify>=0.11.6,<0.12 +motor>=3.3.1,<4 +msal>=1.25.0,<2 +mwparserfromhell>=0.6.4,<0.7 +mwxml>=0.3.3,<0.4 +newspaper3k>=0.2.8,<0.3 +numexpr>=2.8.6,<3 +nvidia-riva-client>=2.14.0,<3 +oci>=2.128.0,<3 +openai<2 +openapi-pydantic>=0.3.2,<0.4 +oracle-ads>=2.9.1,<3 +oracledb>=2.2.0,<3 +pandas>=2.0.1,<3 +pdfminer-six>=20221105 +pgvector>=0.1.6,<0.2 +praw>=7.7.1,<8 +premai>=0.3.25,<0.4 +psychicapi>=0.8.0,<0.9 +py-trello>=0.19.0,<0.20 +pyjwt>=2.8.0,<3 +pymupdf>=1.22.3,<2 +pypdf>=3.4.0,<4 +pypdfium2>=4.10.0,<5 +pyspark>=3.4.0,<4 +rank-bm25>=0.2.2,<0.3 +rapidfuzz>=3.1.1,<4 +rapidocr-onnxruntime>=1.3.2,<2 +rdflib==7.0.0 +requests-toolbelt>=1.0.0,<2 +rspace_client>=2.5.0,<3 +scikit-learn>=1.2.2,<2 +simsimd>=4.3.1,<5 +sqlite-vss>=0.1.2,<0.2 +streamlit>=1.18.0,<2 +sympy>=1.12,<2 +telethon>=1.28.5,<2 +tidb-vector>=0.0.3,<1.0.0 +timescale-vector==0.0.1 +tqdm>=4.48.0 +tree-sitter>=0.20.2,<0.21 +tree-sitter-languages>=1.8.0,<2 +upstash-redis>=1.1.0,<2 +upstash-ratelimit>=1.1.0,<2 +vdms==0.0.20 +xata>=1.0.0a7,<2 +xmltodict>=0.13.0,<0.14 diff --git a/libs/community/langchain_community/agents/__init__.py b/libs/community/langchain_community/agents/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/community/langchain_community/agents/openai_assistant/__init__.py b/libs/community/langchain_community/agents/openai_assistant/__init__.py new file mode 100644 index 0000000000000..f7fdcbdd86490 --- /dev/null +++ b/libs/community/langchain_community/agents/openai_assistant/__init__.py @@ -0,0 +1,3 @@ +from langchain_community.agents.openai_assistant.base import OpenAIAssistantV2Runnable + +__all__ = ["OpenAIAssistantV2Runnable"] diff --git a/libs/community/langchain_community/agents/openai_assistant/base.py b/libs/community/langchain_community/agents/openai_assistant/base.py new file mode 100644 index 0000000000000..806fef2bd8b44 --- /dev/null +++ b/libs/community/langchain_community/agents/openai_assistant/base.py @@ -0,0 +1,546 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Optional, + Sequence, + Type, + Union, +) + +from langchain.agents.openai_assistant.base import OpenAIAssistantRunnable, OutputType +from langchain_core._api import beta +from langchain_core.callbacks import CallbackManager +from langchain_core.load import dumpd +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator +from langchain_core.runnables import RunnableConfig, ensure_config +from langchain_core.tools import BaseTool +from langchain_core.utils.function_calling import convert_to_openai_tool + +if TYPE_CHECKING: + import openai + from openai._types import NotGiven + from openai.types.beta.assistant import ToolResources as AssistantToolResources + + +def _get_openai_client() -> openai.OpenAI: + try: + import openai + + return openai.OpenAI(default_headers={"OpenAI-Beta": "assistants=v2"}) + except ImportError as e: + raise ImportError( + "Unable to import openai, please install with `pip install openai`." + ) from e + except AttributeError as e: + raise AttributeError( + "Please make sure you are using a v1.23-compatible version of openai. You " + 'can install with `pip install "openai>=1.23"`.' + ) from e + + +def _get_openai_async_client() -> openai.AsyncOpenAI: + try: + import openai + + return openai.AsyncOpenAI(default_headers={"OpenAI-Beta": "assistants=v2"}) + except ImportError as e: + raise ImportError( + "Unable to import openai, please install with `pip install openai`." + ) from e + except AttributeError as e: + raise AttributeError( + "Please make sure you are using a v1.23-compatible version of openai. You " + 'can install with `pip install "openai>=1.23"`.' + ) from e + + +def _convert_file_ids_into_attachments(file_ids: list) -> list: + """ + Convert file_ids into attachments + File search and Code interpreter will be turned on by default. + + Args: + file_ids (list): List of file_ids that need to be converted into attachments. + Returns: + A list of attachments that are converted from file_ids. + """ + attachments = [] + for id in file_ids: + attachments.append( + { + "file_id": id, + "tools": [{"type": "file_search"}, {"type": "code_interpreter"}], + } + ) + return attachments + + +def _is_assistants_builtin_tool( + tool: Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool], +) -> bool: + """ + Determine if tool corresponds to OpenAI Assistants built-in. + + Args: + tool : Tool that needs to be determined + Returns: + A boolean response of true or false indicating if the tool corresponds to + OpenAI Assistants built-in. + """ + assistants_builtin_tools = ("code_interpreter", "retrieval") + return ( + isinstance(tool, dict) + and ("type" in tool) + and (tool["type"] in assistants_builtin_tools) + ) + + +def _get_assistants_tool( + tool: Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool], +) -> Dict[str, Any]: + """Convert a raw function/class to an OpenAI tool. + + Note that OpenAI assistants supports several built-in tools, + such as "code_interpreter" and "retrieval." + + Args: + tool: Tools or functions that need to be converted to OpenAI tools. + Returns: + A dictionary of tools that are converted into OpenAI tools. + + """ + if _is_assistants_builtin_tool(tool): + return tool # type: ignore + else: + return convert_to_openai_tool(tool) + + +@beta() +class OpenAIAssistantV2Runnable(OpenAIAssistantRunnable): + """Run an OpenAI Assistant. + + Example using OpenAI tools: + .. code-block:: python + + from langchain.agents.openai_assistant import OpenAIAssistantV2Runnable + + interpreter_assistant = OpenAIAssistantV2Runnable.create_assistant( + name="langchain assistant", + instructions="You are a personal math tutor. Write and run code to answer math questions.", + tools=[{"type": "code_interpreter"}], + model="gpt-4-1106-preview" + ) + output = interpreter_assistant.invoke({"content": "What's 10 - 4 raised to the 2.7"}) + + Example using custom tools and AgentExecutor: + .. code-block:: python + + from langchain.agents.openai_assistant import OpenAIAssistantV2Runnable + from langchain.agents import AgentExecutor + from langchain.tools import E2BDataAnalysisTool + + + tools = [E2BDataAnalysisTool(api_key="...")] + agent = OpenAIAssistantV2Runnable.create_assistant( + name="langchain assistant e2b tool", + instructions="You are a personal math tutor. Write and run code to answer math questions.", + tools=tools, + model="gpt-4-1106-preview", + as_agent=True + ) + + agent_executor = AgentExecutor(agent=agent, tools=tools) + agent_executor.invoke({"content": "What's 10 - 4 raised to the 2.7"}) + + + Example using custom tools and custom execution: + .. code-block:: python + + from langchain.agents.openai_assistant import OpenAIAssistantV2Runnable + from langchain.agents import AgentExecutor + from langchain_core.agents import AgentFinish + from langchain.tools import E2BDataAnalysisTool + + + tools = [E2BDataAnalysisTool(api_key="...")] + agent = OpenAIAssistantV2Runnable.create_assistant( + name="langchain assistant e2b tool", + instructions="You are a personal math tutor. Write and run code to answer math questions.", + tools=tools, + model="gpt-4-1106-preview", + as_agent=True + ) + + def execute_agent(agent, tools, input): + tool_map = {tool.name: tool for tool in tools} + response = agent.invoke(input) + while not isinstance(response, AgentFinish): + tool_outputs = [] + for action in response: + tool_output = tool_map[action.tool].invoke(action.tool_input) + tool_outputs.append({"output": tool_output, "tool_call_id": action.tool_call_id}) + response = agent.invoke( + { + "tool_outputs": tool_outputs, + "run_id": action.run_id, + "thread_id": action.thread_id + } + ) + + return response + + response = execute_agent(agent, tools, {"content": "What's 10 - 4 raised to the 2.7"}) + next_response = execute_agent(agent, tools, {"content": "now add 17.241", "thread_id": response.thread_id}) + + """ # noqa: E501 + + client: Any = Field(default_factory=_get_openai_client) + """OpenAI or AzureOpenAI client.""" + async_client: Any = None + """OpenAI or AzureOpenAI async client.""" + assistant_id: str + """OpenAI assistant id.""" + check_every_ms: float = 1_000.0 + """Frequency with which to check run progress in ms.""" + as_agent: bool = False + """Use as a LangChain agent, compatible with the AgentExecutor.""" + + @root_validator() + def validate_async_client(cls, values: dict) -> dict: + if values["async_client"] is None: + import openai + + api_key = values["client"].api_key + values["async_client"] = openai.AsyncOpenAI(api_key=api_key) + return values + + @classmethod + def create_assistant( + cls, + name: str, + instructions: str, + tools: Sequence[Union[BaseTool, dict]], + model: str, + *, + client: Optional[Union[openai.OpenAI, openai.AzureOpenAI]] = None, + tool_resources: Optional[Union[AssistantToolResources, dict, NotGiven]] = None, + **kwargs: Any, + ) -> OpenAIAssistantRunnable: + """Create an OpenAI Assistant and instantiate the Runnable. + + Args: + name: Assistant name. + instructions: Assistant instructions. + tools: Assistant tools. Can be passed in OpenAI format or as BaseTools. + tool_resources: Assistant tool resources. Can be passed in OpenAI format + model: Assistant model to use. + client: OpenAI or AzureOpenAI client. + Will create default OpenAI client (Assistant v2) if not specified. + + Returns: + OpenAIAssistantRunnable configured to run using the created assistant. + """ + + client = client or _get_openai_client() + if tool_resources is None: + from openai._types import NOT_GIVEN + + tool_resources = NOT_GIVEN + assistant = client.beta.assistants.create( + name=name, + instructions=instructions, + tools=[_get_assistants_tool(tool) for tool in tools], # type: ignore + tool_resources=tool_resources, + model=model, + ) + return cls(assistant_id=assistant.id, client=client, **kwargs) + + def invoke( + self, input: dict, config: Optional[RunnableConfig] = None, **kwargs: Any + ) -> OutputType: + """Invoke assistant. + + Args: + input: Runnable input dict that can have: + content: User message when starting a new run. + thread_id: Existing thread to use. + run_id: Existing run to use. Should only be supplied when providing + the tool output for a required action after an initial invocation. + file_ids: (deprecated) File ids to include in new run. Use + 'attachments' instead + attachments: Assistant files to include in new run. (v2 API). + message_metadata: Metadata to associate with new message. + thread_metadata: Metadata to associate with new thread. Only relevant + when new thread being created. + instructions: Additional run instructions. + model: Override Assistant model for this run. + tools: Override Assistant tools for this run. + tool_resources: Override Assistant tool resources for this run (v2 API). + run_metadata: Metadata to associate with new run. + config: Runnable config: + + Return: + If self.as_agent, will return + Union[List[OpenAIAssistantAction], OpenAIAssistantFinish]. Otherwise, + will return OpenAI types + Union[List[ThreadMessage], List[RequiredActionFunctionToolCall]]. + """ + + config = ensure_config(config) + callback_manager = CallbackManager.configure( + inheritable_callbacks=config.get("callbacks"), + inheritable_tags=config.get("tags"), + inheritable_metadata=config.get("metadata"), + ) + run_manager = callback_manager.on_chain_start( + dumpd(self), input, name=config.get("run_name") + ) + + files = _convert_file_ids_into_attachments(kwargs.get("file_ids", [])) + attachments = kwargs.get("attachments", []) + files + + try: + # Being run within AgentExecutor and there are tool outputs to submit. + if self.as_agent and input.get("intermediate_steps"): + tool_outputs = self._parse_intermediate_steps( + input["intermediate_steps"] + ) + run = self.client.beta.threads.runs.submit_tool_outputs(**tool_outputs) + # Starting a new thread and a new run. + elif "thread_id" not in input: + thread = { + "messages": [ + { + "role": "user", + "content": input["content"], + "attachments": attachments, + "metadata": input.get("message_metadata"), + } + ], + "metadata": input.get("thread_metadata"), + } + run = self._create_thread_and_run(input, thread) + # Starting a new run in an existing thread. + elif "run_id" not in input: + _ = self.client.beta.threads.messages.create( + input["thread_id"], + content=input["content"], + role="user", + attachments=attachments, + metadata=input.get("message_metadata"), + ) + run = self._create_run(input) + # Submitting tool outputs to an existing run, outside the AgentExecutor + # framework. + else: + run = self.client.beta.threads.runs.submit_tool_outputs(**input) + run = self._wait_for_run(run.id, run.thread_id) + except BaseException as e: + run_manager.on_chain_error(e) + raise e + try: + response = self._get_response(run) + except BaseException as e: + run_manager.on_chain_error(e, metadata=run.dict()) + raise e + else: + run_manager.on_chain_end(response) + return response + + @classmethod + async def acreate_assistant( + cls, + name: str, + instructions: str, + tools: Sequence[Union[BaseTool, dict]], + model: str, + *, + async_client: Optional[ + Union[openai.AsyncOpenAI, openai.AsyncAzureOpenAI] + ] = None, + tool_resources: Optional[Union[AssistantToolResources, dict, NotGiven]] = None, + **kwargs: Any, + ) -> OpenAIAssistantRunnable: + """Create an AsyncOpenAI Assistant and instantiate the Runnable. + + Args: + name: Assistant name. + instructions: Assistant instructions. + tools: Assistant tools. Can be passed in OpenAI format or as BaseTools. + tool_resources: Assistant tool resources. Can be passed in OpenAI format + model: Assistant model to use. + async_client: AsyncOpenAI client. + Will create default async_client if not specified. + + Returns: + AsyncOpenAIAssistantRunnable configured to run using the created assistant. + """ + async_client = async_client or _get_openai_async_client() + if tool_resources is None: + from openai._types import NOT_GIVEN + + tool_resources = NOT_GIVEN + openai_tools = [_get_assistants_tool(tool) for tool in tools] + + assistant = await async_client.beta.assistants.create( + name=name, + instructions=instructions, + tools=openai_tools, # type: ignore + tool_resources=tool_resources, + model=model, + ) + return cls(assistant_id=assistant.id, async_client=async_client, **kwargs) + + async def ainvoke( + self, input: dict, config: Optional[RunnableConfig] = None, **kwargs: Any + ) -> OutputType: + """Async invoke assistant. + + Args: + input: Runnable input dict that can have: + content: User message when starting a new run. + thread_id: Existing thread to use. + run_id: Existing run to use. Should only be supplied when providing + the tool output for a required action after an initial invocation. + file_ids: (deprecated) File ids to include in new run. Use + 'attachments' instead + attachments: Assistant files to include in new run. (v2 API). + message_metadata: Metadata to associate with new message. + thread_metadata: Metadata to associate with new thread. Only relevant + when new thread being created. + instructions: Additional run instructions. + model: Override Assistant model for this run. + tools: Override Assistant tools for this run. + tool_resources: Override Assistant tool resources for this run (v2 API). + run_metadata: Metadata to associate with new run. + config: Runnable config: + + Return: + If self.as_agent, will return + Union[List[OpenAIAssistantAction], OpenAIAssistantFinish]. Otherwise, + will return OpenAI types + Union[List[ThreadMessage], List[RequiredActionFunctionToolCall]]. + """ + + config = config or {} + callback_manager = CallbackManager.configure( + inheritable_callbacks=config.get("callbacks"), + inheritable_tags=config.get("tags"), + inheritable_metadata=config.get("metadata"), + ) + run_manager = callback_manager.on_chain_start( + dumpd(self), input, name=config.get("run_name") + ) + + files = _convert_file_ids_into_attachments(kwargs.get("file_ids", [])) + attachments = kwargs.get("attachments", []) + files + + try: + # Being run within AgentExecutor and there are tool outputs to submit. + if self.as_agent and input.get("intermediate_steps"): + tool_outputs = self._parse_intermediate_steps( + input["intermediate_steps"] + ) + run = await self.async_client.beta.threads.runs.submit_tool_outputs( + **tool_outputs + ) + # Starting a new thread and a new run. + elif "thread_id" not in input: + thread = { + "messages": [ + { + "role": "user", + "content": input["content"], + "attachments": attachments, + "metadata": input.get("message_metadata"), + } + ], + "metadata": input.get("thread_metadata"), + } + run = await self._acreate_thread_and_run(input, thread) + # Starting a new run in an existing thread. + elif "run_id" not in input: + _ = await self.async_client.beta.threads.messages.create( + input["thread_id"], + content=input["content"], + role="user", + attachments=attachments, + metadata=input.get("message_metadata"), + ) + run = await self._acreate_run(input) + # Submitting tool outputs to an existing run, outside the AgentExecutor + # framework. + else: + run = await self.async_client.beta.threads.runs.submit_tool_outputs( + **input + ) + run = await self._await_for_run(run.id, run.thread_id) + except BaseException as e: + run_manager.on_chain_error(e) + raise e + try: + response = self._get_response(run) + except BaseException as e: + run_manager.on_chain_error(e, metadata=run.dict()) + raise e + else: + run_manager.on_chain_end(response) + return response + + def _create_run(self, input: dict) -> Any: + params = { + k: v + for k, v in input.items() + if k in ("instructions", "model", "tools", "tool_resources", "run_metadata") + } + return self.client.beta.threads.runs.create( + input["thread_id"], + assistant_id=self.assistant_id, + **params, + ) + + def _create_thread_and_run(self, input: dict, thread: dict) -> Any: + params = { + k: v + for k, v in input.items() + if k in ("instructions", "model", "tools", "run_metadata") + } + if tool_resources := input.get("tool_resources"): + thread["tool_resources"] = tool_resources + run = self.client.beta.threads.create_and_run( + assistant_id=self.assistant_id, + thread=thread, + **params, + ) + return run + + async def _acreate_run(self, input: dict) -> Any: + params = { + k: v + for k, v in input.items() + if k in ("instructions", "model", "tools", "tool_resources" "run_metadata") + } + return await self.async_client.beta.threads.runs.create( + input["thread_id"], + assistant_id=self.assistant_id, + **params, + ) + + async def _acreate_thread_and_run(self, input: dict, thread: dict) -> Any: + params = { + k: v + for k, v in input.items() + if k in ("instructions", "model", "tools", "run_metadata") + } + if tool_resources := input.get("tool_resources"): + thread["tool_resources"] = tool_resources + run = await self.async_client.beta.threads.create_and_run( + assistant_id=self.assistant_id, + thread=thread, + **params, + ) + return run diff --git a/libs/community/langchain_community/callbacks/upstash_ratelimit_callback.py b/libs/community/langchain_community/callbacks/upstash_ratelimit_callback.py new file mode 100644 index 0000000000000..068bf9dc05a90 --- /dev/null +++ b/libs/community/langchain_community/callbacks/upstash_ratelimit_callback.py @@ -0,0 +1,206 @@ +"""Ratelimiting Handler to limit requests or tokens""" + +import logging +from typing import Any, Dict, List, Literal, Optional + +from langchain_core.callbacks import BaseCallbackHandler +from langchain_core.outputs import LLMResult + +logger = logging.getLogger(__name__) +try: + from upstash_ratelimit import Ratelimit +except ImportError: + Ratelimit = None + + +class UpstashRatelimitError(Exception): + """ + Upstash Ratelimit Error + + Raised when the rate limit is reached in `UpstashRatelimitHandler` + """ + + def __init__( + self, + message: str, + type: Literal["token", "request"], + limit: Optional[int] = None, + reset: Optional[float] = None, + ): + """ + Args: + message (str): error message + type (str): The kind of the limit which was reached. One of + "token" or "request" + limit (Optional[int]): The limit which was reached. Passed when type + is request + reset (Optional[int]): unix timestamp in milliseconds when the limits + are reset. Passed when type is request + """ + # Call the base class constructor with the parameters it needs + super().__init__(message) + self.type = type + self.limit = limit + self.reset = reset + + +class UpstashRatelimitHandler(BaseCallbackHandler): + """ + Callback to handle rate limiting based on the number of requests + or the number of tokens in the input. + + It uses Upstash Ratelimit to track the ratelimit which utilizes + Upstash Redis to track the state. + + Should not be passed to the chain when initialising the chain. + This is because the handler has a state which should be fresh + every time invoke is called. Instead, initialise and pass a handler + every time you invoke. + """ + + raise_error = True + _checked: bool = False + + def __init__( + self, + identifier: str, + *, + token_ratelimit: Optional[Ratelimit] = None, + request_ratelimit: Optional[Ratelimit] = None, + include_output_tokens: bool = False, + ): + """ + Creates UpstashRatelimitHandler. Must be passed an identifier to + ratelimit like a user id or an ip address. + + Additionally, it must be passed at least one of token_ratelimit + or request_ratelimit parameters. + + Args: + identifier Union[int, str]: the identifier + token_ratelimit Optional[Ratelimit]: Ratelimit to limit the + number of tokens. Only works with OpenAI models since only + these models provide the number of tokens as information + in their output. + request_ratelimit Optional[Ratelimit]: Ratelimit to limit the + number of requests + include_output_tokens bool: Whether to count output tokens when + rate limiting based on number of tokens. Only used when + `token_ratelimit` is passed. False by default. + + Example: + .. code-block:: python + + from upstash_redis import Redis + from upstash_ratelimit import Ratelimit, FixedWindow + + redis = Redis.from_env() + ratelimit = Ratelimit( + redis=redis, + # fixed window to allow 10 requests every 10 seconds: + limiter=FixedWindow(max_requests=10, window=10), + ) + + user_id = "foo" + handler = UpstashRatelimitHandler( + identifier=user_id, + request_ratelimit=ratelimit + ) + + # Initialize a simple runnable to test + chain = RunnableLambda(str) + + # pass handler as callback: + output = chain.invoke( + "input", + config={ + "callbacks": [handler] + } + ) + + """ + if not any([token_ratelimit, request_ratelimit]): + raise ValueError( + "You must pass at least one of input_token_ratelimit or" + " request_ratelimit parameters for handler to work." + ) + + self.identifier = identifier + self.token_ratelimit = token_ratelimit + self.request_ratelimit = request_ratelimit + self.include_output_tokens = include_output_tokens + + def on_chain_start( + self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any + ) -> Any: + """ + Run when chain starts running. + + on_chain_start runs multiple times during a chain execution. To make + sure that it's only called once, we keep a bool state `_checked`. If + not `self._checked`, we call limit with `request_ratelimit` and raise + `UpstashRatelimitError` if the identifier is rate limited. + """ + if self.request_ratelimit and not self._checked: + response = self.request_ratelimit.limit(self.identifier) + if not response.allowed: + raise UpstashRatelimitError( + "Request limit reached!", "request", response.limit, response.reset + ) + self._checked = True + + def on_llm_start( + self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any + ) -> None: + """ + Run when LLM starts running + """ + if self.token_ratelimit: + remaining = self.token_ratelimit.get_remaining(self.identifier) + if remaining <= 0: + raise UpstashRatelimitError("Token limit reached!", "token") + + def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None: + """ + Run when LLM ends running + + If the `include_output_tokens` is set to True, number of tokens + in LLM completion are counted for rate limiting + """ + if self.token_ratelimit: + try: + llm_output = response.llm_output or {} + token_usage = llm_output["token_usage"] + token_count = ( + token_usage["total_tokens"] + if self.include_output_tokens + else token_usage["prompt_tokens"] + ) + except KeyError: + raise ValueError( + "LLM response doesn't include" + " `token_usage: {total_tokens: int, prompt_tokens: int}`" + " field. To use UpstashRatelimitHandler with token_ratelimit," + " either use a model which returns token_usage (like " + " OpenAI models) or rate limit only with request_ratelimit." + ) + + # call limit to add the completion tokens to rate limit + # but don't raise exception since we already generated + # the tokens and would rather continue execution. + self.token_ratelimit.limit(self.identifier, rate=token_count) + + def reset(self, identifier: Optional[str] = None) -> "UpstashRatelimitHandler": + """ + Creates a new UpstashRatelimitHandler object with the same + ratelimit configurations but with a new identifier if it's + provided. + + Also resets the state of the handler. + """ + return UpstashRatelimitHandler( + identifier=identifier or self.identifier, + token_ratelimit=self.token_ratelimit, + request_ratelimit=self.request_ratelimit, + include_output_tokens=self.include_output_tokens, + ) diff --git a/libs/community/langchain_community/chains/pebblo_retrieval/utilities.py b/libs/community/langchain_community/chains/pebblo_retrieval/utilities.py new file mode 100644 index 0000000000000..3056c8fae7c2c --- /dev/null +++ b/libs/community/langchain_community/chains/pebblo_retrieval/utilities.py @@ -0,0 +1,65 @@ +import logging +import os +import platform +from typing import Tuple + +from langchain_core.env import get_runtime_environment + +from langchain_community.chains.pebblo_retrieval.models import Framework, Runtime + +logger = logging.getLogger(__name__) + +PLUGIN_VERSION = "0.1.1" + +CLASSIFIER_URL = os.getenv("PEBBLO_CLASSIFIER_URL", "http://localhost:8000") +PEBBLO_CLOUD_URL = os.getenv("PEBBLO_CLOUD_URL", "https://api.daxa.ai") + +PROMPT_URL = "/v1/prompt" +APP_DISCOVER_URL = "/v1/app/discover" + + +def get_runtime() -> Tuple[Framework, Runtime]: + """Fetch the current Framework and Runtime details. + + Returns: + Tuple[Framework, Runtime]: Framework and Runtime for the current app instance. + """ + runtime_env = get_runtime_environment() + framework = Framework( + name="langchain", version=runtime_env.get("library_version", None) + ) + uname = platform.uname() + runtime = Runtime( + host=uname.node, + path=os.environ["PWD"], + platform=runtime_env.get("platform", "unknown"), + os=uname.system, + os_version=uname.version, + ip=get_ip(), + language=runtime_env.get("runtime", "unknown"), + language_version=runtime_env.get("runtime_version", "unknown"), + ) + + if "Darwin" in runtime.os: + runtime.type = "desktop" + runtime.runtime = "Mac OSX" + + logger.debug(f"framework {framework}") + logger.debug(f"runtime {runtime}") + return framework, runtime + + +def get_ip() -> str: + """Fetch local runtime ip address. + + Returns: + str: IP address + """ + import socket # lazy imports + + host = socket.gethostname() + try: + public_ip = socket.gethostbyname(host) + except Exception: + public_ip = socket.gethostbyname("localhost") + return public_ip diff --git a/libs/community/langchain_community/chat_message_histories/kafka.py b/libs/community/langchain_community/chat_message_histories/kafka.py new file mode 100644 index 0000000000000..9c171971cf9bd --- /dev/null +++ b/libs/community/langchain_community/chat_message_histories/kafka.py @@ -0,0 +1,362 @@ +""" Kafka-based chat message history by using confluent-kafka-python. + confluent-kafka-python is under Apache 2.0 license. + https://github.com/confluentinc/confluent-kafka-python +""" +from __future__ import annotations + +import json +import logging +import time +from enum import Enum +from typing import TYPE_CHECKING, List, Optional, Sequence + +from langchain_core.chat_history import BaseChatMessageHistory +from langchain_core.messages import BaseMessage, message_to_dict, messages_from_dict + +if TYPE_CHECKING: + from confluent_kafka import TopicPartition + from confluent_kafka.admin import AdminClient + +logger = logging.getLogger(__name__) + +BOOTSTRAP_SERVERS_CONFIG = "bootstrap.servers" + +DEFAULT_TTL_MS = 604800000 # 7 days +DEFAULT_REPLICATION_FACTOR = 1 +DEFAULT_PARTITION = 3 + + +class ConsumeStartPosition(Enum): + """Consume start position for Kafka consumer to get chat history messages. + LAST_CONSUMED: Continue from the last consumed offset. + EARLIEST: Start consuming from the beginning. + LATEST: Start consuming from the latest offset. + """ + + LAST_CONSUMED = 1 + EARLIEST = 2 + LATEST = 3 + + +def ensure_topic_exists( + admin_client: AdminClient, + topic_name: str, + replication_factor: int, + partition: int, + ttl_ms: int, +) -> int: + """Create topic if it doesn't exist, and return the number of partitions. + If the topic already exists, we don't change the topic configuration. + """ + from confluent_kafka.admin import NewTopic + + try: + topic_metadata = admin_client.list_topics().topics + if topic_name in topic_metadata: + num_partitions = len(topic_metadata[topic_name].partitions) + logger.info( + f"Topic {topic_name} already exists with {num_partitions} partitions" + ) + return num_partitions + except Exception as e: + logger.error(f"Failed to list topics: {e}") + raise e + + topics = [ + NewTopic( + topic_name, + num_partitions=partition, + replication_factor=replication_factor, + config={"retention.ms": str(ttl_ms)}, + ) + ] + try: + futures = admin_client.create_topics(topics) + for _, f in futures.items(): + f.result() # result is None + logger.info(f"Topic {topic_name} created") + except Exception as e: + logger.error(f"Failed to create topic {topic_name}: {e}") + raise e + + return partition + + +class KafkaChatMessageHistory(BaseChatMessageHistory): + """Chat message history stored in Kafka. + + Setup: + Install ``confluent-kafka-python``. + + .. code-block:: bash + + pip install confluent_kafka + + Instantiate: + .. code-block:: python + + from langchain_community.chat_message_histories import KafkaChatMessageHistory + + history = KafkaChatMessageHistory( + session_id="your_session_id", + bootstrap_servers="host:port", + ) + + Add and retrieve messages: + .. code-block:: python + + # Add messages + history.add_messages([message1, message2, message3, ...]) + + # Retrieve messages + message_batch_0 = history.messages + + # retrieve messages after message_batch_0 + message_batch_1 = history.messages + + # Reset to beginning and retrieve messages + messages_from_beginning = history.messages_from_beginning() + + Retrieving messages is stateful. Internally, it uses Kafka consumer to read. + The consumed offset is maintained persistently. + + To retrieve messages, you can use the following methods: + - `messages`: + continue consuming chat messages from last one. + - `messages_from_beginning`: + reset the consumer to the beginning of the chat history and return messages. + Optional parameters: + 1. `max_message_count`: maximum number of messages to return. + 2. `max_time_sec`: maximum time in seconds to wait for messages. + - `messages_from_latest`: + reset to end of the chat history and try consuming messages. + Optional parameters same as above. + - `messages_from_last_consumed`: + continuing from the last consumed message, similar to `messages`. + Optional parameters same as above. + + `max_message_count` and `max_time_sec` are used to avoid blocking indefinitely + when retrieving messages. As a result, the method to retrieve messages may not + return all messages. Change `max_message_count` and `max_time_sec` to retrieve + all history messages. + """ # noqa: E501 + + def __init__( + self, + session_id: str, + bootstrap_servers: str, + ttl_ms: int = DEFAULT_TTL_MS, + replication_factor: int = DEFAULT_REPLICATION_FACTOR, + partition: int = DEFAULT_PARTITION, + ): + """ + Args: + session_id: The ID for single chat session. It is used as Kafka topic name. + bootstrap_servers: + Comma-separated host/port pairs to establish connection to Kafka cluster + https://kafka.apache.org/documentation.html#adminclientconfigs_bootstrap.servers + ttl_ms: + Time-to-live (milliseconds) for automatic expiration of entries. + Default 7 days. -1 for no expiration. + It translates to https://kafka.apache.org/documentation.html#topicconfigs_retention.ms + replication_factor: The replication factor for the topic. Default 1. + partition: The number of partitions for the topic. Default 3. + """ + try: + from confluent_kafka import Producer + from confluent_kafka.admin import AdminClient + except (ImportError, ModuleNotFoundError): + raise ImportError( + "Could not import confluent_kafka package. " + "Please install it with `pip install confluent_kafka`." + ) + + self.session_id = session_id + self.bootstrap_servers = bootstrap_servers + self.admin_client = AdminClient({BOOTSTRAP_SERVERS_CONFIG: bootstrap_servers}) + self.num_partitions = ensure_topic_exists( + self.admin_client, session_id, replication_factor, partition, ttl_ms + ) + self.producer = Producer({BOOTSTRAP_SERVERS_CONFIG: bootstrap_servers}) + + def add_messages( + self, + messages: Sequence[BaseMessage], + flush_timeout_seconds: float = 5.0, + ) -> None: + """Add messages to the chat history by producing to the Kafka topic.""" + try: + for message in messages: + self.producer.produce( + topic=self.session_id, + value=json.dumps(message_to_dict(message)), + ) + message_remaining = self.producer.flush(flush_timeout_seconds) + if message_remaining > 0: + logger.warning(f"{message_remaining} messages are still in-flight.") + except Exception as e: + logger.error(f"Failed to add messages to Kafka: {e}") + raise e + + def __read_messages( + self, + consume_start_pos: ConsumeStartPosition, + max_message_count: Optional[int], + max_time_sec: Optional[float], + ) -> List[BaseMessage]: + """Retrieve messages from Kafka topic for the session. + Please note this method is stateful. Internally, it uses Kafka consumer + to consume messages, and maintains the consumed offset. + + Args: + consume_start_pos: Start position for Kafka consumer. + max_message_count: Maximum number of messages to consume. + max_time_sec: Time limit in seconds to consume messages. + Returns: + List of messages. + """ + from confluent_kafka import OFFSET_BEGINNING, OFFSET_END, Consumer + + consumer_config = { + BOOTSTRAP_SERVERS_CONFIG: self.bootstrap_servers, + "group.id": self.session_id, + "auto.offset.reset": "latest" + if consume_start_pos == ConsumeStartPosition.LATEST + else "earliest", + } + + def assign_beginning( + assigned_consumer: Consumer, assigned_partitions: list[TopicPartition] + ) -> None: + for p in assigned_partitions: + p.offset = OFFSET_BEGINNING + assigned_consumer.assign(assigned_partitions) + + def assign_latest( + assigned_consumer: Consumer, assigned_partitions: list[TopicPartition] + ) -> None: + for p in assigned_partitions: + p.offset = OFFSET_END + assigned_consumer.assign(assigned_partitions) + + messages: List[dict] = [] + consumer = Consumer(consumer_config) + try: + if consume_start_pos == ConsumeStartPosition.EARLIEST: + consumer.subscribe([self.session_id], on_assign=assign_beginning) + elif consume_start_pos == ConsumeStartPosition.LATEST: + consumer.subscribe([self.session_id], on_assign=assign_latest) + else: + consumer.subscribe([self.session_id]) + start_time_sec = time.time() + while True: + if ( + max_time_sec is not None + and time.time() - start_time_sec > max_time_sec + ): + break + if max_message_count is not None and len(messages) >= max_message_count: + break + + message = consumer.poll(timeout=1.0) + if message is None: # poll timeout + continue + if message.error() is not None: # error + logger.error(f"Consumer error: {message.error()}") + continue + if message.value() is None: # empty value + logger.warning("Empty message value") + continue + messages.append(json.loads(message.value())) + except Exception as e: + logger.error(f"Failed to consume messages from Kafka: {e}") + raise e + finally: + consumer.close() + + return messages_from_dict(messages) + + def messages_from_beginning( + self, max_message_count: Optional[int] = 5, max_time_sec: Optional[float] = 5.0 + ) -> List[BaseMessage]: + """Retrieve messages from Kafka topic from the beginning. + This method resets the consumer to the beginning and consumes messages. + + Args: + max_message_count: Maximum number of messages to consume. + max_time_sec: Time limit in seconds to consume messages. + Returns: + List of messages. + """ + return self.__read_messages( + consume_start_pos=ConsumeStartPosition.EARLIEST, + max_message_count=max_message_count, + max_time_sec=max_time_sec, + ) + + def messages_from_latest( + self, max_message_count: Optional[int] = 5, max_time_sec: Optional[float] = 5.0 + ) -> List[BaseMessage]: + """Reset to the end offset. Try to consume messages if available. + + Args: + max_message_count: Maximum number of messages to consume. + max_time_sec: Time limit in seconds to consume messages. + Returns: + List of messages. + """ + + return self.__read_messages( + consume_start_pos=ConsumeStartPosition.LATEST, + max_message_count=max_message_count, + max_time_sec=max_time_sec, + ) + + def messages_from_last_consumed( + self, max_message_count: Optional[int] = 5, max_time_sec: Optional[float] = 5.0 + ) -> List[BaseMessage]: + """Retrieve messages from Kafka topic from the last consumed message. + Please note this method is stateful. Internally, it uses Kafka consumer + to consume messages, and maintains the commit offset. + + Args: + max_message_count: Maximum number of messages to consume. + max_time_sec: Time limit in seconds to consume messages. + Returns: + List of messages. + """ + + return self.__read_messages( + consume_start_pos=ConsumeStartPosition.LAST_CONSUMED, + max_message_count=max_message_count, + max_time_sec=max_time_sec, + ) + + @property + def messages(self) -> List[BaseMessage]: # type: ignore + """ + Retrieve the messages for the session, from Kafka topic continuously + from last consumed message. This method is stateful and maintains + consumed(committed) offset based on consumer group. + Alternatively, use messages_from_last_consumed() with specified parameters. + Use messages_from_beginning() to read from the earliest message. + Use messages_from_latest() to read from the latest message. + """ + return self.messages_from_last_consumed() + + def clear(self) -> None: + """Clear the chat history by deleting the Kafka topic.""" + try: + futures = self.admin_client.delete_topics([self.session_id]) + for _, f in futures.items(): + f.result() # result is None + logger.info(f"Topic {self.session_id} deleted") + except Exception as e: + logger.error(f"Failed to delete topic {self.session_id}: {e}") + raise e + + def close(self) -> None: + """Release the resources. + Nothing to be released at this moment. + """ + pass diff --git a/libs/community/langchain_community/chat_message_histories/zep_cloud.py b/libs/community/langchain_community/chat_message_histories/zep_cloud.py new file mode 100644 index 0000000000000..0fc36b737dd78 --- /dev/null +++ b/libs/community/langchain_community/chat_message_histories/zep_cloud.py @@ -0,0 +1,303 @@ +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence + +from langchain_core.chat_history import BaseChatMessageHistory +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, +) + +if TYPE_CHECKING: + from zep_cloud import ( + Memory, + MemoryGetRequestMemoryType, + MemorySearchResult, + Message, + NotFoundError, + RoleType, + SearchScope, + SearchType, + ) + +logger = logging.getLogger(__name__) + + +def condense_zep_memory_into_human_message(zep_memory: Memory) -> BaseMessage: + """Condense Zep memory into a human message. + + Args: + zep_memory: The Zep memory object. + + Returns: + BaseMessage: The human message. + """ + prompt = "" + if zep_memory.facts: + prompt = "\n".join(zep_memory.facts) + if zep_memory.summary and zep_memory.summary.content: + prompt += "\n" + zep_memory.summary.content + for msg in zep_memory.messages or []: + prompt += f"\n{msg.role or msg.role_type}: {msg.content}" + return HumanMessage(content=prompt) + + +def get_zep_message_role_type(role: str) -> RoleType: + """Get the Zep role type from the role string. + + Args: + role: The role string. One of "human", "ai", "system", + "function", "tool". + + Returns: + RoleType: The Zep role type. One of "user", "assistant", + "system", "function", "tool". + """ + if role == "human": + return "user" + elif role == "ai": + return "assistant" + elif role == "system": + return "system" + elif role == "function": + return "function" + elif role == "tool": + return "tool" + else: + return "system" + + +class ZepCloudChatMessageHistory(BaseChatMessageHistory): + """Chat message history that uses Zep Cloud as a backend. + + Recommended usage:: + + # Set up Zep Chat History + zep_chat_history = ZepChatMessageHistory( + session_id=session_id, + api_key=, + ) + + # Use a standard ConversationBufferMemory to encapsulate the Zep chat history + memory = ConversationBufferMemory( + memory_key="chat_history", chat_memory=zep_chat_history + ) + + Zep - Recall, understand, and extract data from chat histories. + Power personalized AI experiences. + + Zep is a long-term memory service for AI Assistant apps. + With Zep, you can provide AI assistants with the + ability to recall past conversations, + no matter how distant, + while also reducing hallucinations, latency, and cost. + + see Zep Cloud Docs: https://help.getzep.com + + This class is a thin wrapper around the zep-python package. Additional + Zep functionality is exposed via the `zep_summary`, `zep_messages` and `zep_facts` + properties. + + For more information on the zep-python package, see: + https://github.com/getzep/zep-python + """ + + def __init__( + self, + session_id: str, + api_key: str, + *, + memory_type: Optional[MemoryGetRequestMemoryType] = None, + lastn: Optional[int] = None, + ai_prefix: Optional[str] = None, + human_prefix: Optional[str] = None, + summary_instruction: Optional[str] = None, + ) -> None: + try: + from zep_cloud.client import AsyncZep, Zep + except ImportError: + raise ImportError( + "Could not import zep-cloud package. " + "Please install it with `pip install zep-cloud`." + ) + + self.zep_client = Zep(api_key=api_key) + self.zep_client_async = AsyncZep(api_key=api_key) + self.session_id = session_id + + self.memory_type = memory_type or "perpetual" + self.lastn = lastn + self.ai_prefix = ai_prefix or "ai" + self.human_prefix = human_prefix or "human" + self.summary_instruction = summary_instruction + + @property + def messages(self) -> List[BaseMessage]: # type: ignore + """Retrieve messages from Zep memory""" + zep_memory: Optional[Memory] = self._get_memory() + if not zep_memory: + return [] + + return [condense_zep_memory_into_human_message(zep_memory)] + + @property + def zep_messages(self) -> List[Message]: + """Retrieve summary from Zep memory""" + zep_memory: Optional[Memory] = self._get_memory() + if not zep_memory: + return [] + + return zep_memory.messages or [] + + @property + def zep_summary(self) -> Optional[str]: + """Retrieve summary from Zep memory""" + zep_memory: Optional[Memory] = self._get_memory() + if not zep_memory or not zep_memory.summary: + return None + + return zep_memory.summary.content + + @property + def zep_facts(self) -> Optional[List[str]]: + """Retrieve conversation facts from Zep memory""" + if self.memory_type != "perpetual": + return None + zep_memory: Optional[Memory] = self._get_memory() + if not zep_memory or not zep_memory.facts: + return None + + return zep_memory.facts + + def _get_memory(self) -> Optional[Memory]: + """Retrieve memory from Zep""" + from zep_cloud import NotFoundError + + try: + zep_memory: Memory = self.zep_client.memory.get( + self.session_id, memory_type=self.memory_type, lastn=self.lastn + ) + except NotFoundError: + logger.warning( + f"Session {self.session_id} not found in Zep. Returning None" + ) + return None + return zep_memory + + def add_user_message( # type: ignore[override] + self, message: str, metadata: Optional[Dict[str, Any]] = None + ) -> None: + """Convenience method for adding a human message string to the store. + + Args: + message: The string contents of a human message. + metadata: Optional metadata to attach to the message. + """ + self.add_message(HumanMessage(content=message), metadata=metadata) + + def add_ai_message( # type: ignore[override] + self, message: str, metadata: Optional[Dict[str, Any]] = None + ) -> None: + """Convenience method for adding an AI message string to the store. + + Args: + message: The string contents of an AI message. + metadata: Optional metadata to attach to the message. + """ + self.add_message(AIMessage(content=message), metadata=metadata) + + def add_message( + self, message: BaseMessage, metadata: Optional[Dict[str, Any]] = None + ) -> None: + """Append the message to the Zep memory history""" + from zep_cloud import Message + + self.zep_client.memory.add( + self.session_id, + messages=[ + Message( + content=str(message.content), + role=message.type, + role_type=get_zep_message_role_type(message.type), + metadata=metadata, + ) + ], + ) + + def add_messages(self, messages: Sequence[BaseMessage]) -> None: + """Append the messages to the Zep memory history""" + from zep_cloud import Message + + zep_messages = [ + Message( + content=str(message.content), + role=message.type, + role_type=get_zep_message_role_type(message.type), + metadata=message.additional_kwargs.get("metadata", None), + ) + for message in messages + ] + + self.zep_client.memory.add(self.session_id, messages=zep_messages) + + async def aadd_messages(self, messages: Sequence[BaseMessage]) -> None: + """Append the messages to the Zep memory history asynchronously""" + from zep_cloud import Message + + zep_messages = [ + Message( + content=str(message.content), + role=message.type, + role_type=get_zep_message_role_type(message.type), + metadata=message.additional_kwargs.get("metadata", None), + ) + for message in messages + ] + + await self.zep_client_async.memory.add(self.session_id, messages=zep_messages) + + def search( + self, + query: str, + metadata: Optional[Dict] = None, + search_scope: SearchScope = "messages", + search_type: SearchType = "similarity", + mmr_lambda: Optional[float] = None, + limit: Optional[int] = None, + ) -> List[MemorySearchResult]: + """Search Zep memory for messages matching the query""" + + return self.zep_client.memory.search( + self.session_id, + text=query, + metadata=metadata, + search_scope=search_scope, + search_type=search_type, + mmr_lambda=mmr_lambda, + limit=limit, + ) + + def clear(self) -> None: + """Clear session memory from Zep. Note that Zep is long-term storage for memory + and this is not advised unless you have specific data retention requirements. + """ + try: + self.zep_client.memory.delete(self.session_id) + except NotFoundError: + logger.warning( + f"Session {self.session_id} not found in Zep. Skipping delete." + ) + + async def aclear(self) -> None: + """Clear session memory from Zep asynchronously. + Note that Zep is long-term storage for memory and this is not advised + unless you have specific data retention requirements. + """ + try: + await self.zep_client_async.memory.delete(self.session_id) + except NotFoundError: + logger.warning( + f"Session {self.session_id} not found in Zep. Skipping delete." + ) diff --git a/libs/community/langchain_community/chat_models/llamacpp.py b/libs/community/langchain_community/chat_models/llamacpp.py new file mode 100644 index 0000000000000..03eb0054baebd --- /dev/null +++ b/libs/community/langchain_community/chat_models/llamacpp.py @@ -0,0 +1,811 @@ +import json +from operator import itemgetter +from pathlib import Path +from typing import ( + Any, + Callable, + Dict, + Iterator, + List, + Mapping, + Optional, + Sequence, + Type, + Union, + cast, +) + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models import LanguageModelInput +from langchain_core.language_models.chat_models import ( + BaseChatModel, + generate_from_stream, +) +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + BaseMessage, + BaseMessageChunk, + ChatMessage, + ChatMessageChunk, + FunctionMessage, + FunctionMessageChunk, + HumanMessage, + HumanMessageChunk, + SystemMessage, + SystemMessageChunk, + ToolMessage, + ToolMessageChunk, +) +from langchain_core.messages.tool import InvalidToolCall, ToolCall, ToolCallChunk +from langchain_core.output_parsers.base import OutputParserLike +from langchain_core.output_parsers.openai_tools import ( + JsonOutputKeyToolsParser, + PydanticToolsParser, + make_invalid_tool_call, + parse_tool_call, +) +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator +from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough +from langchain_core.tools import BaseTool +from langchain_core.utils.function_calling import convert_to_openai_tool + + +class ChatLlamaCpp(BaseChatModel): + """llama.cpp model. + + To use, you should have the llama-cpp-python library installed, and provide the + path to the Llama model as a named parameter to the constructor. + Check out: https://github.com/abetlen/llama-cpp-python + + """ + + client: Any #: :meta private: + + model_path: str + """The path to the Llama model file.""" + + lora_base: Optional[str] = None + """The path to the Llama LoRA base model.""" + + lora_path: Optional[str] = None + """The path to the Llama LoRA. If None, no LoRa is loaded.""" + + n_ctx: int = 512 + """Token context window.""" + + n_parts: int = -1 + """Number of parts to split the model into. + If -1, the number of parts is automatically determined.""" + + seed: int = -1 + """Seed. If -1, a random seed is used.""" + + f16_kv: bool = True + """Use half-precision for key/value cache.""" + + logits_all: bool = False + """Return logits for all tokens, not just the last token.""" + + vocab_only: bool = False + """Only load the vocabulary, no weights.""" + + use_mlock: bool = False + """Force system to keep model in RAM.""" + + n_threads: Optional[int] = None + """Number of threads to use. + If None, the number of threads is automatically determined.""" + + n_batch: int = 8 + """Number of tokens to process in parallel. + Should be a number between 1 and n_ctx.""" + + n_gpu_layers: Optional[int] = None + """Number of layers to be loaded into gpu memory. Default None.""" + + suffix: Optional[str] = None + """A suffix to append to the generated text. If None, no suffix is appended.""" + + max_tokens: int = 256 + """The maximum number of tokens to generate.""" + + temperature: float = 0.8 + """The temperature to use for sampling.""" + + top_p: float = 0.95 + """The top-p value to use for sampling.""" + + logprobs: Optional[int] = None + """The number of logprobs to return. If None, no logprobs are returned.""" + + echo: bool = False + """Whether to echo the prompt.""" + + stop: Optional[List[str]] = None + """A list of strings to stop generation when encountered.""" + + repeat_penalty: float = 1.1 + """The penalty to apply to repeated tokens.""" + + top_k: int = 40 + """The top-k value to use for sampling.""" + + last_n_tokens_size: int = 64 + """The number of tokens to look back when applying the repeat_penalty.""" + + use_mmap: bool = True + """Whether to keep the model loaded in RAM""" + + rope_freq_scale: float = 1.0 + """Scale factor for rope sampling.""" + + rope_freq_base: float = 10000.0 + """Base frequency for rope sampling.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Any additional parameters to pass to llama_cpp.Llama.""" + + streaming: bool = True + """Whether to stream the results, token by token.""" + + grammar_path: Optional[Union[str, Path]] = None + """ + grammar_path: Path to the .gbnf file that defines formal grammars + for constraining model outputs. For instance, the grammar can be used + to force the model to generate valid JSON or to speak exclusively in emojis. At most + one of grammar_path and grammar should be passed in. + """ + grammar: Any = None + """ + grammar: formal grammar for constraining model outputs. For instance, the grammar + can be used to force the model to generate valid JSON or to speak exclusively in + emojis. At most one of grammar_path and grammar should be passed in. + """ + + verbose: bool = True + """Print verbose output to stderr.""" + + @root_validator(pre=False, skip_on_failure=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that llama-cpp-python library is installed.""" + try: + from llama_cpp import Llama, LlamaGrammar + except ImportError: + raise ImportError( + "Could not import llama-cpp-python library. " + "Please install the llama-cpp-python library to " + "use this embedding model: pip install llama-cpp-python" + ) + + model_path = values["model_path"] + model_param_names = [ + "rope_freq_scale", + "rope_freq_base", + "lora_path", + "lora_base", + "n_ctx", + "n_parts", + "seed", + "f16_kv", + "logits_all", + "vocab_only", + "use_mlock", + "n_threads", + "n_batch", + "use_mmap", + "last_n_tokens_size", + "verbose", + ] + model_params = {k: values[k] for k in model_param_names} + # For backwards compatibility, only include if non-null. + if values["n_gpu_layers"] is not None: + model_params["n_gpu_layers"] = values["n_gpu_layers"] + + model_params.update(values["model_kwargs"]) + + try: + values["client"] = Llama(model_path, **model_params) + except Exception as e: + raise ValueError( + f"Could not load Llama model from path: {model_path}. " + f"Received error {e}" + ) + + if values["grammar"] and values["grammar_path"]: + grammar = values["grammar"] + grammar_path = values["grammar_path"] + raise ValueError( + "Can only pass in one of grammar and grammar_path. Received " + f"{grammar=} and {grammar_path=}." + ) + elif isinstance(values["grammar"], str): + values["grammar"] = LlamaGrammar.from_string(values["grammar"]) + elif values["grammar_path"]: + values["grammar"] = LlamaGrammar.from_file(values["grammar_path"]) + else: + pass + return values + + def _get_parameters(self, stop: Optional[List[str]]) -> Dict[str, Any]: + """ + Performs sanity check, preparing parameters in format needed by llama_cpp. + + Returns: + Dictionary containing the combined parameters. + """ + + params = self._default_params + + # llama_cpp expects the "stop" key not this, so we remove it: + stop_sequences = params.pop("stop_sequences") + + # then sets it as configured, or default to an empty list: + params["stop"] = stop or stop_sequences or self.stop or [] + + return params + + def _create_message_dicts( + self, messages: List[BaseMessage] + ) -> List[Dict[str, Any]]: + message_dicts = [_convert_message_to_dict(m) for m in messages] + + return message_dicts + + def _create_chat_result(self, response: dict) -> ChatResult: + generations = [] + for res in response["choices"]: + message = _convert_dict_to_message(res["message"]) + generation_info = dict(finish_reason=res.get("finish_reason")) + if "logprobs" in res: + generation_info["logprobs"] = res["logprobs"] + gen = ChatGeneration(message=message, generation_info=generation_info) + generations.append(gen) + token_usage = response.get("usage", {}) + llm_output = { + "token_usage": token_usage, + # "system_fingerprint": response.get("system_fingerprint", ""), + } + return ChatResult(generations=generations, llm_output=llm_output) + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + params = {**self._get_parameters(stop), **kwargs} + + # Check tool_choice is whether available, if yes then run no stream with tool + # calling + if self.streaming and not params.get("tool_choice"): + stream_iter = self._stream(messages, run_manager=run_manager, **kwargs) + return generate_from_stream(stream_iter) + + message_dicts = self._create_message_dicts(messages) + + response = self.client.create_chat_completion(messages=message_dicts, **params) + + return self._create_chat_result(response) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + params = {**self._get_parameters(stop), **kwargs} + message_dicts = self._create_message_dicts(messages) + + result = self.client.create_chat_completion( + messages=message_dicts, stream=True, **params + ) + + default_chunk_class = AIMessageChunk + count = 0 + for chunk in result: + count += 1 + if not isinstance(chunk, dict): + chunk = chunk.model_dump() + if len(chunk["choices"]) == 0: + continue + choice = chunk["choices"][0] + if choice["delta"] is None: + continue + chunk = _convert_delta_to_message_chunk( + choice["delta"], default_chunk_class + ) + generation_info = {} + if finish_reason := choice.get("finish_reason"): + generation_info["finish_reason"] = finish_reason + logprobs = choice.get("logprobs") + if logprobs: + generation_info["logprobs"] = logprobs + default_chunk_class = chunk.__class__ + chunk = ChatGenerationChunk( + message=chunk, generation_info=generation_info or None + ) + if run_manager: + run_manager.on_llm_new_token(chunk.text, chunk=chunk, logprobs=logprobs) + yield chunk + + def bind_tools( + self, + tools: Sequence[Union[Dict[str, Any], Type[BaseModel], Callable, BaseTool]], + *, + tool_choice: Optional[Union[Dict[str, Dict], bool, str]] = None, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, BaseMessage]: + """Bind tool-like objects to this chat model + + tool_choice: does not currently support "any", "auto" choices like OpenAI + tool-calling API. should be a dict of the form to force this tool + {"type": "function", "function": {"name": <>}}. + """ + formatted_tools = [convert_to_openai_tool(tool) for tool in tools] + tool_names = [ft["function"]["name"] for ft in formatted_tools] + if tool_choice: + if isinstance(tool_choice, dict): + if not any( + tool_choice["function"]["name"] == name for name in tool_names + ): + raise ValueError( + f"Tool choice {tool_choice=} was specified, but the only " + f"provided tools were {tool_names}." + ) + elif isinstance(tool_choice, str): + chosen = [ + f for f in formatted_tools if f["function"]["name"] == tool_choice + ] + if not chosen: + raise ValueError( + f"Tool choice {tool_choice=} was specified, but the only " + f"provided tools were {tool_names}." + ) + elif isinstance(tool_choice, bool): + if len(formatted_tools) > 1: + raise ValueError( + "tool_choice=True can only be specified when a single tool is " + f"passed in. Received {len(tools)} tools." + ) + tool_choice = formatted_tools[0] + else: + raise ValueError( + """Unrecognized tool_choice type. Expected dict having format like + this {"type": "function", "function": {"name": <>}}""" + f"Received: {tool_choice}" + ) + + kwargs["tool_choice"] = tool_choice + formatted_tools = [convert_to_openai_tool(tool) for tool in tools] + return super().bind(tools=formatted_tools, **kwargs) + + def with_structured_output( + self, + schema: Optional[Union[Dict, Type[BaseModel]]] = None, + *, + include_raw: bool = False, + **kwargs: Any, + ) -> Runnable[LanguageModelInput, Union[Dict, BaseModel]]: + """Model wrapper that returns outputs formatted to match the given schema. + + Args: + schema: The output schema as a dict or a Pydantic class. If a Pydantic class + then the model output will be an object of that class. If a dict then + the model output will be a dict. With a Pydantic class the returned + attributes will be validated, whereas with a dict they will not be. If + `method` is "function_calling" and `schema` is a dict, then the dict + must match the OpenAI function-calling spec or be a valid JSON schema + with top level 'title' and 'description' keys specified. + include_raw: If False then only the parsed structured output is returned. If + an error occurs during model output parsing it will be raised. If True + then both the raw model response (a BaseMessage) and the parsed model + response will be returned. If an error occurs during output parsing it + will be caught and returned as well. The final output is always a dict + with keys "raw", "parsed", and "parsing_error". + kwargs: Any other args to bind to model, ``self.bind(..., **kwargs)``. + + Returns: + A Runnable that takes any ChatModel input and returns as output: + + If include_raw is True then a dict with keys: + raw: BaseMessage + parsed: Optional[_DictOrPydantic] + parsing_error: Optional[BaseException] + + If include_raw is False then just _DictOrPydantic is returned, + where _DictOrPydantic depends on the schema: + + If schema is a Pydantic class then _DictOrPydantic is the Pydantic + class. + + If schema is a dict then _DictOrPydantic is a dict. + + Example: Pydantic schema (include_raw=False): + .. code-block:: python + + from langchain_community.chat_models import ChatLlamaCpp + from langchain_core.pydantic_v1 import BaseModel + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + answer: str + justification: str + + llm = ChatLlamaCpp( + temperature=0., + model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf", + n_ctx=10000, + n_gpu_layers=4, + n_batch=200, + max_tokens=512, + n_threads=multiprocessing.cpu_count() - 1, + repeat_penalty=1.5, + top_p=0.5, + stop=["<|end_of_text|>", "<|eot_id|>"], + ) + structured_llm = llm.with_structured_output(AnswerWithJustification) + + structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + + # -> AnswerWithJustification( + # answer='They weigh the same', + # justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.' + # ) + + Example: Pydantic schema (include_raw=True): + .. code-block:: python + + from langchain_community.chat_models import ChatLlamaCpp + from langchain_core.pydantic_v1 import BaseModel + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + answer: str + justification: str + + llm = ChatLlamaCpp( + temperature=0., + model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf", + n_ctx=10000, + n_gpu_layers=4, + n_batch=200, + max_tokens=512, + n_threads=multiprocessing.cpu_count() - 1, + repeat_penalty=1.5, + top_p=0.5, + stop=["<|end_of_text|>", "<|eot_id|>"], + ) + structured_llm = llm.with_structured_output(AnswerWithJustification, include_raw=True) + + structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + # -> { + # 'raw': AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'call_Ao02pnFYXD6GN1yzc0uXPsvF', 'function': {'arguments': '{"answer":"They weigh the same.","justification":"Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ."}', 'name': 'AnswerWithJustification'}, 'type': 'function'}]}), + # 'parsed': AnswerWithJustification(answer='They weigh the same.', justification='Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume or density of the objects may differ.'), + # 'parsing_error': None + # } + + Example: dict schema (include_raw=False): + .. code-block:: python + + from langchain_community.chat_models import ChatLlamaCpp + from langchain_core.pydantic_v1 import BaseModel + from langchain_core.utils.function_calling import convert_to_openai_tool + + class AnswerWithJustification(BaseModel): + '''An answer to the user question along with justification for the answer.''' + answer: str + justification: str + + dict_schema = convert_to_openai_tool(AnswerWithJustification) + llm = ChatLlamaCpp( + temperature=0., + model_path="./SanctumAI-meta-llama-3-8b-instruct.Q8_0.gguf", + n_ctx=10000, + n_gpu_layers=4, + n_batch=200, + max_tokens=512, + n_threads=multiprocessing.cpu_count() - 1, + repeat_penalty=1.5, + top_p=0.5, + stop=["<|end_of_text|>", "<|eot_id|>"], + ) + structured_llm = llm.with_structured_output(dict_schema) + + structured_llm.invoke("What weighs more a pound of bricks or a pound of feathers") + # -> { + # 'answer': 'They weigh the same', + # 'justification': 'Both a pound of bricks and a pound of feathers weigh one pound. The weight is the same, but the volume and density of the two substances differ.' + # } + + """ # noqa: E501 + + if kwargs: + raise ValueError(f"Received unsupported arguments {kwargs}") + is_pydantic_schema = isinstance(schema, type) and issubclass(schema, BaseModel) + if schema is None: + raise ValueError( + "schema must be specified when method is 'function_calling'. " + "Received None." + ) + llm = self.bind_tools([schema], tool_choice=True) + if is_pydantic_schema: + output_parser: OutputParserLike = PydanticToolsParser( + tools=[cast(Type, schema)], first_tool_only=True + ) + else: + key_name = convert_to_openai_tool(schema)["function"]["name"] + output_parser = JsonOutputKeyToolsParser( + key_name=key_name, first_tool_only=True + ) + + if include_raw: + parser_assign = RunnablePassthrough.assign( + parsed=itemgetter("raw") | output_parser, parsing_error=lambda _: None + ) + parser_none = RunnablePassthrough.assign(parsed=lambda _: None) + parser_with_fallback = parser_assign.with_fallbacks( + [parser_none], exception_key="parsing_error" + ) + return RunnableMap(raw=llm) | parser_with_fallback + else: + return llm | output_parser + + @property + def _identifying_params(self) -> Dict[str, Any]: + """Return a dictionary of identifying parameters. + + This information is used by the LangChain callback system, which + is used for tracing purposes make it possible to monitor LLMs. + """ + return { + # The model name allows users to specify custom token counting + # rules in LLM monitoring applications (e.g., in LangSmith users + # can provide per token pricing for their model and monitor + # costs for the given LLM.) + **{"model_path": self.model_path}, + **self._default_params, + } + + @property + def _llm_type(self) -> str: + """Get the type of language model used by this chat model.""" + return "llama-cpp-python" + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling create_chat_completion.""" + params: Dict = { + "max_tokens": self.max_tokens, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "logprobs": self.logprobs, + "stop_sequences": self.stop, # key here is convention among LLM classes + "repeat_penalty": self.repeat_penalty, + } + if self.grammar: + params["grammar"] = self.grammar + return params + + +def _lc_tool_call_to_openai_tool_call(tool_call: ToolCall) -> dict: + return { + "type": "function", + "id": tool_call["id"], + "function": { + "name": tool_call["name"], + "arguments": json.dumps(tool_call["args"]), + }, + } + + +def _lc_invalid_tool_call_to_openai_tool_call( + invalid_tool_call: InvalidToolCall, +) -> dict: + return { + "type": "function", + "id": invalid_tool_call["id"], + "function": { + "name": invalid_tool_call["name"], + "arguments": invalid_tool_call["args"], + }, + } + + +def _convert_dict_to_message(_dict: Mapping[str, Any]) -> BaseMessage: + """Convert a dictionary to a LangChain message. + + Args: + _dict: The dictionary. + + Returns: + The LangChain message. + """ + role = _dict.get("role") + name = _dict.get("name") + id_ = _dict.get("id") + if role == "user": + return HumanMessage(content=_dict.get("content", ""), id=id_, name=name) + elif role == "assistant": + # Fix for azure + # Also OpenAI returns None for tool invocations + content = _dict.get("content", "") or "" + additional_kwargs: Dict = {} + if function_call := _dict.get("function_call"): + additional_kwargs["function_call"] = dict(function_call) + tool_calls = [] + invalid_tool_calls = [] + if raw_tool_calls := _dict.get("tool_calls"): + additional_kwargs["tool_calls"] = raw_tool_calls + for raw_tool_call in raw_tool_calls: + try: + tc = parse_tool_call(raw_tool_call, return_id=True) + except Exception as e: + invalid_tc = make_invalid_tool_call(raw_tool_call, str(e)) + invalid_tool_calls.append(invalid_tc) + else: + if not tc: + continue + else: + tool_calls.append(tc) + return AIMessage( + content=content, + additional_kwargs=additional_kwargs, + name=name, + id=id_, + tool_calls=tool_calls, # type: ignore[arg-type] + invalid_tool_calls=invalid_tool_calls, + ) + elif role == "system": + return SystemMessage(content=_dict.get("content", ""), name=name, id=id_) + elif role == "function": + return FunctionMessage( + content=_dict.get("content", ""), name=cast(str, _dict.get("name")), id=id_ + ) + elif role == "tool": + additional_kwargs = {} + if "name" in _dict: + additional_kwargs["name"] = _dict["name"] + return ToolMessage( + content=_dict.get("content", ""), + tool_call_id=cast(str, _dict.get("tool_call_id")), + additional_kwargs=additional_kwargs, + name=name, + id=id_, + ) + else: + return ChatMessage( + content=_dict.get("content", ""), role=cast(str, role), id=id_ + ) + + +def _format_message_content(content: Any) -> Any: + """Format message content.""" + if content and isinstance(content, list): + # Remove unexpected block types + formatted_content = [] + for block in content: + if ( + isinstance(block, dict) + and "type" in block + and block["type"] == "tool_use" + ): + continue + else: + formatted_content.append(block) + else: + formatted_content = content + + return formatted_content + + +def _convert_message_to_dict(message: BaseMessage) -> dict: + """Convert a LangChain message to a dictionary. + + Args: + message: The LangChain message. + + Returns: + The dictionary. + """ + message_dict: Dict[str, Any] = { + "content": _format_message_content(message.content), + } + if (name := message.name or message.additional_kwargs.get("name")) is not None: + message_dict["name"] = name + + # populate role and additional message data + if isinstance(message, ChatMessage): + message_dict["role"] = message.role + elif isinstance(message, HumanMessage): + message_dict["role"] = "user" + elif isinstance(message, AIMessage): + message_dict["role"] = "assistant" + if "function_call" in message.additional_kwargs: + message_dict["function_call"] = message.additional_kwargs["function_call"] + if message.tool_calls or message.invalid_tool_calls: + message_dict["tool_calls"] = [ + _lc_tool_call_to_openai_tool_call(tc) for tc in message.tool_calls + ] + [ + _lc_invalid_tool_call_to_openai_tool_call(tc) + for tc in message.invalid_tool_calls + ] + elif "tool_calls" in message.additional_kwargs: + message_dict["tool_calls"] = message.additional_kwargs["tool_calls"] + tool_call_supported_props = {"id", "type", "function"} + message_dict["tool_calls"] = [ + {k: v for k, v in tool_call.items() if k in tool_call_supported_props} + for tool_call in message_dict["tool_calls"] + ] + else: + pass + # If tool calls present, content null value should be None not empty string. + if "function_call" in message_dict or "tool_calls" in message_dict: + message_dict["content"] = message_dict["content"] or None + elif isinstance(message, SystemMessage): + message_dict["role"] = "system" + elif isinstance(message, FunctionMessage): + message_dict["role"] = "function" + elif isinstance(message, ToolMessage): + message_dict["role"] = "tool" + message_dict["tool_call_id"] = message.tool_call_id + + supported_props = {"content", "role", "tool_call_id"} + message_dict = {k: v for k, v in message_dict.items() if k in supported_props} + else: + raise TypeError(f"Got unknown type {message}") + return message_dict + + +def _convert_delta_to_message_chunk( + _dict: Mapping[str, Any], default_class: Type[BaseMessageChunk] +) -> BaseMessageChunk: + id_ = _dict.get("id") + role = cast(str, _dict.get("role")) + content = cast(str, _dict.get("content") or "") + additional_kwargs: Dict = {} + if _dict.get("function_call"): + function_call = dict(_dict["function_call"]) + if "name" in function_call and function_call["name"] is None: + function_call["name"] = "" + additional_kwargs["function_call"] = function_call + tool_call_chunks = [] + if raw_tool_calls := _dict.get("tool_calls"): + additional_kwargs["tool_calls"] = raw_tool_calls + for rtc in raw_tool_calls: + try: + tool_call = ToolCallChunk( + name=rtc["function"].get("name"), + args=rtc["function"].get("arguments"), + id=rtc.get("id"), + index=rtc["index"], + ) + tool_call_chunks.append(tool_call) + except KeyError: + pass + + if role == "user" or default_class == HumanMessageChunk: + return HumanMessageChunk(content=content, id=id_) + elif role == "assistant" or default_class == AIMessageChunk: + return AIMessageChunk( + content=content, + additional_kwargs=additional_kwargs, + id=id_, + tool_call_chunks=tool_call_chunks, + ) + elif role == "system" or default_class == SystemMessageChunk: + return SystemMessageChunk(content=content, id=id_) + elif role == "function" or default_class == FunctionMessageChunk: + return FunctionMessageChunk(content=content, name=_dict["name"], id=id_) + elif role == "tool" or default_class == ToolMessageChunk: + return ToolMessageChunk( + content=content, tool_call_id=_dict["tool_call_id"], id=id_ + ) + elif role or default_class == ChatMessageChunk: + return ChatMessageChunk(content=content, role=role, id=id_) + else: + return default_class(content=content, id=id_) # type: ignore diff --git a/libs/community/langchain_community/chat_models/oci_generative_ai.py b/libs/community/langchain_community/chat_models/oci_generative_ai.py new file mode 100644 index 0000000000000..9409b1a2fb743 --- /dev/null +++ b/libs/community/langchain_community/chat_models/oci_generative_ai.py @@ -0,0 +1,363 @@ +import json +from abc import ABC, abstractmethod +from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence + +from langchain_core.callbacks import CallbackManagerForLLMRun +from langchain_core.language_models.chat_models import ( + BaseChatModel, + generate_from_stream, +) +from langchain_core.messages import ( + AIMessage, + AIMessageChunk, + BaseMessage, + ChatMessage, + HumanMessage, + SystemMessage, +) +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_core.pydantic_v1 import Extra + +from langchain_community.llms.oci_generative_ai import OCIGenAIBase +from langchain_community.llms.utils import enforce_stop_tokens + +CUSTOM_ENDPOINT_PREFIX = "ocid1.generativeaiendpoint" + + +class Provider(ABC): + @property + @abstractmethod + def stop_sequence_key(self) -> str: + ... + + @abstractmethod + def chat_response_to_text(self, response: Any) -> str: + ... + + @abstractmethod + def chat_stream_to_text(self, event_data: Dict) -> str: + ... + + @abstractmethod + def chat_generation_info(self, response: Any) -> Dict[str, Any]: + ... + + @abstractmethod + def get_role(self, message: BaseMessage) -> str: + ... + + @abstractmethod + def messages_to_oci_params(self, messages: Any) -> Dict[str, Any]: + ... + + +class CohereProvider(Provider): + stop_sequence_key = "stop_sequences" + + def __init__(self) -> None: + from oci.generative_ai_inference import models + + self.oci_chat_request = models.CohereChatRequest + self.oci_chat_message = { + "USER": models.CohereUserMessage, + "CHATBOT": models.CohereChatBotMessage, + "SYSTEM": models.CohereSystemMessage, + } + self.chat_api_format = models.BaseChatRequest.API_FORMAT_COHERE + + def chat_response_to_text(self, response: Any) -> str: + return response.data.chat_response.text + + def chat_stream_to_text(self, event_data: Dict) -> str: + if "text" in event_data and "finishReason" not in event_data: + return event_data["text"] + else: + return "" + + def chat_generation_info(self, response: Any) -> Dict[str, Any]: + return { + "finish_reason": response.data.chat_response.finish_reason, + } + + def get_role(self, message: BaseMessage) -> str: + if isinstance(message, HumanMessage): + return "USER" + elif isinstance(message, AIMessage): + return "CHATBOT" + elif isinstance(message, SystemMessage): + return "SYSTEM" + else: + raise ValueError(f"Got unknown type {message}") + + def messages_to_oci_params(self, messages: Sequence[ChatMessage]) -> Dict[str, Any]: + oci_chat_history = [ + self.oci_chat_message[self.get_role(msg)](message=msg.content) + for msg in messages[:-1] + ] + oci_params = { + "message": messages[-1].content, + "chat_history": oci_chat_history, + "api_format": self.chat_api_format, + } + + return oci_params + + +class MetaProvider(Provider): + stop_sequence_key = "stop" + + def __init__(self) -> None: + from oci.generative_ai_inference import models + + self.oci_chat_request = models.GenericChatRequest + self.oci_chat_message = { + "USER": models.UserMessage, + "SYSTEM": models.SystemMessage, + "ASSISTANT": models.AssistantMessage, + } + self.oci_chat_message_content = models.TextContent + self.chat_api_format = models.BaseChatRequest.API_FORMAT_GENERIC + + def chat_response_to_text(self, response: Any) -> str: + return response.data.chat_response.choices[0].message.content[0].text + + def chat_stream_to_text(self, event_data: Dict) -> str: + if "message" in event_data: + return event_data["message"]["content"][0]["text"] + else: + return "" + + def chat_generation_info(self, response: Any) -> Dict[str, Any]: + return { + "finish_reason": response.data.chat_response.choices[0].finish_reason, + "time_created": str(response.data.chat_response.time_created), + } + + def get_role(self, message: BaseMessage) -> str: + # meta only supports alternating user/assistant roles + if isinstance(message, HumanMessage): + return "USER" + elif isinstance(message, AIMessage): + return "ASSISTANT" + elif isinstance(message, SystemMessage): + return "SYSTEM" + else: + raise ValueError(f"Got unknown type {message}") + + def messages_to_oci_params(self, messages: List[BaseMessage]) -> Dict[str, Any]: + oci_messages = [ + self.oci_chat_message[self.get_role(msg)]( + content=[self.oci_chat_message_content(text=msg.content)] + ) + for msg in messages + ] + oci_params = { + "messages": oci_messages, + "api_format": self.chat_api_format, + "top_k": -1, + } + + return oci_params + + +class ChatOCIGenAI(BaseChatModel, OCIGenAIBase): + """ChatOCIGenAI chat model integration. + + Setup: + Install ``langchain-community`` and the ``oci`` sdk. + + .. code-block:: bash + + pip install -U langchain-community oci + + Key init args — completion params: + model_id: str + Id of the OCIGenAI chat model to use, e.g., cohere.command-r-16k. + is_stream: bool + Whether to stream back partial progress + model_kwargs: Optional[Dict] + Keyword arguments to pass to the specific model used, e.g., temperature, max_tokens. + + Key init args — client params: + service_endpoint: str + The endpoint URL for the OCIGenAI service, e.g., https://inference.generativeai.us-chicago-1.oci.oraclecloud.com. + compartment_id: str + The compartment OCID. + auth_type: str + The authentication type to use, e.g., API_KEY (default), SECURITY_TOKEN, INSTANCE_PRINCIPAL, RESOURCE_PRINCIPAL. + auth_profile: Optional[str] + The name of the profile in ~/.oci/config, if not specified , DEFAULT will be used. + provider: str + Provider name of the model. Default to None, will try to be derived from the model_id otherwise, requires user input. + See full list of supported init args and their descriptions in the params section. + + Instantiate: + .. code-block:: python + + from langchain_community.chat_models import ChatOCIGenAI + + chat = ChatOCIGenAI( + model_id="cohere.command-r-16k", + service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com", + compartment_id="MY_OCID", + model_kwargs={"temperature": 0.7, "max_tokens": 500}, + ) + + Invoke: + .. code-block:: python + messages = [ + SystemMessage(content="your are an AI assistant."), + AIMessage(content="Hi there human!"), + HumanMessage(content="tell me a joke."), + ] + response = chat.invoke(messages) + + Stream: + .. code-block:: python + + for r in chat.stream(messages): + print(r.content, end="", flush=True) + + Response metadata + .. code-block:: python + + response = chat.invoke(messages) + print(response.response_metadata) + + """ # noqa: E501 + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @property + def _llm_type(self) -> str: + """Return type of llm.""" + return "oci_generative_ai_chat" + + @property + def _provider_map(self) -> Mapping[str, Any]: + """Get the provider map""" + return { + "cohere": CohereProvider(), + "meta": MetaProvider(), + } + + @property + def _provider(self) -> Any: + """Get the internal provider object""" + return self._get_provider(provider_map=self._provider_map) + + def _prepare_request( + self, + messages: List[BaseMessage], + stop: Optional[List[str]], + kwargs: Dict[str, Any], + stream: bool, + ) -> Dict[str, Any]: + try: + from oci.generative_ai_inference import models + + except ImportError as ex: + raise ModuleNotFoundError( + "Could not import oci python package. " + "Please make sure you have the oci package installed." + ) from ex + oci_params = self._provider.messages_to_oci_params(messages) + oci_params["is_stream"] = stream # self.is_stream + _model_kwargs = self.model_kwargs or {} + + if stop is not None: + _model_kwargs[self._provider.stop_sequence_key] = stop + + chat_params = {**_model_kwargs, **kwargs, **oci_params} + + if self.model_id.startswith(CUSTOM_ENDPOINT_PREFIX): + serving_mode = models.DedicatedServingMode(endpoint_id=self.model_id) + else: + serving_mode = models.OnDemandServingMode(model_id=self.model_id) + + request = models.ChatDetails( + compartment_id=self.compartment_id, + serving_mode=serving_mode, + chat_request=self._provider.oci_chat_request(**chat_params), + ) + + return request + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + """Call out to a OCIGenAI chat model. + + Args: + messages: list of LangChain messages + stop: Optional list of stop words to use. + + Returns: + LangChain ChatResult + + Example: + .. code-block:: python + + messages = [ + HumanMessage(content="hello!"), + AIMessage(content="Hi there human!"), + HumanMessage(content="Meow!") + ] + + response = llm.invoke(messages) + """ + if self.is_stream: + stream_iter = self._stream( + messages, stop=stop, run_manager=run_manager, **kwargs + ) + return generate_from_stream(stream_iter) + + request = self._prepare_request(messages, stop, kwargs, stream=False) + response = self.client.chat(request) + + content = self._provider.chat_response_to_text(response) + + if stop is not None: + content = enforce_stop_tokens(content, stop) + + generation_info = self._provider.chat_generation_info(response) + + llm_output = { + "model_id": response.data.model_id, + "model_version": response.data.model_version, + "request_id": response.request_id, + "content-length": response.headers["content-length"], + } + + return ChatResult( + generations=[ + ChatGeneration( + message=AIMessage(content=content), generation_info=generation_info + ) + ], + llm_output=llm_output, + ) + + def _stream( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> Iterator[ChatGenerationChunk]: + request = self._prepare_request(messages, stop, kwargs, stream=True) + response = self.client.chat(request) + + for event in response.data.events(): + delta = self._provider.chat_stream_to_text(json.loads(event.data)) + chunk = ChatGenerationChunk(message=AIMessageChunk(content=delta)) + if run_manager: + run_manager.on_llm_new_token(delta, chunk=chunk) + yield chunk diff --git a/libs/community/langchain_community/chat_models/snowflake.py b/libs/community/langchain_community/chat_models/snowflake.py new file mode 100644 index 0000000000000..c25d2254f9704 --- /dev/null +++ b/libs/community/langchain_community/chat_models/snowflake.py @@ -0,0 +1,232 @@ +import json +from typing import Any, Dict, List, Optional + +from langchain_core.callbacks.manager import CallbackManagerForLLMRun +from langchain_core.language_models import BaseChatModel +from langchain_core.messages import ( + AIMessage, + BaseMessage, + ChatMessage, + HumanMessage, + SystemMessage, +) +from langchain_core.outputs import ChatGeneration, ChatResult +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import ( + convert_to_secret_str, + get_from_dict_or_env, + get_pydantic_field_names, +) +from langchain_core.utils.utils import build_extra_kwargs + +SUPPORTED_ROLES: List[str] = [ + "system", + "user", + "assistant", +] + + +class ChatSnowflakeCortexError(Exception): + """Error with Snowpark client.""" + + +def _convert_message_to_dict(message: BaseMessage) -> dict: + """Convert a LangChain message to a dictionary. + + Args: + message: The LangChain message. + + Returns: + The dictionary. + """ + message_dict: Dict[str, Any] = { + "content": message.content, + } + + # populate role and additional message data + if isinstance(message, ChatMessage) and message.role in SUPPORTED_ROLES: + message_dict["role"] = message.role + elif isinstance(message, SystemMessage): + message_dict["role"] = "system" + elif isinstance(message, HumanMessage): + message_dict["role"] = "user" + elif isinstance(message, AIMessage): + message_dict["role"] = "assistant" + else: + raise TypeError(f"Got unknown type {message}") + return message_dict + + +def _truncate_at_stop_tokens( + text: str, + stop: Optional[List[str]], +) -> str: + """Truncates text at the earliest stop token found.""" + if stop is None: + return text + + for stop_token in stop: + stop_token_idx = text.find(stop_token) + if stop_token_idx != -1: + text = text[:stop_token_idx] + return text + + +class ChatSnowflakeCortex(BaseChatModel): + """Snowflake Cortex based Chat model + + To use you must have the ``snowflake-snowpark-python`` Python package installed and + either: + + 1. environment variables set with your snowflake credentials or + 2. directly passed in as kwargs to the ChatSnowflakeCortex constructor. + + Example: + .. code-block:: python + + from langchain_community.chat_models import ChatSnowflakeCortex + chat = ChatSnowflakeCortex() + """ + + _sp_session: Any = None + """Snowpark session object.""" + + model: str = "snowflake-arctic" + """Snowflake cortex hosted LLM model name, defaulted to `snowflake-arctic`. + Refer to docs for more options.""" + + cortex_function: str = "complete" + """Cortex function to use, defaulted to `complete`. + Refer to docs for more options.""" + + temperature: float = 0.7 + """Model temperature. Value should be >= 0 and <= 1.0""" + + max_tokens: Optional[int] = None + """The maximum number of output tokens in the response.""" + + top_p: Optional[float] = None + """top_p adjusts the number of choices for each predicted tokens based on + cumulative probabilities. Value should be ranging between 0.0 and 1.0. + """ + + snowflake_username: Optional[str] = Field(default=None, alias="username") + """Automatically inferred from env var `SNOWFLAKE_USERNAME` if not provided.""" + snowflake_password: Optional[SecretStr] = Field(default=None, alias="password") + """Automatically inferred from env var `SNOWFLAKE_PASSWORD` if not provided.""" + snowflake_account: Optional[str] = Field(default=None, alias="account") + """Automatically inferred from env var `SNOWFLAKE_ACCOUNT` if not provided.""" + snowflake_database: Optional[str] = Field(default=None, alias="database") + """Automatically inferred from env var `SNOWFLAKE_DATABASE` if not provided.""" + snowflake_schema: Optional[str] = Field(default=None, alias="schema") + """Automatically inferred from env var `SNOWFLAKE_SCHEMA` if not provided.""" + snowflake_warehouse: Optional[str] = Field(default=None, alias="warehouse") + """Automatically inferred from env var `SNOWFLAKE_WAREHOUSE` if not provided.""" + snowflake_role: Optional[str] = Field(default=None, alias="role") + """Automatically inferred from env var `SNOWFLAKE_ROLE` if not provided.""" + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + values["model_kwargs"] = build_extra_kwargs( + extra, values, all_required_field_names + ) + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + try: + from snowflake.snowpark import Session + except ImportError: + raise ImportError( + "`snowflake-snowpark-python` package not found, please install it with " + "`pip install snowflake-snowpark-python`" + ) + + values["snowflake_username"] = get_from_dict_or_env( + values, "snowflake_username", "SNOWFLAKE_USERNAME" + ) + values["snowflake_password"] = convert_to_secret_str( + get_from_dict_or_env(values, "snowflake_password", "SNOWFLAKE_PASSWORD") + ) + values["snowflake_account"] = get_from_dict_or_env( + values, "snowflake_account", "SNOWFLAKE_ACCOUNT" + ) + values["snowflake_database"] = get_from_dict_or_env( + values, "snowflake_database", "SNOWFLAKE_DATABASE" + ) + values["snowflake_schema"] = get_from_dict_or_env( + values, "snowflake_schema", "SNOWFLAKE_SCHEMA" + ) + values["snowflake_warehouse"] = get_from_dict_or_env( + values, "snowflake_warehouse", "SNOWFLAKE_WAREHOUSE" + ) + values["snowflake_role"] = get_from_dict_or_env( + values, "snowflake_role", "SNOWFLAKE_ROLE" + ) + + connection_params = { + "account": values["snowflake_account"], + "user": values["snowflake_username"], + "password": values["snowflake_password"].get_secret_value(), + "database": values["snowflake_database"], + "schema": values["snowflake_schema"], + "warehouse": values["snowflake_warehouse"], + "role": values["snowflake_role"], + } + + try: + values["_sp_session"] = Session.builder.configs(connection_params).create() + except Exception as e: + raise ChatSnowflakeCortexError(f"Failed to create session: {e}") + + return values + + def __del__(self) -> None: + if getattr(self, "_sp_session", None) is not None: + self._sp_session.close() + + @property + def _llm_type(self) -> str: + """Get the type of language model used by this chat model.""" + return f"snowflake-cortex-{self.model}" + + def _generate( + self, + messages: List[BaseMessage], + stop: Optional[List[str]] = None, + run_manager: Optional[CallbackManagerForLLMRun] = None, + **kwargs: Any, + ) -> ChatResult: + message_dicts = [_convert_message_to_dict(m) for m in messages] + message_str = str(message_dicts) + options = {"temperature": self.temperature} + if self.top_p is not None: + options["top_p"] = self.top_p + if self.max_tokens is not None: + options["max_tokens"] = self.max_tokens + options_str = str(options) + sql_stmt = f""" + select snowflake.cortex.{self.cortex_function}( + '{self.model}' + ,{message_str},{options_str}) as llm_response;""" + + try: + l_rows = self._sp_session.sql(sql_stmt).collect() + except Exception as e: + raise ChatSnowflakeCortexError( + f"Error while making request to Snowflake Cortex via Snowpark: {e}" + ) + + response = json.loads(l_rows[0]["LLM_RESPONSE"]) + ai_message_content = response["choices"][0]["messages"] + + content = _truncate_at_stop_tokens(ai_message_content, stop) + message = AIMessage( + content=content, + response_metadata=response["usage"], + ) + generation = ChatGeneration(message=message) + return ChatResult(generations=[generation]) diff --git a/libs/community/langchain_community/document_compressors/dashscope_rerank.py b/libs/community/langchain_community/document_compressors/dashscope_rerank.py new file mode 100644 index 0000000000000..450108639908c --- /dev/null +++ b/libs/community/langchain_community/document_compressors/dashscope_rerank.py @@ -0,0 +1,119 @@ +from __future__ import annotations + +from copy import deepcopy +from typing import Any, Dict, List, Optional, Sequence, Union + +from langchain_core.callbacks.base import Callbacks +from langchain_core.documents import BaseDocumentCompressor, Document +from langchain_core.pydantic_v1 import Extra, Field, root_validator +from langchain_core.utils import get_from_dict_or_env + + +class DashScopeRerank(BaseDocumentCompressor): + """Document compressor that uses `DashScope Rerank API`.""" + + client: Any = None + """DashScope client to use for compressing documents.""" + + model: Optional[str] = None + """Model to use for reranking.""" + + top_n: Optional[int] = 3 + """Number of documents to return.""" + + dashscope_api_key: Optional[str] = Field(None, alias="api_key") + """DashScope API key. Must be specified directly or via environment variable + DASHSCOPE_API_KEY.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + allow_population_by_field_name = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + + if not values.get("client"): + try: + import dashscope + except ImportError: + raise ImportError( + "Could not import dashscope python package. " + "Please install it with `pip install dashscope`." + ) + + values["client"] = dashscope.TextReRank + values["dashscope_api_key"] = get_from_dict_or_env( + values, "dashscope_api_key", "DASHSCOPE_API_KEY" + ) + values["model"] = dashscope.TextReRank.Models.gte_rerank + + return values + + def rerank( + self, + documents: Sequence[Union[str, Document, dict]], + query: str, + *, + top_n: Optional[int] = -1, + ) -> List[Dict[str, Any]]: + """Returns an ordered list of documents ordered by their relevance to the provided query. + + Args: + query: The query to use for reranking. + documents: A sequence of documents to rerank. + top_n : The number of results to return. If None returns all results. + Defaults to self.top_n. + """ # noqa: E501 + + if len(documents) == 0: # to avoid empty api call + return [] + docs = [ + doc.page_content if isinstance(doc, Document) else doc for doc in documents + ] + + top_n = top_n if (top_n is None or top_n > 0) else self.top_n + + results = self.client.call( + model=self.model, + query=query, + documents=docs, + top_n=top_n, + return_documents=False, + api_key=self.dashscope_api_key, + ) + + result_dicts = [] + for res in results.output.results: + result_dicts.append( + {"index": res.index, "relevance_score": res.relevance_score} + ) + return result_dicts + + def compress_documents( + self, + documents: Sequence[Document], + query: str, + callbacks: Optional[Callbacks] = None, + ) -> Sequence[Document]: + """ + Compress documents using DashScope's rerank API. + + Args: + documents: A sequence of documents to compress. + query: The query to use for compressing the documents. + callbacks: Callbacks to run during the compression process. + + Returns: + A sequence of compressed documents. + """ + compressed = [] + for res in self.rerank(documents, query): + doc = documents[res["index"]] + doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata)) + doc_copy.metadata["relevance_score"] = res["relevance_score"] + compressed.append(doc_copy) + return compressed diff --git a/libs/community/langchain_community/document_compressors/rankllm_rerank.py b/libs/community/langchain_community/document_compressors/rankllm_rerank.py new file mode 100644 index 0000000000000..2864df6fa334f --- /dev/null +++ b/libs/community/langchain_community/document_compressors/rankllm_rerank.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from copy import deepcopy +from enum import Enum +from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence + +from langchain.retrievers.document_compressors.base import BaseDocumentCompressor +from langchain_core.callbacks.manager import Callbacks +from langchain_core.documents import Document +from langchain_core.pydantic_v1 import Extra, Field, PrivateAttr, root_validator +from langchain_core.utils import get_from_dict_or_env + +if TYPE_CHECKING: + from rank_llm.data import Candidate, Query, Request +else: + # Avoid pydantic annotation issues when actually instantiating + # while keeping this import optional + try: + from rank_llm.data import Candidate, Query, Request + except ImportError: + pass + + +class RankLLMRerank(BaseDocumentCompressor): + """Document compressor using Flashrank interface.""" + + client: Any = None + """RankLLM client to use for compressing documents""" + top_n: int = Field(default=3) + """Top N documents to return.""" + model: str = Field(default="zephyr") + """Name of model to use for reranking.""" + step_size: int = Field(default=10) + """Step size for moving sliding window.""" + gpt_model: str = Field(default="gpt-3.5-turbo") + """OpenAI model name.""" + _retriever: Any = PrivateAttr() + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate python package exists in environment.""" + + if not values.get("client"): + client_name = values.get("model", "zephyr") + + try: + model_enum = ModelType(client_name.lower()) + except ValueError: + raise ValueError( + "Unsupported model type. Please use 'vicuna', 'zephyr', or 'gpt'." + ) + + try: + if model_enum == ModelType.VICUNA: + from rank_llm.rerank.vicuna_reranker import VicunaReranker + + values["client"] = VicunaReranker() + elif model_enum == ModelType.ZEPHYR: + from rank_llm.rerank.zephyr_reranker import ZephyrReranker + + values["client"] = ZephyrReranker() + elif model_enum == ModelType.GPT: + from rank_llm.rerank.rank_gpt import SafeOpenai + from rank_llm.rerank.reranker import Reranker + + openai_api_key = get_from_dict_or_env( + values, "open_api_key", "OPENAI_API_KEY" + ) + + agent = SafeOpenai( + model=values["gpt_model"], + context_size=4096, + keys=openai_api_key, + ) + values["client"] = Reranker(agent) + + except ImportError: + raise ImportError( + "Could not import rank_llm python package. " + "Please install it with `pip install rank_llm`." + ) + + return values + + def compress_documents( + self, + documents: Sequence[Document], + query: str, + callbacks: Optional[Callbacks] = None, + ) -> Sequence[Document]: + request = Request( + query=Query(text=query, qid=1), + candidates=[ + Candidate(doc={"text": doc.page_content}, docid=index, score=1) + for index, doc in enumerate(documents) + ], + ) + + rerank_results = self.client.rerank( + request, + rank_end=len(documents), + window_size=min(20, len(documents)), + step=10, + ) + + final_results = [] + for res in rerank_results.candidates: + doc = documents[int(res.docid)] + doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata)) + final_results.append(doc_copy) + + return final_results[: self.top_n] + + +class ModelType(Enum): + VICUNA = "vicuna" + ZEPHYR = "zephyr" + GPT = "gpt" diff --git a/libs/community/langchain_community/document_compressors/volcengine_rerank.py b/libs/community/langchain_community/document_compressors/volcengine_rerank.py new file mode 100644 index 0000000000000..f62ed88514cde --- /dev/null +++ b/libs/community/langchain_community/document_compressors/volcengine_rerank.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from copy import deepcopy +from typing import Any, Dict, List, Optional, Sequence, Union + +from langchain_core.callbacks.base import Callbacks +from langchain_core.documents import BaseDocumentCompressor, Document +from langchain_core.pydantic_v1 import Extra, root_validator +from langchain_core.utils import get_from_dict_or_env + + +class VolcengineRerank(BaseDocumentCompressor): + """Document compressor that uses `Volcengine Rerank API`.""" + + client: Any = None + """Volcengine client to use for compressing documents.""" + + ak: Optional[str] = None + """Access Key ID. + https://www.volcengine.com/docs/84313/1254553""" + + sk: Optional[str] = None + """Secret Access Key. + https://www.volcengine.com/docs/84313/1254553""" + + region: str = "api-vikingdb.volces.com" + """https://www.volcengine.com/docs/84313/1254488. """ + + host: str = "cn-beijing" + """https://www.volcengine.com/docs/84313/1254488. """ + + top_n: Optional[int] = 3 + """Number of documents to return.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + arbitrary_types_allowed = True + allow_population_by_field_name = True + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api key and python package exists in environment.""" + + if not values.get("client"): + try: + from volcengine.viking_db import VikingDBService + except ImportError: + raise ImportError( + "Could not import volcengine python package. " + "Please install it with `pip install volcengine` " + "or `pip install --user volcengine`." + ) + + values["ak"] = get_from_dict_or_env(values, "ak", "VOLC_API_AK") + values["sk"] = get_from_dict_or_env(values, "sk", "VOLC_API_SK") + + values["client"] = VikingDBService( + host="api-vikingdb.volces.com", + region="cn-beijing", + scheme="https", + connection_timeout=30, + socket_timeout=30, + ak=values["ak"], + sk=values["sk"], + ) + + return values + + def rerank( + self, + documents: Sequence[Union[str, Document, dict]], + query: str, + *, + top_n: Optional[int] = -1, + ) -> List[Dict[str, Any]]: + """Returns an ordered list of documents ordered by their relevance to the provided query. + + Args: + query: The query to use for reranking. + documents: A sequence of documents to rerank. + top_n : The number of results to return. If None returns all results. + Defaults to self.top_n. + """ # noqa: E501 + + if len(documents) == 0: # to avoid empty api call + return [] + docs = [ + { + "query": query, + "content": doc.page_content if isinstance(doc, Document) else doc, + } + for doc in documents + ] + + from volcengine.viking_db import VikingDBService + + client: VikingDBService = self.client + results = client.batch_rerank(docs) + + result_dicts = [] + for index, score in enumerate(results): + result_dicts.append({"index": index, "relevance_score": score}) + + result_dicts.sort(key=lambda x: x["relevance_score"], reverse=True) + top_n = top_n if (top_n is None or top_n > 0) else self.top_n + + return result_dicts[:top_n] + + def compress_documents( + self, + documents: Sequence[Document], + query: str, + callbacks: Optional[Callbacks] = None, + ) -> Sequence[Document]: + """ + Compress documents using Volcengine's rerank API. + + Args: + documents: A sequence of documents to compress. + query: The query to use for compressing the documents. + callbacks: Callbacks to run during the compression process. + + Returns: + A sequence of compressed documents. + """ + compressed = [] + for res in self.rerank(documents, query): + doc = documents[res["index"]] + doc_copy = Document(doc.page_content, metadata=deepcopy(doc.metadata)) + doc_copy.metadata["relevance_score"] = res["relevance_score"] + compressed.append(doc_copy) + return compressed diff --git a/libs/community/langchain_community/document_loaders/blob_loaders/cloud_blob_loader.py b/libs/community/langchain_community/document_loaders/blob_loaders/cloud_blob_loader.py new file mode 100644 index 0000000000000..2fa866e86c692 --- /dev/null +++ b/libs/community/langchain_community/document_loaders/blob_loaders/cloud_blob_loader.py @@ -0,0 +1,295 @@ +"""Use to load blobs from the local file system.""" +import contextlib +import mimetypes +import tempfile +from io import BufferedReader, BytesIO +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Callable, + Generator, + Iterable, + Iterator, + Optional, + Sequence, + TypeVar, + Union, +) +from urllib.parse import urlparse + +if TYPE_CHECKING: + from cloudpathlib import AnyPath + +from langchain_community.document_loaders.blob_loaders.schema import ( + Blob, + BlobLoader, +) + +T = TypeVar("T") + + +class _CloudBlob(Blob): + def as_string(self) -> str: + """Read data as a string.""" + from cloudpathlib import AnyPath + + if self.data is None and self.path: + return AnyPath(self.path).read_text(encoding=self.encoding) # type: ignore + elif isinstance(self.data, bytes): + return self.data.decode(self.encoding) + elif isinstance(self.data, str): + return self.data + else: + raise ValueError(f"Unable to get string for blob {self}") + + def as_bytes(self) -> bytes: + """Read data as bytes.""" + from cloudpathlib import AnyPath + + if isinstance(self.data, bytes): + return self.data + elif isinstance(self.data, str): + return self.data.encode(self.encoding) + elif self.data is None and self.path: + return AnyPath(self.path).read_bytes() # type: ignore + else: + raise ValueError(f"Unable to get bytes for blob {self}") + + @contextlib.contextmanager + def as_bytes_io(self) -> Generator[Union[BytesIO, BufferedReader], None, None]: + """Read data as a byte stream.""" + from cloudpathlib import AnyPath + + if isinstance(self.data, bytes): + yield BytesIO(self.data) + elif self.data is None and self.path: + return AnyPath(self.path).read_bytes() # type: ignore + else: + raise NotImplementedError(f"Unable to convert blob {self}") + + +def _url_to_filename(url: str) -> str: + """ + Convert file:, s3:, az: or gs: url to localfile. + If the file is not here, download it in a temporary file. + """ + from cloudpathlib import AnyPath + + url_parsed = urlparse(url) + suffix = Path(url_parsed.path).suffix + if url_parsed.scheme in ["s3", "az", "gs"]: + with AnyPath(url).open("rb") as f: # type: ignore + temp_file = tempfile.NamedTemporaryFile(suffix=suffix, delete=False) + while True: + buf = f.read() + if not buf: + break + temp_file.write(buf) + temp_file.close() + file_path = temp_file.name + elif url_parsed.scheme in ["file", ""]: + file_path = url_parsed.path + else: + raise ValueError(f"Scheme {url_parsed.scheme} not supported") + return file_path + + +def _make_iterator( + length_func: Callable[[], int], show_progress: bool = False +) -> Callable[[Iterable[T]], Iterator[T]]: + """Create a function that optionally wraps an iterable in tqdm.""" + if show_progress: + try: + from tqdm.auto import tqdm + except ImportError: + raise ImportError( + "You must install tqdm to use show_progress=True." + "You can install tqdm with `pip install tqdm`." + ) + + # Make sure to provide `total` here so that tqdm can show + # a progress bar that takes into account the total number of files. + def _with_tqdm(iterable: Iterable[T]) -> Iterator[T]: + """Wrap an iterable in a tqdm progress bar.""" + return tqdm(iterable, total=length_func()) + + iterator = _with_tqdm + else: + iterator = iter # type: ignore + + return iterator + + +# PUBLIC API + + +class CloudBlobLoader(BlobLoader): + """Load blobs from cloud URL or file:. + + Example: + + .. code-block:: python + + loader = CloudBlobLoader("s3://mybucket/id") + + for blob in loader.yield_blobs(): + print(blob) + """ # noqa: E501 + + def __init__( + self, + url: Union[str, "AnyPath"], + *, + glob: str = "**/[!.]*", + exclude: Sequence[str] = (), + suffixes: Optional[Sequence[str]] = None, + show_progress: bool = False, + ) -> None: + """Initialize with a url and how to glob over it. + + Use [CloudPathLib](https://cloudpathlib.drivendata.org/). + + Args: + url: Cloud URL to load from. + Supports s3://, az://, gs://, file:// schemes. + If no scheme is provided, it is assumed to be a local file. + If a path to a file is provided, glob/exclude/suffixes are ignored. + glob: Glob pattern relative to the specified path + by default set to pick up all non-hidden files + exclude: patterns to exclude from results, use glob syntax + suffixes: Provide to keep only files with these suffixes + Useful when wanting to keep files with different suffixes + Suffixes must include the dot, e.g. ".txt" + show_progress: If true, will show a progress bar as the files are loaded. + This forces an iteration through all matching files + to count them prior to loading them. + + Examples: + + .. code-block:: python + from langchain_community.document_loaders.blob_loaders import CloudBlobLoader + + # Load a single file. + loader = CloudBlobLoader("s3://mybucket/id") # az:// + + # Recursively load all text files in a directory. + loader = CloudBlobLoader("az://mybucket/id", glob="**/*.txt") + + # Recursively load all non-hidden files in a directory. + loader = CloudBlobLoader("gs://mybucket/id", glob="**/[!.]*") + + # Load all files in a directory without recursion. + loader = CloudBlobLoader("s3://mybucket/id", glob="*") + + # Recursively load all files in a directory, except for py or pyc files. + loader = CloudBlobLoader( + "s3://mybucket/id", + glob="**/*.txt", + exclude=["**/*.py", "**/*.pyc"] + ) + """ # noqa: E501 + from cloudpathlib import AnyPath + + url_parsed = urlparse(str(url)) + + if url_parsed.scheme == "file": + url = url_parsed.path + + if isinstance(url, str): + self.path = AnyPath(url) + else: + self.path = url + + self.glob = glob + self.suffixes = set(suffixes or []) + self.show_progress = show_progress + self.exclude = exclude + + def yield_blobs( + self, + ) -> Iterable[Blob]: + """Yield blobs that match the requested pattern.""" + iterator = _make_iterator( + length_func=self.count_matching_files, show_progress=self.show_progress + ) + + for path in iterator(self._yield_paths()): + # yield Blob.from_path(path) + yield self.from_path(path) + + def _yield_paths(self) -> Iterable["AnyPath"]: + """Yield paths that match the requested pattern.""" + if self.path.is_file(): # type: ignore + yield self.path + return + + paths = self.path.glob(self.glob) + for path in paths: + if self.exclude: + if any(path.match(glob) for glob in self.exclude): + continue + if path.is_file(): + if self.suffixes and path.suffix not in self.suffixes: + continue # FIXME + yield path + + def count_matching_files(self) -> int: + """Count files that match the pattern without loading them.""" + # Carry out a full iteration to count the files without + # materializing anything expensive in memory. + num = 0 + for _ in self._yield_paths(): + num += 1 + return num + + @classmethod + def from_path( + cls, + path: "AnyPath", + *, + encoding: str = "utf-8", + mime_type: Optional[str] = None, + guess_type: bool = True, + metadata: Optional[dict] = None, + ) -> Blob: + """Load the blob from a path like object. + + Args: + path: path like object to file to be read + Supports s3://, az://, gs://, file:// schemes. + If no scheme is provided, it is assumed to be a local file. + encoding: Encoding to use if decoding the bytes into a string + mime_type: if provided, will be set as the mime-type of the data + guess_type: If True, the mimetype will be guessed from the file extension, + if a mime-type was not provided + metadata: Metadata to associate with the blob + + Returns: + Blob instance + """ + if mime_type is None and guess_type: + _mimetype = mimetypes.guess_type(path)[0] if guess_type else None # type: ignore + else: + _mimetype = mime_type + + url_parsed = urlparse(str(path)) + if url_parsed.scheme in ["file", ""]: + if url_parsed.scheme == "file": + local_path = url_parsed.path + else: + local_path = str(path) + return Blob( + data=None, + mimetype=_mimetype, + encoding=encoding, + path=local_path, + metadata=metadata if metadata is not None else {}, + ) + + return _CloudBlob( + data=None, + mimetype=_mimetype, + encoding=encoding, + path=str(path), + metadata=metadata if metadata is not None else {}, + ) diff --git a/libs/community/langchain_community/document_loaders/parsers/language/elixir.py b/libs/community/langchain_community/document_loaders/parsers/language/elixir.py new file mode 100644 index 0000000000000..780209767d89f --- /dev/null +++ b/libs/community/langchain_community/document_loaders/parsers/language/elixir.py @@ -0,0 +1,35 @@ +from typing import TYPE_CHECKING + +from langchain_community.document_loaders.parsers.language.tree_sitter_segmenter import ( # noqa: E501 + TreeSitterSegmenter, +) + +if TYPE_CHECKING: + from tree_sitter import Language + + +CHUNK_QUERY = """ + [ + (call target: ((identifier) @_identifier + (#any-of? @_identifier "defmodule" "defprotocol" "defimpl"))) @module + (call target: ((identifier) @_identifier + (#any-of? @_identifier "def" "defmacro" "defmacrop" "defp"))) @function + (unary_operator operator: "@" operand: (call target: ((identifier) @_identifier + (#any-of? @_identifier "moduledoc" "typedoc""doc")))) @comment + ] +""".strip() + + +class ElixirSegmenter(TreeSitterSegmenter): + """Code segmenter for Elixir.""" + + def get_language(self) -> "Language": + from tree_sitter_languages import get_language + + return get_language("elixir") + + def get_chunk_query(self) -> str: + return CHUNK_QUERY + + def make_line_comment(self, text: str) -> str: + return f"# {text}" diff --git a/libs/community/langchain_community/document_loaders/scrapfly.py b/libs/community/langchain_community/document_loaders/scrapfly.py new file mode 100644 index 0000000000000..b774d46aded84 --- /dev/null +++ b/libs/community/langchain_community/document_loaders/scrapfly.py @@ -0,0 +1,69 @@ +"""Scrapfly Web Reader.""" +import logging +from typing import Iterator, List, Literal, Optional + +from langchain_core.document_loaders import BaseLoader +from langchain_core.documents import Document +from langchain_core.utils import get_from_env + +logger = logging.getLogger(__file__) + + +class ScrapflyLoader(BaseLoader): + """Turn a url to llm accessible markdown with `Scrapfly.io`. + + For further details, visit: https://scrapfly.io/docs/sdk/python + """ + + def __init__( + self, + urls: List[str], + *, + api_key: Optional[str] = None, + scrape_format: Literal["markdown", "text"] = "markdown", + scrape_config: Optional[dict] = None, + continue_on_failure: bool = True, + ) -> None: + """Initialize client. + + Args: + urls: List of urls to scrape. + api_key: The Scrapfly API key. If not specified must have env var + SCRAPFLY_API_KEY set. + scrape_format: Scrape result format, one or "markdown" or "text". + scrape_config: Dictionary of ScrapFly scrape config object. + continue_on_failure: Whether to continue if scraping a url fails. + """ + try: + from scrapfly import ScrapflyClient + except ImportError: + raise ImportError( + "`scrapfly` package not found, please run `pip install scrapfly-sdk`" + ) + if not urls: + raise ValueError("URLs must be provided.") + api_key = api_key or get_from_env("api_key", "SCRAPFLY_API_KEY") + self.scrapfly = ScrapflyClient(key=api_key) + self.urls = urls + self.scrape_format = scrape_format + self.scrape_config = scrape_config + self.continue_on_failure = continue_on_failure + + def lazy_load(self) -> Iterator[Document]: + from scrapfly import ScrapeConfig + + scrape_config = self.scrape_config if self.scrape_config is not None else {} + for url in self.urls: + try: + response = self.scrapfly.scrape( + ScrapeConfig(url, format=self.scrape_format, **scrape_config) + ) + yield Document( + page_content=response.scrape_result["content"], + metadata={"url": url}, + ) + except Exception as e: + if self.continue_on_failure: + logger.error(f"Error fetching data from {url}, exception: {e}") + else: + raise e diff --git a/libs/community/langchain_community/embeddings/ascend.py b/libs/community/langchain_community/embeddings/ascend.py new file mode 100644 index 0000000000000..4e71635663fa5 --- /dev/null +++ b/libs/community/langchain_community/embeddings/ascend.py @@ -0,0 +1,120 @@ +import os +from typing import Any, Dict, List, Optional + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, root_validator + + +class AscendEmbeddings(Embeddings, BaseModel): + """ + Ascend NPU accelerate Embedding model + + Please ensure that you have installed CANN and torch_npu. + + Example: + + from langchain_community.embeddings import AscendEmbeddings + model = AscendEmbeddings(model_path=, + device_id=0, + query_instruction="Represent this sentence for searching relevant passages: " + ) + """ + + """model path""" + model_path: str + """Ascend NPU device id.""" + device_id: int = 0 + """Unstruntion to used for embedding query.""" + query_instruction: str = "" + """Unstruntion to used for embedding document.""" + document_instruction: str = "" + use_fp16: bool = True + pooling_method: Optional[str] = "cls" + model: Any + tokenizer: Any + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + try: + from transformers import AutoModel, AutoTokenizer + except ImportError as e: + raise ImportError( + "Unable to import transformers, please install with " + "`pip install -U transformers`." + ) from e + try: + self.model = AutoModel.from_pretrained(self.model_path).npu().eval() + self.tokenizer = AutoTokenizer.from_pretrained(self.model_path) + except Exception as e: + raise Exception( + f"Failed to load model [self.model_path], due to following error:{e}" + ) + + if self.use_fp16: + self.model.half() + self.encode([f"warmup {i} times" for i in range(10)]) + + @root_validator + def validate_environment(cls, values: Dict) -> Dict: + if not os.access(values["model_path"], os.F_OK): + raise FileNotFoundError( + f"Unabled to find valid model path in [{values['model_path']}]" + ) + try: + import torch_npu + except ImportError: + raise ModuleNotFoundError("torch_npu not found, please install torch_npu") + except Exception as e: + raise e + try: + torch_npu.npu.set_device(values["device_id"]) + except Exception as e: + raise Exception(f"set device failed due to {e}") + return values + + def encode(self, sentences: Any) -> Any: + inputs = self.tokenizer( + sentences, + padding=True, + truncation=True, + return_tensors="pt", + max_length=512, + ) + try: + import torch + except ImportError as e: + raise ImportError( + "Unable to import torch, please install with " "`pip install -U torch`." + ) from e + last_hidden_state = self.model( + inputs.input_ids.npu(), inputs.attention_mask.npu(), return_dict=True + ).last_hidden_state + tmp = self.pooling(last_hidden_state, inputs["attention_mask"].npu()) + embeddings = torch.nn.functional.normalize(tmp, dim=-1) + return embeddings.cpu().detach().numpy() + + def pooling(self, last_hidden_state: Any, attention_mask: Any = None) -> Any: + try: + import torch + except ImportError as e: + raise ImportError( + "Unable to import torch, please install with " "`pip install -U torch`." + ) from e + if self.pooling_method == "cls": + return last_hidden_state[:, 0] + elif self.pooling_method == "mean": + s = torch.sum( + last_hidden_state * attention_mask.unsqueeze(-1).float(), dim=-1 + ) + d = attention_mask.sum(dim=1, keepdim=True).float() + return s / d + else: + raise NotImplementedError( + f"Pooling method [{self.pooling_method}] not implemented" + ) + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + return self.encode([self.document_instruction + text for text in texts]) + + def embed_query(self, text: str) -> List[float]: + return self.encode([self.query_instruction + text])[0] diff --git a/libs/community/langchain_community/embeddings/clova.py b/libs/community/langchain_community/embeddings/clova.py new file mode 100644 index 0000000000000..59b28782e33a8 --- /dev/null +++ b/libs/community/langchain_community/embeddings/clova.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +from typing import Dict, List, Optional, cast + +import requests +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra, SecretStr, root_validator +from langchain_core.utils import convert_to_secret_str, get_from_dict_or_env + + +class ClovaEmbeddings(BaseModel, Embeddings): + """ + Clova's embedding service. + + To use this service, + + you should have the following environment variables + set with your API tokens and application ID, + or pass them as named parameters to the constructor: + + - ``CLOVA_EMB_API_KEY``: API key for accessing Clova's embedding service. + - ``CLOVA_EMB_APIGW_API_KEY``: API gateway key for enhanced security. + - ``CLOVA_EMB_APP_ID``: Application ID for identifying your application. + + Example: + .. code-block:: python + + from langchain_community.embeddings import ClovaEmbeddings + embeddings = ClovaEmbeddings( + clova_emb_api_key='your_clova_emb_api_key', + clova_emb_apigw_api_key='your_clova_emb_apigw_api_key', + app_id='your_app_id' + ) + + query_text = "This is a test query." + query_result = embeddings.embed_query(query_text) + + document_text = "This is a test document." + document_result = embeddings.embed_documents([document_text]) + + """ + + endpoint_url: str = ( + "https://clovastudio.apigw.ntruss.com/testapp/v1/api-tools/embedding" + ) + """Endpoint URL to use.""" + model: str = "clir-emb-dolphin" + """Embedding model name to use.""" + clova_emb_api_key: Optional[SecretStr] = None + """API key for accessing Clova's embedding service.""" + clova_emb_apigw_api_key: Optional[SecretStr] = None + """API gateway key for enhanced security.""" + app_id: Optional[SecretStr] = None + """Application ID for identifying your application.""" + + class Config: + extra = Extra.forbid + + @root_validator(pre=True, allow_reuse=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate api key exists in environment.""" + values["clova_emb_api_key"] = convert_to_secret_str( + get_from_dict_or_env(values, "clova_emb_api_key", "CLOVA_EMB_API_KEY") + ) + values["clova_emb_apigw_api_key"] = convert_to_secret_str( + get_from_dict_or_env( + values, "clova_emb_apigw_api_key", "CLOVA_EMB_APIGW_API_KEY" + ) + ) + values["app_id"] = convert_to_secret_str( + get_from_dict_or_env(values, "app_id", "CLOVA_EMB_APP_ID") + ) + return values + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """ + Embed a list of texts and return their embeddings. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + embeddings = [] + for text in texts: + embeddings.append(self._embed_text(text)) + return embeddings + + def embed_query(self, text: str) -> List[float]: + """ + Embed a single query text and return its embedding. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + return self._embed_text(text) + + def _embed_text(self, text: str) -> List[float]: + """ + Internal method to call the embedding API and handle the response. + """ + payload = {"text": text} + + # HTTP headers for authorization + headers = { + "X-NCP-CLOVASTUDIO-API-KEY": cast( + SecretStr, self.clova_emb_api_key + ).get_secret_value(), + "X-NCP-APIGW-API-KEY": cast( + SecretStr, self.clova_emb_apigw_api_key + ).get_secret_value(), + "Content-Type": "application/json", + } + + # send request + app_id = cast(SecretStr, self.app_id).get_secret_value() + response = requests.post( + f"{self.endpoint_url}/{self.model}/{app_id}", + headers=headers, + json=payload, + ) + + # check for errors + if response.status_code == 200: + response_data = response.json() + if "result" in response_data and "embedding" in response_data["result"]: + return response_data["result"]["embedding"] + raise ValueError( + f"API request failed with status {response.status_code}: {response.text}" + ) diff --git a/libs/community/langchain_community/embeddings/ipex_llm.py b/libs/community/langchain_community/embeddings/ipex_llm.py new file mode 100644 index 0000000000000..8935d80a1e777 --- /dev/null +++ b/libs/community/langchain_community/embeddings/ipex_llm.py @@ -0,0 +1,140 @@ +# This file is adapted from +# https://github.com/langchain-ai/langchain/blob/master/libs/community/langchain_community/embeddings/huggingface.py + +from typing import Any, Dict, List, Optional + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra, Field + +DEFAULT_BGE_MODEL = "BAAI/bge-small-en-v1.5" +DEFAULT_QUERY_BGE_INSTRUCTION_EN = ( + "Represent this question for searching relevant passages: " +) +DEFAULT_QUERY_BGE_INSTRUCTION_ZH = "为这个句子生成表示以用于检索相关文章:" + + +class IpexLLMBgeEmbeddings(BaseModel, Embeddings): + """Wrapper around the BGE embedding model + with IPEX-LLM optimizations on Intel CPUs and GPUs. + + To use, you should have the ``ipex-llm`` + and ``sentence_transformers`` package installed. Refer to + `here `_ + for installation on Intel CPU. + + Example on Intel CPU: + .. code-block:: python + + from langchain_community.embeddings import IpexLLMBgeEmbeddings + + embedding_model = IpexLLMBgeEmbeddings( + model_name="BAAI/bge-large-en-v1.5", + model_kwargs={}, + encode_kwargs={"normalize_embeddings": True}, + ) + + Refer to + `here `_ + for installation on Intel GPU. + + Example on Intel GPU: + .. code-block:: python + + from langchain_community.embeddings import IpexLLMBgeEmbeddings + + embedding_model = IpexLLMBgeEmbeddings( + model_name="BAAI/bge-large-en-v1.5", + model_kwargs={"device": "xpu"}, + encode_kwargs={"normalize_embeddings": True}, + ) + """ + + client: Any #: :meta private: + model_name: str = DEFAULT_BGE_MODEL + """Model name to use.""" + cache_folder: Optional[str] = None + """Path to store models. + Can be also set by SENTENCE_TRANSFORMERS_HOME environment variable.""" + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass to the model.""" + encode_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Keyword arguments to pass when calling the `encode` method of the model.""" + query_instruction: str = DEFAULT_QUERY_BGE_INSTRUCTION_EN + """Instruction to use for embedding query.""" + embed_instruction: str = "" + """Instruction to use for embedding document.""" + + def __init__(self, **kwargs: Any): + """Initialize the sentence_transformer.""" + super().__init__(**kwargs) + try: + import sentence_transformers + from ipex_llm.transformers.convert import _optimize_post, _optimize_pre + + except ImportError as exc: + base_url = ( + "https://python.langchain.com/v0.1/docs/integrations/text_embedding/" + ) + raise ImportError( + "Could not import ipex_llm or sentence_transformers. " + f"Please refer to {base_url}/ipex_llm/ " + "for install required packages on Intel CPU. " + f"And refer to {base_url}/ipex_llm_gpu/ " + "for install required packages on Intel GPU. " + ) from exc + + # Set "cpu" as default device + if "device" not in self.model_kwargs: + self.model_kwargs["device"] = "cpu" + + if self.model_kwargs["device"] not in ["cpu", "xpu"]: + raise ValueError( + "IpexLLMBgeEmbeddings currently only supports device to be " + f"'cpu' or 'xpu', but you have: {self.model_kwargs['device']}." + ) + + self.client = sentence_transformers.SentenceTransformer( + self.model_name, cache_folder=self.cache_folder, **self.model_kwargs + ) + + # Add ipex-llm optimizations + self.client = _optimize_pre(self.client) + self.client = _optimize_post(self.client) + if self.model_kwargs["device"] == "xpu": + self.client = self.client.half().to("xpu") + + if "-zh" in self.model_name: + self.query_instruction = DEFAULT_QUERY_BGE_INSTRUCTION_ZH + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Compute doc embeddings using a HuggingFace transformer model. + + Args: + texts: The list of texts to embed. + + Returns: + List of embeddings, one for each text. + """ + texts = [self.embed_instruction + t.replace("\n", " ") for t in texts] + embeddings = self.client.encode(texts, **self.encode_kwargs) + return embeddings.tolist() + + def embed_query(self, text: str) -> List[float]: + """Compute query embeddings using a HuggingFace transformer model. + + Args: + text: The text to embed. + + Returns: + Embeddings for the text. + """ + text = text.replace("\n", " ") + embedding = self.client.encode( + self.query_instruction + text, **self.encode_kwargs + ) + return embedding.tolist() diff --git a/libs/community/langchain_community/embeddings/ovhcloud.py b/libs/community/langchain_community/embeddings/ovhcloud.py new file mode 100644 index 0000000000000..7dde3810a2c13 --- /dev/null +++ b/libs/community/langchain_community/embeddings/ovhcloud.py @@ -0,0 +1,98 @@ +import logging +import time +from typing import Any, List + +import requests +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Extra + +logger = logging.getLogger(__name__) + + +class OVHCloudEmbeddings(BaseModel, Embeddings): + """ + OVHcloud AI Endpoints Embeddings. + """ + + """ OVHcloud AI Endpoints Access Token""" + access_token: str = "" + + """ OVHcloud AI Endpoints model name for embeddings generation""" + model_name: str = "" + + """ OVHcloud AI Endpoints region""" + region: str = "kepler" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + if self.access_token == "": + raise ValueError("Access token is required for OVHCloud embeddings.") + if self.model_name == "": + raise ValueError("Model name is required for OVHCloud embeddings.") + if self.region == "": + raise ValueError("Region is required for OVHCloud embeddings.") + + def _generate_embedding(self, text: str) -> List[float]: + """Generate embeddings from OVHCLOUD AIE. + Args: + text (str): The text to embed. + Returns: + List[float]: Embeddings for the text. + """ + headers = { + "content-type": "text/plain", + "Authorization": f"Bearer {self.access_token}", + } + + session = requests.session() + while True: + response = session.post( + f"https://{self.model_name}.endpoints.{self.region}.ai.cloud.ovh.net/api/text2vec", + headers=headers, + data=text, + ) + if response.status_code != 200: + if response.status_code == 429: + """Rate limit exceeded, wait for reset""" + reset_time = int(response.headers.get("RateLimit-Reset", 0)) + logger.info("Rate limit exceeded. Waiting %d seconds.", reset_time) + if reset_time > 0: + time.sleep(reset_time) + continue + else: + """Rate limit reset time has passed, retry immediately""" + continue + if response.status_code == 401: + """ Unauthorized, retry with new token """ + raise ValueError("Unauthorized, retry with new token") + """ Handle other non-200 status codes """ + raise ValueError( + "Request failed with status code: {status_code}, {text}".format( + status_code=response.status_code, text=response.text + ) + ) + return response.json() + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Create a retry decorator for PremAIEmbeddings. + Args: + texts (List[str]): The list of texts to embed. + + Returns: + List[List[float]]: List of embeddings, one for each input text. + """ + return [self._generate_embedding(text) for text in texts] + + def embed_query(self, text: str) -> List[float]: + """Embed a single query text. + Args: + text (str): The text to embed. + Returns: + List[float]: Embeddings for the text. + """ + return self._generate_embedding(text) diff --git a/libs/community/langchain_community/embeddings/zhipuai.py b/libs/community/langchain_community/embeddings/zhipuai.py new file mode 100644 index 0000000000000..dd062162002d1 --- /dev/null +++ b/libs/community/langchain_community/embeddings/zhipuai.py @@ -0,0 +1,76 @@ +from typing import Any, Dict, List + +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseModel, Field, root_validator +from langchain_core.utils import get_from_dict_or_env + + +class ZhipuAIEmbeddings(BaseModel, Embeddings): + """ZhipuAI embedding models. + + To use, you should have the ``zhipuai`` python package installed, and the + environment variable ``ZHIPU_API_KEY`` set with your API key or pass it + as a named parameter to the constructor. + + More instructions about ZhipuAi Embeddings, you can get it + from https://open.bigmodel.cn/dev/api#vector + + Example: + .. code-block:: python + + from langchain_community.embeddings import ZhipuAIEmbeddings + embeddings = ZhipuAIEmbeddings(api_key="your-api-key") + text = "This is a test query." + query_result = embeddings.embed_query(text) + # texts = ["This is a test query1.", "This is a test query2."] + # query_result = embeddings.embed_query(texts) + """ + + client: Any = Field(default=None, exclude=True) #: :meta private: + model: str = Field(default="embedding-2") + """Model name""" + api_key: str + """Automatically inferred from env var `ZHIPU_API_KEY` if not provided.""" + + @root_validator(pre=True) + def validate_environment(cls, values: Dict) -> Dict: + """Validate that auth token exists in environment.""" + values["api_key"] = get_from_dict_or_env(values, "api_key", "ZHIPUAI_API_KEY") + try: + from zhipuai import ZhipuAI + + values["client"] = ZhipuAI(api_key=values["api_key"]) + except ImportError: + raise ImportError( + "Could not import zhipuai python package." + "Please install it with `pip install zhipuai`." + ) + return values + + def embed_query(self, text: str) -> List[float]: + """ + Embeds a text using the AutoVOT algorithm. + + Args: + text: A text to embed. + + Returns: + Input document's embedded list. + """ + resp = self.embed_documents([text]) + return resp[0] + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """ + Embeds a list of text documents using the AutoVOT algorithm. + + Args: + texts: A list of text documents to embed. + + Returns: + A list of embeddings for each document in the input list. + Each embedding is represented as a list of float values. + """ + resp = self.client.embeddings.create(model=self.model, input=texts) + embeddings = [r.embedding for r in resp.data] + return embeddings diff --git a/libs/community/langchain_community/memory/zep_cloud_memory.py b/libs/community/langchain_community/memory/zep_cloud_memory.py new file mode 100644 index 0000000000000..24ddb04677c94 --- /dev/null +++ b/libs/community/langchain_community/memory/zep_cloud_memory.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from typing import Any, Dict, Optional + +from langchain_community.chat_message_histories import ZepCloudChatMessageHistory + +try: + from langchain.memory import ConversationBufferMemory + from zep_cloud import MemoryGetRequestMemoryType + + class ZepCloudMemory(ConversationBufferMemory): + """Persist your chain history to the Zep MemoryStore. + + Documentation: https://help.getzep.com + + Example: + .. code-block:: python + + memory = ZepCloudMemory( + session_id=session_id, # Identifies your user or a user's session + api_key=, # Your Zep Project API key + memory_key="history", # Ensure this matches the key used in + # chain's prompt template + return_messages=True, # Does your prompt template expect a string + # or a list of Messages? + ) + chain = LLMChain(memory=memory,...) # Configure your chain to use the ZepMemory + instance + + + Note: + To persist metadata alongside your chat history, your will need to create a + custom Chain class that overrides the `prep_outputs` method to include the metadata + in the call to `self.memory.save_context`. + + + Zep - Recall, understand, and extract data from chat histories. Power personalized AI experiences. + ========= + Zep is a long-term memory service for AI Assistant apps. With Zep, you can provide AI assistants with the ability to recall past conversations, + no matter how distant, while also reducing hallucinations, latency, and cost. + + For more information on the zep-python package, see: + https://github.com/getzep/zep-python + + """ # noqa: E501 + + chat_memory: ZepCloudChatMessageHistory + + def __init__( + self, + session_id: str, + api_key: str, + memory_type: Optional[MemoryGetRequestMemoryType] = None, + lastn: Optional[int] = None, + output_key: Optional[str] = None, + input_key: Optional[str] = None, + return_messages: bool = False, + human_prefix: str = "Human", + ai_prefix: str = "AI", + memory_key: str = "history", + ): + """Initialize ZepMemory. + + Args: + session_id (str): Identifies your user or a user's session + api_key (str): Your Zep Project key. + memory_type (Optional[MemoryGetRequestMemoryType], optional): Zep Memory Type, defaults to perpetual + lastn (Optional[int], optional): Number of messages to retrieve. Will add the last summary generated prior to the nth oldest message. Defaults to 6 + output_key (Optional[str], optional): The key to use for the output message. + Defaults to None. + input_key (Optional[str], optional): The key to use for the input message. + Defaults to None. + return_messages (bool, optional): Does your prompt template expect a string + or a list of Messages? Defaults to False + i.e. return a string. + human_prefix (str, optional): The prefix to use for human messages. + Defaults to "Human". + ai_prefix (str, optional): The prefix to use for AI messages. + Defaults to "AI". + memory_key (str, optional): The key to use for the memory. + Defaults to "history". + Ensure that this matches the key used in + chain's prompt template. + """ # noqa: E501 + chat_message_history = ZepCloudChatMessageHistory( + session_id=session_id, + memory_type=memory_type, + lastn=lastn, + api_key=api_key, + ) + super().__init__( + chat_memory=chat_message_history, + output_key=output_key, + input_key=input_key, + return_messages=return_messages, + human_prefix=human_prefix, + ai_prefix=ai_prefix, + memory_key=memory_key, + ) + + def save_context( + self, + inputs: Dict[str, Any], + outputs: Dict[str, str], + metadata: Optional[Dict[str, Any]] = None, + ) -> None: + """Save context from this conversation to buffer. + + Args: + inputs (Dict[str, Any]): The inputs to the chain. + outputs (Dict[str, str]): The outputs from the chain. + metadata (Optional[Dict[str, Any]], optional): Any metadata to save with + the context. Defaults to None + + Returns: + None + """ + input_str, output_str = self._get_input_output(inputs, outputs) + self.chat_memory.add_user_message(input_str, metadata=metadata) + self.chat_memory.add_ai_message(output_str, metadata=metadata) +except ImportError: + # Placeholder object + class ZepCloudMemory: # type: ignore[no-redef] + pass diff --git a/libs/community/langchain_community/retrievers/asknews.py b/libs/community/langchain_community/retrievers/asknews.py new file mode 100644 index 0000000000000..18a44161d2868 --- /dev/null +++ b/libs/community/langchain_community/retrievers/asknews.py @@ -0,0 +1,146 @@ +import os +import re +from typing import Any, Dict, List, Literal, Optional + +from langchain_core.callbacks import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + + +class AskNewsRetriever(BaseRetriever): + """AskNews retriever.""" + + k: int = 10 + offset: int = 0 + start_timestamp: Optional[int] = None + end_timestamp: Optional[int] = None + method: Literal["nl", "kw"] = "nl" + categories: List[ + Literal[ + "All", + "Business", + "Crime", + "Politics", + "Science", + "Sports", + "Technology", + "Military", + "Health", + "Entertainment", + "Finance", + "Culture", + "Climate", + "Environment", + "World", + ] + ] = ["All"] + historical: bool = False + similarity_score_threshold: float = 0.5 + kwargs: Optional[Dict[str, Any]] = {} + client_id: Optional[str] = None + client_secret: Optional[str] = None + + def _get_relevant_documents( + self, query: str, *, run_manager: CallbackManagerForRetrieverRun + ) -> List[Document]: + """Get documents relevant to a query. + Args: + query: String to find relevant documents for + run_manager: The callbacks handler to use + Returns: + List of relevant documents + """ + try: + from asknews_sdk import AskNewsSDK + except ImportError: + raise ImportError( + "AskNews python package not found. " + "Please install it with `pip install asknews`." + ) + an_client = AskNewsSDK( + client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"], + client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"], + scopes=["news"], + ) + response = an_client.news.search_news( + query=query, + n_articles=self.k, + start_timestamp=self.start_timestamp, + end_timestamp=self.end_timestamp, + method=self.method, + categories=self.categories, + historical=self.historical, + similarity_score_threshold=self.similarity_score_threshold, + offset=self.offset, + doc_start_delimiter="", + doc_end_delimiter="", + return_type="both", + **self.kwargs, + ) + + return self._extract_documents(response) + + async def _aget_relevant_documents( + self, query: str, *, run_manager: AsyncCallbackManagerForRetrieverRun + ) -> List[Document]: + """Asynchronously get documents relevant to a query. + Args: + query: String to find relevant documents for + run_manager: The callbacks handler to use + Returns: + List of relevant documents + """ + try: + from asknews_sdk import AsyncAskNewsSDK + except ImportError: + raise ImportError( + "AskNews python package not found. " + "Please install it with `pip install asknews`." + ) + an_client = AsyncAskNewsSDK( + client_id=self.client_id or os.environ["ASKNEWS_CLIENT_ID"], + client_secret=self.client_secret or os.environ["ASKNEWS_CLIENT_SECRET"], + scopes=["news"], + ) + response = await an_client.news.search_news( + query=query, + n_articles=self.k, + start_timestamp=self.start_timestamp, + end_timestamp=self.end_timestamp, + method=self.method, + categories=self.categories, + historical=self.historical, + similarity_score_threshold=self.similarity_score_threshold, + offset=self.offset, + return_type="both", + doc_start_delimiter="", + doc_end_delimiter="", + **self.kwargs, + ) + + return self._extract_documents(response) + + def _extract_documents(self, response: Any) -> List[Document]: + """Extract documents from an api response.""" + + from asknews_sdk.dto.news import SearchResponse + + sr: SearchResponse = response + matches = re.findall(r"(.*?)", sr.as_string, re.DOTALL) + docs = [ + Document( + page_content=matches[i].strip(), + metadata={ + "title": sr.as_dicts[i].title, + "source": str(sr.as_dicts[i].article_url) + if sr.as_dicts[i].article_url + else None, + "images": sr.as_dicts[i].image_url, + }, + ) + for i in range(len(matches)) + ] + return docs diff --git a/libs/community/langchain_community/retrievers/zep_cloud.py b/libs/community/langchain_community/retrievers/zep_cloud.py new file mode 100644 index 0000000000000..96758c71d9861 --- /dev/null +++ b/libs/community/langchain_community/retrievers/zep_cloud.py @@ -0,0 +1,162 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, List, Optional + +from langchain_core.callbacks import ( + AsyncCallbackManagerForRetrieverRun, + CallbackManagerForRetrieverRun, +) +from langchain_core.documents import Document +from langchain_core.pydantic_v1 import root_validator +from langchain_core.retrievers import BaseRetriever + +if TYPE_CHECKING: + from zep_cloud import MemorySearchResult, SearchScope, SearchType + from zep_cloud.client import AsyncZep, Zep + + +class ZepCloudRetriever(BaseRetriever): + """`Zep Cloud` MemoryStore Retriever. + + Search your user's long-term chat history with Zep. + + Zep offers both simple semantic search and Maximal Marginal Relevance (MMR) + reranking of search results. + + Note: You will need to provide the user's `session_id` to use this retriever. + + Args: + api_key: Your Zep API key + session_id: Identifies your user or a user's session (required) + top_k: Number of documents to return (default: 3, optional) + search_type: Type of search to perform (similarity / mmr) + (default: similarity, optional) + mmr_lambda: Lambda value for MMR search. Defaults to 0.5 (optional) + + Zep - Recall, understand, and extract data from chat histories. + Power personalized AI experiences. + ========= + Zep is a long-term memory service for AI Assistant apps. + With Zep, you can provide AI assistants with the ability + to recall past conversations, + no matter how distant, while also reducing hallucinations, latency, and cost. + + see Zep Cloud Docs: https://help.getzep.com + """ + + api_key: str + """Your Zep API key.""" + zep_client: Zep + """Zep client used for making API requests.""" + zep_client_async: AsyncZep + """Async Zep client used for making API requests.""" + session_id: str + """Zep session ID.""" + top_k: Optional[int] + """Number of items to return.""" + search_scope: SearchScope = "messages" + """Which documents to search. Messages or Summaries?""" + search_type: SearchType = "similarity" + """Type of search to perform (similarity / mmr)""" + mmr_lambda: Optional[float] = None + """Lambda value for MMR search.""" + + @root_validator(pre=True) + def create_client(cls, values: dict) -> dict: + try: + from zep_cloud.client import AsyncZep, Zep + except ImportError: + raise ImportError( + "Could not import zep-cloud package. " + "Please install it with `pip install zep-cloud`." + ) + if values.get("api_key") is None: + raise ValueError("Zep API key is required.") + values["zep_client"] = Zep(api_key=values.get("api_key")) + values["zep_client_async"] = AsyncZep(api_key=values.get("api_key")) + return values + + def _messages_search_result_to_doc( + self, results: List[MemorySearchResult] + ) -> List[Document]: + return [ + Document( + page_content=str(r.message.content), + metadata={ + "score": r.score, + "uuid": r.message.uuid_, + "created_at": r.message.created_at, + "token_count": r.message.token_count, + "role": r.message.role or r.message.role_type, + }, + ) + for r in results or [] + if r.message + ] + + def _summary_search_result_to_doc( + self, results: List[MemorySearchResult] + ) -> List[Document]: + return [ + Document( + page_content=str(r.summary.content), + metadata={ + "score": r.score, + "uuid": r.summary.uuid_, + "created_at": r.summary.created_at, + "token_count": r.summary.token_count, + }, + ) + for r in results + if r.summary + ] + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + metadata: Optional[Dict[str, Any]] = None, + ) -> List[Document]: + if not self.zep_client: + raise RuntimeError("Zep client not initialized.") + + results = self.zep_client.memory.search( + self.session_id, + text=query, + metadata=metadata, + search_scope=self.search_scope, + search_type=self.search_type, + mmr_lambda=self.mmr_lambda, + limit=self.top_k, + ) + + if self.search_scope == "summary": + return self._summary_search_result_to_doc(results) + + return self._messages_search_result_to_doc(results) + + async def _aget_relevant_documents( + self, + query: str, + *, + run_manager: AsyncCallbackManagerForRetrieverRun, + metadata: Optional[Dict[str, Any]] = None, + ) -> List[Document]: + if not self.zep_client_async: + raise RuntimeError("Zep client not initialized.") + + results = await self.zep_client_async.memory.search( + self.session_id, + text=query, + metadata=metadata, + search_scope=self.search_scope, + search_type=self.search_type, + mmr_lambda=self.mmr_lambda, + limit=self.top_k, + ) + + if self.search_scope == "summary": + return self._summary_search_result_to_doc(results) + + return self._messages_search_result_to_doc(results) diff --git a/libs/community/langchain_community/storage/cassandra.py b/libs/community/langchain_community/storage/cassandra.py new file mode 100644 index 0000000000000..d2d97a3557e71 --- /dev/null +++ b/libs/community/langchain_community/storage/cassandra.py @@ -0,0 +1,220 @@ +from __future__ import annotations + +import asyncio +from asyncio import InvalidStateError, Task +from typing import ( + TYPE_CHECKING, + AsyncIterator, + Iterator, + List, + Optional, + Sequence, + Tuple, +) + +from langchain_core.stores import ByteStore + +from langchain_community.utilities.cassandra import SetupMode, aexecute_cql + +if TYPE_CHECKING: + from cassandra.cluster import Session + from cassandra.query import PreparedStatement + +CREATE_TABLE_CQL_TEMPLATE = """ + CREATE TABLE IF NOT EXISTS {keyspace}.{table} + (row_id TEXT, body_blob BLOB, PRIMARY KEY (row_id)); +""" +SELECT_TABLE_CQL_TEMPLATE = ( + """SELECT row_id, body_blob FROM {keyspace}.{table} WHERE row_id IN ?;""" +) +SELECT_ALL_TABLE_CQL_TEMPLATE = """SELECT row_id, body_blob FROM {keyspace}.{table};""" +INSERT_TABLE_CQL_TEMPLATE = ( + """INSERT INTO {keyspace}.{table} (row_id, body_blob) VALUES (?, ?);""" +) +DELETE_TABLE_CQL_TEMPLATE = """DELETE FROM {keyspace}.{table} WHERE row_id IN ?;""" + + +class CassandraByteStore(ByteStore): + """A ByteStore implementation using Cassandra as the backend. + + Parameters: + table: The name of the table to use. + session: A Cassandra session object. If not provided, it will be resolved + from the cassio config. + keyspace: The keyspace to use. If not provided, it will be resolved + from the cassio config. + setup_mode: The setup mode to use. Default is SYNC (SetupMode.SYNC). + """ + + def __init__( + self, + table: str, + *, + session: Optional[Session] = None, + keyspace: Optional[str] = None, + setup_mode: SetupMode = SetupMode.SYNC, + ) -> None: + if not session or not keyspace: + try: + from cassio.config import check_resolve_keyspace, check_resolve_session + + self.keyspace = keyspace or check_resolve_keyspace(keyspace) + self.session = session or check_resolve_session() + except (ImportError, ModuleNotFoundError): + raise ImportError( + "Could not import a recent cassio package." + "Please install it with `pip install --upgrade cassio`." + ) + else: + self.keyspace = keyspace + self.session = session + self.table = table + self.select_statement = None + self.insert_statement = None + self.delete_statement = None + + create_cql = CREATE_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, + table=self.table, + ) + self.db_setup_task: Optional[Task[None]] = None + if setup_mode == SetupMode.ASYNC: + self.db_setup_task = asyncio.create_task( + aexecute_cql(self.session, create_cql) + ) + else: + self.session.execute(create_cql) + + def ensure_db_setup(self) -> None: + """Ensure that the DB setup is finished. If not, raise a ValueError.""" + if self.db_setup_task: + try: + self.db_setup_task.result() + except InvalidStateError: + raise ValueError( + "Asynchronous setup of the DB not finished. " + "NB: AstraDB components sync methods shouldn't be called from the " + "event loop. Consider using their async equivalents." + ) + + async def aensure_db_setup(self) -> None: + """Ensure that the DB setup is finished. If not, wait for it.""" + if self.db_setup_task: + await self.db_setup_task + + def get_select_statement(self) -> PreparedStatement: + """Get the prepared select statement for the table. + If not available, prepare it. + + Returns: + PreparedStatement: The prepared statement. + """ + if not self.select_statement: + self.select_statement = self.session.prepare( + SELECT_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, table=self.table + ) + ) + return self.select_statement + + def get_insert_statement(self) -> PreparedStatement: + """Get the prepared insert statement for the table. + If not available, prepare it. + + Returns: + PreparedStatement: The prepared statement. + """ + if not self.insert_statement: + self.insert_statement = self.session.prepare( + INSERT_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, table=self.table + ) + ) + return self.insert_statement + + def get_delete_statement(self) -> PreparedStatement: + """Get the prepared delete statement for the table. + If not available, prepare it. + + Returns: + PreparedStatement: The prepared statement. + """ + + if not self.delete_statement: + self.delete_statement = self.session.prepare( + DELETE_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, table=self.table + ) + ) + return self.delete_statement + + def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + from cassandra.query import ValueSequence + + self.ensure_db_setup() + docs_dict = {} + for row in self.session.execute( + self.get_select_statement(), [ValueSequence(keys)] + ): + docs_dict[row.row_id] = row.body_blob + return [docs_dict.get(key) for key in keys] + + async def amget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + from cassandra.query import ValueSequence + + await self.aensure_db_setup() + docs_dict = {} + for row in await aexecute_cql( + self.session, self.get_select_statement(), parameters=[ValueSequence(keys)] + ): + docs_dict[row.row_id] = row.body_blob + return [docs_dict.get(key) for key in keys] + + def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + self.ensure_db_setup() + insert_statement = self.get_insert_statement() + for k, v in key_value_pairs: + self.session.execute(insert_statement, (k, v)) + + async def amset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + await self.aensure_db_setup() + insert_statement = self.get_insert_statement() + for k, v in key_value_pairs: + await aexecute_cql(self.session, insert_statement, parameters=(k, v)) + + def mdelete(self, keys: Sequence[str]) -> None: + from cassandra.query import ValueSequence + + self.ensure_db_setup() + self.session.execute(self.get_delete_statement(), [ValueSequence(keys)]) + + async def amdelete(self, keys: Sequence[str]) -> None: + from cassandra.query import ValueSequence + + await self.aensure_db_setup() + await aexecute_cql( + self.session, self.get_delete_statement(), parameters=[ValueSequence(keys)] + ) + + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: + self.ensure_db_setup() + for row in self.session.execute( + SELECT_ALL_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, table=self.table + ) + ): + key = row.row_id + if not prefix or key.startswith(prefix): + yield key + + async def ayield_keys(self, *, prefix: Optional[str] = None) -> AsyncIterator[str]: + await self.aensure_db_setup() + for row in await aexecute_cql( + self.session, + SELECT_ALL_TABLE_CQL_TEMPLATE.format( + keyspace=self.keyspace, table=self.table + ), + ): + key = row.row_id + if not prefix or key.startswith(prefix): + yield key diff --git a/libs/community/langchain_community/storage/sql.py b/libs/community/langchain_community/storage/sql.py new file mode 100644 index 0000000000000..a92daae1d8c67 --- /dev/null +++ b/libs/community/langchain_community/storage/sql.py @@ -0,0 +1,266 @@ +import contextlib +from pathlib import Path +from typing import ( + Any, + AsyncGenerator, + AsyncIterator, + Dict, + Generator, + Iterator, + List, + Optional, + Sequence, + Tuple, + Union, + cast, +) + +from langchain_core.stores import BaseStore +from sqlalchemy import ( + Engine, + LargeBinary, + and_, + create_engine, + delete, + select, +) +from sqlalchemy.ext.asyncio import ( + AsyncEngine, + AsyncSession, + async_sessionmaker, + create_async_engine, +) +from sqlalchemy.orm import ( + Mapped, + Session, + declarative_base, + mapped_column, + sessionmaker, +) + +Base = declarative_base() + + +def items_equal(x: Any, y: Any) -> bool: + return x == y + + +class LangchainKeyValueStores(Base): # type: ignore[valid-type,misc] + """Table used to save values.""" + + # ATTENTION: + # Prior to modifying this table, please determine whether + # we should create migrations for this table to make sure + # users do not experience data loss. + __tablename__ = "langchain_key_value_stores" + + namespace: Mapped[str] = mapped_column(primary_key=True, index=True, nullable=False) + key: Mapped[str] = mapped_column(primary_key=True, index=True, nullable=False) + value = mapped_column(LargeBinary, index=False, nullable=False) + + +# This is a fix of original SQLStore. +# This can will be removed when a PR will be merged. +class SQLStore(BaseStore[str, bytes]): + """BaseStore interface that works on an SQL database. + + Examples: + Create a SQLStore instance and perform operations on it: + + .. code-block:: python + + from langchain_rag.storage import SQLStore + + # Instantiate the SQLStore with the root path + sql_store = SQLStore(namespace="test", db_url="sqllite://:memory:") + + # Set values for keys + sql_store.mset([("key1", b"value1"), ("key2", b"value2")]) + + # Get values for keys + values = sql_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"] + + # Delete keys + sql_store.mdelete(["key1"]) + + # Iterate over keys + for key in sql_store.yield_keys(): + print(key) + + """ + + def __init__( + self, + *, + namespace: str, + db_url: Optional[Union[str, Path]] = None, + engine: Optional[Union[Engine, AsyncEngine]] = None, + engine_kwargs: Optional[Dict[str, Any]] = None, + async_mode: Optional[bool] = None, + ): + if db_url is None and engine is None: + raise ValueError("Must specify either db_url or engine") + + if db_url is not None and engine is not None: + raise ValueError("Must specify either db_url or engine, not both") + + _engine: Union[Engine, AsyncEngine] + if db_url: + if async_mode is None: + async_mode = False + if async_mode: + _engine = create_async_engine( + url=str(db_url), + **(engine_kwargs or {}), + ) + else: + _engine = create_engine(url=str(db_url), **(engine_kwargs or {})) + elif engine: + _engine = engine + + else: + raise AssertionError("Something went wrong with configuration of engine.") + + _session_maker: Union[sessionmaker[Session], async_sessionmaker[AsyncSession]] + if isinstance(_engine, AsyncEngine): + self.async_mode = True + _session_maker = async_sessionmaker(bind=_engine) + else: + self.async_mode = False + _session_maker = sessionmaker(bind=_engine) + + self.engine = _engine + self.dialect = _engine.dialect.name + self.session_maker = _session_maker + self.namespace = namespace + + def create_schema(self) -> None: + Base.metadata.create_all(self.engine) + + async def acreate_schema(self) -> None: + assert isinstance(self.engine, AsyncEngine) + async with self.engine.begin() as session: + await session.run_sync(Base.metadata.create_all) + + def drop(self) -> None: + Base.metadata.drop_all(bind=self.engine.connect()) + + async def amget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + assert isinstance(self.engine, AsyncEngine) + result: Dict[str, bytes] = {} + async with self._make_async_session() as session: + stmt = select(LangchainKeyValueStores).filter( + and_( + LangchainKeyValueStores.key.in_(keys), + LangchainKeyValueStores.namespace == self.namespace, + ) + ) + for v in await session.scalars(stmt): + result[v.key] = v.value + return [result.get(key) for key in keys] + + def mget(self, keys: Sequence[str]) -> List[Optional[bytes]]: + result = {} + + with self._make_sync_session() as session: + stmt = select(LangchainKeyValueStores).filter( + and_( + LangchainKeyValueStores.key.in_(keys), + LangchainKeyValueStores.namespace == self.namespace, + ) + ) + for v in session.scalars(stmt): + result[v.key] = v.value + return [result.get(key) for key in keys] + + async def amset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + async with self._make_async_session() as session: + await self._amdelete([key for key, _ in key_value_pairs], session) + session.add_all( + [ + LangchainKeyValueStores(namespace=self.namespace, key=k, value=v) + for k, v in key_value_pairs + ] + ) + await session.commit() + + def mset(self, key_value_pairs: Sequence[Tuple[str, bytes]]) -> None: + values: Dict[str, bytes] = dict(key_value_pairs) + with self._make_sync_session() as session: + self._mdelete(list(values.keys()), session) + session.add_all( + [ + LangchainKeyValueStores(namespace=self.namespace, key=k, value=v) + for k, v in values.items() + ] + ) + session.commit() + + def _mdelete(self, keys: Sequence[str], session: Session) -> None: + stmt = delete(LangchainKeyValueStores).filter( + and_( + LangchainKeyValueStores.key.in_(keys), + LangchainKeyValueStores.namespace == self.namespace, + ) + ) + session.execute(stmt) + + async def _amdelete(self, keys: Sequence[str], session: AsyncSession) -> None: + stmt = delete(LangchainKeyValueStores).filter( + and_( + LangchainKeyValueStores.key.in_(keys), + LangchainKeyValueStores.namespace == self.namespace, + ) + ) + await session.execute(stmt) + + def mdelete(self, keys: Sequence[str]) -> None: + with self._make_sync_session() as session: + self._mdelete(keys, session) + session.commit() + + async def amdelete(self, keys: Sequence[str]) -> None: + async with self._make_async_session() as session: + await self._amdelete(keys, session) + await session.commit() + + def yield_keys(self, *, prefix: Optional[str] = None) -> Iterator[str]: + with self._make_sync_session() as session: + for v in session.query(LangchainKeyValueStores).filter( # type: ignore + LangchainKeyValueStores.namespace == self.namespace + ): + if str(v.key).startswith(prefix or ""): + yield str(v.key) + session.close() + + async def ayield_keys(self, *, prefix: Optional[str] = None) -> AsyncIterator[str]: + async with self._make_async_session() as session: + stmt = select(LangchainKeyValueStores).filter( + LangchainKeyValueStores.namespace == self.namespace + ) + for v in await session.scalars(stmt): + if str(v.key).startswith(prefix or ""): + yield str(v.key) + await session.close() + + @contextlib.contextmanager + def _make_sync_session(self) -> Generator[Session, None, None]: + """Make an async session.""" + if self.async_mode: + raise ValueError( + "Attempting to use a sync method in when async mode is turned on. " + "Please use the corresponding async method instead." + ) + with cast(Session, self.session_maker()) as session: + yield cast(Session, session) + + @contextlib.asynccontextmanager + async def _make_async_session(self) -> AsyncGenerator[AsyncSession, None]: + """Make an async session.""" + if not self.async_mode: + raise ValueError( + "Attempting to use an async method in when sync mode is turned on. " + "Please use the corresponding async method instead." + ) + async with cast(AsyncSession, self.session_maker()) as session: + yield cast(AsyncSession, session) diff --git a/libs/community/langchain_community/tools/asknews/__init__.py b/libs/community/langchain_community/tools/asknews/__init__.py new file mode 100644 index 0000000000000..635745a7d7d44 --- /dev/null +++ b/libs/community/langchain_community/tools/asknews/__init__.py @@ -0,0 +1,7 @@ +"""AskNews API toolkit.""" + +from langchain_community.tools.asknews.tool import ( + AskNewsSearch, +) + +__all__ = ["AskNewsSearch"] diff --git a/libs/community/langchain_community/tools/asknews/tool.py b/libs/community/langchain_community/tools/asknews/tool.py new file mode 100644 index 0000000000000..e3d39027aa701 --- /dev/null +++ b/libs/community/langchain_community/tools/asknews/tool.py @@ -0,0 +1,80 @@ +""" +Tool for the AskNews API. + +To use this tool, you must first set your credentials as environment variables: + ASKNEWS_CLIENT_ID + ASKNEWS_CLIENT_SECRET +""" + +from typing import Optional, Type + +from langchain_core.callbacks import ( + AsyncCallbackManagerForToolRun, + CallbackManagerForToolRun, +) +from langchain_core.pydantic_v1 import BaseModel, Field +from langchain_core.tools import BaseTool + +from langchain_community.utilities.asknews import AskNewsAPIWrapper + + +class SearchInput(BaseModel): + """Input for the AskNews Search tool.""" + + query: str = Field( + description="Search query to be used for finding real-time or historical news " + "information." + ) + hours_back: Optional[int] = Field( + 0, + description="If the Assistant deems that the event may have occurred more " + "than 48 hours ago, it estimates the number of hours back to search. For " + "example, if the event was one month ago, the Assistant may set this to 720. " + "One week would be 168. The Assistant can estimate up to on year back (8760).", + ) + + +class AskNewsSearch(BaseTool): + """Tool that searches the AskNews API.""" + + name: str = "asknews_search" + description: str = ( + "This tool allows you to perform a search on up-to-date news and historical " + "news. If you needs news from more than 48 hours ago, you can estimate the " + "number of hours back to search." + ) + api_wrapper: AskNewsAPIWrapper = Field(default_factory=AskNewsAPIWrapper) # type: ignore[arg-type] + max_results: int = 10 + args_schema: Type[BaseModel] = SearchInput + + def _run( + self, + query: str, + hours_back: int = 0, + run_manager: Optional[CallbackManagerForToolRun] = None, + ) -> str: + """Use the tool.""" + try: + return self.api_wrapper.search_news( + query, + hours_back=hours_back, + max_results=self.max_results, + ) + except Exception as e: + return repr(e) + + async def _arun( + self, + query: str, + hours_back: int = 0, + run_manager: Optional[AsyncCallbackManagerForToolRun] = None, + ) -> str: + """Use the tool asynchronously.""" + try: + return await self.api_wrapper.asearch_news( + query, + hours_back=hours_back, + max_results=self.max_results, + ) + except Exception as e: + return repr(e) diff --git a/libs/community/langchain_community/tools/databricks/__init__.py b/libs/community/langchain_community/tools/databricks/__init__.py new file mode 100644 index 0000000000000..9a1d5ffe53677 --- /dev/null +++ b/libs/community/langchain_community/tools/databricks/__init__.py @@ -0,0 +1,3 @@ +from langchain_community.tools.databricks.tool import UCFunctionToolkit + +__all__ = ["UCFunctionToolkit"] diff --git a/libs/community/langchain_community/tools/databricks/_execution.py b/libs/community/langchain_community/tools/databricks/_execution.py new file mode 100644 index 0000000000000..6cc0c661562d1 --- /dev/null +++ b/libs/community/langchain_community/tools/databricks/_execution.py @@ -0,0 +1,172 @@ +import json +from dataclasses import dataclass +from io import StringIO +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional + +if TYPE_CHECKING: + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.catalog import FunctionInfo + from databricks.sdk.service.sql import StatementParameterListItem + + +def is_scalar(function: "FunctionInfo") -> bool: + from databricks.sdk.service.catalog import ColumnTypeName + + return function.data_type != ColumnTypeName.TABLE_TYPE + + +@dataclass +class ParameterizedStatement: + statement: str + parameters: List["StatementParameterListItem"] + + +@dataclass +class FunctionExecutionResult: + """ + Result of executing a function. + We always use a string to present the result value for AI model to consume. + """ + + error: Optional[str] = None + format: Optional[Literal["SCALAR", "CSV"]] = None + value: Optional[str] = None + truncated: Optional[bool] = None + + def to_json(self) -> str: + data = {k: v for (k, v) in self.__dict__.items() if v is not None} + return json.dumps(data) + + +def get_execute_function_sql_stmt( + function: "FunctionInfo", json_params: Dict[str, Any] +) -> ParameterizedStatement: + from databricks.sdk.service.catalog import ColumnTypeName + from databricks.sdk.service.sql import StatementParameterListItem + + parts = [] + output_params = [] + if is_scalar(function): + # TODO: IDENTIFIER(:function) did not work + parts.append(f"SELECT {function.full_name}(") + else: + parts.append(f"SELECT * FROM {function.full_name}(") + if function.input_params is None or function.input_params.parameters is None: + assert ( + not json_params + ), "Function has no parameters but parameters were provided." + else: + args = [] + use_named_args = False + for p in function.input_params.parameters: + if p.name not in json_params: + if p.parameter_default is not None: + use_named_args = True + else: + raise ValueError( + f"Parameter {p.name} is required but not provided." + ) + else: + arg_clause = "" + if use_named_args: + arg_clause += f"{p.name} => " + json_value = json_params[p.name] + if p.type_name in ( + ColumnTypeName.ARRAY, + ColumnTypeName.MAP, + ColumnTypeName.STRUCT, + ): + # Use from_json to restore values of complex types. + json_value_str = json.dumps(json_value) + # TODO: parametrize type + arg_clause += f"from_json(:{p.name}, '{p.type_text}')" + output_params.append( + StatementParameterListItem(name=p.name, value=json_value_str) + ) + elif p.type_name == ColumnTypeName.BINARY: + # Use ubbase64 to restore binary values. + arg_clause += f"unbase64(:{p.name})" + output_params.append( + StatementParameterListItem(name=p.name, value=json_value) + ) + else: + arg_clause += f":{p.name}" + output_params.append( + StatementParameterListItem( + name=p.name, value=json_value, type=p.type_text + ) + ) + args.append(arg_clause) + parts.append(",".join(args)) + parts.append(")") + # TODO: check extra params in kwargs + statement = "".join(parts) + return ParameterizedStatement(statement=statement, parameters=output_params) + + +def execute_function( + ws: "WorkspaceClient", + warehouse_id: str, + function: "FunctionInfo", + parameters: Dict[str, Any], +) -> FunctionExecutionResult: + """ + Execute a function with the given arguments and return the result. + """ + try: + import pandas as pd + except ImportError as e: + raise ImportError( + "Could not import pandas python package. " + "Please install it with `pip install pandas`." + ) from e + from databricks.sdk.service.sql import StatementState + + # TODO: async so we can run functions in parallel + parametrized_statement = get_execute_function_sql_stmt(function, parameters) + # TODO: configurable limits + response = ws.statement_execution.execute_statement( + statement=parametrized_statement.statement, + warehouse_id=warehouse_id, + parameters=parametrized_statement.parameters, + wait_timeout="30s", + row_limit=100, + byte_limit=4096, + ) + status = response.status + assert status is not None, f"Statement execution failed: {response}" + if status.state != StatementState.SUCCEEDED: + error = status.error + assert ( + error is not None + ), "Statement execution failed but no error message was provided." + return FunctionExecutionResult(error=f"{error.error_code}: {error.message}") + manifest = response.manifest + assert manifest is not None + truncated = manifest.truncated + result = response.result + assert ( + result is not None + ), "Statement execution succeeded but no result was provided." + data_array = result.data_array + if is_scalar(function): + value = None + if data_array and len(data_array) > 0 and len(data_array[0]) > 0: + value = str(data_array[0][0]) # type: ignore + return FunctionExecutionResult( + format="SCALAR", value=value, truncated=truncated + ) + else: + schema = manifest.schema + assert ( + schema is not None and schema.columns is not None + ), "Statement execution succeeded but no schema was provided." + columns = [c.name for c in schema.columns] + if data_array is None: + data_array = [] + pdf = pd.DataFrame.from_records(data_array, columns=columns) + csv_buffer = StringIO() + pdf.to_csv(csv_buffer, index=False) + return FunctionExecutionResult( + format="CSV", value=csv_buffer.getvalue(), truncated=truncated + ) diff --git a/libs/community/langchain_community/tools/databricks/tool.py b/libs/community/langchain_community/tools/databricks/tool.py new file mode 100644 index 0000000000000..33f1d9313ee5e --- /dev/null +++ b/libs/community/langchain_community/tools/databricks/tool.py @@ -0,0 +1,201 @@ +import json +from datetime import date, datetime +from decimal import Decimal +from hashlib import md5 +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union + +from langchain_core.pydantic_v1 import BaseModel, Field, create_model +from langchain_core.tools import BaseTool, BaseToolkit, StructuredTool +from typing_extensions import Self + +if TYPE_CHECKING: + from databricks.sdk import WorkspaceClient + from databricks.sdk.service.catalog import FunctionInfo + +from langchain_community.tools.databricks._execution import execute_function + + +def _uc_type_to_pydantic_type(uc_type_json: Union[str, Dict[str, Any]]) -> Type: + mapping = { + "long": int, + "binary": bytes, + "boolean": bool, + "date": date, + "double": float, + "float": float, + "integer": int, + "short": int, + "string": str, + "timestamp": datetime, + "timestamp_ntz": datetime, + "byte": int, + } + if isinstance(uc_type_json, str): + if uc_type_json in mapping: + return mapping[uc_type_json] + else: + if uc_type_json.startswith("decimal"): + return Decimal + elif uc_type_json == "void" or uc_type_json.startswith("interval"): + raise TypeError(f"Type {uc_type_json} is not supported.") + else: + raise TypeError( + f"Unknown type {uc_type_json}. Try upgrading this package." + ) + else: + assert isinstance(uc_type_json, dict) + tpe = uc_type_json["type"] + if tpe == "array": + element_type = _uc_type_to_pydantic_type(uc_type_json["elementType"]) + if uc_type_json["containsNull"]: + element_type = Optional[element_type] # type: ignore + return List[element_type] # type: ignore + elif tpe == "map": + key_type = uc_type_json["keyType"] + assert key_type == "string", TypeError( + f"Only support STRING key type for MAP but got {key_type}." + ) + value_type = _uc_type_to_pydantic_type(uc_type_json["valueType"]) + if uc_type_json["valueContainsNull"]: + value_type: Type = Optional[value_type] # type: ignore + return Dict[str, value_type] # type: ignore + elif tpe == "struct": + fields = {} + for field in uc_type_json["fields"]: + field_type = _uc_type_to_pydantic_type(field["type"]) + if field.get("nullable"): + field_type = Optional[field_type] # type: ignore + comment = ( + uc_type_json["metadata"].get("comment") + if "metadata" in uc_type_json + else None + ) + fields[field["name"]] = (field_type, Field(..., description=comment)) + uc_type_json_str = json.dumps(uc_type_json, sort_keys=True) + type_hash = md5(uc_type_json_str.encode()).hexdigest()[:8] + return create_model(f"Struct_{type_hash}", **fields) # type: ignore + else: + raise TypeError(f"Unknown type {uc_type_json}. Try upgrading this package.") + + +def _generate_args_schema(function: "FunctionInfo") -> Type[BaseModel]: + if function.input_params is None: + return BaseModel + params = function.input_params.parameters + assert params is not None + fields = {} + for p in params: + assert p.type_json is not None + type_json = json.loads(p.type_json)["type"] + pydantic_type = _uc_type_to_pydantic_type(type_json) + description = p.comment + default: Any = ... + if p.parameter_default: + pydantic_type = Optional[pydantic_type] # type: ignore + default = None + # TODO: Convert default value string to the correct type. + # We might need to use statement execution API + # to get the JSON representation of the value. + default_description = f"(Default: {p.parameter_default})" + if description: + description += f" {default_description}" + else: + description = default_description + fields[p.name] = ( + pydantic_type, + Field(default=default, description=description), + ) + return create_model( + f"{function.catalog_name}__{function.schema_name}__{function.name}__params", + **fields, # type: ignore + ) + + +def _get_tool_name(function: "FunctionInfo") -> str: + tool_name = f"{function.catalog_name}__{function.schema_name}__{function.name}"[ + -64: + ] + return tool_name + + +def _get_default_workspace_client() -> "WorkspaceClient": + try: + from databricks.sdk import WorkspaceClient + except ImportError as e: + raise ImportError( + "Could not import databricks-sdk python package. " + "Please install it with `pip install databricks-sdk`." + ) from e + return WorkspaceClient() + + +class UCFunctionToolkit(BaseToolkit): + warehouse_id: str = Field( + description="The ID of a Databricks SQL Warehouse to execute functions." + ) + + workspace_client: "WorkspaceClient" = Field( + default_factory=_get_default_workspace_client, + description="Databricks workspace client.", + ) + + tools: Dict[str, BaseTool] = Field(default_factory=dict) + + class Config: + arbitrary_types_allowed = True + + def include(self, *function_names: str, **kwargs: Any) -> Self: + """ + Includes UC functions to the toolkit. + + Args: + functions: A list of UC function names in the format + "catalog_name.schema_name.function_name" or + "catalog_name.schema_name.*". + If the function name ends with ".*", + all functions in the schema will be added. + kwargs: Extra arguments to pass to StructuredTool, e.g., `return_direct`. + """ + for name in function_names: + if name.endswith(".*"): + catalog_name, schema_name = name[:-2].split(".") + # TODO: handle pagination, warn and truncate if too many + functions = self.workspace_client.functions.list( + catalog_name=catalog_name, schema_name=schema_name + ) + for f in functions: + assert f.full_name is not None + self.include(f.full_name, **kwargs) + else: + if name not in self.tools: + self.tools[name] = self._make_tool(name, **kwargs) + return self + + def _make_tool(self, function_name: str, **kwargs: Any) -> BaseTool: + function = self.workspace_client.functions.get(function_name) + name = _get_tool_name(function) + description = function.comment or "" + args_schema = _generate_args_schema(function) + + def func(*args: Any, **kwargs: Any) -> str: + # TODO: We expect all named args and ignore args. + # Non-empty args show up when the function has no parameters. + args_json = json.loads(json.dumps(kwargs, default=str)) + result = execute_function( + ws=self.workspace_client, + warehouse_id=self.warehouse_id, + function=function, + parameters=args_json, + ) + return result.to_json() + + return StructuredTool( + name=name, + description=description, + args_schema=args_schema, + func=func, + **kwargs, + ) + + def get_tools(self) -> List[BaseTool]: + return list(self.tools.values()) diff --git a/libs/community/langchain_community/tools/zenguard/__init__.py b/libs/community/langchain_community/tools/zenguard/__init__.py new file mode 100644 index 0000000000000..ac9ddbb11b12b --- /dev/null +++ b/libs/community/langchain_community/tools/zenguard/__init__.py @@ -0,0 +1,11 @@ +from langchain_community.tools.zenguard.tool import ( + Detector, + ZenGuardInput, + ZenGuardTool, +) + +__all__ = [ + "ZenGuardTool", + "Detector", + "ZenGuardInput", +] diff --git a/libs/community/langchain_community/tools/zenguard/tool.py b/libs/community/langchain_community/tools/zenguard/tool.py new file mode 100644 index 0000000000000..1bb2a8fe05b0d --- /dev/null +++ b/libs/community/langchain_community/tools/zenguard/tool.py @@ -0,0 +1,104 @@ +import os +from enum import Enum +from typing import Any, Dict, List, Optional + +import requests +from langchain_core.pydantic_v1 import BaseModel, Field, ValidationError, validator +from langchain_core.tools import BaseTool + + +class Detector(str, Enum): + ALLOWED_TOPICS = "allowed_subjects" + BANNED_TOPICS = "banned_subjects" + PROMPT_INJECTION = "prompt_injection" + KEYWORDS = "keywords" + PII = "pii" + SECRETS = "secrets" + TOXICITY = "toxicity" + + +class DetectorAPI(str, Enum): + ALLOWED_TOPICS = "v1/detect/topics/allowed" + BANNED_TOPICS = "v1/detect/topics/banned" + PROMPT_INJECTION = "v1/detect/prompt_injection" + KEYWORDS = "v1/detect/keywords" + PII = "v1/detect/pii" + SECRETS = "v1/detect/secrets" + TOXICITY = "v1/detect/toxicity" + + +class ZenGuardInput(BaseModel): + prompts: List[str] = Field( + ..., + min_items=1, + min_length=1, + description="Prompt to check", + ) + detectors: List[Detector] = Field( + ..., + min_items=1, + description="List of detectors by which you want to check the prompt", + ) + in_parallel: bool = Field( + default=True, + description="Run prompt detection by the detector in parallel or sequentially", + ) + + +class ZenGuardTool(BaseTool): + name: str = "ZenGuard" + description: str = ( + "ZenGuard AI integration package. ZenGuard AI - the fastest GenAI guardrails." + ) + args_schema = ZenGuardInput + return_direct = True + + zenguard_api_key: Optional[str] = Field(default=None) + + _ZENGUARD_API_URL_ROOT = "https://api.zenguard.ai/" + _ZENGUARD_API_KEY_ENV_NAME = "ZENGUARD_API_KEY" + + @validator("zenguard_api_key", pre=True, always=True, check_fields=False) + def set_api_key(cls, v: str) -> str: + if v is None: + v = os.getenv(cls._ZENGUARD_API_KEY_ENV_NAME) + if v is None: + raise ValidationError( + "The zenguard_api_key tool option must be set either " + "by passing zenguard_api_key to the tool or by setting " + f"the f{cls._ZENGUARD_API_KEY_ENV_NAME} environment variable" + ) + return v + + def _run( + self, + prompts: List[str], + detectors: List[Detector], + in_parallel: bool = True, + ) -> Dict[str, Any]: + try: + postfix = None + json: Optional[Dict[str, Any]] = None + if len(detectors) == 1: + postfix = self._convert_detector_to_api(detectors[0]) + json = {"messages": prompts} + else: + postfix = "v1/detect" + json = { + "messages": prompts, + "in_parallel": in_parallel, + "detectors": detectors, + } + response = requests.post( + self._ZENGUARD_API_URL_ROOT + postfix, + json=json, + headers={"x-api-key": self.zenguard_api_key}, + timeout=5, + ) + response.raise_for_status() + return response.json() + except (requests.HTTPError, requests.Timeout) as e: + return {"error": str(e)} + + def _convert_detector_to_api(self, detector: Detector) -> str: + return DetectorAPI[detector.name].value diff --git a/libs/community/langchain_community/utilities/asknews.py b/libs/community/langchain_community/utilities/asknews.py new file mode 100644 index 0000000000000..4ac5445568ce8 --- /dev/null +++ b/libs/community/langchain_community/utilities/asknews.py @@ -0,0 +1,115 @@ +"""Util that calls AskNews api.""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any, Dict, Optional + +from langchain_core.pydantic_v1 import BaseModel, Extra, root_validator +from langchain_core.utils import get_from_dict_or_env + + +class AskNewsAPIWrapper(BaseModel): + """Wrapper for AskNews API.""" + + asknews_sync: Any #: :meta private: + asknews_async: Any #: :meta private: + asknews_client_id: Optional[str] = None + """Client ID for the AskNews API.""" + asknews_client_secret: Optional[str] = None + """Client Secret for the AskNews API.""" + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + """Validate that api credentials and python package exists in environment.""" + + asknews_client_id = get_from_dict_or_env( + values, "asknews_client_id", "ASKNEWS_CLIENT_ID" + ) + asknews_client_secret = get_from_dict_or_env( + values, "asknews_client_secret", "ASKNEWS_CLIENT_SECRET" + ) + + try: + import asknews_sdk + + except ImportError: + raise ImportError( + "AskNews python package not found. " + "Please install it with `pip install asknews`." + ) + + an_sync = asknews_sdk.AskNewsSDK( + client_id=asknews_client_id, + client_secret=asknews_client_secret, + scopes=["news"], + ) + an_async = asknews_sdk.AsyncAskNewsSDK( + client_id=asknews_client_id, + client_secret=asknews_client_secret, + scopes=["news"], + ) + + values["asknews_sync"] = an_sync + values["asknews_async"] = an_async + values["asknews_client_id"] = asknews_client_id + values["asknews_client_secret"] = asknews_client_secret + + return values + + def search_news( + self, query: str, max_results: int = 10, hours_back: int = 0 + ) -> str: + """Search news in AskNews API synchronously.""" + if hours_back > 48: + method = "kw" + historical = True + start = int((datetime.now() - timedelta(hours=hours_back)).timestamp()) + stop = int(datetime.now().timestamp()) + else: + historical = False + method = "nl" + start = None + stop = None + + response = self.asknews_sync.news.search_news( + query=query, + n_articles=max_results, + method=method, + historical=historical, + start_timestamp=start, + end_timestamp=stop, + return_type="string", + ) + return response.as_string + + async def asearch_news( + self, query: str, max_results: int = 10, hours_back: int = 0 + ) -> str: + """Search news in AskNews API asynchronously.""" + if hours_back > 48: + method = "kw" + historical = True + start = int((datetime.now() - timedelta(hours=hours_back)).timestamp()) + stop = int(datetime.now().timestamp()) + else: + historical = False + method = "nl" + start = None + stop = None + + response = await self.asknews_async.news.search_news( + query=query, + n_articles=max_results, + method=method, + historical=historical, + start_timestamp=start, + end_timestamp=stop, + return_type="string", + ) + return response.as_string diff --git a/libs/community/langchain_community/utils/user_agent.py b/libs/community/langchain_community/utils/user_agent.py new file mode 100644 index 0000000000000..befb8cf9a0f8a --- /dev/null +++ b/libs/community/langchain_community/utils/user_agent.py @@ -0,0 +1,16 @@ +import logging +import os + +log = logging.getLogger(__name__) + + +def get_user_agent() -> str: + """Get user agent from environment variable.""" + env_user_agent = os.environ.get("USER_AGENT") + if not env_user_agent: + log.warning( + "USER_AGENT environment variable not set, " + "consider setting it to identify your requests." + ) + return "DefaultLangchainUserAgent" + return env_user_agent diff --git a/libs/community/langchain_community/vectorstores/aerospike.py b/libs/community/langchain_community/vectorstores/aerospike.py new file mode 100644 index 0000000000000..e7759923b6126 --- /dev/null +++ b/libs/community/langchain_community/vectorstores/aerospike.py @@ -0,0 +1,598 @@ +from __future__ import annotations + +import logging +import uuid +import warnings +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Iterable, + List, + Optional, + Tuple, + TypeVar, + Union, +) + +import numpy as np +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +from langchain_community.vectorstores.utils import ( + DistanceStrategy, + maximal_marginal_relevance, +) + +if TYPE_CHECKING: + from aerospike_vector_search import Client + from aerospike_vector_search.types import Neighbor, VectorDistanceMetric + +logger = logging.getLogger(__name__) + + +def _import_aerospike() -> Any: + try: + from aerospike_vector_search import Client + except ImportError as e: + raise ImportError( + "Could not import aerospike_vector_search python package. " + "Please install it with `pip install aerospike_vector`." + ) from e + return Client + + +AVST = TypeVar("AVST", bound="Aerospike") + + +class Aerospike(VectorStore): + """`Aerospike` vector store. + + To use, you should have the ``aerospike_vector_search`` python package installed. + """ + + def __init__( + self, + client: Client, + embedding: Union[Embeddings, Callable], + namespace: str, + index_name: Optional[str] = None, + vector_key: str = "_vector", + text_key: str = "_text", + id_key: str = "_id", + set_name: Optional[str] = None, + distance_strategy: Optional[ + Union[DistanceStrategy, VectorDistanceMetric] + ] = DistanceStrategy.EUCLIDEAN_DISTANCE, + ): + """Initialize with Aerospike client. + + Args: + client: Aerospike client. + embedding: Embeddings object or Callable (deprecated) to embed text. + namespace: Namespace to use for storing vectors. This should match + index_name: Name of the index previously created in Aerospike. This + vector_key: Key to use for vector in metadata. This should match the + key used during index creation. + text_key: Key to use for text in metadata. + id_key: Key to use for id in metadata. + set_name: Default set name to use for storing vectors. + distance_strategy: Distance strategy to use for similarity search + This should match the distance strategy used during index creation. + """ + + aerospike = _import_aerospike() + + if not isinstance(embedding, Embeddings): + warnings.warn( + "Passing in `embedding` as a Callable is deprecated. Please pass in an" + " Embeddings object instead." + ) + + if not isinstance(client, aerospike): + raise ValueError( + f"client should be an instance of aerospike_vector_search.Client, " + f"got {type(client)}" + ) + + self._client = client + self._embedding = embedding + self._text_key = text_key + self._vector_key = vector_key + self._id_key = id_key + self._index_name = index_name + self._namespace = namespace + self._set_name = set_name + self._distance_strategy = self.convert_distance_strategy(distance_strategy) + + @property + def embeddings(self) -> Optional[Embeddings]: + """Access the query embedding object if available.""" + if isinstance(self._embedding, Embeddings): + return self._embedding + return None + + def _embed_documents(self, texts: Iterable[str]) -> List[List[float]]: + """Embed search docs.""" + if isinstance(self._embedding, Embeddings): + return self._embedding.embed_documents(list(texts)) + return [self._embedding(t) for t in texts] + + def _embed_query(self, text: str) -> List[float]: + """Embed query text.""" + if isinstance(self._embedding, Embeddings): + return self._embedding.embed_query(text) + return self._embedding(text) + + @staticmethod + def convert_distance_strategy( + distance_strategy: Union[VectorDistanceMetric, DistanceStrategy], + ) -> DistanceStrategy: + """ + Convert Aerospikes distance strategy to langchains DistanceStrategy + enum. This is a convenience method to allow users to pass in the same + distance metric used to create the index. + """ + from aerospike_vector_search.types import VectorDistanceMetric + + if isinstance(distance_strategy, DistanceStrategy): + return distance_strategy + + if distance_strategy == VectorDistanceMetric.COSINE: + return DistanceStrategy.COSINE + + if distance_strategy == VectorDistanceMetric.DOT_PRODUCT: + return DistanceStrategy.DOT_PRODUCT + + if distance_strategy == VectorDistanceMetric.SQUARED_EUCLIDEAN: + return DistanceStrategy.EUCLIDEAN_DISTANCE + + raise ValueError( + "Unknown distance strategy, must be cosine, dot_product" ", or euclidean" + ) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + set_name: Optional[str] = None, + embedding_chunk_size: int = 1000, + index_name: Optional[str] = None, + wait_for_index: bool = True, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + ids: Optional list of ids to associate with the texts. + set_name: Optional aerospike set name to add the texts to. + batch_size: Batch size to use when adding the texts to the vectorstore. + embedding_chunk_size: Chunk size to use when embedding the texts. + index_name: Optional aerospike index name used for waiting for index + completion. If not provided, the default index_name will be used. + wait_for_index: If True, wait for the all the texts to be indexed + before returning. Requires index_name to be provided. Defaults + to True. + **kwargs: Additional keyword arguments to pass to the client upsert call. + + Returns: + List of ids from adding the texts into the vectorstore. + + """ + if set_name is None: + set_name = self._set_name + + if index_name is None: + index_name = self._index_name + + if wait_for_index and index_name is None: + raise ValueError("if wait_for_index is True, index_name must be provided") + + texts = list(texts) + ids = ids or [str(uuid.uuid4()) for _ in texts] + + # We need to shallow copy so that we can add the vector and text keys + if metadatas: + metadatas = [m.copy() for m in metadatas] + else: + metadatas = metadatas or [{} for _ in texts] + + for i in range(0, len(texts), embedding_chunk_size): + chunk_texts = texts[i : i + embedding_chunk_size] + chunk_ids = ids[i : i + embedding_chunk_size] + chunk_metadatas = metadatas[i : i + embedding_chunk_size] + embeddings = self._embed_documents(chunk_texts) + + for metadata, embedding, text in zip( + chunk_metadatas, embeddings, chunk_texts + ): + metadata[self._vector_key] = embedding + metadata[self._text_key] = text + + for id, metadata in zip(chunk_ids, chunk_metadatas): + metadata[self._id_key] = id + self._client.upsert( + namespace=self._namespace, + key=id, + set_name=set_name, + record_data=metadata, + **kwargs, + ) + + if wait_for_index: + self._client.wait_for_index_completion( + namespace=self._namespace, + name=index_name, + ) + + return ids + + def delete( + self, + ids: Optional[List[str]] = None, + set_name: Optional[str] = None, + **kwargs: Any, + ) -> Optional[bool]: + """Delete by vector ID or other criteria. + + Args: + ids: List of ids to delete. + **kwargs: Other keyword arguments to pass to client delete call. + + Returns: + Optional[bool]: True if deletion is successful, + False otherwise, None if not implemented. + """ + from aerospike_vector_search import AVSServerError + + if ids: + for id in ids: + try: + self._client.delete( + namespace=self._namespace, + key=id, + set_name=set_name, + **kwargs, + ) + except AVSServerError: + return False + + return True + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return aerospike documents most similar to query, along with scores. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + metadata_keys: List of metadata keys to return with the documents. + If None, all metadata keys will be returned. Defaults to None. + index_name: Name of the index to search. Overrides the default + index_name. + kwargs: Additional keyword arguments to pass to the search method. + + Returns: + List of Documents most similar to the query and associated scores. + """ + + return self.similarity_search_by_vector_with_score( + self._embed_query(query), + k=k, + metadata_keys=metadata_keys, + index_name=index_name, + **kwargs, + ) + + def similarity_search_by_vector_with_score( + self, + embedding: List[float], + k: int = 4, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return aerospike documents most similar to embedding, along with scores. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + metadata_keys: List of metadata keys to return with the documents. + If None, all metadata keys will be returned. Defaults to None. + index_name: Name of the index to search. Overrides the default + index_name. + kwargs: Additional keyword arguments to pass to the client + vector_search method. + + Returns: + List of Documents most similar to the query and associated scores. + + """ + + docs = [] + + if metadata_keys and self._text_key not in metadata_keys: + metadata_keys = [self._text_key] + metadata_keys + + if index_name is None: + index_name = self._index_name + + if index_name is None: + raise ValueError("index_name must be provided") + + results: list[Neighbor] = self._client.vector_search( + index_name=index_name, + namespace=self._namespace, + query=embedding, + limit=k, + field_names=metadata_keys, + **kwargs, + ) + + for result in results: + metadata = result.fields + + if self._text_key in metadata: + text = metadata.pop(self._text_key) + score = result.distance + docs.append((Document(page_content=text, metadata=metadata), score)) + else: + logger.warning( + f"Found document with no `{self._text_key}` key. Skipping." + ) + continue + + return docs + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to embedding vector. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + metadata_keys: List of metadata keys to return with the documents. + If None, all metadata keys will be returned. Defaults to None. + index_name: Name of the index to search. Overrides the default + index_name. + kwargs: Additional keyword arguments to pass to the search method. + + + Returns: + List of Documents most similar to the query vector. + """ + return [ + doc + for doc, _ in self.similarity_search_by_vector_with_score( + embedding, + k=k, + metadata_keys=metadata_keys, + index_name=index_name, + **kwargs, + ) + ] + + def similarity_search( + self, + query: str, + k: int = 4, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Return aerospike documents most similar to query. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + metadata_keys: List of metadata keys to return with the documents. + If None, all metadata keys will be returned. Defaults to None. + index_name: Optional name of the index to search. Overrides the + default index_name. + + Returns: + List of Documents most similar to the query and score for each + """ + docs_and_scores = self.similarity_search_with_score( + query, k=k, metadata_keys=metadata_keys, index_name=index_name, **kwargs + ) + return [doc for doc, _ in docs_and_scores] + + def _select_relevance_score_fn(self) -> Callable[[float], float]: + """ + The 'correct' relevance function + may differ depending on a few things, including: + - the distance / similarity metric used by the VectorStore + - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) + - embedding dimensionality + - etc. + + 0 is dissimilar, 1 is similar. + + Aerospike's relevance_fn assume euclidean and dot product embeddings are + normalized to unit norm. + """ + if self._distance_strategy == DistanceStrategy.COSINE: + return self._cosine_relevance_score_fn + elif self._distance_strategy == DistanceStrategy.DOT_PRODUCT: + return self._max_inner_product_relevance_score_fn + elif self._distance_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE: + return self._euclidean_relevance_score_fn + else: + raise ValueError( + "Unknown distance strategy, must be cosine, dot_product" + ", or euclidean" + ) + + @staticmethod + def _cosine_relevance_score_fn(score: float) -> float: + """Aerospike returns cosine distance scores between [0,2] + + 0 is dissimilar, 1 is similar. + """ + return 1 - (score / 2) + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + embedding: Embedding to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree of + diversity among the results with 0 corresponding to maximum + diversity and 1 to minimum diversity. Defaults to 0.5. + metadata_keys: List of metadata keys to return with the documents. + If None, all metadata keys will be returned. Defaults to None. + index_name: Optional name of the index to search. Overrides the + default index_name. + Returns: + List of Documents selected by maximal marginal relevance. + """ + + if metadata_keys and self._vector_key not in metadata_keys: + metadata_keys = [self._vector_key] + metadata_keys + + docs = self.similarity_search_by_vector( + embedding, + k=fetch_k, + metadata_keys=metadata_keys, + index_name=index_name, + **kwargs, + ) + mmr_selected = maximal_marginal_relevance( + np.array([embedding], dtype=np.float32), + [doc.metadata[self._vector_key] for doc in docs], + k=k, + lambda_mult=lambda_mult, + ) + + if metadata_keys and self._vector_key in metadata_keys: + for i in mmr_selected: + docs[i].metadata.pop(self._vector_key) + + return [docs[i] for i in mmr_selected] + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata_keys: Optional[List[str]] = None, + index_name: Optional[str] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5. + index_name: Name of the index to search. + Returns: + List of Documents selected by maximal marginal relevance. + """ + embedding = self._embed_query(query) + return self.max_marginal_relevance_search_by_vector( + embedding, + k, + fetch_k, + lambda_mult, + metadata_keys=metadata_keys, + index_name=index_name, + **kwargs, + ) + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + client: Client = None, + namespace: str = "test", + index_name: Optional[str] = None, + ids: Optional[List[str]] = None, + embeddings_chunk_size: int = 1000, + client_kwargs: Optional[dict] = None, + **kwargs: Any, + ) -> Aerospike: + """ + This is a user friendly interface that: + 1. Embeds text. + 2. Converts the texts into documents. + 3. Adds the documents to a provided Aerospike index + + This is intended to be a quick way to get started. + + Example: + .. code-block:: python + + from langchain_community.vectorstores import Aerospike + from langchain_openai import OpenAIEmbeddings + from aerospike_vector_search import Client, HostPort + + client = Client(seeds=HostPort(host="localhost", port=5000)) + aerospike = Aerospike.from_texts( + ["foo", "bar", "baz"], + embedder, + client, + "namespace", + index_name="index", + vector_key="vector", + distance_strategy=MODEL_DISTANCE_CALC, + ) + """ + aerospike = cls( + client, + embedding, + namespace, + **kwargs, + ) + + aerospike.add_texts( + texts, + metadatas=metadatas, + ids=ids, + index_name=index_name, + embedding_chunk_size=embeddings_chunk_size, + **(client_kwargs or {}), + ) + return aerospike diff --git a/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py new file mode 100644 index 0000000000000..5be52fb02c766 --- /dev/null +++ b/libs/community/langchain_community/vectorstores/azure_cosmos_db_no_sql.py @@ -0,0 +1,337 @@ +from __future__ import annotations + +import uuid +import warnings +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple + +import numpy as np +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +from langchain_community.vectorstores.utils import maximal_marginal_relevance + +if TYPE_CHECKING: + from azure.cosmos.cosmos_client import CosmosClient + + +class AzureCosmosDBNoSqlVectorSearch(VectorStore): + """`Azure Cosmos DB for NoSQL` vector store. + + To use, you should have both: + - the ``azure-cosmos`` python package installed + + You can read more about vector search using AzureCosmosDBNoSQL here: + https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/vector-search + """ + + def __init__( + self, + *, + cosmos_client: CosmosClient, + embedding: Embeddings, + vector_embedding_policy: Dict[str, Any], + indexing_policy: Dict[str, Any], + cosmos_container_properties: Dict[str, Any], + cosmos_database_properties: Dict[str, Any], + database_name: str = "vectorSearchDB", + container_name: str = "vectorSearchContainer", + create_container: bool = True, + ): + """ + Constructor for AzureCosmosDBNoSqlVectorSearch + + Args: + cosmos_client: Client used to connect to azure cosmosdb no sql account. + database_name: Name of the database to be created. + container_name: Name of the container to be created. + embedding: Text embedding model to use. + vector_embedding_policy: Vector Embedding Policy for the container. + indexing_policy: Indexing Policy for the container. + cosmos_container_properties: Container Properties for the container. + cosmos_database_properties: Database Properties for the container. + """ + self._cosmos_client = cosmos_client + self._database_name = database_name + self._container_name = container_name + self._embedding = embedding + self._vector_embedding_policy = vector_embedding_policy + self._indexing_policy = indexing_policy + self._cosmos_container_properties = cosmos_container_properties + self._cosmos_database_properties = cosmos_database_properties + self._create_container = create_container + + if self._create_container: + if ( + indexing_policy["vectorIndexes"] is None + or len(indexing_policy["vectorIndexes"]) == 0 + ): + raise ValueError( + "vectorIndexes cannot be null or empty in the indexing_policy." + ) + if ( + vector_embedding_policy is None + or len(vector_embedding_policy["vectorEmbeddings"]) == 0 + ): + raise ValueError( + "vectorEmbeddings cannot be null " + "or empty in the vector_embedding_policy." + ) + if self._cosmos_container_properties["partition_key"] is None: + raise ValueError( + "partition_key cannot be null or empty for a container." + ) + + # Create the database if it already doesn't exist + self._database = self._cosmos_client.create_database_if_not_exists( + id=self._database_name, + offer_throughput=self._cosmos_database_properties.get("offer_throughput"), + session_token=self._cosmos_database_properties.get("session_token"), + initial_headers=self._cosmos_database_properties.get("initial_headers"), + etag=self._cosmos_database_properties.get("etag"), + match_condition=self._cosmos_database_properties.get("match_condition"), + ) + + # Create the collection if it already doesn't exist + self._container = self._database.create_container_if_not_exists( + id=self._container_name, + partition_key=self._cosmos_container_properties["partition_key"], + indexing_policy=self._indexing_policy, + default_ttl=self._cosmos_container_properties.get("default_ttl"), + offer_throughput=self._cosmos_container_properties.get("offer_throughput"), + unique_key_policy=self._cosmos_container_properties.get( + "unique_key_policy" + ), + conflict_resolution_policy=self._cosmos_container_properties.get( + "conflict_resolution_policy" + ), + analytical_storage_ttl=self._cosmos_container_properties.get( + "analytical_storage_ttl" + ), + computed_properties=self._cosmos_container_properties.get( + "computed_properties" + ), + etag=self._cosmos_container_properties.get("etag"), + match_condition=self._cosmos_container_properties.get("match_condition"), + session_token=self._cosmos_container_properties.get("session_token"), + initial_headers=self._cosmos_container_properties.get("initial_headers"), + vector_embedding_policy=self._vector_embedding_policy, + ) + + self._embedding_key = self._vector_embedding_policy["vectorEmbeddings"][0][ + "path" + ][1:] + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + + Returns: + List of ids from adding the texts into the vectorstore. + """ + _metadatas = list(metadatas if metadatas is not None else ({} for _ in texts)) + + return self._insert_texts(list(texts), _metadatas) + + def _insert_texts( + self, texts: List[str], metadatas: List[Dict[str, Any]] + ) -> List[str]: + """Used to Load Documents into the collection + + Args: + texts: The list of documents strings to load + metadatas: The list of metadata objects associated with each document + + Returns: + List of ids from adding the texts into the vectorstore. + """ + # If the texts is empty, throw an error + if not texts: + raise Exception("Texts can not be null or empty") + + # Embed and create the documents + embeddings = self._embedding.embed_documents(texts) + text_key = "text" + + to_insert = [ + {"id": str(uuid.uuid4()), text_key: t, self._embedding_key: embedding, **m} + for t, m, embedding in zip(texts, metadatas, embeddings) + ] + # insert the documents in CosmosDB No Sql + doc_ids: List[str] = [] + for item in to_insert: + created_doc = self._container.create_item(item) + doc_ids.append(created_doc["id"]) + return doc_ids + + @classmethod + def _from_kwargs( + cls, + embedding: Embeddings, + *, + cosmos_client: CosmosClient, + vector_embedding_policy: Dict[str, Any], + indexing_policy: Dict[str, Any], + cosmos_container_properties: Dict[str, Any], + cosmos_database_properties: Dict[str, Any], + database_name: str = "vectorSearchDB", + container_name: str = "vectorSearchContainer", + **kwargs: Any, + ) -> AzureCosmosDBNoSqlVectorSearch: + if kwargs: + warnings.warn( + "Method 'from_texts' of AzureCosmosDBNoSql vector " + "store invoked with " + f"unsupported arguments " + f"({', '.join(sorted(kwargs))}), " + "which will be ignored." + ) + + return cls( + embedding=embedding, + cosmos_client=cosmos_client, + vector_embedding_policy=vector_embedding_policy, + indexing_policy=indexing_policy, + cosmos_container_properties=cosmos_container_properties, + cosmos_database_properties=cosmos_database_properties, + database_name=database_name, + container_name=container_name, + ) + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> AzureCosmosDBNoSqlVectorSearch: + """Create an AzureCosmosDBNoSqlVectorSearch vectorstore from raw texts. + + Args: + texts: the texts to insert. + embedding: the embedding function to use in the store. + metadatas: metadata dicts for the texts. + **kwargs: you can pass any argument that you would + to :meth:`~add_texts` and/or to the 'AstraDB' constructor + (see these methods for details). These arguments will be + routed to the respective methods as they are. + + Returns: + an `AzureCosmosDBNoSqlVectorSearch` vectorstore. + """ + vectorstore = AzureCosmosDBNoSqlVectorSearch._from_kwargs(embedding, **kwargs) + vectorstore.add_texts( + texts=texts, + metadatas=metadatas, + ) + return vectorstore + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + if ids is None: + raise ValueError("No document ids provided to delete.") + + for document_id in ids: + self._container.delete_item(document_id) + return True + + def delete_document_by_id(self, document_id: Optional[str] = None) -> None: + """Removes a Specific Document by id + + Args: + document_id: The document identifier + """ + if document_id is None: + raise ValueError("No document ids provided to delete.") + self._container.delete_item(document_id, partition_key=document_id) + + def _similarity_search_with_score( + self, + embeddings: List[float], + k: int = 4, + ) -> List[Tuple[Document, float]]: + query = ( + "SELECT TOP {} c.id, c.{}, c.text, VectorDistance(c.{}, {}) AS " + "SimilarityScore FROM c ORDER BY VectorDistance(c.{}, {})".format( + k, + self._embedding_key, + self._embedding_key, + embeddings, + self._embedding_key, + embeddings, + ) + ) + docs_and_scores = [] + items = list( + self._container.query_items(query=query, enable_cross_partition_query=True) + ) + for item in items: + text = item["text"] + score = item["SimilarityScore"] + docs_and_scores.append((Document(page_content=text, metadata=item), score)) + return docs_and_scores + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + ) -> List[Tuple[Document, float]]: + embeddings = self._embedding.embed_query(query) + docs_and_scores = self._similarity_search_with_score(embeddings=embeddings, k=k) + return docs_and_scores + + def similarity_search( + self, query: str, k: int = 4, **kwargs: Any + ) -> List[Document]: + docs_and_scores = self.similarity_search_with_score(query, k=k) + + return [doc for doc, _ in docs_and_scores] + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + # Retrieves the docs with similarity scores + docs = self._similarity_search_with_score(embeddings=embedding, k=fetch_k) + + # Re-ranks the docs using MMR + mmr_doc_indexes = maximal_marginal_relevance( + np.array(embedding), + [doc.metadata[self._embedding_key] for doc, _ in docs], + k=k, + lambda_mult=lambda_mult, + ) + + mmr_docs = [docs[i][0] for i in mmr_doc_indexes] + return mmr_docs + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + **kwargs: Any, + ) -> List[Document]: + # compute the embeddings vector from the query string + embeddings = self._embedding.embed_query(query) + + docs = self.max_marginal_relevance_search_by_vector( + embeddings, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + ) + return docs diff --git a/libs/community/langchain_community/vectorstores/manticore_search.py b/libs/community/langchain_community/vectorstores/manticore_search.py new file mode 100644 index 0000000000000..edafb8bebdbd9 --- /dev/null +++ b/libs/community/langchain_community/vectorstores/manticore_search.py @@ -0,0 +1,372 @@ +from __future__ import annotations + +import json +import logging +import uuid +from hashlib import sha1 +from typing import Any, Dict, Iterable, List, Optional, Type + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.pydantic_v1 import BaseSettings +from langchain_core.vectorstores import VectorStore + +logger = logging.getLogger() +DEFAULT_K = 4 # Number of Documents to return. + + +class ManticoreSearchSettings(BaseSettings): + proto: str = "http" + host: str = "localhost" + port: int = 9308 + + username: Optional[str] = None + password: Optional[str] = None + + # database: str = "Manticore" + table: str = "langchain" + + column_map: Dict[str, str] = { + "id": "id", + "uuid": "uuid", + "document": "document", + "embedding": "embedding", + "metadata": "metadata", + } + + # A mandatory setting; currently, only hnsw is supported. + knn_type: str = "hnsw" + + # A mandatory setting that specifies the dimensions of the vectors being indexed. + knn_dims: Optional[int] = None # Defaults autodetect + + # A mandatory setting that specifies the distance function used by the HNSW index. + hnsw_similarity: str = "L2" # Acceptable values are: L2, IP, COSINE + + # An optional setting that defines the maximum amount of outgoing connections + # in the graph. + hnsw_m: int = 16 # The default is 16. + + # An optional setting that defines a construction time/accuracy trade-off. + hnsw_ef_construction = 100 + + def get_connection_string(self) -> str: + return self.proto + "://" + self.host + ":" + str(self.port) + + def __getitem__(self, item: str) -> Any: + return getattr(self, item) + + class Config: + env_file = ".env" + env_prefix = "manticore_" + env_file_encoding = "utf-8" + + +class ManticoreSearch(VectorStore): + """ + `ManticoreSearch Engine` vector store. + + To use, you should have the ``manticoresearch`` python package installed. + + Example: + .. code-block:: python + + from langchain_community.vectorstores import Manticore + from langchain_community.embeddings.openai import OpenAIEmbeddings + + embeddings = OpenAIEmbeddings() + vectorstore = ManticoreSearch(embeddings) + """ + + def __init__( + self, + embedding: Embeddings, + *, + config: Optional[ManticoreSearchSettings] = None, + **kwargs: Any, + ) -> None: + """ + ManticoreSearch Wrapper to LangChain + + Args: + embedding (Embeddings): Text embedding model. + config (ManticoreSearchSettings): Configuration of ManticoreSearch Client + **kwargs: Other keyword arguments will pass into Configuration of API client + manticoresearch-python. See + https://github.com/manticoresoftware/manticoresearch-python for more. + """ + try: + import manticoresearch.api as ENDPOINTS + import manticoresearch.api_client as API + except ImportError: + raise ImportError( + "Could not import manticoresearch python package. " + "Please install it with `pip install manticoresearch-dev`." + ) + + try: + from tqdm import tqdm + + self.pgbar = tqdm + except ImportError: + # Just in case if tqdm is not installed + self.pgbar = lambda x, **kwargs: x + + super().__init__() + + self.embedding = embedding + if config is not None: + self.config = config + else: + self.config = ManticoreSearchSettings() + + assert self.config + assert self.config.host and self.config.port + assert ( + self.config.column_map + # and self.config.database + and self.config.table + ) + + assert ( + self.config.knn_type + # and self.config.knn_dims + # and self.config.hnsw_m + # and self.config.hnsw_ef_construction + and self.config.hnsw_similarity + ) + + for k in ["id", "embedding", "document", "metadata", "uuid"]: + assert k in self.config.column_map + + # Detect embeddings dimension + if self.config.knn_dims is None: + self.dim: int = len(self.embedding.embed_query("test")) + else: + self.dim = self.config.knn_dims + + # Initialize the schema + self.schema = f"""\ +CREATE TABLE IF NOT EXISTS {self.config.table}( + {self.config.column_map['id']} bigint, + {self.config.column_map['document']} text indexed stored, + {self.config.column_map['embedding']} \ + float_vector knn_type='{self.config.knn_type}' \ + knn_dims='{self.dim}' \ + hnsw_similarity='{self.config.hnsw_similarity}' \ + hnsw_m='{self.config.hnsw_m}' \ + hnsw_ef_construction='{self.config.hnsw_ef_construction}', + {self.config.column_map['metadata']} json, + {self.config.column_map['uuid']} text indexed stored +)\ +""" + + # Create a connection to ManticoreSearch + self.configuration = API.Configuration( + host=self.config.get_connection_string(), + username=self.config.username, + password=self.config.password, + # disabled_client_side_validations=",", + **kwargs, + ) + self.connection = API.ApiClient(self.configuration) + self.client = { + "index": ENDPOINTS.IndexApi(self.connection), + "utils": ENDPOINTS.UtilsApi(self.connection), + "search": ENDPOINTS.SearchApi(self.connection), + } + + # Create default schema if not exists + self.client["utils"].sql(self.schema) + + @property + def embeddings(self) -> Embeddings: + return self.embedding + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + *, + batch_size: int = 32, + text_ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """ + Insert more texts through the embeddings and add to the VectorStore. + + Args: + texts: Iterable of strings to add to the VectorStore + metadata: Optional column data to be inserted + batch_size: Batch size of insertion + ids: Optional list of ids to associate with the texts + + Returns: + List of ids from adding the texts into the VectorStore. + """ + # Embed and create the documents + ids = text_ids or [ + # See https://stackoverflow.com/questions/67219691/python-hash-function-that-returns-32-or-64-bits + str(int(sha1(t.encode("utf-8")).hexdigest()[:15], 16)) + for t in texts + ] + transac = [] + for i, text in enumerate(texts): + embed = self.embeddings.embed_query(text) + doc_uuid = str(uuid.uuid1()) + doc = { + self.config.column_map["document"]: text, + self.config.column_map["embedding"]: embed, + self.config.column_map["metadata"]: metadatas[i] if metadatas else {}, + self.config.column_map["uuid"]: doc_uuid, + } + transac.append( + {"replace": {"index": self.config.table, "id": ids[i], "doc": doc}} + ) + + if len(transac) == batch_size: + body = "\n".join(map(json.dumps, transac)) + try: + self.client["index"].bulk(body) + transac = [] + except Exception as e: + logger.info(f"Error indexing documents: {e}") + + if len(transac) > 0: + body = "\n".join(map(json.dumps, transac)) + try: + self.client["index"].bulk(body) + except Exception as e: + logger.info(f"Error indexing documents: {e}") + + return ids + + @classmethod + def from_texts( + cls: Type[ManticoreSearch], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[Dict[Any, Any]]] = None, + *, + config: Optional[ManticoreSearchSettings] = None, + text_ids: Optional[List[str]] = None, + batch_size: int = 32, + **kwargs: Any, + ) -> ManticoreSearch: + ctx = cls(embedding, config=config, **kwargs) + ctx.add_texts( + texts=texts, + embedding=embedding, + text_ids=text_ids, + batch_size=batch_size, + metadatas=metadatas, + **kwargs, + ) + return ctx + + @classmethod + def from_documents( + cls: Type[ManticoreSearch], + documents: List[Document], + embedding: Embeddings, + *, + config: Optional[ManticoreSearchSettings] = None, + text_ids: Optional[List[str]] = None, + batch_size: int = 32, + **kwargs: Any, + ) -> ManticoreSearch: + texts = [doc.page_content for doc in documents] + metadatas = [doc.metadata for doc in documents] + return cls.from_texts( + texts=texts, + embedding=embedding, + text_ids=text_ids, + batch_size=batch_size, + metadatas=metadatas, + **kwargs, + ) + + def __repr__(self) -> str: + """ + Text representation for ManticoreSearch Vector Store, prints backends, username + and schemas. Easy to use with `str(ManticoreSearch())` + + Returns: + repr: string to show connection info and data schema + """ + _repr = f"\033[92m\033[1m{self.config.table} @ " + _repr += f"http://{self.config.host}:{self.config.port}\033[0m\n\n" + _repr += f"\033[1musername: {self.config.username}\033[0m\n\nTable Schema:\n" + _repr += "-" * 51 + "\n" + for r in self.client["utils"].sql(f"DESCRIBE {self.config.table}")[0]["data"]: + _repr += ( + f"|\033[94m{r['Field']:24s}\033[0m|\033[" + f"96m{r['Type'] + ' ' + r['Properties']:24s}\033[0m|\n" + ) + _repr += "-" * 51 + "\n" + return _repr + + def similarity_search( + self, query: str, k: int = DEFAULT_K, **kwargs: Any + ) -> List[Document]: + """Perform a similarity search with ManticoreSearch + + Args: + query (str): query string + k (int, optional): Top K neighbors to retrieve. Defaults to 4. + + Returns: + List[Document]: List of Documents + """ + return self.similarity_search_by_vector( + self.embedding.embed_query(query), k, **kwargs + ) + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = DEFAULT_K, + **kwargs: Any, + ) -> List[Document]: + """Perform a similarity search with ManticoreSearch by vectors + + Args: + embedding (List[float]): Embedding vector + k (int, optional): Top K neighbors to retrieve. Defaults to 4. + + Returns: + List[Document]: List of documents + """ + + # Build search request + request = { + "index": self.config.table, + "knn": { + "field": self.config.column_map["embedding"], + "k": k, + "query_vector": embedding, + }, + } + + # Execute request and convert response to langchain.Document format + try: + return [ + Document( + page_content=r["_source"][self.config.column_map["document"]], + metadata=r["_source"][self.config.column_map["metadata"]], + ) + for r in self.client["search"].search(request, **kwargs).hits.hits[:k] + ] + except Exception as e: + logger.error(f"\033[91m\033[1m{type(e)}\033[0m \033[95m{str(e)}\033[0m") + return [] + + def drop(self) -> None: + """ + Helper function: Drop data + """ + self.client["utils"].sql(f"DROP TABLE IF EXISTS {self.config.table}") + + @property + def metadata_column(self) -> str: + return self.config.column_map["metadata"] diff --git a/libs/community/langchain_community/vectorstores/zep_cloud.py b/libs/community/langchain_community/vectorstores/zep_cloud.py new file mode 100644 index 0000000000000..052340e4fcd3d --- /dev/null +++ b/libs/community/langchain_community/vectorstores/zep_cloud.py @@ -0,0 +1,477 @@ +from __future__ import annotations + +import logging +import warnings +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +if TYPE_CHECKING: + from zep_cloud import CreateDocumentRequest, DocumentCollectionResponse, SearchType + +logger = logging.getLogger() + + +class ZepCloudVectorStore(VectorStore): + """`Zep` vector store. + + It provides methods for adding texts or documents to the store, + searching for similar documents, and deleting documents. + + Search scores are calculated using cosine similarity normalized to [0, 1]. + + Args: + collection_name (str): The name of the collection in the Zep store. + api_key (str): The API key for the Zep API. + """ + + def __init__( + self, + collection_name: str, + api_key: str, + ) -> None: + super().__init__() + if not collection_name: + raise ValueError( + "collection_name must be specified when using ZepVectorStore." + ) + try: + from zep_cloud.client import AsyncZep, Zep + except ImportError: + raise ImportError( + "Could not import zep-python python package. " + "Please install it with `pip install zep-python`." + ) + self._client = Zep(api_key=api_key) + self._client_async = AsyncZep(api_key=api_key) + + self.collection_name = collection_name + + self._load_collection() + + @property + def embeddings(self) -> Optional[Embeddings]: + """Unavailable for ZepCloud""" + return None + + def _load_collection(self) -> DocumentCollectionResponse: + """ + Load the collection from the Zep backend. + """ + from zep_cloud import NotFoundError + + try: + collection = self._client.document.get_collection(self.collection_name) + except NotFoundError: + logger.info( + f"Collection {self.collection_name} not found. Creating new collection." + ) + collection = self._create_collection() + + return collection + + def _create_collection(self) -> DocumentCollectionResponse: + """ + Create a new collection in the Zep backend. + """ + self._client.document.add_collection(self.collection_name) + collection = self._client.document.get_collection(self.collection_name) + return collection + + def _generate_documents_to_add( + self, + texts: Iterable[str], + metadatas: Optional[List[Dict[Any, Any]]] = None, + document_ids: Optional[List[str]] = None, + ) -> List[CreateDocumentRequest]: + from zep_cloud import CreateDocumentRequest as ZepDocument + + documents: List[ZepDocument] = [] + for i, d in enumerate(texts): + documents.append( + ZepDocument( + content=d, + metadata=metadatas[i] if metadatas else None, + document_id=document_ids[i] if document_ids else None, + ) + ) + return documents + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[Dict[str, Any]]] = None, + document_ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore. + + Args: + texts: Iterable of strings to add to the vectorstore. + metadatas: Optional list of metadatas associated with the texts. + document_ids: Optional list of document ids associated with the texts. + kwargs: vectorstore specific parameters + + Returns: + List of ids from adding the texts into the vectorstore. + """ + + documents = self._generate_documents_to_add(texts, metadatas, document_ids) + uuids = self._client.document.add_documents( + self.collection_name, request=documents + ) + + return uuids + + async def aadd_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[Dict[str, Any]]] = None, + document_ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Run more texts through the embeddings and add to the vectorstore.""" + documents = self._generate_documents_to_add(texts, metadatas, document_ids) + uuids = await self._client_async.document.add_documents( + self.collection_name, request=documents + ) + + return uuids + + def search( + self, + query: str, + search_type: SearchType, + metadata: Optional[Dict[str, Any]] = None, + k: int = 3, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to query using specified search type.""" + if search_type == "similarity": + return self.similarity_search(query, k=k, metadata=metadata, **kwargs) + elif search_type == "mmr": + return self.max_marginal_relevance_search( + query, k=k, metadata=metadata, **kwargs + ) + else: + raise ValueError( + f"search_type of {search_type} not allowed. Expected " + "search_type to be 'similarity' or 'mmr'." + ) + + async def asearch( + self, + query: str, + search_type: str, + metadata: Optional[Dict[str, Any]] = None, + k: int = 3, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to query using specified search type.""" + if search_type == "similarity": + return await self.asimilarity_search( + query, k=k, metadata=metadata, **kwargs + ) + elif search_type == "mmr": + return await self.amax_marginal_relevance_search( + query, k=k, metadata=metadata, **kwargs + ) + else: + raise ValueError( + f"search_type of {search_type} not allowed. Expected " + "search_type to be 'similarity' or 'mmr'." + ) + + def similarity_search( + self, + query: str, + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to query.""" + + results = self._similarity_search_with_relevance_scores( + query, k=k, metadata=metadata, **kwargs + ) + return [doc for doc, _ in results] + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Run similarity search with distance.""" + + return self._similarity_search_with_relevance_scores( + query, k=k, metadata=metadata, **kwargs + ) + + def _similarity_search_with_relevance_scores( + self, + query: str, + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """ + Default similarity search with relevance scores. Modify if necessary + in subclass. + Return docs and relevance scores in the range [0, 1]. + + 0 is dissimilar, 1 is most similar. + + Args: + query: input text + k: Number of Documents to return. Defaults to 4. + metadata: Optional, metadata filter + **kwargs: kwargs to be passed to similarity search. Should include: + score_threshold: Optional, a floating point value between 0 to 1 and + filter the resulting set of retrieved docs + + Returns: + List of Tuples of (doc, similarity_score) + """ + + results = self._client.document.search( + collection_name=self.collection_name, + text=query, + limit=k, + metadata=metadata, + **kwargs, + ) + + return [ + ( + Document( + page_content=str(doc.content), + metadata=doc.metadata, + ), + doc.score or 0.0, + ) + for doc in results.results or [] + ] + + async def asimilarity_search_with_relevance_scores( + self, + query: str, + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to query.""" + + results = await self._client_async.document.search( + collection_name=self.collection_name, + text=query, + limit=k, + metadata=metadata, + **kwargs, + ) + + return [ + ( + Document( + page_content=str(doc.content), + metadata=doc.metadata, + ), + doc.score or 0.0, + ) + for doc in results.results or [] + ] + + async def asimilarity_search( + self, + query: str, + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs most similar to query.""" + + results = await self.asimilarity_search_with_relevance_scores( + query, k, metadata=metadata, **kwargs + ) + + return [doc for doc, _ in results] + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Unsupported in Zep Cloud""" + warnings.warn("similarity_search_by_vector is not supported in Zep Cloud") + return [] + + async def asimilarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Unsupported in Zep Cloud""" + warnings.warn("asimilarity_search_by_vector is not supported in Zep Cloud") + return [] + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance. + + Maximal marginal relevance optimizes for similarity to query AND diversity + among selected documents. + + Args: + query: Text to look up documents similar to. + k: Number of Documents to return. Defaults to 4. + fetch_k: Number of Documents to fetch to pass to MMR algorithm. + Zep determines this automatically and this parameter is + ignored. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5. + metadata: Optional, metadata to filter the resulting set of retrieved docs + Returns: + List of Documents selected by maximal marginal relevance. + """ + + results = self._client.document.search( + collection_name=self.collection_name, + text=query, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, + ) + + return [ + Document(page_content=str(d.content), metadata=d.metadata) + for d in results.results or [] + ] + + async def amax_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Return docs selected using the maximal marginal relevance.""" + + results = await self._client_async.document.search( + collection_name=self.collection_name, + text=query, + limit=k, + metadata=metadata, + search_type="mmr", + mmr_lambda=lambda_mult, + **kwargs, + ) + + return [ + Document(page_content=str(d.content), metadata=d.metadata) + for d in results.results or [] + ] + + def max_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Unsupported in Zep Cloud""" + warnings.warn( + "max_marginal_relevance_search_by_vector is not supported in Zep Cloud" + ) + return [] + + async def amax_marginal_relevance_search_by_vector( + self, + embedding: List[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + metadata: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[Document]: + """Unsupported in Zep Cloud""" + warnings.warn( + "amax_marginal_relevance_search_by_vector is not supported in Zep Cloud" + ) + return [] + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection_name: str = "", + api_key: Optional[str] = None, + **kwargs: Any, + ) -> ZepCloudVectorStore: + """ + Class method that returns a ZepVectorStore instance initialized from texts. + + If the collection does not exist, it will be created. + + Args: + texts (List[str]): The list of texts to add to the vectorstore. + metadatas (Optional[List[Dict[str, Any]]]): Optional list of metadata + associated with the texts. + collection_name (str): The name of the collection in the Zep store. + api_key (str): The API key for the Zep API. + **kwargs: Additional parameters specific to the vectorstore. + + Returns: + ZepVectorStore: An instance of ZepVectorStore. + """ + if not api_key: + raise ValueError("api_key must be specified when using ZepVectorStore.") + vecstore = cls( + collection_name=collection_name, + api_key=api_key, + ) + vecstore.add_texts(texts, metadatas) + return vecstore + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: + """Delete by Zep vector UUIDs. + + Parameters + ---------- + ids : Optional[List[str]] + The UUIDs of the vectors to delete. + + Raises + ------ + ValueError + If no UUIDs are provided. + """ + + if ids is None or len(ids) == 0: + raise ValueError("No uuids provided to delete.") + + for u in ids: + self._client.document.delete_document(self.collection_name, u) diff --git a/libs/community/pyproject.toml b/libs/community/pyproject.toml index 29476ab303c2f..cc0a456b55d8b 100644 --- a/libs/community/pyproject.toml +++ b/libs/community/pyproject.toml @@ -1,116 +1,29 @@ [tool.poetry] name = "gigachain-community" -version = "0.2.0" -description = "Community contributed gigachain integrations." +version = "0.2.6" +description = "Community contributed Gigachain integrations." authors = [] license = "MIT" readme = "README.md" repository = "https://github.com/langchain-ai/langchain" -packages = [ - {include = "langchain_community"} -] [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = "^0.2.0" -gigachain = "^0.2.0" +gigachain-core = "^0.2.10" +gigachain = "^0.2.6" SQLAlchemy = ">=1.4,<3" requests = "^2" PyYAML = ">=5.3" -numpy = "^1" aiohttp = "^3.8.3" -tenacity = "^8.1.0" +tenacity = "^8.1.0,!=8.4.0" dataclasses-json = ">= 0.5.7, < 0.7" langsmith = "^0.1.0" -gigachat = "^0.1.29" -tqdm = {version = ">=4.48.0", optional = true} -openapi-pydantic = {version = "^0.3.2", optional = true} -faiss-cpu = {version = "^1", optional = true} -beautifulsoup4 = {version = "^4", optional = true} -jinja2 = {version = "^3", optional = true} -cohere = {version = "^4", optional = true} -openai = {version = "<2", optional = true} -arxiv = {version = "^1.4", optional = true} -pypdf = {version = "^3.4.0", optional = true} -aleph-alpha-client = {version="^2.15.0", optional = true} -gradientai = {version="^1.4.0", optional = true} -pgvector = {version = "^0.1.6", optional = true} -atlassian-python-api = {version = "^3.36.0", optional=true} -html2text = {version="^2020.1.16", optional=true} -numexpr = {version="^2.8.6", optional=true} -jq = {version = "^1.4.1", optional = true} -pdfminer-six = {version = "^20221105", optional = true} -lxml = {version = ">=4.9.3,<6.0", optional = true} -pymupdf = {version = "^1.22.3", optional = true} -rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"} -pypdfium2 = {version = "^4.10.0", optional = true} -gql = {version = "^3.4.1", optional = true} -pandas = {version = "^2.0.1", optional = true} -telethon = {version = "^1.28.5", optional = true} -chardet = {version="^5.1.0", optional=true} -requests-toolbelt = {version = "^1.0.0", optional = true} -scikit-learn = {version = "^1.2.2", optional = true} -py-trello = {version = "^0.19.0", optional = true} -bibtexparser = {version = "^1.4.0", optional = true} -pyspark = {version = "^3.4.0", optional = true} -mwparserfromhell = {version = "^0.6.4", optional = true} -mwxml = {version = "^0.3.3", optional = true} -esprima = {version = "^4.0.1", optional = true} -streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"} -psychicapi = {version = "^0.8.0", optional = true} -cassio = {version = "^0.1.6", optional = true} -sympy = {version = "^1.12", optional = true} -rapidfuzz = {version = "^3.1.1", optional = true} -jsonschema = {version = ">1", optional = true} -rank-bm25 = {version = "^0.2.2", optional = true} -geopandas = {version = "^0.13.1", optional = true} -gitpython = {version = "^3.1.32", optional = true} -feedparser = {version = "^6.0.10", optional = true} -newspaper3k = {version = "^0.2.8", optional = true} -xata = {version = "^1.0.0a7", optional = true} -xmltodict = {version = "^0.13.0", optional = true} -markdownify = {version = "^0.11.6", optional = true} -assemblyai = {version = "^0.17.0", optional = true} -sqlite-vss = {version = "^0.1.2", optional = true} -motor = {version = "^3.3.1", optional = true} -timescale-vector = {version = "^0.0.1", optional = true} -typer = {version= "^0.9.0", optional = true} -anthropic = {version = "^0.3.11", optional = true} -aiosqlite = {version = "^0.19.0", optional = true} -rspace_client = {version = "^2.5.0", optional = true} -upstash-redis = {version = "^0.15.0", optional = true} -google-cloud-documentai = {version = "^2.20.1", optional = true} -fireworks-ai = {version = "^0.9.0", optional = true} -javelin-sdk = {version = "^0.1.8", optional = true} -hologres-vector = {version = "^0.0.6", optional = true} -praw = {version = "^7.7.1", optional = true} -msal = {version = "^1.25.0", optional = true} -databricks-vectorsearch = {version = "^0.21", optional = true} -cloudpickle = {version = ">=2.0.0", optional = true} -dgml-utils = {version = "^0.3.0", optional = true} -datasets = {version = "^2.15.0", optional = true} -tree-sitter = {version = "^0.20.2", optional = true} -tree-sitter-languages = {version = "^1.8.0", optional = true} -azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true} -oracle-ads = {version = "^2.9.1", optional = true} -httpx = {version = "^0.24.1", optional = true} -elasticsearch = {version = "^8.12.0", optional = true} -hdbcli = {version = "^2.19.21", optional = true} -oci = {version = "^2.119.1", optional = true} -rdflib = {version = "7.0.0", optional = true} -nvidia-riva-client = {version = "^2.14.0", optional = true} -azure-search-documents = {version = "11.4.0", optional = true} -azure-identity = {version = "^1.15.0", optional = true} -tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true} -friendli-client = {version = "^1.2.4", optional = true} -premai = {version = "^0.3.25", optional = true} -vdms = {version = "^0.0.20", optional = true} -httpx-sse = {version = "^0.4.0", optional = true} -pyjwt = {version = "^2.8.0", optional = true} -oracledb = {version = "^2.2.0", optional = true} -httplib2 = {version = "^0.22.0"} -google-auth-httplib2 = {version = "^0.2.0"} -parsel = {version = "^1.9.1", optional = true} + +# Support Python 3.8 and 3.12+. +numpy = [ + { version = "^1", python = "<3.12" }, + { version = "^1.26.0", python = ">=3.12" }, +] [tool.poetry.group.test] optional = true @@ -129,12 +42,12 @@ responses = "^0.22.0" pytest-asyncio = "^0.20.3" lark = "^1.1.5" pandas = "^2.0.0" -pytest-mock = "^3.10.0" +pytest-mock = "^3.10.0" pytest-socket = "^0.6.0" syrupy = "^4.0.2" requests-mock = "^1.11.0" -gigachain-core = {path = "../core", develop = true} -gigachain = {path = "../langchain", develop = true} +gigachain-core = { path = "../core", develop = true } +gigachain = { path = "../langchain", develop = true } [tool.poetry.group.codespell] optional = true @@ -147,21 +60,10 @@ optional = true [tool.poetry.group.test_integration.dependencies] # Do not add dependencies in the test_integration group -# Instead: -# 1. Add an optional dependency to the main group -# poetry add --optional [package name] -# 2. Add the package name to the extended_testing extra (find it below) -# 3. Relock the poetry file -# poetry lock --no-update -# 4. Favor unit tests not integration tests. -# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests. -# Your tests should not rely on network access, as it prevents other -# developers from being able to easily run them. -# Instead write unit tests that use the `responses` library or mock.patch with -# fixtures. Keep the fixtures minimal. -# See Contributing Guide for more instructions on working with optional dependencies. +# Instead read the following link: # https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies pytest-vcr = "^1.0.2" +vcrpy = "^6" wrapt = "^1.15.0" openai = "^1" python-dotenv = "^1.0.0" @@ -169,7 +71,7 @@ cassio = "^0.1.6" tiktoken = ">=0.3.2,<0.6.0" anthropic = "^0.3.11" gigachain-core = { path = "../core", develop = true } -gigachain = {path = "../langchain", develop = true} +gigachain = { path = "../langchain", develop = true } fireworks-ai = "^0.9.0" vdms = "^0.0.20" exllamav2 = "^0.0.18" @@ -189,9 +91,9 @@ types-pytz = "^2023.3.0.0" types-chardet = "^5.0.4.6" types-redis = "^4.3.21.6" mypy-protobuf = "^3.0.0" -gigachain-core = {path = "../core", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} -gigachain = {path = "../langchain", develop = true} +gigachain-core = { path = "../core", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } +gigachain = { path = "../langchain", develop = true } [tool.poetry.group.dev] optional = true @@ -199,102 +101,7 @@ optional = true [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" setuptools = "^67.6.1" -gigachain-core = {path = "../core", develop = true} - -[tool.poetry.extras] -cli = ["typer"] - -# An extra used to be able to add extended testing. -# Please use new-line on formatting to make it easier to add new packages without -# merge-conflicts -extended_testing = [ - "aleph-alpha-client", - "aiosqlite", - "assemblyai", - "beautifulsoup4", - "bibtexparser", - "cassio", - "chardet", - "datasets", - "google-cloud-documentai", - "esprima", - "jq", - "pdfminer-six", - "pgvector", - "pypdf", - "pymupdf", - "pypdfium2", - "tqdm", - "lxml", - "atlassian-python-api", - "mwparserfromhell", - "mwxml", - "msal", - "pandas", - "telethon", - "psychicapi", - "gql", - "gradientai", - "requests-toolbelt", - "html2text", - "numexpr", - "py-trello", - "scikit-learn", - "streamlit", - "pyspark", - "openai", - "sympy", - "rapidfuzz", - "jsonschema", - "rank-bm25", - "geopandas", - "jinja2", - "gitpython", - "newspaper3k", - "nvidia-riva-client", - "feedparser", - "xata", - "xmltodict", - "faiss-cpu", - "openapi-pydantic", - "markdownify", - "arxiv", - "sqlite-vss", - "rapidocr-onnxruntime", - "motor", - "timescale-vector", - "anthropic", - "upstash-redis", - "rspace_client", - "fireworks-ai", - "javelin-sdk", - "hologres-vector", - "praw", - "databricks-vectorsearch", - "cloudpickle", - "dgml-utils", - "cohere", - "tree-sitter", - "tree-sitter-languages", - "azure-ai-documentintelligence", - "oracle-ads", - "httpx", - "elasticsearch", - "hdbcli", - "oci", - "rdflib", - "azure-search-documents", - "azure-identity", - "tidb-vector", - "cloudpickle", - "friendli-client", - "premai", - "vdms", - "httpx-sse", - "pyjwt", - "oracledb", - "parsel", -] +gigachain-core = { path = "../core", develop = true } [tool.ruff] exclude = [ @@ -304,9 +111,9 @@ exclude = [ [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "I", # isort + "E", # pycodestyle + "F", # pyflakes + "I", # isort "T201", # print ] @@ -316,9 +123,7 @@ disallow_untyped_defs = "True" exclude = ["notebooks", "examples", "example_data"] [tool.coverage.run] -omit = [ - "tests/*", -] +omit = ["tests/*"] [build-system] requires = ["poetry-core>=1.0.0"] @@ -340,7 +145,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused markers = [ "requires: mark tests as requiring a specific library", "scheduled: mark tests to run in scheduled testing", - "compile: mark placeholder test used to compile integration tests without running them" + "compile: mark placeholder test used to compile integration tests without running them", ] asyncio_mode = "auto" @@ -351,4 +156,4 @@ ignore-regex = '.*(Stati Uniti|Tense=Pres).*' # whats is a typo but used frequently in queries so kept as is # aapply - async apply # unsecure - typo but part of API, decided to not bother for now -ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin' +ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin,cann' diff --git a/libs/community/scripts/check_pickle.sh b/libs/community/scripts/check_pickle.sh new file mode 100755 index 0000000000000..036ff406173d3 --- /dev/null +++ b/libs/community/scripts/check_pickle.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This checks for usage of pickle in the package. +# +# Usage: ./scripts/check_pickle.sh /path/to/repository +# +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E 'pickle.load\(|pickle.loads\(' | grep -v '# ignore\[pickle\]: explicit-opt-in') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please avoid using pickle or cloudpickle." + echo "If you must, then add:" + echo "1. A security notice (scan the code for examples)" + echo "2. Code path should be opt-in." + exit 1 +fi diff --git a/libs/community/tests/integration_tests/chat_models/test_snowflake.py b/libs/community/tests/integration_tests/chat_models/test_snowflake.py new file mode 100644 index 0000000000000..f3ba87fb3537c --- /dev/null +++ b/libs/community/tests/integration_tests/chat_models/test_snowflake.py @@ -0,0 +1,59 @@ +"""Test ChatSnowflakeCortex +Note: This test must be run with the following environment variables set: + SNOWFLAKE_ACCOUNT="YOUR_SNOWFLAKE_ACCOUNT", + SNOWFLAKE_USERNAME="YOUR_SNOWFLAKE_USERNAME", + SNOWFLAKE_PASSWORD="YOUR_SNOWFLAKE_PASSWORD", + SNOWFLAKE_DATABASE="YOUR_SNOWFLAKE_DATABASE", + SNOWFLAKE_SCHEMA="YOUR_SNOWFLAKE_SCHEMA", + SNOWFLAKE_WAREHOUSE="YOUR_SNOWFLAKE_WAREHOUSE" + SNOWFLAKE_ROLE="YOUR_SNOWFLAKE_ROLE", +""" + +import pytest +from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage +from langchain_core.outputs import ChatGeneration, LLMResult + +from langchain_community.chat_models import ChatSnowflakeCortex + + +@pytest.fixture +def chat() -> ChatSnowflakeCortex: + return ChatSnowflakeCortex() + + +def test_chat_snowflake_cortex(chat: ChatSnowflakeCortex) -> None: + """Test ChatSnowflakeCortex.""" + message = HumanMessage(content="Hello") + response = chat([message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_chat_snowflake_cortex_system_message(chat: ChatSnowflakeCortex) -> None: + """Test ChatSnowflakeCortex for system message""" + system_message = SystemMessage(content="You are to chat with the user.") + human_message = HumanMessage(content="Hello") + response = chat([system_message, human_message]) + assert isinstance(response, BaseMessage) + assert isinstance(response.content, str) + + +def test_chat_snowflake_cortex_model() -> None: + """Test ChatSnowflakeCortex handles model_name.""" + chat = ChatSnowflakeCortex( + model="foo", + ) + assert chat.model == "foo" + + +def test_chat_snowflake_cortex_generate(chat: ChatSnowflakeCortex) -> None: + """Test ChatSnowflakeCortex with generate.""" + message = HumanMessage(content="Hello") + response = chat.generate([[message], [message]]) + assert isinstance(response, LLMResult) + assert len(response.generations) == 2 + for generations in response.generations: + for generation in generations: + assert isinstance(generation, ChatGeneration) + assert isinstance(generation.text, str) + assert generation.text == generation.message.content diff --git a/libs/community/tests/integration_tests/document_compressors/__init__.py b/libs/community/tests/integration_tests/document_compressors/__init__.py new file mode 100644 index 0000000000000..7b0197f593959 --- /dev/null +++ b/libs/community/tests/integration_tests/document_compressors/__init__.py @@ -0,0 +1 @@ +"""Test document compressor integrations.""" diff --git a/libs/community/tests/integration_tests/document_compressors/test_dashscope_rerank.py b/libs/community/tests/integration_tests/document_compressors/test_dashscope_rerank.py new file mode 100644 index 0000000000000..8d54cae5f4980 --- /dev/null +++ b/libs/community/tests/integration_tests/document_compressors/test_dashscope_rerank.py @@ -0,0 +1,24 @@ +from langchain_core.documents import Document + +from langchain_community.document_compressors.dashscope_rerank import ( + DashScopeRerank, +) + + +def test_rerank() -> None: + reranker = DashScopeRerank(api_key=None) + docs = [ + Document(page_content="量子计算是计算科学的一个前沿领域"), + Document(page_content="预训练语言模型的发展给文本排序模型带来了新的进展"), + Document( + page_content="文本排序模型广泛用于搜索引擎和推荐系统中,它们根据文本相关性对候选文本进行排序" + ), + Document(page_content="random text for nothing"), + ] + compressed = reranker.compress_documents( + query="什么是文本排序模型", + documents=docs, + ) + + assert len(compressed) == 3, "default top_n is 3" + assert compressed[0].page_content == docs[2].page_content, "rerank works" diff --git a/libs/community/tests/integration_tests/document_compressors/test_rankllm_rerank.py b/libs/community/tests/integration_tests/document_compressors/test_rankllm_rerank.py new file mode 100644 index 0000000000000..46cb8b81be64c --- /dev/null +++ b/libs/community/tests/integration_tests/document_compressors/test_rankllm_rerank.py @@ -0,0 +1,8 @@ +"""Test rankllm reranker.""" + +from langchain_community.document_compressors.rankllm_rerank import RankLLMRerank + + +def test_rankllm_reranker_init() -> None: + """Test the RankLLM reranker initializes correctly.""" + RankLLMRerank() diff --git a/libs/community/tests/integration_tests/document_compressors/test_volcengine_rerank.py b/libs/community/tests/integration_tests/document_compressors/test_volcengine_rerank.py new file mode 100644 index 0000000000000..0f830e83f337a --- /dev/null +++ b/libs/community/tests/integration_tests/document_compressors/test_volcengine_rerank.py @@ -0,0 +1,24 @@ +from langchain_core.documents import Document + +from langchain_community.document_compressors.volcengine_rerank import ( + VolcengineRerank, +) + + +def test_rerank() -> None: + reranker = VolcengineRerank() + docs = [ + Document(page_content="量子计算是计算科学的一个前沿领域"), + Document(page_content="预训练语言模型的发展给文本排序模型带来了新的进展"), + Document( + page_content="文本排序模型广泛用于搜索引擎和推荐系统中,它们根据文本相关性对候选文本进行排序" + ), + Document(page_content="random text for nothing"), + ] + compressed = reranker.compress_documents( + query="什么是文本排序模型", + documents=docs, + ) + + assert len(compressed) == 3, "default top_n is 3" + assert compressed[0].page_content == docs[2].page_content, "rerank works" diff --git a/libs/community/tests/integration_tests/embeddings/test_ipex_llm.py b/libs/community/tests/integration_tests/embeddings/test_ipex_llm.py new file mode 100644 index 0000000000000..30a7c96d70047 --- /dev/null +++ b/libs/community/tests/integration_tests/embeddings/test_ipex_llm.py @@ -0,0 +1,52 @@ +"""Test IPEX LLM""" + +import os + +import pytest + +from langchain_community.embeddings import IpexLLMBgeEmbeddings + +model_ids_to_test = os.getenv("TEST_IPEXLLM_BGE_EMBEDDING_MODEL_IDS") or "" +skip_if_no_model_ids = pytest.mark.skipif( + not model_ids_to_test, + reason="TEST_IPEXLLM_BGE_EMBEDDING_MODEL_IDS environment variable not set.", +) +model_ids_to_test = [model_id.strip() for model_id in model_ids_to_test.split(",")] # type: ignore + +device = os.getenv("TEST_IPEXLLM_BGE_EMBEDDING_MODEL_DEVICE") or "cpu" + +sentence = "IPEX-LLM is a PyTorch library for running LLM on Intel CPU and GPU (e.g., \ +local PC with iGPU, discrete GPU such as Arc, Flex and Max) with very low latency." +query = "What is IPEX-LLM?" + + +@skip_if_no_model_ids +@pytest.mark.parametrize( + "model_id", + model_ids_to_test, +) +def test_embed_documents(model_id: str) -> None: + """Test IpexLLMBgeEmbeddings embed_documents""" + embedding_model = IpexLLMBgeEmbeddings( + model_name=model_id, + model_kwargs={"device": device}, + encode_kwargs={"normalize_embeddings": True}, + ) + output = embedding_model.embed_documents([sentence, query]) + assert len(output) == 2 + + +@skip_if_no_model_ids +@pytest.mark.parametrize( + "model_id", + model_ids_to_test, +) +def test_embed_query(model_id: str) -> None: + """Test IpexLLMBgeEmbeddings embed_documents""" + embedding_model = IpexLLMBgeEmbeddings( + model_name=model_id, + model_kwargs={"device": device}, + encode_kwargs={"normalize_embeddings": True}, + ) + output = embedding_model.embed_query(query) + assert isinstance(output, list) diff --git a/libs/community/tests/integration_tests/embeddings/test_zhipuai.py b/libs/community/tests/integration_tests/embeddings/test_zhipuai.py new file mode 100644 index 0000000000000..57ce6c19c9cd4 --- /dev/null +++ b/libs/community/tests/integration_tests/embeddings/test_zhipuai.py @@ -0,0 +1,19 @@ +"""Test ZhipuAI Text Embedding.""" +from langchain_community.embeddings.zhipuai import ZhipuAIEmbeddings + + +def test_zhipuai_embedding_documents() -> None: + """Test ZhipuAI Text Embedding for documents.""" + documents = ["This is a test query1.", "This is a test query2."] + embedding = ZhipuAIEmbeddings() # type: ignore[call-arg] + res = embedding.embed_documents(documents) + assert len(res) == 2 # type: ignore[arg-type] + assert len(res[0]) == 1024 # type: ignore[index] + + +def test_zhipuai_embedding_query() -> None: + """Test ZhipuAI Text Embedding for query.""" + document = "This is a test query." + embedding = ZhipuAIEmbeddings() # type: ignore[call-arg] + res = embedding.embed_query(document) + assert len(res) == 1024 # type: ignore[arg-type] diff --git a/libs/community/tests/integration_tests/storage/test_cassandra.py b/libs/community/tests/integration_tests/storage/test_cassandra.py new file mode 100644 index 0000000000000..88f240ed79171 --- /dev/null +++ b/libs/community/tests/integration_tests/storage/test_cassandra.py @@ -0,0 +1,155 @@ +"""Implement integration tests for Cassandra storage.""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +import pytest + +from langchain_community.storage.cassandra import CassandraByteStore +from langchain_community.utilities.cassandra import SetupMode + +if TYPE_CHECKING: + from cassandra.cluster import Session + +KEYSPACE = "storage_test_keyspace" + + +@pytest.fixture(scope="session") +def session() -> Session: + from cassandra.cluster import Cluster + + cluster = Cluster() + session = cluster.connect() + session.execute( + ( + f"CREATE KEYSPACE IF NOT EXISTS {KEYSPACE} " + f"WITH replication = {{'class': 'SimpleStrategy', 'replication_factor': 1}}" + ) + ) + return session + + +def init_store(table_name: str, session: Session) -> CassandraByteStore: + store = CassandraByteStore(table=table_name, keyspace=KEYSPACE, session=session) + store.mset([("key1", b"value1"), ("key2", b"value2")]) + return store + + +async def init_async_store(table_name: str, session: Session) -> CassandraByteStore: + store = CassandraByteStore( + table=table_name, keyspace=KEYSPACE, session=session, setup_mode=SetupMode.ASYNC + ) + await store.amset([("key1", b"value1"), ("key2", b"value2")]) + return store + + +def drop_table(table_name: str, session: Session) -> None: + session.execute(f"DROP TABLE {KEYSPACE}.{table_name}") + + +async def test_mget(session: Session) -> None: + """Test CassandraByteStore mget method.""" + table_name = "lc_test_store_mget" + try: + store = init_store(table_name, session) + assert store.mget(["key1", "key2"]) == [b"value1", b"value2"] + assert await store.amget(["key1", "key2"]) == [b"value1", b"value2"] + finally: + drop_table(table_name, session) + + +async def test_amget(session: Session) -> None: + """Test CassandraByteStore amget method.""" + table_name = "lc_test_store_amget" + try: + store = await init_async_store(table_name, session) + assert await store.amget(["key1", "key2"]) == [b"value1", b"value2"] + finally: + drop_table(table_name, session) + + +def test_mset(session: Session) -> None: + """Test that multiple keys can be set with CassandraByteStore.""" + table_name = "lc_test_store_mset" + try: + init_store(table_name, session) + result = session.execute( + "SELECT row_id, body_blob FROM storage_test_keyspace.lc_test_store_mset " + "WHERE row_id = 'key1';" + ).one() + assert result.body_blob == b"value1" + result = session.execute( + "SELECT row_id, body_blob FROM storage_test_keyspace.lc_test_store_mset " + "WHERE row_id = 'key2';" + ).one() + assert result.body_blob == b"value2" + finally: + drop_table(table_name, session) + + +async def test_amset(session: Session) -> None: + """Test that multiple keys can be set with CassandraByteStore.""" + table_name = "lc_test_store_amset" + try: + await init_async_store(table_name, session) + result = session.execute( + "SELECT row_id, body_blob FROM storage_test_keyspace.lc_test_store_amset " + "WHERE row_id = 'key1';" + ).one() + assert result.body_blob == b"value1" + result = session.execute( + "SELECT row_id, body_blob FROM storage_test_keyspace.lc_test_store_amset " + "WHERE row_id = 'key2';" + ).one() + assert result.body_blob == b"value2" + finally: + drop_table(table_name, session) + + +def test_mdelete(session: Session) -> None: + """Test that deletion works as expected.""" + table_name = "lc_test_store_mdelete" + try: + store = init_store(table_name, session) + store.mdelete(["key1", "key2"]) + result = store.mget(["key1", "key2"]) + assert result == [None, None] + finally: + drop_table(table_name, session) + + +async def test_amdelete(session: Session) -> None: + """Test that deletion works as expected.""" + table_name = "lc_test_store_amdelete" + try: + store = await init_async_store(table_name, session) + await store.amdelete(["key1", "key2"]) + result = await store.amget(["key1", "key2"]) + assert result == [None, None] + finally: + drop_table(table_name, session) + + +def test_yield_keys(session: Session) -> None: + table_name = "lc_test_store_yield_keys" + try: + store = init_store(table_name, session) + assert set(store.yield_keys()) == {"key1", "key2"} + assert set(store.yield_keys(prefix="key")) == {"key1", "key2"} + assert set(store.yield_keys(prefix="lang")) == set() + finally: + drop_table(table_name, session) + + +async def test_ayield_keys(session: Session) -> None: + table_name = "lc_test_store_ayield_keys" + try: + store = await init_async_store(table_name, session) + assert {key async for key in store.ayield_keys()} == {"key1", "key2"} + assert {key async for key in store.ayield_keys(prefix="key")} == { + "key1", + "key2", + } + assert {key async for key in store.ayield_keys(prefix="lang")} == set() + finally: + drop_table(table_name, session) diff --git a/libs/community/tests/integration_tests/storage/test_sql.py b/libs/community/tests/integration_tests/storage/test_sql.py new file mode 100644 index 0000000000000..a454029b86cdf --- /dev/null +++ b/libs/community/tests/integration_tests/storage/test_sql.py @@ -0,0 +1,186 @@ +"""Implement integration tests for Redis storage.""" + +import pytest +from sqlalchemy import Engine, create_engine, text +from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine + +from langchain_community.storage import SQLStore + +pytest.importorskip("sqlalchemy") + + +@pytest.fixture +def sql_engine() -> Engine: + """Yield redis client.""" + return create_engine(url="sqlite://", echo=True) + + +@pytest.fixture +def sql_aengine() -> AsyncEngine: + """Yield redis client.""" + return create_async_engine(url="sqlite+aiosqlite:///:memory:", echo=True) + + +def test_mget(sql_engine: Engine) -> None: + """Test mget method.""" + store = SQLStore(engine=sql_engine, namespace="test") + store.create_schema() + keys = ["key1", "key2"] + with sql_engine.connect() as session: + session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key1',:value)" + ).bindparams(value=b"value1"), + ) + session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key2',:value)" + ).bindparams(value=b"value2"), + ) + session.commit() + + result = store.mget(keys) + assert result == [b"value1", b"value2"] + + +@pytest.mark.asyncio +async def test_amget(sql_aengine: AsyncEngine) -> None: + """Test mget method.""" + store = SQLStore(engine=sql_aengine, namespace="test") + await store.acreate_schema() + keys = ["key1", "key2"] + async with sql_aengine.connect() as session: + await session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key1',:value)" + ).bindparams(value=b"value1"), + ) + await session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key2',:value)" + ).bindparams(value=b"value2"), + ) + await session.commit() + + result = await store.amget(keys) + assert result == [b"value1", b"value2"] + + +def test_mset(sql_engine: Engine) -> None: + """Test that multiple keys can be set.""" + store = SQLStore(engine=sql_engine, namespace="test") + store.create_schema() + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + store.mset(key_value_pairs) + + with sql_engine.connect() as session: + result = session.exec_driver_sql("select * from langchain_key_value_stores") + assert result.keys() == ["namespace", "key", "value"] + data = [(row[0], row[1]) for row in result] + assert data == [("test", "key1"), ("test", "key2")] + session.commit() + + +@pytest.mark.asyncio +async def test_amset(sql_aengine: AsyncEngine) -> None: + """Test that multiple keys can be set.""" + store = SQLStore(engine=sql_aengine, namespace="test") + await store.acreate_schema() + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + await store.amset(key_value_pairs) + + async with sql_aengine.connect() as session: + result = await session.exec_driver_sql( + "select * from langchain_key_value_stores" + ) + assert result.keys() == ["namespace", "key", "value"] + data = [(row[0], row[1]) for row in result] + assert data == [("test", "key1"), ("test", "key2")] + await session.commit() + + +def test_mdelete(sql_engine: Engine) -> None: + """Test that deletion works as expected.""" + store = SQLStore(engine=sql_engine, namespace="test") + store.create_schema() + keys = ["key1", "key2"] + with sql_engine.connect() as session: + session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key1',:value)" + ).bindparams(value=b"value1"), + ) + session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key2',:value)" + ).bindparams(value=b"value2"), + ) + session.commit() + store.mdelete(keys) + with sql_engine.connect() as session: + result = session.exec_driver_sql("select * from langchain_key_value_stores") + assert result.keys() == ["namespace", "key", "value"] + data = [row for row in result] + assert data == [] + session.commit() + + +@pytest.mark.asyncio +async def test_amdelete(sql_aengine: AsyncEngine) -> None: + """Test that deletion works as expected.""" + store = SQLStore(engine=sql_aengine, namespace="test") + await store.acreate_schema() + keys = ["key1", "key2"] + async with sql_aengine.connect() as session: + await session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key1',:value)" + ).bindparams(value=b"value1"), + ) + await session.execute( + text( + "insert into langchain_key_value_stores ('namespace', 'key', 'value') " + "values('test','key2',:value)" + ).bindparams(value=b"value2"), + ) + await session.commit() + await store.amdelete(keys) + async with sql_aengine.connect() as session: + result = await session.exec_driver_sql( + "select * from langchain_key_value_stores" + ) + assert result.keys() == ["namespace", "key", "value"] + data = [row for row in result] + assert data == [] + await session.commit() + + +def test_yield_keys(sql_engine: Engine) -> None: + store = SQLStore(engine=sql_engine, namespace="test") + store.create_schema() + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + store.mset(key_value_pairs) + assert sorted(store.yield_keys()) == ["key1", "key2"] + assert sorted(store.yield_keys(prefix="key")) == ["key1", "key2"] + assert sorted(store.yield_keys(prefix="lang")) == [] + + +@pytest.mark.asyncio +async def test_ayield_keys(sql_aengine: AsyncEngine) -> None: + store = SQLStore(engine=sql_aengine, namespace="test") + await store.acreate_schema() + key_value_pairs = [("key1", b"value1"), ("key2", b"value2")] + await store.amset(key_value_pairs) + assert sorted([k async for k in store.ayield_keys()]) == ["key1", "key2"] + assert sorted([k async for k in store.ayield_keys(prefix="key")]) == [ + "key1", + "key2", + ] + assert sorted([k async for k in store.ayield_keys(prefix="lang")]) == [] diff --git a/libs/community/tests/integration_tests/tools/zenguard/test_zenguard.py b/libs/community/tests/integration_tests/tools/zenguard/test_zenguard.py new file mode 100644 index 0000000000000..7d7ef81a455d3 --- /dev/null +++ b/libs/community/tests/integration_tests/tools/zenguard/test_zenguard.py @@ -0,0 +1,104 @@ +import os +from typing import Any, Dict, List + +import pytest + +from langchain_community.tools.zenguard.tool import Detector, ZenGuardTool + + +@pytest.fixture() +def zenguard_tool() -> ZenGuardTool: + if os.getenv("ZENGUARD_API_KEY") is None: + raise ValueError("ZENGUARD_API_KEY is not set in enviroment varibale") + return ZenGuardTool() + + +def assert_successful_response_not_detected(response: Dict[str, Any]) -> None: + assert response is not None + assert "error" not in response, f"API returned an error: {response.get('error')}" + assert response.get("is_detected") is False, f"Prompt was detected: {response}" + + +def assert_detectors_response( + response: Dict[str, Any], + detectors: List[Detector], +) -> None: + assert response is not None + for detector in detectors: + common_response = next( + ( + resp["common_response"] + for resp in response["responses"] + if resp["detector"] == detector.value + ) + ) + assert ( + "err" not in common_response + ), f"API returned an error: {common_response.get('err')}" # noqa: E501 + assert ( + common_response.get("is_detected") is False + ), f"Prompt was detected: {common_response}" # noqa: E501 + + +def test_prompt_injection(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple prompt injection test" + detectors = [Detector.PROMPT_INJECTION] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_pii(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple PII test" + detectors = [Detector.PII] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_allowed_topics(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple allowed topics test" + detectors = [Detector.ALLOWED_TOPICS] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_banned_topics(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple banned topics test" + detectors = [Detector.BANNED_TOPICS] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_keywords(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple keywords test" + detectors = [Detector.KEYWORDS] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_secrets(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple secrets test" + detectors = [Detector.SECRETS] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_toxicity(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple toxicity test" + detectors = [Detector.TOXICITY] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_successful_response_not_detected(response) + + +def test_all_detectors(zenguard_tool: ZenGuardTool) -> None: + prompt = "Simple all detectors test" + detectors = [ + Detector.ALLOWED_TOPICS, + Detector.BANNED_TOPICS, + Detector.KEYWORDS, + Detector.PII, + Detector.PROMPT_INJECTION, + Detector.SECRETS, + Detector.TOXICITY, + ] + response = zenguard_tool.run({"detectors": detectors, "prompts": [prompt]}) + assert_detectors_response(response, detectors) diff --git a/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike-proximus.yml b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike-proximus.yml new file mode 100644 index 0000000000000..248706780657a --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike-proximus.yml @@ -0,0 +1,36 @@ +cluster: + + # Unique identifier for this cluster. + cluster-name: aerospike-vector + +# The Proximus service listening ports, TLS and network interface. +service: + ports: + 5002: {} + # Uncomment for local debugging + advertised-listeners: + default: + address: 127.0.0.1 + port: 5002 + +# Management API listening ports, TLS and network interface. +manage: + ports: + 5040: {} + +# Intra cluster interconnect listening ports, TLS and network interface. +interconnect: + ports: + 5001: {} + +# Target Aerospike cluster +aerospike: + seeds: + - aerospike: + port: 3000 + +# The logging properties. +logging: + enable-console-logging: true + levels: + metrics-ticker: off diff --git a/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike.conf b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike.conf new file mode 100644 index 0000000000000..fba3a7a33e961 --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/aerospike.conf @@ -0,0 +1,62 @@ +# Aerospike database configuration file for use with systemd. + +service { + cluster-name quote-demo + proto-fd-max 15000 +} + + +logging { + file /var/log/aerospike/aerospike.log { + context any info + } + + # Send log messages to stdout + console { + context any info + context query critical + } +} + +network { + service { + address any + port 3000 + } + + heartbeat { + mode multicast + multicast-group 239.1.99.222 + port 9918 + interval 150 + timeout 10 + } + + fabric { + port 3001 + } + + info { + port 3003 + } +} + +namespace test { + replication-factor 1 + nsup-period 60 + + storage-engine device { + file /opt/aerospike/data/test.dat + filesize 1G + } +} + +namespace proximus-meta { + replication-factor 1 + nsup-period 100 + + storage-engine memory { + data-size 1G + } +} + diff --git a/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/docker-compose.yml b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/docker-compose.yml new file mode 100644 index 0000000000000..ea6642dfc971f --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/docker-compose/aerospike/docker-compose.yml @@ -0,0 +1,23 @@ +services: + aerospike: + image: aerospike/aerospike-server-enterprise:7.0.0.2 + ports: + - "3000:3000" + networks: + - aerospike-test + volumes: + - .:/opt/aerospike/etc/aerospike + command: + - "--config-file" + - "/opt/aerospike/etc/aerospike/aerospike.conf" + proximus: + image: aerospike/aerospike-proximus:0.4.0 + ports: + - "5002:5002" + networks: + - aerospike-test + volumes: + - .:/etc/aerospike-proximus + +networks: + aerospike-test: {} diff --git a/libs/community/tests/integration_tests/vectorstores/test_aerospike.py b/libs/community/tests/integration_tests/vectorstores/test_aerospike.py new file mode 100644 index 0000000000000..4bcbce11fea77 --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/test_aerospike.py @@ -0,0 +1,838 @@ +"""Test Aerospike functionality.""" + +import inspect +import os +import subprocess +import time +from typing import Any, Generator + +import pytest +from langchain_core.documents import Document + +from langchain_community.vectorstores.aerospike import ( + Aerospike, +) +from langchain_community.vectorstores.utils import DistanceStrategy +from tests.integration_tests.vectorstores.fake_embeddings import ( + ConsistentFakeEmbeddings, +) + +pytestmark = pytest.mark.requires("aerospike_vector_search") + +TEST_INDEX_NAME = "test-index" +TEST_NAMESPACE = "test" +TEST_AEROSPIKE_HOST_PORT = ("localhost", 5002) +TEXT_KEY = "_text" +VECTOR_KEY = "_vector" +ID_KEY = "_id" +EUCLIDEAN_SCORE = 1.0 +DIR_PATH = os.path.dirname(os.path.realpath(__file__)) + "/docker-compose/aerospike" +FEAT_KEY_PATH = DIR_PATH + "/features.conf" + + +def compose_up() -> None: + subprocess.run(["docker", "compose", "up", "-d"], cwd=DIR_PATH) + time.sleep(10) + + +def compose_down() -> None: + subprocess.run(["docker", "compose", "down"], cwd=DIR_PATH) + + +@pytest.fixture(scope="class", autouse=True) +def docker_compose() -> Generator[None, None, None]: + try: + import aerospike_vector_search # noqa + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + if not os.path.exists(FEAT_KEY_PATH): + pytest.skip( + "Aerospike feature key file not found at path {}".format(FEAT_KEY_PATH) + ) + + compose_up() + yield + compose_down() + + +@pytest.fixture(scope="class") +def seeds() -> Generator[Any, None, None]: + try: + from aerospike_vector_search.types import HostPort + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + yield HostPort( + host=TEST_AEROSPIKE_HOST_PORT[0], + port=TEST_AEROSPIKE_HOST_PORT[1], + ) + + +@pytest.fixture(scope="class") +@pytest.mark.requires("aerospike_vector_search") +def admin_client(seeds: Any) -> Generator[Any, None, None]: + try: + from aerospike_vector_search.admin import Client as AdminClient + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + with AdminClient(seeds=seeds) as admin_client: + yield admin_client + + +@pytest.fixture(scope="class") +@pytest.mark.requires("aerospike_vector_search") +def client(seeds: Any) -> Generator[Any, None, None]: + try: + from aerospike_vector_search import Client + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + with Client(seeds=seeds) as client: + yield client + + +@pytest.fixture +def embedder() -> Any: + return ConsistentFakeEmbeddings() + + +@pytest.fixture +def aerospike( + client: Any, embedder: ConsistentFakeEmbeddings +) -> Generator[Aerospike, None, None]: + yield Aerospike( + client, + embedder, + TEST_NAMESPACE, + vector_key=VECTOR_KEY, + text_key=TEXT_KEY, + id_key=ID_KEY, + ) + + +def get_func_name() -> str: + """ + Used to get the name of the calling function. The name is used for the index + and set name in Aerospike tests for debugging purposes. + """ + return inspect.stack()[1].function + + +""" +TODO: Add tests for delete() +""" + + +class TestAerospike: + def test_from_text( + self, + client: Any, + admin_client: Any, + embedder: ConsistentFakeEmbeddings, + ) -> None: + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike = Aerospike.from_texts( + ["foo", "bar", "baz", "bay", "bax", "baw", "bav"], + embedder, + client=client, + namespace=TEST_NAMESPACE, + index_name=index_name, + ids=["1", "2", "3", "4", "5", "6", "7"], + set_name=set_name, + ) + + expected = [ + Document( + page_content="foo", + metadata={ + ID_KEY: "1", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], + }, + ), + Document( + page_content="bar", + metadata={ + ID_KEY: "2", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + ), + Document( + page_content="baz", + metadata={ + ID_KEY: "3", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], + }, + ), + ] + actual = aerospike.search( + "foo", k=3, index_name=index_name, search_type="similarity" + ) + + assert actual == expected + + def test_from_documents( + self, + client: Any, + admin_client: Any, + embedder: ConsistentFakeEmbeddings, + ) -> None: + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + documents = [ + Document( + page_content="foo", + metadata={ + ID_KEY: "1", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], + }, + ), + Document( + page_content="bar", + metadata={ + ID_KEY: "2", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + }, + ), + Document( + page_content="baz", + metadata={ + ID_KEY: "3", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0], + }, + ), + Document( + page_content="bay", + metadata={ + ID_KEY: "4", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0], + }, + ), + Document( + page_content="bax", + metadata={ + ID_KEY: "5", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 4.0], + }, + ), + Document( + page_content="baw", + metadata={ + ID_KEY: "6", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 5.0], + }, + ), + Document( + page_content="bav", + metadata={ + ID_KEY: "7", + "_vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 6.0], + }, + ), + ] + aerospike = Aerospike.from_documents( + documents, + embedder, + client=client, + namespace=TEST_NAMESPACE, + index_name=index_name, + ids=["1", "2", "3", "4", "5", "6", "7"], + set_name=set_name, + ) + + actual = aerospike.search( + "foo", k=3, index_name=index_name, search_type="similarity" + ) + + expected = documents[:3] + + assert actual == expected + + def test_delete(self, aerospike: Aerospike, admin_client: Any, client: Any) -> None: + """Test end to end construction and search.""" + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) + + assert client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="1") + assert client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="2") + assert client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="3") + + aerospike.delete(["1", "2", "3"], set_name=set_name) + + assert not client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="1") + assert not client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="2") + assert not client.exists(namespace=TEST_NAMESPACE, set_name=set_name, key="3") + + def test_search_blocking(self, aerospike: Aerospike, admin_client: Any) -> None: + """Test end to end construction and search.""" + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) # Blocks until all vectors are indexed + expected = [Document(page_content="foo", metadata={ID_KEY: "1"})] + actual = aerospike.search( + "foo", + k=1, + index_name=index_name, + search_type="similarity", + metadata_keys=[ID_KEY], + ) + + assert actual == expected + + def test_search_nonblocking(self, aerospike: Aerospike, admin_client: Any) -> None: + """Test end to end construction and search.""" + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + wait_for_index=True, + ) # blocking + aerospike.add_texts( + ["bay"], index_name=index_name, set_name=set_name, wait_for_index=False + ) + expected = [ + Document(page_content="foo", metadata={ID_KEY: "1"}), + Document(page_content="bar", metadata={ID_KEY: "2"}), + Document(page_content="baz", metadata={ID_KEY: "3"}), + ] + actual = aerospike.search( + "foo", + k=4, + index_name=index_name, + search_type="similarity", + metadata_keys=[ID_KEY], + ) + + # "bay" + assert actual == expected + + def test_similarity_search_with_score( + self, aerospike: Aerospike, admin_client: Any + ) -> None: + """Test end to end construction and search.""" + + expected = [(Document(page_content="foo", metadata={ID_KEY: "1"}), 0.0)] + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) + actual = aerospike.similarity_search_with_score( + "foo", k=1, index_name=index_name, metadata_keys=[ID_KEY] + ) + + assert actual == expected + + def test_similarity_search_by_vector_with_score( + self, + aerospike: Aerospike, + admin_client: Any, + embedder: ConsistentFakeEmbeddings, + ) -> None: + """Test end to end construction and search.""" + + expected = [ + (Document(page_content="foo", metadata={"a": "b", ID_KEY: "1"}), 0.0) + ] + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + metadatas=[{"a": "b", "1": "2"}, {"a": "c"}, {"a": "d"}], + ) + actual = aerospike.similarity_search_by_vector_with_score( + embedder.embed_query("foo"), + k=1, + index_name=index_name, + metadata_keys=["a", ID_KEY], + ) + + assert actual == expected + + def test_similarity_search_by_vector( + self, + aerospike: Aerospike, + admin_client: Any, + embedder: ConsistentFakeEmbeddings, + ) -> None: + """Test end to end construction and search.""" + + expected = [ + Document(page_content="foo", metadata={"a": "b", ID_KEY: "1"}), + Document(page_content="bar", metadata={"a": "c", ID_KEY: "2"}), + ] + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + metadatas=[{"a": "b", "1": "2"}, {"a": "c"}, {"a": "d"}], + ) + actual = aerospike.similarity_search_by_vector( + embedder.embed_query("foo"), + k=2, + index_name=index_name, + metadata_keys=["a", ID_KEY], + ) + + assert actual == expected + + def test_similarity_search(self, aerospike: Aerospike, admin_client: Any) -> None: + """Test end to end construction and search.""" + + expected = [ + Document(page_content="foo", metadata={ID_KEY: "1"}), + Document(page_content="bar", metadata={ID_KEY: "2"}), + Document(page_content="baz", metadata={ID_KEY: "3"}), + ] + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) # blocking + actual = aerospike.similarity_search( + "foo", k=3, index_name=index_name, metadata_keys=[ID_KEY] + ) + + assert actual == expected + + def test_max_marginal_relevance_search_by_vector( + self, + client: Any, + admin_client: Any, + embedder: ConsistentFakeEmbeddings, + ) -> None: + """Test max marginal relevance search.""" + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike = Aerospike.from_texts( + ["foo", "bar", "baz", "bay", "bax", "baw", "bav"], + embedder, + client=client, + namespace=TEST_NAMESPACE, + index_name=index_name, + ids=["1", "2", "3", "4", "5", "6", "7"], + set_name=set_name, + ) + + mmr_output = aerospike.max_marginal_relevance_search_by_vector( + embedder.embed_query("foo"), index_name=index_name, k=3, fetch_k=3 + ) + sim_output = aerospike.similarity_search("foo", index_name=index_name, k=3) + + assert len(mmr_output) == 3 + assert mmr_output == sim_output + + mmr_output = aerospike.max_marginal_relevance_search_by_vector( + embedder.embed_query("foo"), index_name=index_name, k=2, fetch_k=3 + ) + + assert len(mmr_output) == 2 + assert mmr_output[0].page_content == "foo" + assert mmr_output[1].page_content == "bar" + + mmr_output = aerospike.max_marginal_relevance_search_by_vector( + embedder.embed_query("foo"), + index_name=index_name, + k=2, + fetch_k=3, + lambda_mult=0.1, # more diversity + ) + + assert len(mmr_output) == 2 + assert mmr_output[0].page_content == "foo" + assert mmr_output[1].page_content == "baz" + + # if fetch_k < k, then the output will be less than k + mmr_output = aerospike.max_marginal_relevance_search_by_vector( + embedder.embed_query("foo"), index_name=index_name, k=3, fetch_k=2 + ) + assert len(mmr_output) == 2 + + def test_max_marginal_relevance_search( + self, aerospike: Aerospike, admin_client: Any + ) -> None: + """Test max marginal relevance search.""" + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "bar", "baz", "bay", "bax", "baw", "bav"], + ids=["1", "2", "3", "4", "5", "6", "7"], + index_name=index_name, + set_name=set_name, + ) + + mmr_output = aerospike.max_marginal_relevance_search( + "foo", index_name=index_name, k=3, fetch_k=3 + ) + sim_output = aerospike.similarity_search("foo", index_name=index_name, k=3) + + assert len(mmr_output) == 3 + assert mmr_output == sim_output + + mmr_output = aerospike.max_marginal_relevance_search( + "foo", index_name=index_name, k=2, fetch_k=3 + ) + + assert len(mmr_output) == 2 + assert mmr_output[0].page_content == "foo" + assert mmr_output[1].page_content == "bar" + + mmr_output = aerospike.max_marginal_relevance_search( + "foo", + index_name=index_name, + k=2, + fetch_k=3, + lambda_mult=0.1, # more diversity + ) + + assert len(mmr_output) == 2 + assert mmr_output[0].page_content == "foo" + assert mmr_output[1].page_content == "baz" + + # if fetch_k < k, then the output will be less than k + mmr_output = aerospike.max_marginal_relevance_search( + "foo", index_name=index_name, k=3, fetch_k=2 + ) + assert len(mmr_output) == 2 + + def test_cosine_distance(self, aerospike: Aerospike, admin_client: Any) -> None: + """Test cosine distance.""" + from aerospike_vector_search import types + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + vector_distance_metric=types.VectorDistanceMetric.COSINE, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) # blocking + + """ + foo vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0] + far vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0] + cosine similarity ~= 0.71 + cosine distance ~= 1 - cosine similarity = 0.29 + """ + expected = pytest.approx(0.292, abs=0.002) + output = aerospike.similarity_search_with_score( + "far", index_name=index_name, k=3 + ) + + _, actual_score = output[2] + + assert actual_score == expected + + def test_dot_product_distance( + self, aerospike: Aerospike, admin_client: Any + ) -> None: + """Test dot product distance.""" + from aerospike_vector_search import types + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + vector_distance_metric=types.VectorDistanceMetric.DOT_PRODUCT, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) # blocking + + """ + foo vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0] + far vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0] + dot product = 9.0 + dot product distance = dot product * -1 = -9.0 + """ + expected = -9.0 + output = aerospike.similarity_search_with_score( + "far", index_name=index_name, k=3 + ) + + _, actual_score = output[2] + + assert actual_score == expected + + def test_euclidean_distance(self, aerospike: Aerospike, admin_client: Any) -> None: + """Test dot product distance.""" + from aerospike_vector_search import types + + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + vector_distance_metric=types.VectorDistanceMetric.SQUARED_EUCLIDEAN, + ) + aerospike.add_texts( + ["foo", "bar", "baz"], + ids=["1", "2", "3"], + index_name=index_name, + set_name=set_name, + ) # blocking + + """ + foo vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0] + far vector = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0] + euclidean distance = 9.0 + """ + expected = 9.0 + output = aerospike.similarity_search_with_score( + "far", index_name=index_name, k=3 + ) + + _, actual_score = output[2] + + assert actual_score == expected + + def test_as_retriever(self, aerospike: Aerospike, admin_client: Any) -> None: + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + ) + aerospike.add_texts( + ["foo", "foo", "foo", "foo", "bar"], + ids=["1", "2", "3", "4", "5"], + index_name=index_name, + set_name=set_name, + ) # blocking + + aerospike._index_name = index_name + retriever = aerospike.as_retriever( + search_type="similarity", search_kwargs={"k": 3} + ) + results = retriever.invoke("foo") + assert len(results) == 3 + assert all([d.page_content == "foo" for d in results]) + + def test_as_retriever_distance_threshold( + self, aerospike: Aerospike, admin_client: Any + ) -> None: + from aerospike_vector_search import types + + aerospike._distance_strategy = DistanceStrategy.COSINE + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + vector_distance_metric=types.VectorDistanceMetric.COSINE, + ) + aerospike.add_texts( + ["foo1", "foo2", "foo3", "bar4", "bar5", "bar6", "bar7", "bar8"], + ids=["1", "2", "3", "4", "5", "6", "7", "8"], + index_name=index_name, + set_name=set_name, + ) # blocking + + aerospike._index_name = index_name + retriever = aerospike.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"k": 9, "score_threshold": 0.90}, + ) + results = retriever.invoke("foo1") + + assert all([d.page_content.startswith("foo") for d in results]) + assert len(results) == 3 + + def test_as_retriever_add_documents( + self, aerospike: Aerospike, admin_client: Any + ) -> None: + from aerospike_vector_search import types + + aerospike._distance_strategy = DistanceStrategy.COSINE + index_name = set_name = get_func_name() + admin_client.index_create( + namespace=TEST_NAMESPACE, + sets=set_name, + name=index_name, + vector_field=VECTOR_KEY, + dimensions=10, + vector_distance_metric=types.VectorDistanceMetric.COSINE, + ) + retriever = aerospike.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={"k": 9, "score_threshold": 0.90}, + ) + + documents = [ + Document( + page_content="foo1", + metadata={ + "a": 1, + }, + ), + Document( + page_content="foo2", + metadata={ + "a": 2, + }, + ), + Document( + page_content="foo3", + metadata={ + "a": 3, + }, + ), + Document( + page_content="bar4", + metadata={ + "a": 4, + }, + ), + Document( + page_content="bar5", + metadata={ + "a": 5, + }, + ), + Document( + page_content="bar6", + metadata={ + "a": 6, + }, + ), + Document( + page_content="bar7", + metadata={ + "a": 7, + }, + ), + ] + retriever.add_documents( + documents, + ids=["1", "2", "3", "4", "5", "6", "7", "8"], + index_name=index_name, + set_name=set_name, + wait_for_index=True, + ) + + aerospike._index_name = index_name + results = retriever.invoke("foo1") + + assert all([d.page_content.startswith("foo") for d in results]) + assert len(results) == 3 diff --git a/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py new file mode 100644 index 0000000000000..9f7f9120a101e --- /dev/null +++ b/libs/community/tests/integration_tests/vectorstores/test_azure_cosmos_db_no_sql.py @@ -0,0 +1,155 @@ +"""Test AzureCosmosDBNoSqlVectorSearch functionality.""" +import logging +import os +from time import sleep +from typing import Any + +import pytest +from langchain_core.documents import Document + +from langchain_community.embeddings import OpenAIEmbeddings +from langchain_community.vectorstores.azure_cosmos_db_no_sql import ( + AzureCosmosDBNoSqlVectorSearch, +) + +logging.basicConfig(level=logging.DEBUG) + +model_deployment = os.getenv( + "OPENAI_EMBEDDINGS_DEPLOYMENT", "smart-agent-embedding-ada" +) +model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002") + +# Host and Key for CosmosDB No SQl +HOST = os.environ.get("HOST") +KEY = os.environ.get("KEY") + +database_name = "langchain_python_db" +container_name = "langchain_python_container" + + +@pytest.fixture() +def cosmos_client() -> Any: + from azure.cosmos import CosmosClient + + return CosmosClient(HOST, KEY) + + +@pytest.fixture() +def partition_key() -> Any: + from azure.cosmos import PartitionKey + + return PartitionKey(path="/id") + + +@pytest.fixture() +def azure_openai_embeddings() -> Any: + openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings( + deployment=model_deployment, model=model_name, chunk_size=1 + ) + return openai_embeddings + + +def safe_delete_database(cosmos_client: Any) -> None: + cosmos_client.delete_database(database_name) + + +def get_vector_indexing_policy(embedding_type: str) -> dict: + return { + "indexingMode": "consistent", + "includedPaths": [{"path": "/*"}], + "excludedPaths": [{"path": '/"_etag"/?'}], + "vectorIndexes": [{"path": "/embedding", "type": embedding_type}], + } + + +def get_vector_embedding_policy( + distance_function: str, data_type: str, dimensions: int +) -> dict: + return { + "vectorEmbeddings": [ + { + "path": "/embedding", + "dataType": data_type, + "dimensions": dimensions, + "distanceFunction": distance_function, + } + ] + } + + +class TestAzureCosmosDBNoSqlVectorSearch: + def test_from_documents_cosine_distance( + self, + cosmos_client: Any, + partition_key: Any, + azure_openai_embeddings: OpenAIEmbeddings, + ) -> None: + """Test end to end construction and search.""" + documents = [ + Document(page_content="Dogs are tough.", metadata={"a": 1}), + Document(page_content="Cats have fluff.", metadata={"b": 1}), + Document(page_content="What is a sandwich?", metadata={"c": 1}), + Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}), + ] + + store = AzureCosmosDBNoSqlVectorSearch.from_documents( + documents, + azure_openai_embeddings, + cosmos_client=cosmos_client, + database_name=database_name, + container_name=container_name, + vector_embedding_policy=get_vector_embedding_policy( + "cosine", "float32", 400 + ), + indexing_policy=get_vector_indexing_policy("flat"), + cosmos_container_properties={"partition_key": partition_key}, + ) + sleep(1) # waits for Cosmos DB to save contents to the collection + + output = store.similarity_search("Dogs", k=2) + + assert output + assert output[0].page_content == "Dogs are tough." + safe_delete_database(cosmos_client) + + def test_from_texts_cosine_distance_delete_one( + self, + cosmos_client: Any, + partition_key: Any, + azure_openai_embeddings: OpenAIEmbeddings, + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "That fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + + store = AzureCosmosDBNoSqlVectorSearch.from_texts( + texts, + azure_openai_embeddings, + metadatas, + cosmos_client=cosmos_client, + database_name=database_name, + container_name=container_name, + vector_embedding_policy=get_vector_embedding_policy( + "cosine", "float32", 400 + ), + indexing_policy=get_vector_indexing_policy("flat"), + cosmos_container_properties={"partition_key": partition_key}, + ) + sleep(1) # waits for Cosmos DB to save contents to the collection + + output = store.similarity_search("Dogs", k=1) + assert output + assert output[0].page_content == "Dogs are tough." + + # delete one document + store.delete_document_by_id(str(output[0].metadata["id"])) + sleep(2) + + output2 = store.similarity_search("Dogs", k=1) + assert output2 + assert output2[0].page_content != "Dogs are tough." + safe_delete_database(cosmos_client) diff --git a/libs/community/tests/unit_tests/callbacks/test_upstash_ratelimit_callback.py b/libs/community/tests/unit_tests/callbacks/test_upstash_ratelimit_callback.py new file mode 100644 index 0000000000000..cf728c4c1184b --- /dev/null +++ b/libs/community/tests/unit_tests/callbacks/test_upstash_ratelimit_callback.py @@ -0,0 +1,234 @@ +import logging +from typing import Any +from unittest.mock import create_autospec + +import pytest +from langchain_core.outputs import LLMResult + +from langchain_community.callbacks import UpstashRatelimitError, UpstashRatelimitHandler + +logger = logging.getLogger(__name__) + +try: + from upstash_ratelimit import Ratelimit, Response +except ImportError: + Ratelimit, Response = None, None + + +# Fixtures +@pytest.fixture +def request_ratelimit() -> Ratelimit: + ratelimit = create_autospec(Ratelimit) + response = Response(allowed=True, limit=10, remaining=10, reset=10000) + ratelimit.limit.return_value = response + return ratelimit + + +@pytest.fixture +def token_ratelimit() -> Ratelimit: + ratelimit = create_autospec(Ratelimit) + response = Response(allowed=True, limit=1000, remaining=1000, reset=10000) + ratelimit.limit.return_value = response + ratelimit.get_remaining.return_value = 1000 + return ratelimit + + +@pytest.fixture +def handler_with_both_limits( + request_ratelimit: Ratelimit, token_ratelimit: Ratelimit +) -> UpstashRatelimitHandler: + return UpstashRatelimitHandler( + identifier="user123", + token_ratelimit=token_ratelimit, + request_ratelimit=request_ratelimit, + include_output_tokens=False, + ) + + +# Tests +@pytest.mark.requires("upstash_ratelimit") +def test_init_no_limits() -> None: + with pytest.raises(ValueError): + UpstashRatelimitHandler(identifier="user123") + + +@pytest.mark.requires("upstash_ratelimit") +def test_init_request_limit_only(request_ratelimit: Ratelimit) -> None: + handler = UpstashRatelimitHandler( + identifier="user123", request_ratelimit=request_ratelimit + ) + assert handler.request_ratelimit is not None + assert handler.token_ratelimit is None + + +@pytest.mark.requires("upstash_ratelimit") +def test_init_token_limit_only(token_ratelimit: Ratelimit) -> None: + handler = UpstashRatelimitHandler( + identifier="user123", token_ratelimit=token_ratelimit + ) + assert handler.token_ratelimit is not None + assert handler.request_ratelimit is None + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_chain_start_request_limit(handler_with_both_limits: Any) -> None: + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + handler_with_both_limits.request_ratelimit.limit.assert_called_once_with("user123") + handler_with_both_limits.token_ratelimit.limit.assert_not_called() + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_chain_start_request_limit_reached(request_ratelimit: Any) -> None: + request_ratelimit.limit.return_value = Response( + allowed=False, limit=10, remaining=0, reset=10000 + ) + handler = UpstashRatelimitHandler( + identifier="user123", token_ratelimit=None, request_ratelimit=request_ratelimit + ) + with pytest.raises(UpstashRatelimitError): + handler.on_chain_start(serialized={}, inputs={}) + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_start_token_limit_reached(token_ratelimit: Any) -> None: + token_ratelimit.get_remaining.return_value = 0 + handler = UpstashRatelimitHandler( + identifier="user123", token_ratelimit=token_ratelimit, request_ratelimit=None + ) + with pytest.raises(UpstashRatelimitError): + handler.on_llm_start(serialized={}, prompts=["test"]) + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_start_token_limit_reached_negative(token_ratelimit: Any) -> None: + token_ratelimit.get_remaining.return_value = -10 + handler = UpstashRatelimitHandler( + identifier="user123", token_ratelimit=token_ratelimit, request_ratelimit=None + ) + with pytest.raises(UpstashRatelimitError): + handler.on_llm_start(serialized={}, prompts=["test"]) + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_end_with_token_limit(handler_with_both_limits: Any) -> None: + response = LLMResult( + generations=[], + llm_output={ + "token_usage": { + "prompt_tokens": 2, + "completion_tokens": 3, + "total_tokens": 5, + } + }, + ) + handler_with_both_limits.on_llm_end(response) + handler_with_both_limits.token_ratelimit.limit.assert_called_once_with("user123", 2) + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_end_with_token_limit_include_output_tokens( + token_ratelimit: Any, +) -> None: + handler = UpstashRatelimitHandler( + identifier="user123", + token_ratelimit=token_ratelimit, + request_ratelimit=None, + include_output_tokens=True, + ) + response = LLMResult( + generations=[], + llm_output={ + "token_usage": { + "prompt_tokens": 2, + "completion_tokens": 3, + "total_tokens": 5, + } + }, + ) + handler.on_llm_end(response) + token_ratelimit.limit.assert_called_once_with("user123", 5) + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_end_without_token_usage(handler_with_both_limits: Any) -> None: + response = LLMResult(generations=[], llm_output={}) + with pytest.raises(ValueError): + handler_with_both_limits.on_llm_end(response) + + +@pytest.mark.requires("upstash_ratelimit") +def test_reset_handler(handler_with_both_limits: Any) -> None: + new_handler = handler_with_both_limits.reset(identifier="user456") + assert new_handler.identifier == "user456" + assert not new_handler._checked + + +@pytest.mark.requires("upstash_ratelimit") +def test_reset_handler_no_new_identifier(handler_with_both_limits: Any) -> None: + new_handler = handler_with_both_limits.reset() + assert new_handler.identifier == "user123" + assert not new_handler._checked + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_chain_start_called_once(handler_with_both_limits: Any) -> None: + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + assert handler_with_both_limits.request_ratelimit.limit.call_count == 1 + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_chain_start_reset_checked(handler_with_both_limits: Any) -> None: + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + new_handler = handler_with_both_limits.reset(identifier="user456") + new_handler.on_chain_start(serialized={}, inputs={}) + + # becomes two because the mock object is kept in reset + assert new_handler.request_ratelimit.limit.call_count == 2 + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_start_no_token_limit(request_ratelimit: Any) -> None: + handler = UpstashRatelimitHandler( + identifier="user123", token_ratelimit=None, request_ratelimit=request_ratelimit + ) + handler.on_llm_start(serialized={}, prompts=["test"]) + assert request_ratelimit.limit.call_count == 0 + + +@pytest.mark.requires("upstash_ratelimit") +def test_on_llm_start_token_limit(handler_with_both_limits: Any) -> None: + handler_with_both_limits.on_llm_start(serialized={}, prompts=["test"]) + assert handler_with_both_limits.token_ratelimit.get_remaining.call_count == 1 + + +@pytest.mark.requires("upstash_ratelimit") +def test_full_chain_with_both_limits(handler_with_both_limits: Any) -> None: + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + handler_with_both_limits.on_chain_start(serialized={}, inputs={}) + + assert handler_with_both_limits.request_ratelimit.limit.call_count == 1 + assert handler_with_both_limits.token_ratelimit.limit.call_count == 0 + assert handler_with_both_limits.token_ratelimit.get_remaining.call_count == 0 + + handler_with_both_limits.on_llm_start(serialized={}, prompts=["test"]) + + assert handler_with_both_limits.request_ratelimit.limit.call_count == 1 + assert handler_with_both_limits.token_ratelimit.limit.call_count == 0 + assert handler_with_both_limits.token_ratelimit.get_remaining.call_count == 1 + + response = LLMResult( + generations=[], + llm_output={ + "token_usage": { + "prompt_tokens": 2, + "completion_tokens": 3, + "total_tokens": 5, + } + }, + ) + handler_with_both_limits.on_llm_end(response) + + assert handler_with_both_limits.request_ratelimit.limit.call_count == 1 + assert handler_with_both_limits.token_ratelimit.limit.call_count == 1 + assert handler_with_both_limits.token_ratelimit.get_remaining.call_count == 1 diff --git a/libs/community/tests/unit_tests/chat_models/test_oci_generative_ai.py b/libs/community/tests/unit_tests/chat_models/test_oci_generative_ai.py new file mode 100644 index 0000000000000..b7d80d19c4e76 --- /dev/null +++ b/libs/community/tests/unit_tests/chat_models/test_oci_generative_ai.py @@ -0,0 +1,105 @@ +"""Test OCI Generative AI LLM service""" +from unittest.mock import MagicMock + +import pytest +from langchain_core.messages import HumanMessage +from pytest import MonkeyPatch + +from langchain_community.chat_models.oci_generative_ai import ChatOCIGenAI + + +class MockResponseDict(dict): + def __getattr__(self, val): # type: ignore[no-untyped-def] + return self[val] + + +@pytest.mark.requires("oci") +@pytest.mark.parametrize( + "test_model_id", ["cohere.command-r-16k", "meta.llama-3-70b-instruct"] +) +def test_llm_chat(monkeypatch: MonkeyPatch, test_model_id: str) -> None: + """Test valid chat call to OCI Generative AI LLM service.""" + oci_gen_ai_client = MagicMock() + llm = ChatOCIGenAI(model_id=test_model_id, client=oci_gen_ai_client) + + provider = llm.model_id.split(".")[0].lower() + + def mocked_response(*args): # type: ignore[no-untyped-def] + response_text = "Assistant chat reply." + response = None + if provider == "cohere": + response = MockResponseDict( + { + "status": 200, + "data": MockResponseDict( + { + "chat_response": MockResponseDict( + { + "text": response_text, + "finish_reason": "completed", + } + ), + "model_id": "cohere.command-r-16k", + "model_version": "1.0.0", + } + ), + "request_id": "1234567890", + "headers": MockResponseDict( + { + "content-length": "123", + } + ), + } + ) + elif provider == "meta": + response = MockResponseDict( + { + "status": 200, + "data": MockResponseDict( + { + "chat_response": MockResponseDict( + { + "choices": [ + MockResponseDict( + { + "message": MockResponseDict( + { + "content": [ + MockResponseDict( + { + "text": response_text, # noqa: E501 + } + ) + ] + } + ), + "finish_reason": "completed", + } + ) + ], + "time_created": "2024-09-01T00:00:00Z", + } + ), + "model_id": "cohere.command-r-16k", + "model_version": "1.0.0", + } + ), + "request_id": "1234567890", + "headers": MockResponseDict( + { + "content-length": "123", + } + ), + } + ) + return response + + monkeypatch.setattr(llm.client, "chat", mocked_response) + + messages = [ + HumanMessage(content="User message"), + ] + + expected = "Assistant chat reply." + actual = llm.invoke(messages, temperature=0.2) + assert actual.content == expected diff --git a/libs/community/tests/unit_tests/chat_models/test_ollama.py b/libs/community/tests/unit_tests/chat_models/test_ollama.py new file mode 100644 index 0000000000000..a99049345acdb --- /dev/null +++ b/libs/community/tests/unit_tests/chat_models/test_ollama.py @@ -0,0 +1,35 @@ +from typing import List, Literal, Optional + +import pytest +from langchain_core.pydantic_v1 import BaseModel, ValidationError + +from langchain_community.chat_models import ChatOllama + + +def test_standard_params() -> None: + class ExpectedParams(BaseModel): + ls_provider: str + ls_model_name: str + ls_model_type: Literal["chat"] + ls_temperature: Optional[float] + ls_max_tokens: Optional[int] + ls_stop: Optional[List[str]] + + model = ChatOllama(model="llama3") + ls_params = model._get_ls_params() + try: + ExpectedParams(**ls_params) + except ValidationError as e: + pytest.fail(f"Validation error: {e}") + assert ls_params["ls_model_name"] == "llama3" + + # Test optional params + model = ChatOllama(num_predict=10, stop=["test"], temperature=0.33) + ls_params = model._get_ls_params() + try: + ExpectedParams(**ls_params) + except ValidationError as e: + pytest.fail(f"Validation error: {e}") + assert ls_params["ls_max_tokens"] == 10 + assert ls_params["ls_stop"] == ["test"] + assert ls_params["ls_temperature"] == 0.33 diff --git a/libs/community/tests/unit_tests/chat_models/test_snowflake.py b/libs/community/tests/unit_tests/chat_models/test_snowflake.py new file mode 100644 index 0000000000000..9e80179a89390 --- /dev/null +++ b/libs/community/tests/unit_tests/chat_models/test_snowflake.py @@ -0,0 +1,24 @@ +"""Test ChatSnowflakeCortex.""" + +from langchain_core.messages import AIMessage, HumanMessage, SystemMessage + +from langchain_community.chat_models.snowflake import _convert_message_to_dict + + +def test_messages_to_prompt_dict_with_valid_messages() -> None: + messages = [ + SystemMessage(content="System Prompt"), + HumanMessage(content="User message #1"), + AIMessage(content="AI message #1"), + HumanMessage(content="User message #2"), + AIMessage(content="AI message #2"), + ] + result = [_convert_message_to_dict(m) for m in messages] + expected = [ + {"role": "system", "content": "System Prompt"}, + {"role": "user", "content": "User message #1"}, + {"role": "assistant", "content": "AI message #1"}, + {"role": "user", "content": "User message #2"}, + {"role": "assistant", "content": "AI message #2"}, + ] + assert result == expected diff --git a/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_header_param.json b/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_header_param.json new file mode 100644 index 0000000000000..ff38939c0a86e --- /dev/null +++ b/libs/community/tests/unit_tests/data/openapi_specs/openapi_spec_header_param.json @@ -0,0 +1,34 @@ +{ + "openapi": "3.0.0", + "info": { + "version": "1.0.0", + "title": "Swagger Petstore", + "license": { + "name": "MIT" + } + }, + "servers": [ + { + "url": "http://petstore.swagger.io/v1" + } + ], + "paths": { + "/pets": { + "get": { + "summary": "Info for a specific pet", + "operationId": "showPetById", + "parameters": [ + { + "name": "header_param", + "in": "header", + "required": true, + "description": "A header param", + "schema": { + "type": "string" + } + } + ] + } + } + } + } \ No newline at end of file diff --git a/libs/community/tests/unit_tests/document_loaders/blob_loaders/test_cloud_blob_loader.py b/libs/community/tests/unit_tests/document_loaders/blob_loaders/test_cloud_blob_loader.py new file mode 100644 index 0000000000000..53ad0da98b74a --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/blob_loaders/test_cloud_blob_loader.py @@ -0,0 +1,166 @@ +"""Verify that file system blob loader works as expected.""" +import os +import tempfile +from typing import Generator +from urllib.parse import urlparse + +import pytest + +from langchain_community.document_loaders.blob_loaders import CloudBlobLoader + + +@pytest.fixture +def toy_dir() -> Generator[str, None, None]: + """Yield a pre-populated directory to test the blob loader.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create test.txt + with open(os.path.join(temp_dir, "test.txt"), "w") as test_txt: + test_txt.write("This is a test.txt file.") + + # Create test.html + with open(os.path.join(temp_dir, "test.html"), "w") as test_html: + test_html.write( + "

This is a test.html file.

" + ) + + # Create .hidden_file + with open(os.path.join(temp_dir, ".hidden_file"), "w") as hidden_file: + hidden_file.write("This is a hidden file.") + + # Create some_dir/nested_file.txt + some_dir = os.path.join(temp_dir, "some_dir") + os.makedirs(some_dir) + with open(os.path.join(some_dir, "nested_file.txt"), "w") as nested_file: + nested_file.write("This is a nested_file.txt file.") + + # Create some_dir/other_dir/more_nested.txt + other_dir = os.path.join(some_dir, "other_dir") + os.makedirs(other_dir) + with open(os.path.join(other_dir, "more_nested.txt"), "w") as nested_file: + nested_file.write("This is a more_nested.txt file.") + + yield f"file://{temp_dir}" + + +# @pytest.fixture +# @pytest.mark.requires("boto3") +# def toy_dir() -> str: +# return "s3://ppr-langchain-test" + + +_TEST_CASES = [ + { + "glob": "**/[!.]*", + "suffixes": None, + "exclude": (), + "relative_filenames": [ + "test.html", + "test.txt", + "some_dir/nested_file.txt", + "some_dir/other_dir/more_nested.txt", + ], + }, + { + "glob": "*", + "suffixes": None, + "exclude": (), + "relative_filenames": ["test.html", "test.txt", ".hidden_file"], + }, + { + "glob": "**/*.html", + "suffixes": None, + "exclude": (), + "relative_filenames": ["test.html"], + }, + { + "glob": "*/*.txt", + "suffixes": None, + "exclude": (), + "relative_filenames": ["some_dir/nested_file.txt"], + }, + { + "glob": "**/*.txt", + "suffixes": None, + "exclude": (), + "relative_filenames": [ + "test.txt", + "some_dir/nested_file.txt", + "some_dir/other_dir/more_nested.txt", + ], + }, + { + "glob": "**/*", + "suffixes": [".txt"], + "exclude": (), + "relative_filenames": [ + "test.txt", + "some_dir/nested_file.txt", + "some_dir/other_dir/more_nested.txt", + ], + }, + { + "glob": "meeeeeeow", + "suffixes": None, + "exclude": (), + "relative_filenames": [], + }, + { + "glob": "*", + "suffixes": [".html", ".txt"], + "exclude": (), + "relative_filenames": ["test.html", "test.txt"], + }, + # Using exclude patterns + { + "glob": "**/*", + "suffixes": [".txt"], + "exclude": ("some_dir/*",), + "relative_filenames": ["test.txt", "some_dir/other_dir/more_nested.txt"], + }, + # Using 2 exclude patterns, one of which is recursive + { + "glob": "**/*", + "suffixes": None, + "exclude": ("**/*.txt", ".hidden*"), + "relative_filenames": ["test.html"], + }, +] + + +@pytest.mark.requires("cloudpathlib") +@pytest.mark.parametrize("params", _TEST_CASES) +def test_file_names_exist(toy_dir: str, params: dict) -> None: + """Verify that the file names exist.""" + + glob_pattern = params["glob"] + suffixes = params["suffixes"] + exclude = params["exclude"] + relative_filenames = params["relative_filenames"] + + loader = CloudBlobLoader( + toy_dir, glob=glob_pattern, suffixes=suffixes, exclude=exclude + ) + blobs = list(loader.yield_blobs()) + + url_parsed = urlparse(toy_dir) + scheme = "" + if url_parsed.scheme == "file": + scheme = "file://" + + file_names = sorted(f"{scheme}{blob.path}" for blob in blobs) + + expected_filenames = sorted( + str(toy_dir + "/" + relative_filename) + for relative_filename in relative_filenames + ) + + assert file_names == expected_filenames + assert loader.count_matching_files() == len(relative_filenames) + + +@pytest.mark.requires("cloudpathlib") +def test_show_progress(toy_dir: str) -> None: + """Verify that file system loader works with a progress bar.""" + loader = CloudBlobLoader(toy_dir) + blobs = list(loader.yield_blobs()) + assert len(blobs) == loader.count_matching_files() diff --git a/libs/community/tests/unit_tests/document_loaders/parsers/language/test_elixir.py b/libs/community/tests/unit_tests/document_loaders/parsers/language/test_elixir.py new file mode 100644 index 0000000000000..02d6af926563e --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/parsers/language/test_elixir.py @@ -0,0 +1,57 @@ +import unittest + +import pytest + +from langchain_community.document_loaders.parsers.language.elixir import ElixirSegmenter + + +@pytest.mark.requires("tree_sitter", "tree_sitter_languages") +class TestElixirSegmenter(unittest.TestCase): + def setUp(self) -> None: + self.example_code = """@doc "some comment" +def foo do + i = 0 +end + +defmodule M do + def hi do + i = 2 + end + + defp wave do + :ok + end +end""" + + self.expected_simplified_code = """# Code for: @doc "some comment" +# Code for: def foo do + +# Code for: defmodule M do""" + + self.expected_extracted_code = [ + '@doc "some comment"', + "def foo do\n i = 0\nend", + "defmodule M do\n" + " def hi do\n" + " i = 2\n" + " end\n\n" + " defp wave do\n" + " :ok\n" + " end\n" + "end", + ] + + def test_is_valid(self) -> None: + self.assertTrue(ElixirSegmenter("def a do; end").is_valid()) + self.assertFalse(ElixirSegmenter("a b c 1 2 3").is_valid()) + + def test_extract_functions_classes(self) -> None: + segmenter = ElixirSegmenter(self.example_code) + extracted_code = segmenter.extract_functions_classes() + self.assertEqual(len(extracted_code), 3) + self.assertEqual(extracted_code, self.expected_extracted_code) + + def test_simplify_code(self) -> None: + segmenter = ElixirSegmenter(self.example_code) + simplified_code = segmenter.simplify_code() + self.assertEqual(simplified_code, self.expected_simplified_code) diff --git a/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv b/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv new file mode 100644 index 0000000000000..a6a3d77e05060 --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/test_docs/csv/test_none_col.csv @@ -0,0 +1,3 @@ +column1,column2,column3 +value1,value2,value3,value4,value5 +value6,value7,value8,value9 diff --git a/libs/community/tests/unit_tests/document_loaders/test_recursive_url_loader.py b/libs/community/tests/unit_tests/document_loaders/test_recursive_url_loader.py new file mode 100644 index 0000000000000..55e00d997653e --- /dev/null +++ b/libs/community/tests/unit_tests/document_loaders/test_recursive_url_loader.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import inspect +import uuid +from types import TracebackType +from typing import Any, Type + +import aiohttp +import pytest +import requests_mock + +from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader + +link_to_one_two = """ + + +""" +link_to_three = '' +no_links = "

no links

" + +fake_url = f"https://{uuid.uuid4()}.com" +URL_TO_HTML = { + fake_url: link_to_one_two, + f"{fake_url}/one": link_to_three, + f"{fake_url}/two": link_to_three, + f"{fake_url}/three": no_links, +} + + +class MockGet: + def __init__(self, url: str) -> None: + self._text = URL_TO_HTML[url] + self.headers: dict = {} + + async def text(self) -> str: + return self._text + + async def __aexit__( + self, exc_type: Type[BaseException], exc: BaseException, tb: TracebackType + ) -> None: + pass + + async def __aenter__(self) -> MockGet: + return self + + +@pytest.mark.parametrize(("max_depth", "expected_docs"), [(1, 1), (2, 3), (3, 4)]) +@pytest.mark.parametrize("use_async", [False, True]) +def test_lazy_load( + mocker: Any, max_depth: int, expected_docs: int, use_async: bool +) -> None: + loader = RecursiveUrlLoader(fake_url, max_depth=max_depth, use_async=use_async) + if use_async: + mocker.patch.object(aiohttp.ClientSession, "get", new=MockGet) + docs = list(loader.lazy_load()) + else: + with requests_mock.Mocker() as m: + for url, html in URL_TO_HTML.items(): + m.get(url, text=html) + docs = list(loader.lazy_load()) + assert len(docs) == expected_docs + + +@pytest.mark.parametrize(("max_depth", "expected_docs"), [(1, 1), (2, 3), (3, 4)]) +@pytest.mark.parametrize("use_async", [False, True]) +async def test_alazy_load( + mocker: Any, max_depth: int, expected_docs: int, use_async: bool +) -> None: + loader = RecursiveUrlLoader(fake_url, max_depth=max_depth, use_async=use_async) + if use_async: + mocker.patch.object(aiohttp.ClientSession, "get", new=MockGet) + docs = [] + async for doc in loader.alazy_load(): + docs.append(doc) + else: + with requests_mock.Mocker() as m: + for url, html in URL_TO_HTML.items(): + m.get(url, text=html) + docs = [] + async for doc in loader.alazy_load(): + docs.append(doc) + + assert len(docs) == expected_docs + + +def test_init_args_documented() -> None: + cls_docstring = RecursiveUrlLoader.__doc__ or "" + init_docstring = RecursiveUrlLoader.__init__.__doc__ or "" + all_docstring = cls_docstring + init_docstring + init_args = list(inspect.signature(RecursiveUrlLoader.__init__).parameters) + undocumented = [arg for arg in init_args[1:] if f"{arg}:" not in all_docstring] + assert not undocumented + + +@pytest.mark.parametrize("method", ["load", "aload", "lazy_load", "alazy_load"]) +def test_no_runtime_args(method: str) -> None: + method_attr = getattr(RecursiveUrlLoader, method) + args = list(inspect.signature(method_attr).parameters) + assert args == ["self"] diff --git a/libs/community/tests/unit_tests/embeddings/test_baichuan.py b/libs/community/tests/unit_tests/embeddings/test_baichuan.py new file mode 100644 index 0000000000000..10513948f9427 --- /dev/null +++ b/libs/community/tests/unit_tests/embeddings/test_baichuan.py @@ -0,0 +1,18 @@ +from typing import cast + +from langchain_core.pydantic_v1 import SecretStr + +from langchain_community.embeddings import BaichuanTextEmbeddings + + +def test_sparkllm_initialization_by_alias() -> None: + # Effective initialization + embeddings = BaichuanTextEmbeddings( # type: ignore[call-arg] + model="embedding_model", # type: ignore[arg-type] + api_key="your-api-key", # type: ignore[arg-type] + ) + assert embeddings.model_name == "embedding_model" + assert ( + cast(SecretStr, embeddings.baichuan_api_key).get_secret_value() + == "your-api-key" + ) diff --git a/libs/community/tests/unit_tests/embeddings/test_ovhcloud.py b/libs/community/tests/unit_tests/embeddings/test_ovhcloud.py new file mode 100644 index 0000000000000..c06c1550e4b7d --- /dev/null +++ b/libs/community/tests/unit_tests/embeddings/test_ovhcloud.py @@ -0,0 +1,31 @@ +import pytest + +from langchain_community.embeddings.ovhcloud import OVHCloudEmbeddings + + +def test_ovhcloud_correct_instantiation() -> None: + llm = OVHCloudEmbeddings(model_name="multilingual-e5-base", access_token="token") + assert isinstance(llm, OVHCloudEmbeddings) + llm = OVHCloudEmbeddings( + model_name="multilingual-e5-base", region="kepler", access_token="token" + ) + assert isinstance(llm, OVHCloudEmbeddings) + + +def test_ovhcloud_empty_model_name_should_raise_error() -> None: + with pytest.raises(ValueError): + OVHCloudEmbeddings(model_name="", region="kepler", access_token="token") + + +def test_ovhcloud_empty_region_should_raise_error() -> None: + with pytest.raises(ValueError): + OVHCloudEmbeddings( + model_name="multilingual-e5-base", region="", access_token="token" + ) + + +def test_ovhcloud_empty_access_token_should_raise_error() -> None: + with pytest.raises(ValueError): + OVHCloudEmbeddings( + model_name="multilingual-e5-base", region="kepler", access_token="" + ) diff --git a/libs/community/tests/unit_tests/embeddings/test_sparkllm.py b/libs/community/tests/unit_tests/embeddings/test_sparkllm.py new file mode 100644 index 0000000000000..d318035106e23 --- /dev/null +++ b/libs/community/tests/unit_tests/embeddings/test_sparkllm.py @@ -0,0 +1,47 @@ +import os +from typing import cast + +import pytest +from langchain_core.pydantic_v1 import SecretStr, ValidationError + +from langchain_community.embeddings import SparkLLMTextEmbeddings + + +def test_sparkllm_initialization_by_alias() -> None: + # Effective initialization + embeddings = SparkLLMTextEmbeddings( + app_id="your-app-id", # type: ignore[arg-type] + api_key="your-api-key", # type: ignore[arg-type] + api_secret="your-api-secret", # type: ignore[arg-type] + ) + assert cast(SecretStr, embeddings.spark_app_id).get_secret_value() == "your-app-id" + assert ( + cast(SecretStr, embeddings.spark_api_key).get_secret_value() == "your-api-key" + ) + assert ( + cast(SecretStr, embeddings.spark_api_secret).get_secret_value() + == "your-api-secret" + ) + + +def test_initialization_parameters_from_env() -> None: + # Setting environment variable + os.environ["SPARK_APP_ID"] = "your-app-id" + os.environ["SPARK_API_KEY"] = "your-api-key" + os.environ["SPARK_API_SECRET"] = "your-api-secret" + + # Effective initialization + embeddings = SparkLLMTextEmbeddings() + assert cast(SecretStr, embeddings.spark_app_id).get_secret_value() == "your-app-id" + assert ( + cast(SecretStr, embeddings.spark_api_key).get_secret_value() == "your-api-key" + ) + assert ( + cast(SecretStr, embeddings.spark_api_secret).get_secret_value() + == "your-api-secret" + ) + + # Environment variable missing + del os.environ["SPARK_APP_ID"] + with pytest.raises(ValidationError): + SparkLLMTextEmbeddings() diff --git a/libs/community/tests/unit_tests/storage/test_sql.py b/libs/community/tests/unit_tests/storage/test_sql.py new file mode 100644 index 0000000000000..084f0e2d19089 --- /dev/null +++ b/libs/community/tests/unit_tests/storage/test_sql.py @@ -0,0 +1,89 @@ +from typing import AsyncGenerator, Generator, cast + +import pytest +from langchain.storage._lc_store import create_kv_docstore, create_lc_store +from langchain_core.documents import Document +from langchain_core.stores import BaseStore + +from langchain_community.storage.sql import SQLStore + + +@pytest.fixture +def sql_store() -> Generator[SQLStore, None, None]: + store = SQLStore(namespace="test", db_url="sqlite://") + store.create_schema() + yield store + + +@pytest.fixture +async def async_sql_store() -> AsyncGenerator[SQLStore, None]: + store = SQLStore(namespace="test", db_url="sqlite+aiosqlite://", async_mode=True) + await store.acreate_schema() + yield store + + +def test_create_lc_store(sql_store: SQLStore) -> None: + """Test that a docstore is created from a base store.""" + docstore: BaseStore[str, Document] = cast( + BaseStore[str, Document], create_lc_store(sql_store) + ) + docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))]) + fetched_doc = docstore.mget(["key1"])[0] + assert fetched_doc is not None + assert fetched_doc.page_content == "hello" + assert fetched_doc.metadata == {"key": "value"} + + +def test_create_kv_store(sql_store: SQLStore) -> None: + """Test that a docstore is created from a base store.""" + docstore = create_kv_docstore(sql_store) + docstore.mset([("key1", Document(page_content="hello", metadata={"key": "value"}))]) + fetched_doc = docstore.mget(["key1"])[0] + assert isinstance(fetched_doc, Document) + assert fetched_doc.page_content == "hello" + assert fetched_doc.metadata == {"key": "value"} + + +@pytest.mark.requires("aiosqlite") +async def test_async_create_kv_store(async_sql_store: SQLStore) -> None: + """Test that a docstore is created from a base store.""" + docstore = create_kv_docstore(async_sql_store) + await docstore.amset( + [("key1", Document(page_content="hello", metadata={"key": "value"}))] + ) + fetched_doc = (await docstore.amget(["key1"]))[0] + assert isinstance(fetched_doc, Document) + assert fetched_doc.page_content == "hello" + assert fetched_doc.metadata == {"key": "value"} + + +def test_sample_sql_docstore(sql_store: SQLStore) -> None: + # Set values for keys + sql_store.mset([("key1", b"value1"), ("key2", b"value2")]) + + # Get values for keys + values = sql_store.mget(["key1", "key2"]) # Returns [b"value1", b"value2"] + assert values == [b"value1", b"value2"] + # Delete keys + sql_store.mdelete(["key1"]) + + # Iterate over keys + assert [key for key in sql_store.yield_keys()] == ["key2"] + + +@pytest.mark.requires("aiosqlite") +async def test_async_sample_sql_docstore(async_sql_store: SQLStore) -> None: + # Set values for keys + await async_sql_store.amset([("key1", b"value1"), ("key2", b"value2")]) + # sql_store.mset([("key1", "value1"), ("key2", "value2")]) + + # Get values for keys + values = await async_sql_store.amget( + ["key1", "key2"] + ) # Returns [b"value1", b"value2"] + assert values == [b"value1", b"value2"] + # Delete keys + await async_sql_store.amdelete(["key1"]) + + # Iterate over keys + assert [key async for key in async_sql_store.ayield_keys()] == ["key2"] diff --git a/libs/community/tests/unit_tests/utilities/test_openapi.py b/libs/community/tests/unit_tests/utilities/test_openapi.py new file mode 100644 index 0000000000000..e7e8b74557396 --- /dev/null +++ b/libs/community/tests/unit_tests/utilities/test_openapi.py @@ -0,0 +1,44 @@ +from pathlib import Path + +import pytest +from langchain.chains.openai_functions.openapi import openapi_spec_to_openai_fn + +from langchain_community.utilities.openapi import ( # noqa: E402 # ignore: community-import + OpenAPISpec, +) + +EXPECTED_OPENAI_FUNCTIONS_HEADER_PARAM = [ + { + "name": "showPetById", + "description": "Info for a specific pet", + "parameters": { + "type": "object", + "properties": { + "headers": { + "type": "object", + "properties": { + "header_param": { + "type": "string", + "description": "A header param", + } + }, + "required": ["header_param"], + } + }, + }, + } +] + + +@pytest.mark.requires("openapi_pydantic") +def test_header_param() -> None: + spec = OpenAPISpec.from_file( + Path(__file__).parent.parent + / "data" + / "openapi_specs" + / "openapi_spec_header_param.json", + ) + + openai_functions, _ = openapi_spec_to_openai_fn(spec) + + assert openai_functions == EXPECTED_OPENAI_FUNCTIONS_HEADER_PARAM diff --git a/libs/community/tests/unit_tests/vectorstores/test_aerospike.py b/libs/community/tests/unit_tests/vectorstores/test_aerospike.py new file mode 100644 index 0000000000000..6ff4bca995844 --- /dev/null +++ b/libs/community/tests/unit_tests/vectorstores/test_aerospike.py @@ -0,0 +1,378 @@ +import sys +from typing import Any, Callable, Generator +from unittest.mock import MagicMock, Mock, call + +import pytest +from langchain_core.documents import Document + +from langchain_community.vectorstores.aerospike import Aerospike +from langchain_community.vectorstores.utils import DistanceStrategy +from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings + +pytestmark = pytest.mark.requires("aerospike_vector_search") and pytest.mark.skipif( + sys.version_info < (3, 9), reason="requires python3.9 or higher" +) + + +@pytest.fixture(scope="module") +def client() -> Generator[Any, None, None]: + try: + from aerospike_vector_search import Client + from aerospike_vector_search.types import HostPort + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + client = Client( + seeds=[ + HostPort(host="dummy-host", port=3000), + ], + ) + + yield client + + client.close() + + +@pytest.fixture +def mock_client(mocker: Any) -> None: + try: + from aerospike_vector_search import Client + except ImportError: + pytest.skip("aerospike_vector_search not installed") + + return mocker.MagicMock(Client) + + +def test_aerospike(client: Any) -> None: + """Ensure an error is raised when search with score in hybrid mode + because in this case Elasticsearch does not return any score. + """ + from aerospike_vector_search import AVSError + + query_string = "foo" + embedding = FakeEmbeddings() + + store = Aerospike( + client=client, + embedding=embedding, + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + # TODO: Remove grpc import when aerospike_vector_search wraps grpc errors + with pytest.raises(AVSError): + store.similarity_search_by_vector(embedding.embed_query(query_string)) + + +def test_init_aerospike_distance(client: Any) -> None: + from aerospike_vector_search.types import VectorDistanceMetric + + embedding = FakeEmbeddings() + aerospike = Aerospike( + client=client, + embedding=embedding, + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=VectorDistanceMetric.COSINE, + ) + + assert aerospike._distance_strategy == DistanceStrategy.COSINE + + +def test_init_bad_embedding(client: Any) -> None: + def bad_embedding() -> None: + return None + + with pytest.warns( + UserWarning, + match=( + "Passing in `embedding` as a Callable is deprecated. Please pass" + + " in an Embeddings object instead." + ), + ): + Aerospike( + client=client, + embedding=bad_embedding, + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + +def test_init_bad_client(client: Any) -> None: + class BadClient: + pass + + with pytest.raises( + ValueError, + match=( + "client should be an instance of aerospike_vector_search.Client," + + " got .BadClient'>" + ), + ): + Aerospike( + client=BadClient(), + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + +def test_convert_distance_strategy(client: Any) -> None: + from aerospike_vector_search.types import VectorDistanceMetric + + aerospike = Aerospike( + client=client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + converted_strategy = aerospike.convert_distance_strategy( + VectorDistanceMetric.COSINE + ) + assert converted_strategy == DistanceStrategy.COSINE + + converted_strategy = aerospike.convert_distance_strategy( + VectorDistanceMetric.DOT_PRODUCT + ) + assert converted_strategy == DistanceStrategy.DOT_PRODUCT + + converted_strategy = aerospike.convert_distance_strategy( + VectorDistanceMetric.SQUARED_EUCLIDEAN + ) + assert converted_strategy == DistanceStrategy.EUCLIDEAN_DISTANCE + + with pytest.raises(ValueError): + aerospike.convert_distance_strategy(VectorDistanceMetric.HAMMING) + + +def test_add_texts_wait_for_index_error(client: Any) -> None: + aerospike = Aerospike( + client=client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + # index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + with pytest.raises( + ValueError, match="if wait_for_index is True, index_name must be provided" + ): + aerospike.add_texts(["foo", "bar"], wait_for_index=True) + + +def test_add_texts_returns_ids(mock_client: MagicMock) -> None: + aerospike = Aerospike( + client=mock_client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + excepted = ["0", "1"] + actual = aerospike.add_texts( + ["foo", "bar"], + metadatas=[{"foo": 0}, {"bar": 1}], + ids=["0", "1"], + set_name="otherset", + index_name="dummy_index", + wait_for_index=True, + ) + + assert excepted == actual + mock_client.upsert.assert_has_calls( + calls=[ + call( + namespace="test", + key="0", + set_name="otherset", + record_data={ + "_id": "0", + "text": "foo", + "vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0], + "foo": 0, + }, + ), + call( + namespace="test", + key="1", + set_name="otherset", + record_data={ + "_id": "1", + "text": "bar", + "vector": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "bar": 1, + }, + ), + ] + ) + mock_client.wait_for_index_completion.assert_called_once_with( + namespace="test", + name="dummy_index", + ) + + +def test_delete_returns_false(mock_client: MagicMock) -> None: + from aerospike_vector_search import AVSServerError + + mock_client.delete.side_effect = Mock(side_effect=AVSServerError(rpc_error="")) + aerospike = Aerospike( + client=mock_client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + assert not aerospike.delete(["foo", "bar"], set_name="testset") + mock_client.delete.assert_called_once_with( + namespace="test", key="foo", set_name="testset" + ) + + +def test_similarity_search_by_vector_with_score_missing_index_name( + client: Any, +) -> None: + aerospike = Aerospike( + client=client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + # index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + with pytest.raises(ValueError, match="index_name must be provided"): + aerospike.similarity_search_by_vector_with_score([1.0, 2.0, 3.0]) + + +def test_similarity_search_by_vector_with_score_filters_missing_text_key( + mock_client: MagicMock, +) -> None: + from aerospike_vector_search.types import Neighbor + + text_key = "text" + mock_client.vector_search.return_value = [ + Neighbor(key="key1", fields={text_key: 1}, distance=1.0), + Neighbor(key="key2", fields={}, distance=0.0), + Neighbor(key="key3", fields={text_key: 3}, distance=3.0), + ] + aerospike = Aerospike( + client=mock_client, + embedding=FakeEmbeddings(), + text_key=text_key, + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + actual = aerospike.similarity_search_by_vector_with_score( + [1.0, 2.0, 3.0], k=10, metadata_keys=["foo"] + ) + + expected = [ + (Document(page_content="1"), 1.0), + (Document(page_content="3"), 3.0), + ] + mock_client.vector_search.assert_called_once_with( + index_name="dummy_index", + namespace="test", + query=[1.0, 2.0, 3.0], + limit=10, + field_names=[text_key, "foo"], + ) + + assert expected == actual + + +def test_similarity_search_by_vector_with_score_overwrite_index_name( + mock_client: MagicMock, +) -> None: + mock_client.vector_search.return_value = [] + aerospike = Aerospike( + client=mock_client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=DistanceStrategy.COSINE, + ) + + aerospike.similarity_search_by_vector_with_score( + [1.0, 2.0, 3.0], index_name="other_index" + ) + + mock_client.vector_search.assert_called_once_with( + index_name="other_index", + namespace="test", + query=[1.0, 2.0, 3.0], + limit=4, + field_names=None, + ) + + +@pytest.mark.parametrize( + "distance_strategy,expected_fn", + [ + (DistanceStrategy.COSINE, Aerospike._cosine_relevance_score_fn), + (DistanceStrategy.EUCLIDEAN_DISTANCE, Aerospike._euclidean_relevance_score_fn), + (DistanceStrategy.DOT_PRODUCT, Aerospike._max_inner_product_relevance_score_fn), + (DistanceStrategy.JACCARD, ValueError), + ], +) +def test_select_relevance_score_fn( + client: Any, distance_strategy: DistanceStrategy, expected_fn: Callable +) -> None: + aerospike = Aerospike( + client=client, + embedding=FakeEmbeddings(), + text_key="text", + vector_key="vector", + index_name="dummy_index", + namespace="test", + set_name="testset", + distance_strategy=distance_strategy, + ) + + if expected_fn == ValueError: + with pytest.raises(ValueError): + aerospike._select_relevance_score_fn() + + else: + fn = aerospike._select_relevance_score_fn() + + assert fn == expected_fn diff --git a/libs/core/extended_testing_deps.txt b/libs/core/extended_testing_deps.txt new file mode 100644 index 0000000000000..5ad9c8930daf9 --- /dev/null +++ b/libs/core/extended_testing_deps.txt @@ -0,0 +1 @@ +jinja2>=3,<4 diff --git a/libs/core/langchain_core/tracers/core.py b/libs/core/langchain_core/tracers/core.py new file mode 100644 index 0000000000000..bee9f855b3ecf --- /dev/null +++ b/libs/core/langchain_core/tracers/core.py @@ -0,0 +1,569 @@ +"""Utilities for the root listener.""" + +from __future__ import annotations + +import logging +import sys +import traceback +from abc import ABC, abstractmethod +from datetime import datetime, timezone +from typing import ( + TYPE_CHECKING, + Any, + Coroutine, + Dict, + List, + Literal, + Optional, + Sequence, + Set, + Tuple, + Union, + cast, +) +from uuid import UUID + +from tenacity import RetryCallState + +from langchain_core.exceptions import TracerException +from langchain_core.load import dumpd +from langchain_core.messages import BaseMessage +from langchain_core.outputs import ( + ChatGeneration, + ChatGenerationChunk, + GenerationChunk, + LLMResult, +) +from langchain_core.tracers.schemas import Run + +if TYPE_CHECKING: + from langchain_core.documents import Document + +logger = logging.getLogger(__name__) + +SCHEMA_FORMAT_TYPE = Literal["original", "streaming_events"] + + +class _TracerCore(ABC): + """ + Abstract base class for tracers + This class provides common methods, and reusable methods for tracers. + """ + + log_missing_parent: bool = True + + def __init__( + self, + *, + _schema_format: Literal[ + "original", "streaming_events", "original+chat" + ] = "original", + **kwargs: Any, + ) -> None: + """Initialize the tracer. + + Args: + _schema_format: Primarily changes how the inputs and outputs are + handled. For internal use only. This API will change. + - 'original' is the format used by all current tracers. + This format is slightly inconsistent with respect to inputs + and outputs. + - 'streaming_events' is used for supporting streaming events, + for internal usage. It will likely change in the future, or + be deprecated entirely in favor of a dedicated async tracer + for streaming events. + - 'original+chat' is a format that is the same as 'original' + except it does NOT raise an attribute error on_chat_model_start + kwargs: Additional keyword arguments that will be passed to + the super class. + """ + super().__init__(**kwargs) + self._schema_format = _schema_format # For internal use only API will change. + self.run_map: Dict[str, Run] = {} + """Map of run ID to run. Cleared on run end.""" + self.order_map: Dict[UUID, Tuple[UUID, str]] = {} + """Map of run ID to (trace_id, dotted_order). Cleared when tracer GCed.""" + + @abstractmethod + def _persist_run(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Persist a run.""" + + @staticmethod + def _add_child_run( + parent_run: Run, + child_run: Run, + ) -> None: + """Add child run to a chain run or tool run.""" + parent_run.child_runs.append(child_run) + + @staticmethod + def _get_stacktrace(error: BaseException) -> str: + """Get the stacktrace of the parent error.""" + msg = repr(error) + try: + if sys.version_info < (3, 10): + tb = traceback.format_exception( + error.__class__, error, error.__traceback__ + ) + else: + tb = traceback.format_exception(error) + return (msg + "\n\n".join(tb)).strip() + except: # noqa: E722 + return msg + + def _start_trace(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: # type: ignore[return] + current_dotted_order = run.start_time.strftime("%Y%m%dT%H%M%S%fZ") + str(run.id) + if run.parent_run_id: + if parent := self.order_map.get(run.parent_run_id): + run.trace_id, run.dotted_order = parent + run.dotted_order += "." + current_dotted_order + if parent_run := self.run_map.get(str(run.parent_run_id)): + self._add_child_run(parent_run, run) + else: + if self.log_missing_parent: + logger.warning( + f"Parent run {run.parent_run_id} not found for run {run.id}." + " Treating as a root run." + ) + run.parent_run_id = None + run.trace_id = run.id + run.dotted_order = current_dotted_order + else: + run.trace_id = run.id + run.dotted_order = current_dotted_order + self.order_map[run.id] = (run.trace_id, run.dotted_order) + self.run_map[str(run.id)] = run + + def _get_run( + self, run_id: UUID, run_type: Union[str, Set[str], None] = None + ) -> Run: + try: + run = self.run_map[str(run_id)] + except KeyError as exc: + raise TracerException(f"No indexed run ID {run_id}.") from exc + + if isinstance(run_type, str): + run_types: Union[Set[str], None] = {run_type} + else: + run_types = run_type + if run_types is not None and run.run_type not in run_types: + raise TracerException( + f"Found {run.run_type} run at ID {run_id}, " + f"but expected {run_types} run." + ) + return run + + def _create_chat_model_run( + self, + serialized: Dict[str, Any], + messages: List[List[BaseMessage]], + run_id: UUID, + tags: Optional[List[str]] = None, + parent_run_id: Optional[UUID] = None, + metadata: Optional[Dict[str, Any]] = None, + name: Optional[str] = None, + **kwargs: Any, + ) -> Run: + """Create a chat model run.""" + if self._schema_format not in ("streaming_events", "original+chat"): + # Please keep this un-implemented for backwards compatibility. + # When it's unimplemented old tracers that use the "original" format + # fallback on the on_llm_start method implementation if they + # find that the on_chat_model_start method is not implemented. + # This can eventually be cleaned up by writing a "modern" tracer + # that has all the updated schema changes corresponding to + # the "streaming_events" format. + raise NotImplementedError( + f"Chat model tracing is not supported in " + f"for {self._schema_format} format." + ) + start_time = datetime.now(timezone.utc) + if metadata: + kwargs.update({"metadata": metadata}) + return Run( + id=run_id, + parent_run_id=parent_run_id, + serialized=serialized, + inputs={"messages": [[dumpd(msg) for msg in batch] for batch in messages]}, + extra=kwargs, + events=[{"name": "start", "time": start_time}], + start_time=start_time, + # WARNING: This is valid ONLY for streaming_events. + # run_type="llm" is what's used by virtually all tracers. + # Changing this to "chat_model" may break triggering on_llm_start + run_type="chat_model", + tags=tags, + name=name, # type: ignore[arg-type] + ) + + def _create_llm_run( + self, + serialized: Dict[str, Any], + prompts: List[str], + run_id: UUID, + tags: Optional[List[str]] = None, + parent_run_id: Optional[UUID] = None, + metadata: Optional[Dict[str, Any]] = None, + name: Optional[str] = None, + **kwargs: Any, + ) -> Run: + """Create a llm run""" + start_time = datetime.now(timezone.utc) + if metadata: + kwargs.update({"metadata": metadata}) + return Run( + id=run_id, + parent_run_id=parent_run_id, + serialized=serialized, + # TODO: Figure out how to expose kwargs here + inputs={"prompts": prompts}, + extra=kwargs, + events=[{"name": "start", "time": start_time}], + start_time=start_time, + run_type="llm", + tags=tags or [], + name=name, # type: ignore[arg-type] + ) + + def _llm_run_with_token_event( + self, + token: str, + run_id: UUID, + chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]] = None, + parent_run_id: Optional[UUID] = None, + **kwargs: Any, + ) -> Run: + """ + Append token event to LLM run and return the run + """ + llm_run = self._get_run(run_id, run_type={"llm", "chat_model"}) + event_kwargs: Dict[str, Any] = {"token": token} + if chunk: + event_kwargs["chunk"] = chunk + llm_run.events.append( + { + "name": "new_token", + "time": datetime.now(timezone.utc), + "kwargs": event_kwargs, + }, + ) + return llm_run + + def _llm_run_with_retry_event( + self, + retry_state: RetryCallState, + run_id: UUID, + **kwargs: Any, + ) -> Run: + llm_run = self._get_run(run_id) + retry_d: Dict[str, Any] = { + "slept": retry_state.idle_for, + "attempt": retry_state.attempt_number, + } + if retry_state.outcome is None: + retry_d["outcome"] = "N/A" + elif retry_state.outcome.failed: + retry_d["outcome"] = "failed" + exception = retry_state.outcome.exception() + retry_d["exception"] = str(exception) + retry_d["exception_type"] = exception.__class__.__name__ + else: + retry_d["outcome"] = "success" + retry_d["result"] = str(retry_state.outcome.result()) + llm_run.events.append( + { + "name": "retry", + "time": datetime.now(timezone.utc), + "kwargs": retry_d, + }, + ) + return llm_run + + def _complete_llm_run(self, response: LLMResult, run_id: UUID) -> Run: + llm_run = self._get_run(run_id, run_type={"llm", "chat_model"}) + llm_run.outputs = response.dict() + for i, generations in enumerate(response.generations): + for j, generation in enumerate(generations): + output_generation = llm_run.outputs["generations"][i][j] + if "message" in output_generation: + output_generation["message"] = dumpd( + cast(ChatGeneration, generation).message + ) + llm_run.end_time = datetime.now(timezone.utc) + llm_run.events.append({"name": "end", "time": llm_run.end_time}) + + return llm_run + + def _errored_llm_run(self, error: BaseException, run_id: UUID) -> Run: + llm_run = self._get_run(run_id, run_type={"llm", "chat_model"}) + llm_run.error = self._get_stacktrace(error) + llm_run.end_time = datetime.now(timezone.utc) + llm_run.events.append({"name": "error", "time": llm_run.end_time}) + + return llm_run + + def _create_chain_run( + self, + serialized: Dict[str, Any], + inputs: Dict[str, Any], + run_id: UUID, + tags: Optional[List[str]] = None, + parent_run_id: Optional[UUID] = None, + metadata: Optional[Dict[str, Any]] = None, + run_type: Optional[str] = None, + name: Optional[str] = None, + **kwargs: Any, + ) -> Run: + """Create a chain Run""" + start_time = datetime.now(timezone.utc) + if metadata: + kwargs.update({"metadata": metadata}) + return Run( + id=run_id, + parent_run_id=parent_run_id, + serialized=serialized, + inputs=self._get_chain_inputs(inputs), + extra=kwargs, + events=[{"name": "start", "time": start_time}], + start_time=start_time, + child_runs=[], + run_type=run_type or "chain", + name=name, # type: ignore[arg-type] + tags=tags or [], + ) + + def _get_chain_inputs(self, inputs: Any) -> Any: + """Get the inputs for a chain run.""" + if self._schema_format in ("original", "original+chat"): + return inputs if isinstance(inputs, dict) else {"input": inputs} + elif self._schema_format == "streaming_events": + return { + "input": inputs, + } + else: + raise ValueError(f"Invalid format: {self._schema_format}") + + def _get_chain_outputs(self, outputs: Any) -> Any: + """Get the outputs for a chain run.""" + if self._schema_format in ("original", "original+chat"): + return outputs if isinstance(outputs, dict) else {"output": outputs} + elif self._schema_format == "streaming_events": + return { + "output": outputs, + } + else: + raise ValueError(f"Invalid format: {self._schema_format}") + + def _complete_chain_run( + self, + outputs: Dict[str, Any], + run_id: UUID, + inputs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Run: + """Update a chain run with outputs and end time.""" + chain_run = self._get_run(run_id) + chain_run.outputs = self._get_chain_outputs(outputs) + chain_run.end_time = datetime.now(timezone.utc) + chain_run.events.append({"name": "end", "time": chain_run.end_time}) + if inputs is not None: + chain_run.inputs = self._get_chain_inputs(inputs) + return chain_run + + def _errored_chain_run( + self, + error: BaseException, + inputs: Optional[Dict[str, Any]], + run_id: UUID, + **kwargs: Any, + ) -> Run: + chain_run = self._get_run(run_id) + chain_run.error = self._get_stacktrace(error) + chain_run.end_time = datetime.now(timezone.utc) + chain_run.events.append({"name": "error", "time": chain_run.end_time}) + if inputs is not None: + chain_run.inputs = self._get_chain_inputs(inputs) + return chain_run + + def _create_tool_run( + self, + serialized: Dict[str, Any], + input_str: str, + run_id: UUID, + tags: Optional[List[str]] = None, + parent_run_id: Optional[UUID] = None, + metadata: Optional[Dict[str, Any]] = None, + name: Optional[str] = None, + inputs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> Run: + """Create a tool run.""" + start_time = datetime.now(timezone.utc) + if metadata: + kwargs.update({"metadata": metadata}) + + if self._schema_format in ("original", "original+chat"): + inputs = {"input": input_str} + elif self._schema_format == "streaming_events": + inputs = {"input": inputs} + else: + raise AssertionError(f"Invalid format: {self._schema_format}") + + return Run( + id=run_id, + parent_run_id=parent_run_id, + serialized=serialized, + # Wrapping in dict since Run requires a dict object. + inputs=inputs, + extra=kwargs, + events=[{"name": "start", "time": start_time}], + start_time=start_time, + child_runs=[], + run_type="tool", + tags=tags or [], + name=name, # type: ignore[arg-type] + ) + + def _complete_tool_run( + self, + output: Dict[str, Any], + run_id: UUID, + **kwargs: Any, + ) -> Run: + """Update a tool run with outputs and end time.""" + tool_run = self._get_run(run_id, run_type="tool") + tool_run.outputs = {"output": output} + tool_run.end_time = datetime.now(timezone.utc) + tool_run.events.append({"name": "end", "time": tool_run.end_time}) + return tool_run + + def _errored_tool_run( + self, + error: BaseException, + run_id: UUID, + **kwargs: Any, + ) -> Run: + """Update a tool run with error and end time.""" + tool_run = self._get_run(run_id, run_type="tool") + tool_run.error = self._get_stacktrace(error) + tool_run.end_time = datetime.now(timezone.utc) + tool_run.events.append({"name": "error", "time": tool_run.end_time}) + return tool_run + + def _create_retrieval_run( + self, + serialized: Dict[str, Any], + query: str, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[List[str]] = None, + metadata: Optional[Dict[str, Any]] = None, + name: Optional[str] = None, + **kwargs: Any, + ) -> Run: + """Create a retrieval run.""" + start_time = datetime.now(timezone.utc) + if metadata: + kwargs.update({"metadata": metadata}) + return Run( + id=run_id, + name=name or "Retriever", + parent_run_id=parent_run_id, + serialized=serialized, + inputs={"query": query}, + extra=kwargs, + events=[{"name": "start", "time": start_time}], + start_time=start_time, + tags=tags, + child_runs=[], + run_type="retriever", + ) + + def _complete_retrieval_run( + self, + documents: Sequence[Document], + run_id: UUID, + **kwargs: Any, + ) -> Run: + """Update a retrieval run with outputs and end time.""" + retrieval_run = self._get_run(run_id, run_type="retriever") + retrieval_run.outputs = {"documents": documents} + retrieval_run.end_time = datetime.now(timezone.utc) + retrieval_run.events.append({"name": "end", "time": retrieval_run.end_time}) + return retrieval_run + + def _errored_retrieval_run( + self, + error: BaseException, + run_id: UUID, + **kwargs: Any, + ) -> Run: + retrieval_run = self._get_run(run_id, run_type="retriever") + retrieval_run.error = self._get_stacktrace(error) + retrieval_run.end_time = datetime.now(timezone.utc) + retrieval_run.events.append({"name": "error", "time": retrieval_run.end_time}) + return retrieval_run + + def __deepcopy__(self, memo: dict) -> _TracerCore: + """Deepcopy the tracer.""" + return self + + def __copy__(self) -> _TracerCore: + """Copy the tracer.""" + return self + + def _end_trace(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """End a trace for a run.""" + + def _on_run_create(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process a run upon creation.""" + + def _on_run_update(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process a run upon update.""" + + def _on_llm_start(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the LLM Run upon start.""" + + def _on_llm_new_token( + self, + run: Run, + token: str, + chunk: Optional[Union[GenerationChunk, ChatGenerationChunk]], + ) -> Union[None, Coroutine[Any, Any, None]]: + """Process new LLM token.""" + + def _on_llm_end(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the LLM Run.""" + + def _on_llm_error(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the LLM Run upon error.""" + + def _on_chain_start(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Chain Run upon start.""" + + def _on_chain_end(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Chain Run.""" + + def _on_chain_error(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Chain Run upon error.""" + + def _on_tool_start(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Tool Run upon start.""" + + def _on_tool_end(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Tool Run.""" + + def _on_tool_error(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Tool Run upon error.""" + + def _on_chat_model_start(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Chat Model Run upon start.""" + + def _on_retriever_start(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Retriever Run upon start.""" + + def _on_retriever_end(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Retriever Run.""" + + def _on_retriever_error(self, run: Run) -> Union[None, Coroutine[Any, Any, None]]: + """Process the Retriever Run upon error.""" diff --git a/libs/core/pyproject.toml b/libs/core/pyproject.toml index 353729b2fe902..5b11badd770b2 100644 --- a/libs/core/pyproject.toml +++ b/libs/core/pyproject.toml @@ -1,25 +1,21 @@ - [tool.poetry] name = "gigachain-core" -version = "0.2.0.1" +version = "0.2.10" description = "Building applications with LLMs through composability" authors = [] license = "MIT" readme = "README.md" -repository = "https://github.com/ai-forever/gigachain" -packages = [ - {include = "langchain_core"} -] +repository = "https://github.com/langchain-ai/langchain" + [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -pydantic = ">=1,<3" -langsmith = "^0.1.0" -tenacity = "^8.1.0" +pydantic = [{version = ">=1,<3", python = "<3.12.4"}, {version = "^2.7.4", python=">=3.12.4"}] +langsmith = "^0.1.75" +tenacity = "^8.1.0,!=8.4.0" jsonpatch = "^1.33" PyYAML = ">=5.3" -packaging = "^23.2" -jinja2 = { version = "^3", optional = true } +packaging = ">=23.2,<25" [tool.poetry.group.lint] optional = true @@ -61,7 +57,12 @@ pytest-asyncio = "^0.21.1" grandalf = "^0.8" pytest-profiling = "^1.7.0" responses = "^0.25.0" -numpy = "^1.24.0" + +# Support Python 3.8 and 3.12+. +numpy = [ + { version = "^1.24.0", python = "<3.12" }, + { version = "^1.26.0", python = ">=3.12" }, +] [tool.poetry.group.test_integration] @@ -69,7 +70,6 @@ optional = true dependencies = {} [tool.poetry.extras] -extended_testing = ["jinja2"] [tool.ruff.lint] select = [ @@ -81,7 +81,6 @@ select = [ [tool.mypy] disallow_untyped_defs = "True" -ignore_missing_imports = "True" exclude = ["notebooks", "examples", "example_data", "langchain_core/pydantic"] [[tool.mypy.overrides]] @@ -115,4 +114,3 @@ markers = [ "compile: mark placeholder test used to compile integration tests without running them", ] asyncio_mode = "auto" - diff --git a/libs/core/tests/unit_tests/documents/test_str.py b/libs/core/tests/unit_tests/documents/test_str.py new file mode 100644 index 0000000000000..fd44d06a98ece --- /dev/null +++ b/libs/core/tests/unit_tests/documents/test_str.py @@ -0,0 +1,20 @@ +from langchain_core.documents import Document + + +def test_str() -> None: + assert str(Document(page_content="Hello, World!")) == "page_content='Hello, World!'" + assert ( + str(Document(page_content="Hello, World!", metadata={"a": 3})) + == "page_content='Hello, World!' metadata={'a': 3}" + ) + + +def test_repr() -> None: + assert ( + repr(Document(page_content="Hello, World!")) + == "Document(page_content='Hello, World!')" + ) + assert ( + repr(Document(page_content="Hello, World!", metadata={"a": 3})) + == "Document(page_content='Hello, World!', metadata={'a': 3})" + ) diff --git a/libs/core/tests/unit_tests/messages/test_utils.py b/libs/core/tests/unit_tests/messages/test_utils.py new file mode 100644 index 0000000000000..0aea8d7e11b3c --- /dev/null +++ b/libs/core/tests/unit_tests/messages/test_utils.py @@ -0,0 +1,337 @@ +from typing import Dict, List, Type + +import pytest + +from langchain_core.messages import ( + AIMessage, + BaseMessage, + HumanMessage, + SystemMessage, + ToolCall, + ToolMessage, +) +from langchain_core.messages.utils import ( + filter_messages, + merge_message_runs, + trim_messages, +) + + +@pytest.mark.parametrize("msg_cls", [HumanMessage, AIMessage, SystemMessage]) +def test_merge_message_runs_str(msg_cls: Type[BaseMessage]) -> None: + messages = [msg_cls("foo"), msg_cls("bar"), msg_cls("baz")] + messages_copy = [m.copy(deep=True) for m in messages] + expected = [msg_cls("foo\nbar\nbaz")] + actual = merge_message_runs(messages) + assert actual == expected + assert messages == messages_copy + + +def test_merge_message_runs_content() -> None: + messages = [ + AIMessage("foo", id="1"), + AIMessage( + [ + {"text": "bar", "type": "text"}, + {"image_url": "...", "type": "image_url"}, + ], + tool_calls=[ToolCall(name="foo_tool", args={"x": 1}, id="tool1")], + id="2", + ), + AIMessage( + "baz", + tool_calls=[ToolCall(name="foo_tool", args={"x": 5}, id="tool2")], + id="3", + ), + ] + messages_copy = [m.copy(deep=True) for m in messages] + expected = [ + AIMessage( + [ + "foo", + {"text": "bar", "type": "text"}, + {"image_url": "...", "type": "image_url"}, + "baz", + ], + tool_calls=[ + ToolCall(name="foo_tool", args={"x": 1}, id="tool1"), + ToolCall(name="foo_tool", args={"x": 5}, id="tool2"), + ], + id="1", + ), + ] + actual = merge_message_runs(messages) + assert actual == expected + invoked = merge_message_runs().invoke(messages) + assert actual == invoked + assert messages == messages_copy + + +def test_merge_messages_tool_messages() -> None: + messages = [ + ToolMessage("foo", tool_call_id="1"), + ToolMessage("bar", tool_call_id="2"), + ] + messages_copy = [m.copy(deep=True) for m in messages] + actual = merge_message_runs(messages) + assert actual == messages + assert messages == messages_copy + + +@pytest.mark.parametrize( + "filters", + [ + {"include_names": ["blur"]}, + {"exclude_names": ["blah"]}, + {"include_ids": ["2"]}, + {"exclude_ids": ["1"]}, + {"include_types": "human"}, + {"include_types": ["human"]}, + {"include_types": HumanMessage}, + {"include_types": [HumanMessage]}, + {"exclude_types": "system"}, + {"exclude_types": ["system"]}, + {"exclude_types": SystemMessage}, + {"exclude_types": [SystemMessage]}, + {"include_names": ["blah", "blur"], "exclude_types": [SystemMessage]}, + ], +) +def test_filter_message(filters: Dict) -> None: + messages = [ + SystemMessage("foo", name="blah", id="1"), + HumanMessage("bar", name="blur", id="2"), + ] + messages_copy = [m.copy(deep=True) for m in messages] + expected = messages[1:2] + actual = filter_messages(messages, **filters) + assert expected == actual + invoked = filter_messages(**filters).invoke(messages) + assert invoked == actual + assert messages == messages_copy + + +_MESSAGES_TO_TRIM = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="first"), + AIMessage( + [ + {"type": "text", "text": "This is the FIRST 4 token block."}, + {"type": "text", "text": "This is the SECOND 4 token block."}, + ], + id="second", + ), + HumanMessage("This is a 4 token text.", id="third"), + AIMessage("This is a 4 token text.", id="fourth"), +] + +_MESSAGES_TO_TRIM_COPY = [m.copy(deep=True) for m in _MESSAGES_TO_TRIM] + + +def test_trim_messages_first_30() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="first"), + ] + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="first", + ) + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_first_30_allow_partial() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="first"), + AIMessage( + [{"type": "text", "text": "This is the FIRST 4 token block."}], id="second" + ), + ] + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="first", + allow_partial=True, + ) + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_first_30_allow_partial_end_on_human() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="first"), + ] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="first", + allow_partial=True, + end_on="human", + ) + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_last_30_include_system() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="third"), + AIMessage("This is a 4 token text.", id="fourth"), + ] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + include_system=True, + token_counter=dummy_token_counter, + strategy="last", + ) + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_last_40_include_system_allow_partial() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + AIMessage( + [ + {"type": "text", "text": "This is the SECOND 4 token block."}, + ], + id="second", + ), + HumanMessage("This is a 4 token text.", id="third"), + AIMessage("This is a 4 token text.", id="fourth"), + ] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=40, + token_counter=dummy_token_counter, + strategy="last", + allow_partial=True, + include_system=True, + ) + + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_last_30_include_system_allow_partial_end_on_human() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + AIMessage( + [ + {"type": "text", "text": "This is the SECOND 4 token block."}, + ], + id="second", + ), + HumanMessage("This is a 4 token text.", id="third"), + ] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="last", + allow_partial=True, + include_system=True, + end_on="human", + ) + + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_last_40_include_system_allow_partial_start_on_human() -> None: + expected = [ + SystemMessage("This is a 4 token text."), + HumanMessage("This is a 4 token text.", id="third"), + AIMessage("This is a 4 token text.", id="fourth"), + ] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=30, + token_counter=dummy_token_counter, + strategy="last", + allow_partial=True, + include_system=True, + start_on="human", + ) + + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_allow_partial_text_splitter() -> None: + expected = [ + HumanMessage("a 4 token text.", id="third"), + AIMessage("This is a 4 token text.", id="fourth"), + ] + + def count_words(msgs: List[BaseMessage]) -> int: + count = 0 + for msg in msgs: + if isinstance(msg.content, str): + count += len(msg.content.split(" ")) + else: + count += len( + " ".join(block["text"] for block in msg.content).split(" ") # type: ignore[index] + ) + return count + + def _split_on_space(text: str) -> List[str]: + splits = text.split(" ") + return [s + " " for s in splits[:-1]] + splits[-1:] + + actual = trim_messages( + _MESSAGES_TO_TRIM, + max_tokens=10, + token_counter=count_words, + strategy="last", + allow_partial=True, + text_splitter=_split_on_space, + ) + assert actual == expected + assert _MESSAGES_TO_TRIM == _MESSAGES_TO_TRIM_COPY + + +def test_trim_messages_invoke() -> None: + actual = trim_messages(max_tokens=10, token_counter=dummy_token_counter).invoke( + _MESSAGES_TO_TRIM + ) + expected = trim_messages( + _MESSAGES_TO_TRIM, max_tokens=10, token_counter=dummy_token_counter + ) + assert actual == expected + + +def dummy_token_counter(messages: List[BaseMessage]) -> int: + # treat each message like it adds 3 default tokens at the beginning + # of the message and at the end of the message. 3 + 4 + 3 = 10 tokens + # per message. + + default_content_len = 4 + default_msg_prefix_len = 3 + default_msg_suffix_len = 3 + + count = 0 + for msg in messages: + if isinstance(msg.content, str): + count += ( + default_msg_prefix_len + default_content_len + default_msg_suffix_len + ) + if isinstance(msg.content, list): + count += ( + default_msg_prefix_len + + len(msg.content) * default_content_len + + default_msg_suffix_len + ) + return count diff --git a/libs/core/tests/unit_tests/runnables/test_tracing_interops.py b/libs/core/tests/unit_tests/runnables/test_tracing_interops.py new file mode 100644 index 0000000000000..2f7f7ea252763 --- /dev/null +++ b/libs/core/tests/unit_tests/runnables/test_tracing_interops.py @@ -0,0 +1,201 @@ +import json +import sys +from unittest.mock import MagicMock, patch + +import pytest +from langsmith import Client, traceable +from langsmith.run_helpers import tracing_context + +from langchain_core.runnables.base import RunnableLambda +from langchain_core.tracers.langchain import LangChainTracer + + +def _get_posts(client: Client) -> list: + mock_calls = client.session.request.mock_calls # type: ignore + posts = [] + for call in mock_calls: + if call.args: + if call.args[0] != "POST": + continue + assert call.args[0] == "POST" + assert call.args[1].startswith("https://api.smith.langchain.com") + body = json.loads(call.kwargs["data"]) + if "post" in body: + # Batch request + assert body["post"] + posts.extend(body["post"]) + else: + posts.append(body) + return posts + + +def test_config_traceable_handoff() -> None: + mock_session = MagicMock() + mock_client_ = Client( + session=mock_session, api_key="test", auto_batch_tracing=False + ) + tracer = LangChainTracer(client=mock_client_) + + @traceable + def my_great_great_grandchild_function(a: int) -> int: + return a + 1 + + @RunnableLambda + def my_great_grandchild_function(a: int) -> int: + return my_great_great_grandchild_function(a) + + @RunnableLambda + def my_grandchild_function(a: int) -> int: + return my_great_grandchild_function.invoke(a) + + @traceable + def my_child_function(a: int) -> int: + return my_grandchild_function.invoke(a) * 3 + + @traceable() + def my_function(a: int) -> int: + return my_child_function(a) + + def my_parent_function(a: int) -> int: + return my_function(a) + + my_parent_runnable = RunnableLambda(my_parent_function) + + assert my_parent_runnable.invoke(1, {"callbacks": [tracer]}) == 6 + posts = _get_posts(mock_client_) + # There should have been 6 runs created, + # one for each function invocation + assert len(posts) == 6 + name_to_body = {post["name"]: post for post in posts} + ordered_names = [ + "my_parent_function", + "my_function", + "my_child_function", + "my_grandchild_function", + "my_great_grandchild_function", + "my_great_great_grandchild_function", + ] + trace_id = posts[0]["trace_id"] + last_dotted_order = None + parent_run_id = None + for name in ordered_names: + id_ = name_to_body[name]["id"] + parent_run_id_ = name_to_body[name]["parent_run_id"] + if parent_run_id_ is not None: + assert parent_run_id == parent_run_id_ + assert name in name_to_body + # All within the same trace + assert name_to_body[name]["trace_id"] == trace_id + dotted_order: str = name_to_body[name]["dotted_order"] + assert dotted_order is not None + if last_dotted_order is not None: + assert dotted_order > last_dotted_order + assert dotted_order.startswith(last_dotted_order), ( + "Unexpected dotted order for run" + f" {name}\n{dotted_order}\n{last_dotted_order}" + ) + last_dotted_order = dotted_order + parent_run_id = id_ + + +@pytest.mark.skipif( + sys.version_info < (3, 11), reason="Asyncio context vars require Python 3.11+" +) +async def test_config_traceable_async_handoff() -> None: + mock_session = MagicMock() + mock_client_ = Client( + session=mock_session, api_key="test", auto_batch_tracing=False + ) + tracer = LangChainTracer(client=mock_client_) + + @traceable + def my_great_great_grandchild_function(a: int) -> int: + return a + 1 + + @RunnableLambda + def my_great_grandchild_function(a: int) -> int: + return my_great_great_grandchild_function(a) + + @RunnableLambda # type: ignore + async def my_grandchild_function(a: int) -> int: + return my_great_grandchild_function.invoke(a) + + @traceable + async def my_child_function(a: int) -> int: + return await my_grandchild_function.ainvoke(a) * 3 # type: ignore + + @traceable() + async def my_function(a: int) -> int: + return await my_child_function(a) + + async def my_parent_function(a: int) -> int: + return await my_function(a) + + my_parent_runnable = RunnableLambda(my_parent_function) # type: ignore + result = await my_parent_runnable.ainvoke(1, {"callbacks": [tracer]}) + assert result == 6 + posts = _get_posts(mock_client_) + # There should have been 6 runs created, + # one for each function invocation + assert len(posts) == 6 + name_to_body = {post["name"]: post for post in posts} + ordered_names = [ + "my_parent_function", + "my_function", + "my_child_function", + "my_grandchild_function", + "my_great_grandchild_function", + "my_great_great_grandchild_function", + ] + trace_id = posts[0]["trace_id"] + last_dotted_order = None + parent_run_id = None + for name in ordered_names: + id_ = name_to_body[name]["id"] + parent_run_id_ = name_to_body[name]["parent_run_id"] + if parent_run_id_ is not None: + assert parent_run_id == parent_run_id_ + assert name in name_to_body + # All within the same trace + assert name_to_body[name]["trace_id"] == trace_id + dotted_order: str = name_to_body[name]["dotted_order"] + assert dotted_order is not None + if last_dotted_order is not None: + assert dotted_order > last_dotted_order + assert dotted_order.startswith(last_dotted_order), ( + "Unexpected dotted order for run" + f" {name}\n{dotted_order}\n{last_dotted_order}" + ) + last_dotted_order = dotted_order + parent_run_id = id_ + + +@patch("langchain_core.tracers.langchain.get_client") +@pytest.mark.parametrize("enabled", [None, True, False]) +@pytest.mark.parametrize("env", ["", "true"]) +def test_tracing_enable_disable( + mock_get_client: MagicMock, enabled: bool, env: str +) -> None: + mock_session = MagicMock() + mock_client_ = Client( + session=mock_session, api_key="test", auto_batch_tracing=False + ) + mock_get_client.return_value = mock_client_ + + def my_func(a: int) -> int: + return a + 1 + + env_on = env == "true" + with patch.dict("os.environ", {"LANGSMITH_TRACING": env}): + with tracing_context(enabled=enabled): + RunnableLambda(my_func).invoke(1) + + mock_posts = _get_posts(mock_client_) + if enabled is True: + assert len(mock_posts) == 1 + elif enabled is False: + assert not mock_posts + elif env_on: + assert len(mock_posts) == 1 + else: + assert not mock_posts diff --git a/libs/core/tests/unit_tests/tracers/test_async_base_tracer.py b/libs/core/tests/unit_tests/tracers/test_async_base_tracer.py new file mode 100644 index 0000000000000..f1f04c526cc61 --- /dev/null +++ b/libs/core/tests/unit_tests/tracers/test_async_base_tracer.py @@ -0,0 +1,598 @@ +"""Test Tracer classes.""" + +from __future__ import annotations + +from datetime import datetime, timezone +from typing import Any, List +from uuid import uuid4 + +import pytest +from freezegun import freeze_time + +from langchain_core.callbacks import AsyncCallbackManager +from langchain_core.exceptions import TracerException +from langchain_core.messages import HumanMessage +from langchain_core.outputs import LLMResult +from langchain_core.tracers.base import AsyncBaseTracer +from langchain_core.tracers.schemas import Run + +SERIALIZED = {"id": ["llm"]} +SERIALIZED_CHAT = {"id": ["chat_model"]} + + +class FakeAsyncTracer(AsyncBaseTracer): + """Fake tracer to test async based tracers.""" + + def __init__(self) -> None: + """Initialize the tracer.""" + super().__init__() + self.runs: List[Run] = [] + + async def _persist_run(self, run: Run) -> None: + self.runs.append(run) + + +def _compare_run_with_error(run: Any, expected_run: Any) -> None: + if run.child_runs: + assert len(expected_run.child_runs) == len(run.child_runs) + for received, expected in zip(run.child_runs, expected_run.child_runs): + _compare_run_with_error(received, expected) + received = run.dict(exclude={"child_runs"}) + received_err = received.pop("error") + expected = expected_run.dict(exclude={"child_runs"}) + expected_err = expected.pop("error") + + assert received == expected + if expected_err is not None: + assert received_err is not None + assert expected_err in received_err + else: + assert received_err is None + + +@freeze_time("2023-01-01") +async def test_tracer_llm_run() -> None: + """Test tracer on an LLM run.""" + uuid = uuid4() + compare_run = Run( # type: ignore[call-arg] + id=uuid, + parent_run_id=None, + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs={"prompts": []}, + outputs=LLMResult(generations=[[]]), # type: ignore[arg-type] + error=None, + run_type="llm", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + await tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=uuid) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +async def test_tracer_chat_model_run() -> None: + """Test tracer on a Chat Model run.""" + tracer = FakeAsyncTracer() + manager = AsyncCallbackManager(handlers=[tracer]) + run_managers = await manager.on_chat_model_start( + serialized=SERIALIZED_CHAT, messages=[[HumanMessage(content="")]] + ) + compare_run = Run( + id=str(run_managers[0].run_id), # type: ignore[arg-type] + name="chat_model", + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED_CHAT, + inputs=dict(prompts=["Human: "]), + outputs=LLMResult(generations=[[]]), # type: ignore[arg-type] + error=None, + run_type="llm", + trace_id=run_managers[0].run_id, + dotted_order=f"20230101T000000000000Z{run_managers[0].run_id}", + ) + for run_manager in run_managers: + await run_manager.on_llm_end(response=LLMResult(generations=[[]])) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +async def test_tracer_llm_run_errors_no_start() -> None: + """Test tracer on an LLM run without a start.""" + tracer = FakeAsyncTracer() + + with pytest.raises(TracerException): + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=uuid4()) + + +@freeze_time("2023-01-01") +async def test_tracer_multiple_llm_runs() -> None: + """Test the tracer with multiple runs.""" + uuid = uuid4() + compare_run = Run( + id=uuid, + name="llm", + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=LLMResult(generations=[[]]), # type: ignore[arg-type] + error=None, + run_type="llm", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + num_runs = 10 + for _ in range(num_runs): + await tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=uuid) + + assert tracer.runs == [compare_run] * num_runs + + +@freeze_time("2023-01-01") +async def test_tracer_chain_run() -> None: + """Test tracer on a Chain run.""" + uuid = uuid4() + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "chain"}, + inputs={}, + outputs={}, + error=None, + run_type="chain", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + await tracer.on_chain_start(serialized={"name": "chain"}, inputs={}, run_id=uuid) + await tracer.on_chain_end(outputs={}, run_id=uuid) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +async def test_tracer_tool_run() -> None: + """Test tracer on a Tool run.""" + uuid = uuid4() + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "tool"}, + inputs={"input": "test"}, + outputs={"output": "test"}, + error=None, + run_type="tool", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + await tracer.on_tool_start( + serialized={"name": "tool"}, input_str="test", run_id=uuid + ) + await tracer.on_tool_end("test", run_id=uuid) + assert tracer.runs == [compare_run] + + +@freeze_time("2023-01-01") +async def test_tracer_nested_run() -> None: + """Test tracer on a nested run.""" + tracer = FakeAsyncTracer() + + chain_uuid = uuid4() + tool_uuid = uuid4() + llm_uuid1 = uuid4() + llm_uuid2 = uuid4() + for _ in range(10): + await tracer.on_chain_start( + serialized={"name": "chain"}, inputs={}, run_id=chain_uuid + ) + await tracer.on_tool_start( + serialized={"name": "tool"}, + input_str="test", + run_id=tool_uuid, + parent_run_id=chain_uuid, + ) + await tracer.on_llm_start( + serialized=SERIALIZED, + prompts=[], + run_id=llm_uuid1, + parent_run_id=tool_uuid, + ) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=llm_uuid1) + await tracer.on_tool_end("test", run_id=tool_uuid) + await tracer.on_llm_start( + serialized=SERIALIZED, + prompts=[], + run_id=llm_uuid2, + parent_run_id=chain_uuid, + ) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=llm_uuid2) + await tracer.on_chain_end(outputs={}, run_id=chain_uuid) + + compare_run = Run( # type: ignore[call-arg] + id=str(chain_uuid), # type: ignore[arg-type] + error=None, + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "chain"}, + inputs={}, + outputs={}, + run_type="chain", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}", + child_runs=[ + Run( # type: ignore[call-arg] + id=tool_uuid, + parent_run_id=chain_uuid, + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "tool"}, + inputs=dict(input="test"), + outputs=dict(output="test"), + error=None, + run_type="tool", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{tool_uuid}", + child_runs=[ + Run( # type: ignore[call-arg] + id=str(llm_uuid1), # type: ignore[arg-type] + parent_run_id=str(tool_uuid), # type: ignore[arg-type] + error=None, + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=LLMResult(generations=[[]]), # type: ignore[arg-type] + run_type="llm", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{tool_uuid}.20230101T000000000000Z{llm_uuid1}", + ) + ], + ), + Run( # type: ignore[call-arg] + id=str(llm_uuid2), # type: ignore[arg-type] + parent_run_id=str(chain_uuid), # type: ignore[arg-type] + error=None, + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=LLMResult(generations=[[]]), # type: ignore[arg-type] + run_type="llm", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{llm_uuid2}", + ), + ], + ) + assert tracer.runs[0] == compare_run + assert tracer.runs == [compare_run] * 10 + + +@freeze_time("2023-01-01") +async def test_tracer_llm_run_on_error() -> None: + """Test tracer on an LLM run with an error.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=None, + error=repr(exception), + run_type="llm", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + await tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + await tracer.on_llm_error(exception, run_id=uuid) + assert len(tracer.runs) == 1 + _compare_run_with_error(tracer.runs[0], compare_run) + + +@freeze_time("2023-01-01") +async def test_tracer_llm_run_on_error_callback() -> None: + """Test tracer on an LLM run with an error and a callback.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + inputs=dict(prompts=[]), + outputs=None, + error=repr(exception), + run_type="llm", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + + class FakeTracerWithLlmErrorCallback(FakeAsyncTracer): + error_run = None + + async def _on_llm_error(self, run: Run) -> None: + self.error_run = run + + tracer = FakeTracerWithLlmErrorCallback() + await tracer.on_llm_start(serialized=SERIALIZED, prompts=[], run_id=uuid) + await tracer.on_llm_error(exception, run_id=uuid) + _compare_run_with_error(tracer.error_run, compare_run) + + +@freeze_time("2023-01-01") +async def test_tracer_chain_run_on_error() -> None: + """Test tracer on a Chain run with an error.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "chain"}, + inputs={}, + outputs=None, + error=repr(exception), + run_type="chain", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + await tracer.on_chain_start(serialized={"name": "chain"}, inputs={}, run_id=uuid) + await tracer.on_chain_error(exception, run_id=uuid) + _compare_run_with_error(tracer.runs[0], compare_run) + + +@freeze_time("2023-01-01") +async def test_tracer_tool_run_on_error() -> None: + """Test tracer on a Tool run with an error.""" + exception = Exception("test") + uuid = uuid4() + + compare_run = Run( # type: ignore[call-arg] + id=str(uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "tool"}, + inputs=dict(input="test"), + outputs=None, + action="{'name': 'tool'}", + error=repr(exception), + run_type="tool", + trace_id=uuid, + dotted_order=f"20230101T000000000000Z{uuid}", + ) + tracer = FakeAsyncTracer() + + await tracer.on_tool_start( + serialized={"name": "tool"}, input_str="test", run_id=uuid + ) + await tracer.on_tool_error(exception, run_id=uuid) + _compare_run_with_error(tracer.runs[0], compare_run) + + +@freeze_time("2023-01-01") +async def test_tracer_nested_runs_on_error() -> None: + """Test tracer on a nested run with an error.""" + exception = Exception("test") + + tracer = FakeAsyncTracer() + chain_uuid = uuid4() + tool_uuid = uuid4() + llm_uuid1 = uuid4() + llm_uuid2 = uuid4() + llm_uuid3 = uuid4() + + for _ in range(3): + await tracer.on_chain_start( + serialized={"name": "chain"}, inputs={}, run_id=chain_uuid + ) + await tracer.on_llm_start( + serialized=SERIALIZED, + prompts=[], + run_id=llm_uuid1, + parent_run_id=chain_uuid, + ) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=llm_uuid1) + await tracer.on_llm_start( + serialized=SERIALIZED, + prompts=[], + run_id=llm_uuid2, + parent_run_id=chain_uuid, + ) + await tracer.on_llm_end(response=LLMResult(generations=[[]]), run_id=llm_uuid2) + await tracer.on_tool_start( + serialized={"name": "tool"}, + input_str="test", + run_id=tool_uuid, + parent_run_id=chain_uuid, + ) + await tracer.on_llm_start( + serialized=SERIALIZED, + prompts=[], + run_id=llm_uuid3, + parent_run_id=tool_uuid, + ) + await tracer.on_llm_error(exception, run_id=llm_uuid3) + await tracer.on_tool_error(exception, run_id=tool_uuid) + await tracer.on_chain_error(exception, run_id=chain_uuid) + + compare_run = Run( # type: ignore[call-arg] + id=str(chain_uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "chain"}, + error=repr(exception), + inputs={}, + outputs=None, + run_type="chain", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}", + child_runs=[ + Run( # type: ignore[call-arg] + id=str(llm_uuid1), # type: ignore[arg-type] + parent_run_id=str(chain_uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + error=None, + inputs=dict(prompts=[]), + outputs=LLMResult(generations=[[]], llm_output=None), # type: ignore[arg-type] + run_type="llm", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{llm_uuid1}", + ), + Run( # type: ignore[call-arg] + id=str(llm_uuid2), # type: ignore[arg-type] + parent_run_id=str(chain_uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "end", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + error=None, + inputs=dict(prompts=[]), + outputs=LLMResult(generations=[[]], llm_output=None), # type: ignore[arg-type] + run_type="llm", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{llm_uuid2}", + ), + Run( # type: ignore[call-arg] + id=str(tool_uuid), # type: ignore[arg-type] + parent_run_id=str(chain_uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized={"name": "tool"}, + error=repr(exception), + inputs=dict(input="test"), + outputs=None, + action="{'name': 'tool'}", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{tool_uuid}", + child_runs=[ + Run( # type: ignore[call-arg] + id=str(llm_uuid3), # type: ignore[arg-type] + parent_run_id=str(tool_uuid), # type: ignore[arg-type] + start_time=datetime.now(timezone.utc), + end_time=datetime.now(timezone.utc), + events=[ + {"name": "start", "time": datetime.now(timezone.utc)}, + {"name": "error", "time": datetime.now(timezone.utc)}, + ], + extra={}, + serialized=SERIALIZED, + error=repr(exception), + inputs=dict(prompts=[]), + outputs=None, + run_type="llm", + trace_id=chain_uuid, + dotted_order=f"20230101T000000000000Z{chain_uuid}.20230101T000000000000Z{tool_uuid}.20230101T000000000000Z{llm_uuid3}", + ) + ], + run_type="tool", + ), + ], + ) + assert len(tracer.runs) == 3 + for run in tracer.runs: + _compare_run_with_error(run, compare_run) diff --git a/libs/core/tests/unit_tests/utils/test_env.py b/libs/core/tests/unit_tests/utils/test_env.py new file mode 100644 index 0000000000000..3cf6d027354af --- /dev/null +++ b/libs/core/tests/unit_tests/utils/test_env.py @@ -0,0 +1,64 @@ +import pytest + +from langchain_core.utils.env import get_from_dict_or_env + + +def test_get_from_dict_or_env() -> None: + assert ( + get_from_dict_or_env( + { + "a": "foo", + }, + ["a"], + "__SOME_KEY_IN_ENV", + ) + == "foo" + ) + + assert ( + get_from_dict_or_env( + { + "a": "foo", + }, + ["b", "a"], + "__SOME_KEY_IN_ENV", + ) + == "foo" + ) + + assert ( + get_from_dict_or_env( + { + "a": "foo", + }, + "a", + "__SOME_KEY_IN_ENV", + ) + == "foo" + ) + + assert ( + get_from_dict_or_env( + { + "a": "foo", + }, + "not exists", + "__SOME_KEY_IN_ENV", + default="default", + ) + == "default" + ) + + # Not the most obvious behavior, but + # this is how it works right now + with pytest.raises(ValueError): + assert ( + get_from_dict_or_env( + { + "a": "foo", + }, + "not exists", + "__SOME_KEY_IN_ENV", + ) + is None + ) diff --git a/libs/experimental/extended_testing_deps.txt b/libs/experimental/extended_testing_deps.txt new file mode 100644 index 0000000000000..06ab41ba342f1 --- /dev/null +++ b/libs/experimental/extended_testing_deps.txt @@ -0,0 +1,8 @@ +presidio-anonymizer>=2.2.352,<3 +presidio-analyzer>=2.2.352,<3 +faker>=19.3.1,<20 +vowpal-wabbit-next==0.7.0 +sentence-transformers>=2,<3 +jinja2>=3,<4 +pandas>=2.0.1,<3 +tabulate>=0.9.0,<1 diff --git a/libs/experimental/langchain_experimental/fallacy_removal/base.py b/libs/experimental/langchain_experimental/fallacy_removal/base.py new file mode 100644 index 0000000000000..97df55e798dff --- /dev/null +++ b/libs/experimental/langchain_experimental/fallacy_removal/base.py @@ -0,0 +1,182 @@ +"""Chain for applying removals of logical fallacies.""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from langchain.chains.base import Chain +from langchain.chains.llm import LLMChain +from langchain.schema import BasePromptTemplate +from langchain_core.callbacks.manager import CallbackManagerForChainRun +from langchain_core.language_models import BaseLanguageModel + +from langchain_experimental.fallacy_removal.fallacies import FALLACIES +from langchain_experimental.fallacy_removal.models import LogicalFallacy +from langchain_experimental.fallacy_removal.prompts import ( + FALLACY_CRITIQUE_PROMPT, + FALLACY_REVISION_PROMPT, +) + + +class FallacyChain(Chain): + """Chain for applying logical fallacy evaluations. + + It is modeled after Constitutional AI and in same format, but + applying logical fallacies as generalized rules to remove in output. + + Example: + .. code-block:: python + + from langchain_community.llms import OpenAI + from langchain.chains import LLMChain + from langchain_experimental.fallacy import FallacyChain + from langchain_experimental.fallacy_removal.models import LogicalFallacy + + llm = OpenAI() + + qa_prompt = PromptTemplate( + template="Q: {question} A:", + input_variables=["question"], + ) + qa_chain = LLMChain(llm=llm, prompt=qa_prompt) + + fallacy_chain = FallacyChain.from_llm( + llm=llm, + chain=qa_chain, + logical_fallacies=[ + LogicalFallacy( + fallacy_critique_request="Tell if this answer meets criteria.", + fallacy_revision_request=\ + "Give an answer that meets better criteria.", + ) + ], + ) + + fallacy_chain.run(question="How do I know if the earth is round?") + """ + + chain: LLMChain + logical_fallacies: List[LogicalFallacy] + fallacy_critique_chain: LLMChain + fallacy_revision_chain: LLMChain + return_intermediate_steps: bool = False + + @classmethod + def get_fallacies(cls, names: Optional[List[str]] = None) -> List[LogicalFallacy]: + if names is None: + return list(FALLACIES.values()) + else: + return [FALLACIES[name] for name in names] + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + chain: LLMChain, + fallacy_critique_prompt: BasePromptTemplate = FALLACY_CRITIQUE_PROMPT, + fallacy_revision_prompt: BasePromptTemplate = FALLACY_REVISION_PROMPT, + **kwargs: Any, + ) -> "FallacyChain": + """Create a chain from an LLM.""" + fallacy_critique_chain = LLMChain(llm=llm, prompt=fallacy_critique_prompt) + fallacy_revision_chain = LLMChain(llm=llm, prompt=fallacy_revision_prompt) + return cls( + chain=chain, + fallacy_critique_chain=fallacy_critique_chain, + fallacy_revision_chain=fallacy_revision_chain, + **kwargs, + ) + + @property + def input_keys(self) -> List[str]: + """Input keys.""" + return self.chain.input_keys + + @property + def output_keys(self) -> List[str]: + """Output keys.""" + if self.return_intermediate_steps: + return ["output", "fallacy_critiques_and_revisions", "initial_output"] + return ["output"] + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, Any]: + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + response = self.chain.run( + **inputs, + callbacks=_run_manager.get_child("original"), + ) + initial_response = response + input_prompt = self.chain.prompt.format(**inputs) + + _run_manager.on_text( + text="Initial response: " + response + "\n\n", + verbose=self.verbose, + color="yellow", + ) + fallacy_critiques_and_revisions = [] + for logical_fallacy in self.logical_fallacies: + # Fallacy critique below + + fallacy_raw_critique = self.fallacy_critique_chain.run( + input_prompt=input_prompt, + output_from_model=response, + fallacy_critique_request=logical_fallacy.fallacy_critique_request, + callbacks=_run_manager.get_child("fallacy_critique"), + ) + fallacy_critique = self._parse_critique( + output_string=fallacy_raw_critique, + ).strip() + + # if fallacy critique contains "No fallacy critique needed" then done + if "no fallacy critique needed" in fallacy_critique.lower(): + fallacy_critiques_and_revisions.append((fallacy_critique, "")) + continue + + fallacy_revision = self.fallacy_revision_chain.run( + input_prompt=input_prompt, + output_from_model=response, + fallacy_critique_request=logical_fallacy.fallacy_critique_request, + fallacy_critique=fallacy_critique, + revision_request=logical_fallacy.fallacy_revision_request, + callbacks=_run_manager.get_child("fallacy_revision"), + ).strip() + response = fallacy_revision + fallacy_critiques_and_revisions.append((fallacy_critique, fallacy_revision)) + + _run_manager.on_text( + text=f"Applying {logical_fallacy.name}..." + "\n\n", + verbose=self.verbose, + color="green", + ) + + _run_manager.on_text( + text="Logical Fallacy: " + fallacy_critique + "\n\n", + verbose=self.verbose, + color="blue", + ) + + _run_manager.on_text( + text="Updated response: " + fallacy_revision + "\n\n", + verbose=self.verbose, + color="yellow", + ) + + final_output: Dict[str, Any] = {"output": response} + if self.return_intermediate_steps: + final_output["initial_output"] = initial_response + final_output[ + "fallacy_critiques_and_revisions" + ] = fallacy_critiques_and_revisions + return final_output + + @staticmethod + def _parse_critique(output_string: str) -> str: + if "Fallacy Revision request:" not in output_string: + return output_string + output_string = output_string.split("Fallacy Revision request:")[0] + if "\n\n" in output_string: + output_string = output_string.split("\n\n")[0] + return output_string diff --git a/libs/experimental/langchain_experimental/fallacy_removal/fallacies.py b/libs/experimental/langchain_experimental/fallacy_removal/fallacies.py new file mode 100644 index 0000000000000..efe4838fda762 --- /dev/null +++ b/libs/experimental/langchain_experimental/fallacy_removal/fallacies.py @@ -0,0 +1,209 @@ +"""Classification of logical fallacies in Natural Language Arguments \ +from https://arxiv.org/pdf/2212.07425.pdf +""" + +from typing import Dict + +from langchain_experimental.fallacy_removal.models import LogicalFallacy + +FALLACIES: Dict[str, LogicalFallacy] = { + "adhominem": LogicalFallacy( + name="adhominem", + fallacy_critique_request="Identify any feasible ways in which \ + the assistant’s last response is attacking the character or \ + personal traits of the person making an argument rather than \ + addressing the actual argument and evidence.", + fallacy_revision_request="Please rewrite the assistant response\ + to remove any attacking the character or personal traits of the\ + person making an argument rather than addressing the actual\ + argument and evidence.", + ), + "adpopulum": LogicalFallacy( + name="adpopulum", + fallacy_critique_request="Identify ways in which the assistant’s\ + last response may be asserting that something must be true or \ + correct simply because many people believe it or do it, without \ + actual facts or evidence to support the conclusion.", + fallacy_revision_request="Please rewrite the assistant response \ + to remove any assertion that something must be true or correct \ + simply because many people believe it or do it, without actual \ + facts or evidence to support the conclusion.", + ), + "appealtoemotion": LogicalFallacy( + name="appealtoemotion", + fallacy_critique_request="Identify all ways in which the \ + assistant’s last response is an attempt to win support for an \ + argument by exploiting or manipulating people's emotions rather \ + than using facts and reason.", + fallacy_revision_request="Please rewrite the assistant response \ + to remove any attempt to win support for an argument by \ + exploiting or manipulating people's emotions rather than using \ + facts and reason.", + ), + "fallacyofextension": LogicalFallacy( + name="fallacyofextension", + fallacy_critique_request="Identify any ways in which the \ + assitant's last response is making broad, sweeping generalizations\ + and extending the implications of an argument far beyond what the \ + initial premises support.", + fallacy_revision_request="Rewrite the assistant response to remove\ + all broad, sweeping generalizations and extending the implications\ + of an argument far beyond what the initial premises support.", + ), + "intentionalfallacy": LogicalFallacy( + name="intentionalfallacy", + fallacy_critique_request="Identify any way in which the assistant’s\ + last response may be falsely supporting a conclusion by claiming to\ + understand an author or creator's subconscious intentions without \ + clear evidence.", + fallacy_revision_request="Revise the assistant’s last response to \ + remove any false support of a conclusion by claiming to understand\ + an author or creator's subconscious intentions without clear \ + evidence.", + ), + "falsecausality": LogicalFallacy( + name="falsecausality", + fallacy_critique_request="Think carefully about whether the \ + assistant's last response is jumping to conclusions about causation\ + between events or circumstances without adequate evidence to infer \ + a causal relationship.", + fallacy_revision_request="Please write a new version of the \ + assistant’s response that removes jumping to conclusions about\ + causation between events or circumstances without adequate \ + evidence to infer a causal relationship.", + ), + "falsedilemma": LogicalFallacy( + name="falsedilemma", + fallacy_critique_request="Identify any way in which the \ + assistant's last response may be presenting only two possible options\ + or sides to a situation when there are clearly other alternatives \ + that have not been considered or addressed.", + fallacy_revision_request="Amend the assistant’s last response to \ + remove any presentation of only two possible options or sides to a \ + situation when there are clearly other alternatives that have not \ + been considered or addressed.", + ), + "hastygeneralization": LogicalFallacy( + name="hastygeneralization", + fallacy_critique_request="Identify any way in which the assistant’s\ + last response is making a broad inference or generalization to \ + situations, people, or circumstances that are not sufficiently \ + similar based on a specific example or limited evidence.", + fallacy_revision_request="Please rewrite the assistant response to\ + remove a broad inference or generalization to situations, people, \ + or circumstances that are not sufficiently similar based on a \ + specific example or limited evidence.", + ), + "illogicalarrangement": LogicalFallacy( + name="illogicalarrangement", + fallacy_critique_request="Think carefully about any ways in which \ + the assistant's last response is constructing an argument in a \ + flawed, illogical way, so the premises do not connect to or lead\ + to the conclusion properly.", + fallacy_revision_request="Please rewrite the assistant’s response\ + so as to remove any construction of an argument that is flawed and\ + illogical or if the premises do not connect to or lead to the \ + conclusion properly.", + ), + "fallacyofcredibility": LogicalFallacy( + name="fallacyofcredibility", + fallacy_critique_request="Discuss whether the assistant's last \ + response was dismissing or attacking the credibility of the person\ + making an argument rather than directly addressing the argument \ + itself.", + fallacy_revision_request="Revise the assistant’s response so as \ + that it refrains from dismissing or attacking the credibility of\ + the person making an argument rather than directly addressing \ + the argument itself.", + ), + "circularreasoning": LogicalFallacy( + name="circularreasoning", + fallacy_critique_request="Discuss ways in which the assistant’s\ + last response may be supporting a premise by simply repeating \ + the premise as the conclusion without giving actual proof or \ + evidence.", + fallacy_revision_request="Revise the assistant’s response if \ + possible so that it’s not supporting a premise by simply \ + repeating the premise as the conclusion without giving actual\ + proof or evidence.", + ), + "beggingthequestion": LogicalFallacy( + name="beggingthequestion", + fallacy_critique_request="Discuss ways in which the assistant's\ + last response is restating the conclusion of an argument as a \ + premise without providing actual support for the conclusion in \ + the first place.", + fallacy_revision_request="Write a revision of the assistant’s \ + response that refrains from restating the conclusion of an \ + argument as a premise without providing actual support for the \ + conclusion in the first place.", + ), + "trickquestion": LogicalFallacy( + name="trickquestion", + fallacy_critique_request="Identify ways in which the \ + assistant’s last response is asking a question that \ + contains or assumes information that has not been proven or \ + substantiated.", + fallacy_revision_request="Please write a new assistant \ + response so that it does not ask a question that contains \ + or assumes information that has not been proven or \ + substantiated.", + ), + "overapplier": LogicalFallacy( + name="overapplier", + fallacy_critique_request="Identify ways in which the assistant’s\ + last response is applying a general rule or generalization to a \ + specific case it was not meant to apply to.", + fallacy_revision_request="Please write a new response that does\ + not apply a general rule or generalization to a specific case \ + it was not meant to apply to.", + ), + "equivocation": LogicalFallacy( + name="equivocation", + fallacy_critique_request="Read the assistant’s last response \ + carefully and identify if it is using the same word or phrase \ + in two different senses or contexts within an argument.", + fallacy_revision_request="Rewrite the assistant response so \ + that it does not use the same word or phrase in two different \ + senses or contexts within an argument.", + ), + "amphiboly": LogicalFallacy( + name="amphiboly", + fallacy_critique_request="Critique the assistant’s last response\ + to see if it is constructing sentences such that the grammar \ + or structure is ambiguous, leading to multiple interpretations.", + fallacy_revision_request="Please rewrite the assistant response\ + to remove any construction of sentences where the grammar or \ + structure is ambiguous or leading to multiple interpretations.", + ), + "accent": LogicalFallacy( + name="accent", + fallacy_critique_request="Discuss whether the assitant's response\ + is misrepresenting an argument by shifting the emphasis of a word\ + or phrase to give it a different meaning than intended.", + fallacy_revision_request="Please rewrite the AI model's response\ + so that it is not misrepresenting an argument by shifting the \ + emphasis of a word or phrase to give it a different meaning than\ + intended.", + ), + "composition": LogicalFallacy( + name="composition", + fallacy_critique_request="Discuss whether the assistant's \ + response is erroneously inferring that something is true of \ + the whole based on the fact that it is true of some part or \ + parts.", + fallacy_revision_request="Please rewrite the assitant's response\ + so that it is not erroneously inferring that something is true \ + of the whole based on the fact that it is true of some part or \ + parts.", + ), + "division": LogicalFallacy( + name="division", + fallacy_critique_request="Discuss whether the assistant's last \ + response is erroneously inferring that something is true of the \ + parts based on the fact that it is true of the whole.", + fallacy_revision_request="Please rewrite the assitant's response\ + so that it is not erroneously inferring that something is true \ + of the parts based on the fact that it is true of the whole.", + ), +} diff --git a/libs/experimental/langchain_experimental/fallacy_removal/models.py b/libs/experimental/langchain_experimental/fallacy_removal/models.py new file mode 100644 index 0000000000000..78422b91b7995 --- /dev/null +++ b/libs/experimental/langchain_experimental/fallacy_removal/models.py @@ -0,0 +1,10 @@ +"""Models for the Logical Fallacy Chain""" +from langchain_experimental.pydantic_v1 import BaseModel + + +class LogicalFallacy(BaseModel): + """Logical fallacy.""" + + fallacy_critique_request: str + fallacy_revision_request: str + name: str = "Logical Fallacy" diff --git a/libs/experimental/langchain_experimental/fallacy_removal/prompts.py b/libs/experimental/langchain_experimental/fallacy_removal/prompts.py new file mode 100644 index 0000000000000..5c44da3bb1f6d --- /dev/null +++ b/libs/experimental/langchain_experimental/fallacy_removal/prompts.py @@ -0,0 +1,135 @@ +from langchain_core.prompts.few_shot import FewShotPromptTemplate +from langchain_core.prompts.prompt import PromptTemplate + +fallacy_critique_example = PromptTemplate( + template="""Human: {input_prompt} + +Model: {output_from_model} + +Fallacy Critique Request: {fallacy_critique_request} + +Fallacy Critique: {fallacy_critique}""", + input_variables=[ + "input_prompt", + "output_from_model", + "fallacy_critique_request", + "fallacy_critique", + ], +) + +examples = [ + { + "input_prompt": "If everyone says the Earth is round, \ + how do I know that's correct?", + "output_from_model": "The earth is round because your \ + teacher says it is", + "fallacy_critique_request": "Identify specific ways in\ + which the model’s previous response had a logical fallacy. \ + Also point out potential logical fallacies in the human’s \ + questions and responses. Examples of logical fallacies \ + include but are not limited to ad hominem, ad populum, \ + appeal to emotion and false causality.", + "fallacy_critique": "This statement contains the logical \ + fallacy of Ad Verecundiam or Appeal to Authority. It is \ + a fallacy because it asserts something to be true purely \ + based on the authority of the source making the claim, \ + without any actual evidence to support it. Fallacy \ + Critique Needed", + "fallacy_revision_request": "Please rewrite the model \ + response to remove all logical fallacies, and to \ + politely point out any logical fallacies from the \ + human.", + "fallacy_revision": "The earth is round based on \ + evidence from observations of its curvature from high \ + altitudes, photos from space showing its spherical shape, \ + circumnavigation, and the fact that we see its rounded \ + shadow on the moon during lunar eclipses.", + }, + { + "input_prompt": "Should we invest more in our school \ + music program? After all, studies show students \ + involved in music perform better academically.", + "output_from_model": "I don't think we should invest \ + more in the music program. Playing the piccolo won't \ + teach someone better math skills.", + "fallacy_critique_request": "Identify specific ways \ + in which the model’s previous response had a logical \ + fallacy. Also point out potential logical fallacies \ + in the human’s questions and responses. Examples of \ + logical fallacies include but are not limited to ad \ + homimem, ad populum, appeal to emotion and false \ + causality.", + "fallacy_critique": "This answer commits the division \ + fallacy by rejecting the argument based on assuming \ + capabilities true of the parts (playing an instrument \ + like piccolo) also apply to the whole \ + (the full music program). The answer focuses only on \ + part of the music program rather than considering it \ + as a whole. Fallacy Critique Needed.", + "fallacy_revision_request": "Please rewrite the model \ + response to remove all logical fallacies, and to \ + politely point out any logical fallacies from the human.", + "fallacy_revision": "While playing an instrument may \ + teach discipline, more evidence is needed on whether \ + music education courses improve critical thinking \ + skills across subjects before determining if increased \ + investment in the whole music program is warranted.", + }, +] + +FALLACY_CRITIQUE_PROMPT = FewShotPromptTemplate( + example_prompt=fallacy_critique_example, + examples=[ + {k: v for k, v in e.items() if k != "fallacy_revision_request"} + for e in examples + ], + prefix="Below is a conversation between a human and an \ + AI assistant. If there is no material critique of the \ + model output, append to the end of the Fallacy Critique: \ + 'No fallacy critique needed.' If there is material \ + critique \ + of the model output, append to the end of the Fallacy \ + Critique: 'Fallacy Critique needed.'", + suffix="""Human: {input_prompt} +Model: {output_from_model} + +Fallacy Critique Request: {fallacy_critique_request} + +Fallacy Critique:""", + example_separator="\n === \n", + input_variables=["input_prompt", "output_from_model", "fallacy_critique_request"], +) + +FALLACY_REVISION_PROMPT = FewShotPromptTemplate( + example_prompt=fallacy_critique_example, + examples=examples, + prefix="Below is a conversation between a human and \ + an AI assistant.", + suffix="""Human: {input_prompt} + +Model: {output_from_model} + +Fallacy Critique Request: {fallacy_critique_request} + +Fallacy Critique: {fallacy_critique} + +If the fallacy critique does not identify anything worth \ +changing, ignore the Fallacy Revision Request and do not \ +make any revisions. Instead, return "No revisions needed". + +If the fallacy critique does identify something worth \ +changing, please revise the model response based on the \ +Fallacy Revision Request. + +Fallacy Revision Request: {fallacy_revision_request} + +Fallacy Revision:""", + example_separator="\n === \n", + input_variables=[ + "input_prompt", + "output_from_model", + "fallacy_critique_request", + "fallacy_critique", + "fallacy_revision_request", + ], +) diff --git a/libs/experimental/pyproject.toml b/libs/experimental/pyproject.toml index f348d80edb871..e509fc40e1250 100644 --- a/libs/experimental/pyproject.toml +++ b/libs/experimental/pyproject.toml @@ -1,27 +1,17 @@ [tool.poetry] name = "gigachain-experimental" -version = "0.0.59" +version = "0.0.62" description = "Building applications with LLMs through composability" authors = [] license = "MIT" readme = "README.md" -repository = "https://github.com/ai-forever/gigachain" -packages = [ - {include = "langchain_experimental"} -] +repository = "https://github.com/langchain-ai/langchain" + [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = "^0.2" -gigachain-community = "^0.2" -presidio-anonymizer = {version = "^2.2.352", optional = true} -presidio-analyzer = {version = "^2.2.352", optional = true} -faker = {version = "^19.3.1", optional = true} -vowpal-wabbit-next = {version = "0.6.0", optional = true} -sentence-transformers = {version = "^2", optional = true} -jinja2 = {version = "^3", optional = true} -pandas = { version = "^2.0.1", optional = true } -tabulate = {version = "^0.9.0", optional = true} +gigachain-core = "^0.2.10" +gigachain-community = "^0.2.6" [tool.poetry.group.lint] optional = true @@ -36,9 +26,9 @@ optional = true mypy = "^0.991" types-pyyaml = "^6.0.12.2" types-requests = "^2.28.11.5" -gigachain = {path = "../langchain", develop = true} -gigachain-core = {path = "../core", develop = true} -gigachain-community = {path = "../community", develop = true} +gigachain = { path = "../langchain", develop = true } +gigachain-core = { path = "../core", develop = true } +gigachain-community = { path = "../community", develop = true } [tool.poetry.group.dev] optional = true @@ -46,9 +36,9 @@ optional = true [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" setuptools = "^67.6.1" -gigachain = {path = "../langchain", develop = true} -gigachain-core = {path = "../core", develop = true} -gigachain-community = {path = "../community", develop = true} +gigachain = { path = "../langchain", develop = true } +gigachain-core = { path = "../core", develop = true } +gigachain-community = { path = "../community", develop = true } [tool.poetry.group.test] optional = true @@ -59,42 +49,33 @@ optional = true # Any dependencies that do not meet that criteria will be removed. pytest = "^7.3.0" pytest-asyncio = "^0.20.3" -gigachain = {path = "../langchain", develop = true} -gigachain-core = {path = "../core", develop = true} -gigachain-community = {path = "../community", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} -gigachat = "^0.1.29" +gigachain = { path = "../langchain", develop = true } +gigachain-core = { path = "../core", develop = true } +gigachain-community = { path = "../community", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } + +# Support Python 3.8 and 3.12+. +# Can be removed once the numpy version is fixed in gigachain-community. +numpy = [ + { version = "^1.24.0", python = "<3.12" }, + { version = "^1.26.0", python = ">=3.12" }, +] [tool.poetry.group.test_integration] optional = true [tool.poetry.group.test_integration.dependencies] -gigachain = {path = "../langchain", develop = true} -gigachain-core = {path = "../core", develop = true} -gigachain-community = {path = "../community", develop = true} -gigachain-openai = {path = "../partners/openai", develop = true} - -# An extra used to be able to add extended testing. -# Please use new-line on formatting to make it easier to add new packages without -# merge-conflicts -[tool.poetry.extras] -extended_testing = [ - "presidio-anonymizer", - "presidio-analyzer", - "faker", - "vowpal-wabbit-next", - "sentence-transformers", - "jinja2", - "pandas", - "tabulate", -] +gigachain = { path = "../langchain", develop = true } +gigachain-core = { path = "../core", develop = true } +gigachain-community = { path = "../community", develop = true } +gigachain-openai = { path = "../partners/openai", develop = true } [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "I", # isort + "E", # pycodestyle + "F", # pyflakes + "I", # isort "T201", # print ] @@ -104,9 +85,7 @@ disallow_untyped_defs = "True" exclude = ["notebooks", "examples", "example_data"] [tool.coverage.run] -omit = [ - "tests/*", -] +omit = ["tests/*"] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/libs/experimental/tests/unit_tests/test_logical_fallacy.py b/libs/experimental/tests/unit_tests/test_logical_fallacy.py new file mode 100644 index 0000000000000..455c76a463cb1 --- /dev/null +++ b/libs/experimental/tests/unit_tests/test_logical_fallacy.py @@ -0,0 +1,26 @@ +"""Unit tests for the Logical Fallacy chain, same format as CAI""" +from langchain_experimental.fallacy_removal.base import FallacyChain + +TEXT_ONE = """ This text is bad.\ + +Fallacy Revision request: Make it great.\ + +Fallacy Revision:""" + +TEXT_TWO = """ This text is bad.\n\n""" + +TEXT_THREE = """ This text is bad.\ + +Fallacy Revision request: Make it great again.\ + +Fallacy Revision: Better text""" + + +def test_fallacy_critique_parsing() -> None: + """Test parsing of critique text.""" + for text in [TEXT_ONE, TEXT_TWO, TEXT_THREE]: + fallacy_critique = FallacyChain._parse_critique(text) + + assert ( + fallacy_critique.strip() == "This text is bad." + ), f"Failed on {text} with {fallacy_critique}" diff --git a/libs/langchain/extended_testing_deps.txt b/libs/langchain/extended_testing_deps.txt new file mode 100644 index 0000000000000..855344dfa6253 --- /dev/null +++ b/libs/langchain/extended_testing_deps.txt @@ -0,0 +1,10 @@ +-e ../partners/openai +-e ../partners/anthropic +-e ../partners/fireworks +-e ../partners/together +-e ../partners/mistralai +-e ../partners/groq +jsonschema>=4.22.0,<5 +numexpr>=2.8.6,<3 +rapidfuzz>=3.1.1,<4 +aiosqlite>=0.19.0,<0.20 diff --git a/libs/langchain/langchain/memory/vectorstore_token_buffer_memory.py b/libs/langchain/langchain/memory/vectorstore_token_buffer_memory.py new file mode 100644 index 0000000000000..0995bb3e34a67 --- /dev/null +++ b/libs/langchain/langchain/memory/vectorstore_token_buffer_memory.py @@ -0,0 +1,184 @@ +""" +Class for a conversation memory buffer with older messages stored in a vectorstore . + +This implementats a conversation memory in which the messages are stored in a memory +buffer up to a specified token limit. When the limit is exceeded, older messages are +saved to a vectorstore backing database. The vectorstore can be made persistent across +sessions. +""" + +import warnings +from datetime import datetime +from typing import Any, Dict, List + +from langchain_core.messages import BaseMessage +from langchain_core.prompts.chat import SystemMessagePromptTemplate +from langchain_core.pydantic_v1 import Field, PrivateAttr +from langchain_core.vectorstores import VectorStoreRetriever + +from langchain.memory import ConversationTokenBufferMemory, VectorStoreRetrieverMemory +from langchain.memory.chat_memory import BaseChatMemory +from langchain.text_splitter import RecursiveCharacterTextSplitter + +DEFAULT_HISTORY_TEMPLATE = """ +Current date and time: {current_time}. + +Potentially relevant timestamped excerpts of previous conversations (you +do not need to use these if irrelevant): +{previous_history} + +""" + +TIMESTAMP_FORMAT = "%Y-%m-%d %H:%M:%S %Z" + + +class ConversationVectorStoreTokenBufferMemory(ConversationTokenBufferMemory): + """Conversation chat memory with token limit and vectordb backing. + + load_memory_variables() will return a dict with the key "history". + It contains background information retrieved from the vector store + plus recent lines of the current conversation. + + To help the LLM understand the part of the conversation stored in the + vectorstore, each interaction is timestamped and the current date and + time is also provided in the history. A side effect of this is that the + LLM will have access to the current date and time. + + Initialization arguments: + + This class accepts all the initialization arguments of + ConversationTokenBufferMemory, such as `llm`. In addition, it + accepts the following additional arguments + + retriever: (required) A VectorStoreRetriever object to use + as the vector backing store + + split_chunk_size: (optional, 1000) Token chunk split size + for long messages generated by the AI + + previous_history_template: (optional) Template used to format + the contents of the prompt history + + + Example using ChromaDB: + + .. code-block:: python + + from langchain.memory.token_buffer_vectorstore_memory import ( + ConversationVectorStoreTokenBufferMemory + ) + from langchain_community.vectorstores import Chroma + from langchain_community.embeddings import HuggingFaceInstructEmbeddings + from langchain_openai import OpenAI + + embedder = HuggingFaceInstructEmbeddings( + query_instruction="Represent the query for retrieval: " + ) + chroma = Chroma(collection_name="demo", + embedding_function=embedder, + collection_metadata={"hnsw:space": "cosine"}, + ) + + retriever = chroma.as_retriever( + search_type="similarity_score_threshold", + search_kwargs={ + 'k': 5, + 'score_threshold': 0.75, + }, + ) + + conversation_memory = ConversationVectorStoreTokenBufferMemory( + return_messages=True, + llm=OpenAI(), + retriever=retriever, + max_token_limit = 1000, + ) + + conversation_memory.save_context({"Human": "Hi there"}, + {"AI": "Nice to meet you!"} + ) + conversation_memory.save_context({"Human": "Nice day isn't it?"}, + {"AI": "I love Wednesdays."} + ) + conversation_memory.load_memory_variables({"input": "What time is it?"}) + + """ + + retriever: VectorStoreRetriever = Field(exclude=True) + memory_key: str = "history" + previous_history_template: str = DEFAULT_HISTORY_TEMPLATE + split_chunk_size: int = 1000 + + _memory_retriever: VectorStoreRetrieverMemory = PrivateAttr(default=None) + _timestamps: List[datetime] = PrivateAttr(default_factory=list) + + @property + def memory_retriever(self) -> VectorStoreRetrieverMemory: + """Return a memory retriever from the passed retriever object.""" + if self._memory_retriever is not None: + return self._memory_retriever + self._memory_retriever = VectorStoreRetrieverMemory(retriever=self.retriever) + return self._memory_retriever + + def load_memory_variables(self, inputs: Dict[str, Any]) -> Dict[str, Any]: + """Return history and memory buffer.""" + try: + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + memory_variables = self.memory_retriever.load_memory_variables(inputs) + previous_history = memory_variables[self.memory_retriever.memory_key] + except AssertionError: # happens when db is empty + previous_history = "" + current_history = super().load_memory_variables(inputs) + template = SystemMessagePromptTemplate.from_template( + self.previous_history_template + ) + messages = [ + template.format( + previous_history=previous_history, + current_time=datetime.now().astimezone().strftime(TIMESTAMP_FORMAT), + ) + ] + messages.extend(current_history[self.memory_key]) + return {self.memory_key: messages} + + def save_context(self, inputs: Dict[str, Any], outputs: Dict[str, str]) -> None: + """Save context from this conversation to buffer. Pruned.""" + BaseChatMemory.save_context(self, inputs, outputs) + self._timestamps.append(datetime.now().astimezone()) + # Prune buffer if it exceeds max token limit + buffer = self.chat_memory.messages + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) + if curr_buffer_length > self.max_token_limit: + while curr_buffer_length > self.max_token_limit: + self._pop_and_store_interaction(buffer) + curr_buffer_length = self.llm.get_num_tokens_from_messages(buffer) + + def save_remainder(self) -> None: + """ + Save the remainder of the conversation buffer to the vector store. + + This is useful if you have made the vectorstore persistent, in which + case this can be called before the end of the session to store the + remainder of the conversation. + """ + buffer = self.chat_memory.messages + while len(buffer) > 0: + self._pop_and_store_interaction(buffer) + + def _pop_and_store_interaction(self, buffer: List[BaseMessage]) -> None: + input = buffer.pop(0) + output = buffer.pop(0) + timestamp = self._timestamps.pop(0).strftime(TIMESTAMP_FORMAT) + # Split AI output into smaller chunks to avoid creating documents + # that will overflow the context window + ai_chunks = self._split_long_ai_text(str(output.content)) + for index, chunk in enumerate(ai_chunks): + self.memory_retriever.save_context( + {"Human": f"<{timestamp}/00> {str(input.content)}"}, + {"AI": f"<{timestamp}/{index:02}> {chunk}"}, + ) + + def _split_long_ai_text(self, text: str) -> List[str]: + splitter = RecursiveCharacterTextSplitter(chunk_size=self.split_chunk_size) + return [chunk.page_content for chunk in splitter.create_documents([text])] diff --git a/libs/langchain/pyproject.toml b/libs/langchain/pyproject.toml index 0af634cf8d13d..e59ed4786e54a 100644 --- a/libs/langchain/pyproject.toml +++ b/libs/langchain/pyproject.toml @@ -1,119 +1,33 @@ [tool.poetry] name = "gigachain" -version = "0.2.0" +version = "0.2.6" description = "Building applications with LLMs through composability" authors = [] license = "MIT" readme = "README.md" -repository = "https://github.com/ai-forever/gigachain" -packages = [ - {include = "langchain"} -] +repository = "https://github.com/langchain-ai/langchain" [tool.poetry.scripts] langchain-server = "langchain.server:main" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = "^0.2.0" +gigachain-core = "^0.2.10" gigachain-text-splitters = "^0.2.0" langsmith = "^0.1.17" pydantic = ">=1,<3" SQLAlchemy = ">=1.4,<3" requests = "^2" PyYAML = ">=5.3" -numpy = "^1" aiohttp = "^3.8.3" -tenacity = "^8.1.0" -gigachat = "^0.1.29" -azure-core = {version = "^1.26.4", optional=true} -tqdm = {version = ">=4.48.0", optional = true} -openapi-pydantic = {version = "^0.3.2", optional = true} -faiss-cpu = {version = "^1", optional = true} -manifest-ml = {version = "^0.0.1", optional = true} -transformers = {version = "^4", optional = true} -beautifulsoup4 = {version = "^4", optional = true} -torch = {version = ">=1,<3", optional = true} -jinja2 = {version = "^3", optional = true} -tiktoken = {version = ">=0.7,<1.0", optional = true, python=">=3.9"} -qdrant-client = {version = "^1.3.1", optional = true, python = ">=3.8.1,<3.12"} -dataclasses-json = ">= 0.5.7, < 0.7" -cohere = {version = ">=4,<6", optional = true} -openai = {version = "<2", optional = true} -nlpcloud = {version = "^1", optional = true} -huggingface_hub = {version = "^0", optional = true} -sentence-transformers = {version = "^2", optional = true} -arxiv = {version = "^1.4", optional = true} -pypdf = {version = "^3.4.0", optional = true} -aleph-alpha-client = {version="^2.15.0", optional = true} -pgvector = {version = "^0.1.6", optional = true} -async-timeout = {version = "^4.0.0", python = "<3.11"} -azure-identity = {version = "^1.12.0", optional=true} -atlassian-python-api = {version = "^3.36.0", optional=true} -html2text = {version="^2020.1.16", optional=true} -numexpr = {version="^2.8.6", optional=true} -azure-cosmos = {version="^4.4.0b1", optional=true} -jq = {version = "^1.4.1", optional = true} -pdfminer-six = {version = "^20221105", optional = true} -docarray = {version="^0.32.0", extras=["hnswlib"], optional=true} -lxml = {version = ">=4.9.3,<6.0", optional = true} -pymupdf = {version = "^1.22.3", optional = true} -rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"} -pypdfium2 = {version = "^4.10.0", optional = true} -gql = {version = "^3.4.1", optional = true} -pandas = {version = "^2.0.1", optional = true} -telethon = {version = "^1.28.5", optional = true} -chardet = {version="^5.1.0", optional=true} -requests-toolbelt = {version = "^1.0.0", optional = true} -openlm = {version = "^0.0.5", optional = true} -scikit-learn = {version = "^1.2.2", optional = true} -azure-ai-formrecognizer = {version = "^3.2.1", optional = true} -azure-cognitiveservices-speech = {version = "^1.28.0", optional = true} -py-trello = {version = "^0.19.0", optional = true} -bibtexparser = {version = "^1.4.0", optional = true} -pyspark = {version = "^3.4.0", optional = true} -clarifai = {version = ">=9.1.0", optional = true} -mwparserfromhell = {version = "^0.6.4", optional = true} -mwxml = {version = "^0.3.3", optional = true} -azure-search-documents = {version = "11.4.0b8", optional = true} -esprima = {version = "^4.0.1", optional = true} -streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"} -psychicapi = {version = "^0.8.0", optional = true} -cassio = {version = "^0.1.0", optional = true} -sympy = {version = "^1.12", optional = true} -rapidfuzz = {version = "^3.1.1", optional = true} -jsonschema = {version = ">1", optional = true} -rank-bm25 = {version = "^0.2.2", optional = true} -geopandas = {version = "^0.13.1", optional = true} -gitpython = {version = "^3.1.32", optional = true} -feedparser = {version = "^6.0.10", optional = true} -newspaper3k = {version = "^0.2.8", optional = true} -xata = {version = "^1.0.0a7", optional = true} -xmltodict = {version = "^0.13.0", optional = true} -markdownify = {version = "^0.11.6", optional = true} -assemblyai = {version = "^0.17.0", optional = true} -dashvector = {version = "^1.0.1", optional = true} -sqlite-vss = {version = "^0.1.2", optional = true} -motor = {version = "^3.3.1", optional = true} -timescale-vector = {version = "^0.0.1", optional = true} -typer = {version= "^0.9.0", optional = true} -anthropic = {version = "^0.3.11", optional = true} -aiosqlite = {version = "^0.19.0", optional = true} -rspace_client = {version = "^2.5.0", optional = true} -upstash-redis = {version = "^0.15.0", optional = true} -azure-ai-textanalytics = {version = "^5.3.0", optional = true} -google-cloud-documentai = {version = "^2.20.1", optional = true} -fireworks-ai = {version = "^0.9.0", optional = true} -javelin-sdk = {version = "^0.1.8", optional = true} -hologres-vector = {version = "^0.0.6", optional = true} -praw = {version = "^7.7.1", optional = true} -msal = {version = "^1.25.0", optional = true} -databricks-vectorsearch = {version = "^0.21", optional = true} -couchbase = {version = "^4.1.9", optional = true} -dgml-utils = {version = "^0.3.0", optional = true} -datasets = {version = "^2.15.0", optional = true} -gigachain-openai = {version = "^0.1", optional = true} -rdflib = {version = "7.0.0", optional = true} +tenacity = "^8.1.0,!=8.4.0" +async-timeout = { version = "^4.0.0", python = "<3.11" } + +# Support Python 3.8 and 3.12+. +numpy = [ + { version = "^1", python = "<3.12" }, + { version = "^1.26.0", python = ">=3.12" }, +] [tool.poetry.group.test] optional = true @@ -132,13 +46,13 @@ responses = "^0.22.0" pytest-asyncio = "^0.23.2" lark = "^1.1.5" pandas = "^2.0.0" -pytest-mock = "^3.10.0" +pytest-mock = "^3.10.0" pytest-socket = "^0.6.0" syrupy = "^4.0.2" requests-mock = "^1.11.0" -gigachain-core = {path = "../core", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} -gigachain-openai = {path = "../partners/openai", optional = true, develop = true} +gigachain-core = { path = "../core", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } +gigachain-openai = { path = "../partners/openai", optional = true, develop = true } [tool.poetry.group.codespell] optional = true @@ -150,31 +64,17 @@ codespell = "^2.2.0" optional = true [tool.poetry.group.test_integration.dependencies] -# Do not add dependencies in the test_integration group -# Instead: -# 1. Add an optional dependency to the main group -# poetry add --optional [package name] -# 2. Add the package name to the extended_testing extra (find it below) -# 3. Relock the poetry file -# poetry lock --no-update -# 4. Favor unit tests not integration tests. -# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests. -# Your tests should not rely on network access, as it prevents other -# developers from being able to easily run them. -# Instead write unit tests that use the `responses` library or mock.patch with -# fixtures. Keep the fixtures minimal. -# See the Contributing Guide for more instructions on working with optional dependencies. + + +# Instead read the following link: # https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies pytest-vcr = "^1.0.2" wrapt = "^1.15.0" -openai = "^1" python-dotenv = "^1.0.0" cassio = "^0.1.0" -tiktoken = ">=0.7,<1" -anthropic = "^0.3.11" -gigachain-core = {path = "../core", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} -langchainhub = "^0.1.15" +gigachain-core = { path = "../core", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } +langchainhub = "^0.1.16" [tool.poetry.group.lint] optional = true @@ -194,8 +94,8 @@ types-redis = "^4.3.21.6" types-pytz = "^2023.3.0.0" types-chardet = "^5.0.4.6" mypy-protobuf = "^3.0.0" -gigachain-core = {path = "../core", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} +gigachain-core = { path = "../core", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } [tool.poetry.group.dev] optional = true @@ -204,116 +104,16 @@ optional = true jupyter = "^1.0.0" playwright = "^1.28.0" setuptools = "^67.6.1" -gigachain-core = {path = "../core", develop = true} -gigachain-text-splitters = {path = "../text-splitters", develop = true} - -[tool.poetry.extras] -llms = ["clarifai", "cohere", "openai", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"] -qdrant = ["qdrant-client"] -openai = ["openai", "tiktoken"] -text_helpers = ["chardet"] -clarifai = ["clarifai"] -cohere = ["cohere"] -docarray = ["docarray"] -embeddings = ["sentence-transformers"] -javascript = ["esprima"] -azure = [ - "azure-identity", - "azure-cosmos", - "openai", - "azure-core", - "azure-ai-formrecognizer", - "azure-cognitiveservices-speech", - "azure-search-documents", - "azure-ai-textanalytics", -] -all = [] -cli = ["typer"] - -# An extra used to be able to add extended testing. -# Please use new-line on formatting to make it easier to add new packages without -# merge-conflicts -extended_testing = [ - "aleph-alpha-client", - "aiosqlite", - "assemblyai", - "beautifulsoup4", - "bibtexparser", - "cassio", - "chardet", - "datasets", - "google-cloud-documentai", - "esprima", - "jq", - "pdfminer-six", - "pgvector", - "pypdf", - "pymupdf", - "pypdfium2", - "tqdm", - "lxml", - "atlassian-python-api", - "mwparserfromhell", - "mwxml", - "msal", - "pandas", - "telethon", - "psychicapi", - "gql", - "requests-toolbelt", - "html2text", - "numexpr", - "py-trello", - "scikit-learn", - "streamlit", - "pyspark", - "openai", - "sympy", - "rapidfuzz", - "jsonschema", - "openai", - "rank-bm25", - "geopandas", - "jinja2", - "gitpython", - "newspaper3k", - "feedparser", - "xata", - "xmltodict", - "faiss-cpu", - "openapi-pydantic", - "markdownify", - "arxiv", - "dashvector", - "sqlite-vss", - "rapidocr-onnxruntime", - "motor", - "timescale-vector", - "anthropic", - "upstash-redis", - "rspace_client", - "fireworks-ai", - "javelin-sdk", - "hologres-vector", - "praw", - "databricks-vectorsearch", - "couchbase", - "dgml-utils", - "cohere", - "gigachain-openai", - "rdflib", -] - +gigachain-core = { path = "../core", develop = true } +gigachain-text-splitters = { path = "../text-splitters", develop = true } [tool.ruff] -exclude = [ - "tests/integration_tests/examples/non-utf8-encoding.py", -] +exclude = ["tests/integration_tests/examples/non-utf8-encoding.py"] [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "I", # isort + "E", # pycodestyle + "F", # pyflakes + "I", # isort "T201", # print ] @@ -323,9 +123,7 @@ disallow_untyped_defs = "True" exclude = ["notebooks", "examples", "example_data"] [tool.coverage.run] -omit = [ - "tests/*", -] +omit = ["tests/*"] [build-system] requires = ["poetry-core>=1.0.0"] @@ -347,7 +145,7 @@ addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused markers = [ "requires: mark tests as requiring a specific library", "scheduled: mark tests to run in scheduled testing", - "compile: mark placeholder test used to compile integration tests without running them" + "compile: mark placeholder test used to compile integration tests without running them", ] asyncio_mode = "auto" diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_fix.py b/libs/langchain/tests/unit_tests/output_parsers/test_fix.py new file mode 100644 index 0000000000000..0f1eaf9413074 --- /dev/null +++ b/libs/langchain/tests/unit_tests/output_parsers/test_fix.py @@ -0,0 +1,121 @@ +from typing import Any + +import pytest +from langchain_core.exceptions import OutputParserException +from langchain_core.runnables import RunnablePassthrough + +from langchain.output_parsers.boolean import BooleanOutputParser +from langchain.output_parsers.datetime import DatetimeOutputParser +from langchain.output_parsers.fix import BaseOutputParser, OutputFixingParser + + +class SuccessfulParseAfterRetries(BaseOutputParser[str]): + parse_count: int = 0 # Number of times parse has been called + attemp_count_before_success: int # Number of times to fail before succeeding # noqa + + def parse(self, *args: Any, **kwargs: Any) -> str: + self.parse_count += 1 + if self.parse_count <= self.attemp_count_before_success: + raise OutputParserException("error") + return "parsed" + + +class SuccessfulParseAfterRetriesWithGetFormatInstructions(SuccessfulParseAfterRetries): # noqa + def get_format_instructions(self) -> str: + return "instructions" + + +@pytest.mark.parametrize( + "base_parser", + [ + SuccessfulParseAfterRetries(attemp_count_before_success=5), + SuccessfulParseAfterRetriesWithGetFormatInstructions( + attemp_count_before_success=5 + ), # noqa: E501 + ], +) +def test_output_fixing_parser_parse( + base_parser: SuccessfulParseAfterRetries, +) -> None: + # preparation + n: int = ( + base_parser.attemp_count_before_success + ) # Success on the (n+1)-th attempt # noqa + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = OutputFixingParser( + parser=base_parser, + max_retries=n, # n times to retry, that is, (n+1) times call + retry_chain=RunnablePassthrough(), + legacy=False, + ) + # test + assert parser.parse("completion") == "parsed" + assert base_parser.parse_count == n + 1 + # TODO: test whether "instructions" is passed to the retry_chain + + +@pytest.mark.parametrize( + "base_parser", + [ + SuccessfulParseAfterRetries(attemp_count_before_success=5), + SuccessfulParseAfterRetriesWithGetFormatInstructions( + attemp_count_before_success=5 + ), # noqa: E501 + ], +) +async def test_output_fixing_parser_aparse( + base_parser: SuccessfulParseAfterRetries, +) -> None: + n: int = ( + base_parser.attemp_count_before_success + ) # Success on the (n+1)-th attempt # noqa + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = OutputFixingParser( + parser=base_parser, + max_retries=n, # n times to retry, that is, (n+1) times call + retry_chain=RunnablePassthrough(), + legacy=False, + ) + assert (await parser.aparse("completion")) == "parsed" + assert base_parser.parse_count == n + 1 + # TODO: test whether "instructions" is passed to the retry_chain + + +def test_output_fixing_parser_parse_fail() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = OutputFixingParser( + parser=base_parser, + max_retries=n - 1, # n-1 times to retry, that is, n times call + retry_chain=RunnablePassthrough(), + legacy=False, + ) + with pytest.raises(OutputParserException): + parser.parse("completion") + assert base_parser.parse_count == n + + +async def test_output_fixing_parser_aparse_fail() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = OutputFixingParser( + parser=base_parser, + max_retries=n - 1, # n-1 times to retry, that is, n times call + retry_chain=RunnablePassthrough(), + legacy=False, + ) + with pytest.raises(OutputParserException): + await parser.aparse("completion") + assert base_parser.parse_count == n + + +@pytest.mark.parametrize( + "base_parser", + [ + BooleanOutputParser(), + DatetimeOutputParser(), + ], +) +def test_output_fixing_parser_output_type(base_parser: BaseOutputParser) -> None: # noqa: E501 + parser = OutputFixingParser(parser=base_parser, retry_chain=RunnablePassthrough()) # noqa: E501 + assert parser.OutputType is base_parser.OutputType diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_regex.py b/libs/langchain/tests/unit_tests/output_parsers/test_regex.py new file mode 100644 index 0000000000000..ef434b4ba7931 --- /dev/null +++ b/libs/langchain/tests/unit_tests/output_parsers/test_regex.py @@ -0,0 +1,38 @@ +from typing import Dict + +from langchain.output_parsers.regex import RegexParser + +# NOTE: The almost same constant variables in ./test_combining_parser.py +DEF_EXPECTED_RESULT = { + "confidence": "A", + "explanation": "Paris is the capital of France according to Wikipedia.", +} + +DEF_README = """```json +{ + "answer": "Paris", + "source": "https://en.wikipedia.org/wiki/France" +} +``` + +//Confidence: A, Explanation: Paris is the capital of France according to Wikipedia.""" + + +def test_regex_parser_parse() -> None: + """Test regex parser parse.""" + parser = RegexParser( + regex=r"Confidence: (A|B|C), Explanation: (.*)", + output_keys=["confidence", "explanation"], + default_output_key="noConfidence", + ) + assert DEF_EXPECTED_RESULT == parser.parse(DEF_README) + + +def test_regex_parser_output_type() -> None: + """Test regex parser output type is Dict[str, str].""" + parser = RegexParser( + regex=r"Confidence: (A|B|C), Explanation: (.*)", + output_keys=["confidence", "explanation"], + default_output_key="noConfidence", + ) + assert parser.OutputType is Dict[str, str] diff --git a/libs/langchain/tests/unit_tests/output_parsers/test_retry.py b/libs/langchain/tests/unit_tests/output_parsers/test_retry.py new file mode 100644 index 0000000000000..161ba32a980d4 --- /dev/null +++ b/libs/langchain/tests/unit_tests/output_parsers/test_retry.py @@ -0,0 +1,196 @@ +from typing import Any + +import pytest +from langchain_core.prompt_values import StringPromptValue +from langchain_core.runnables import RunnablePassthrough + +from langchain.output_parsers.boolean import BooleanOutputParser +from langchain.output_parsers.datetime import DatetimeOutputParser +from langchain.output_parsers.retry import ( + BaseOutputParser, + OutputParserException, + RetryOutputParser, + RetryWithErrorOutputParser, +) + + +class SuccessfulParseAfterRetries(BaseOutputParser[str]): + parse_count: int = 0 # Number of times parse has been called + attemp_count_before_success: int # Number of times to fail before succeeding # noqa + error_msg: str = "error" + + def parse(self, *args: Any, **kwargs: Any) -> str: + self.parse_count += 1 + if self.parse_count <= self.attemp_count_before_success: + raise OutputParserException(self.error_msg) + return "parsed" + + +def test_retry_output_parser_parse_with_prompt() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n, # n times to retry, that is, (n+1) times call + legacy=False, + ) + actual = parser.parse_with_prompt("completion", StringPromptValue(text="dummy")) # noqa: E501 + assert actual == "parsed" + assert base_parser.parse_count == n + 1 + + +def test_retry_output_parser_parse_with_prompt_fail() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n - 1, # n-1 times to retry, that is, n times call + legacy=False, + ) + with pytest.raises(OutputParserException): + parser.parse_with_prompt("completion", StringPromptValue(text="dummy")) + assert base_parser.parse_count == n + + +async def test_retry_output_parser_aparse_with_prompt() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n, # n times to retry, that is, (n+1) times call + legacy=False, + ) + actual = await parser.aparse_with_prompt( + "completion", StringPromptValue(text="dummy") + ) + assert actual == "parsed" + assert base_parser.parse_count == n + 1 + + +async def test_retry_output_parser_aparse_with_prompt_fail() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n - 1, # n-1 times to retry, that is, n times call + legacy=False, + ) + with pytest.raises(OutputParserException): + await parser.aparse_with_prompt("completion", StringPromptValue(text="dummy")) # noqa: E501 + assert base_parser.parse_count == n + + +@pytest.mark.parametrize( + "base_parser", + [ + BooleanOutputParser(), + DatetimeOutputParser(), + ], +) +def test_retry_output_parser_output_type(base_parser: BaseOutputParser) -> None: + parser = RetryOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + legacy=False, + ) + assert parser.OutputType is base_parser.OutputType + + +def test_retry_output_parser_parse_is_not_implemented() -> None: + parser = RetryOutputParser( + parser=BooleanOutputParser(), + retry_chain=RunnablePassthrough(), + legacy=False, + ) + with pytest.raises(NotImplementedError): + parser.parse("completion") + + +def test_retry_with_error_output_parser_parse_with_prompt() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryWithErrorOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n, # n times to retry, that is, (n+1) times call + legacy=False, + ) + actual = parser.parse_with_prompt("completion", StringPromptValue(text="dummy")) # noqa: E501 + assert actual == "parsed" + assert base_parser.parse_count == n + 1 + + +def test_retry_with_error_output_parser_parse_with_prompt_fail() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryWithErrorOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n - 1, # n-1 times to retry, that is, n times call + legacy=False, + ) + with pytest.raises(OutputParserException): + parser.parse_with_prompt("completion", StringPromptValue(text="dummy")) + assert base_parser.parse_count == n + + +async def test_retry_with_error_output_parser_aparse_with_prompt() -> None: + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryWithErrorOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n, # n times to retry, that is, (n+1) times call + legacy=False, + ) + actual = await parser.aparse_with_prompt( + "completion", StringPromptValue(text="dummy") + ) + assert actual == "parsed" + assert base_parser.parse_count == n + 1 + + +async def test_retry_with_error_output_parser_aparse_with_prompt_fail() -> None: # noqa: E501 + n: int = 5 # Success on the (n+1)-th attempt + base_parser = SuccessfulParseAfterRetries(attemp_count_before_success=n) + parser = RetryWithErrorOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + max_retries=n - 1, # n-1 times to retry, that is, n times call + legacy=False, + ) + with pytest.raises(OutputParserException): + await parser.aparse_with_prompt("completion", StringPromptValue(text="dummy")) # noqa: E501 + assert base_parser.parse_count == n + + +@pytest.mark.parametrize( + "base_parser", + [ + BooleanOutputParser(), + DatetimeOutputParser(), + ], +) +def test_retry_with_error_output_parser_output_type( + base_parser: BaseOutputParser, +) -> None: + parser = RetryWithErrorOutputParser( + parser=base_parser, + retry_chain=RunnablePassthrough(), + legacy=False, + ) + assert parser.OutputType is base_parser.OutputType + + +def test_retry_with_error_output_parser_parse_is_not_implemented() -> None: + parser = RetryWithErrorOutputParser( + parser=BooleanOutputParser(), + retry_chain=RunnablePassthrough(), + legacy=False, + ) + with pytest.raises(NotImplementedError): + parser.parse("completion") diff --git a/libs/langchain/tests/unit_tests/retrievers/test_ensemble.py b/libs/langchain/tests/unit_tests/retrievers/test_ensemble.py new file mode 100644 index 0000000000000..4c5e9837c0b41 --- /dev/null +++ b/libs/langchain/tests/unit_tests/retrievers/test_ensemble.py @@ -0,0 +1,88 @@ +from typing import List, Optional + +from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + +from langchain.retrievers.ensemble import EnsembleRetriever + + +class MockRetriever(BaseRetriever): + docs: List[Document] + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: Optional[CallbackManagerForRetrieverRun] = None, + ) -> List[Document]: + """Return the documents""" + return self.docs + + +def test_invoke() -> None: + documents1 = [ + Document(page_content="a", metadata={"id": 1}), + Document(page_content="b", metadata={"id": 2}), + Document(page_content="c", metadata={"id": 3}), + ] + documents2 = [Document(page_content="b")] + + retriever1 = MockRetriever(docs=documents1) + retriever2 = MockRetriever(docs=documents2) + + ensemble_retriever = EnsembleRetriever( + retrievers=[retriever1, retriever2], weights=[0.5, 0.5], id_key=None + ) + ranked_documents = ensemble_retriever.invoke("_") + + # The document with page_content "b" in documents2 + # will be merged with the document with page_content "b" + # in documents1, so the length of ranked_documents should be 3. + # Additionally, the document with page_content "b" will be ranked 1st. + assert len(ranked_documents) == 3 + assert ranked_documents[0].page_content == "b" + + documents1 = [ + Document(page_content="a", metadata={"id": 1}), + Document(page_content="b", metadata={"id": 2}), + Document(page_content="c", metadata={"id": 3}), + ] + documents2 = [Document(page_content="d")] + + retriever1 = MockRetriever(docs=documents1) + retriever2 = MockRetriever(docs=documents2) + + ensemble_retriever = EnsembleRetriever( + retrievers=[retriever1, retriever2], weights=[0.5, 0.5], id_key=None + ) + ranked_documents = ensemble_retriever.invoke("_") + + # The document with page_content "d" in documents2 will not be merged + # with any document in documents1, so the length of ranked_documents + # should be 4. The document with page_content "a" and the document + # with page_content "d" will have the same score, but the document + # with page_content "a" will be ranked 1st because retriever1 has a smaller index. + assert len(ranked_documents) == 4 + assert ranked_documents[0].page_content == "a" + + documents1 = [ + Document(page_content="a", metadata={"id": 1}), + Document(page_content="b", metadata={"id": 2}), + Document(page_content="c", metadata={"id": 3}), + ] + documents2 = [Document(page_content="d", metadata={"id": 2})] + + retriever1 = MockRetriever(docs=documents1) + retriever2 = MockRetriever(docs=documents2) + + ensemble_retriever = EnsembleRetriever( + retrievers=[retriever1, retriever2], weights=[0.5, 0.5], id_key="id" + ) + ranked_documents = ensemble_retriever.invoke("_") + + # Since id_key is specified, the document with id 2 will be merged. + # Therefore, the length of ranked_documents should be 3. + # Additionally, the document with page_content "b" will be ranked 1st. + assert len(ranked_documents) == 3 + assert ranked_documents[0].page_content == "b" diff --git a/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py b/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py new file mode 100644 index 0000000000000..8f80e77e79b09 --- /dev/null +++ b/libs/langchain/tests/unit_tests/retrievers/test_multi_query.py @@ -0,0 +1,40 @@ +from typing import List + +import pytest as pytest +from langchain_core.documents import Document + +from langchain.retrievers.multi_query import _unique_documents + + +@pytest.mark.parametrize( + "documents,expected", + [ + ([], []), + ([Document(page_content="foo")], [Document(page_content="foo")]), + ([Document(page_content="foo")] * 2, [Document(page_content="foo")]), + ( + [Document(page_content="foo", metadata={"bar": "baz"})] * 2, + [Document(page_content="foo", metadata={"bar": "baz"})], + ), + ( + [Document(page_content="foo", metadata={"bar": [1, 2]})] * 2, + [Document(page_content="foo", metadata={"bar": [1, 2]})], + ), + ( + [Document(page_content="foo", metadata={"bar": {1, 2}})] * 2, + [Document(page_content="foo", metadata={"bar": {1, 2}})], + ), + ( + [ + Document(page_content="foo", metadata={"bar": [1, 2]}), + Document(page_content="foo", metadata={"bar": [2, 1]}), + ], + [ + Document(page_content="foo", metadata={"bar": [1, 2]}), + Document(page_content="foo", metadata={"bar": [2, 1]}), + ], + ), + ], +) +def test__unique_documents(documents: List[Document], expected: List[Document]) -> None: + assert _unique_documents(documents) == expected diff --git a/libs/partners/ai21/pyproject.toml b/libs/partners/ai21/pyproject.toml index 70e75d3287b05..518972b8e5cc9 100644 --- a/libs/partners/ai21/pyproject.toml +++ b/libs/partners/ai21/pyproject.toml @@ -1,16 +1,20 @@ - [tool.poetry] -name = "gigachain-ai21" -version = "0.1.5" -description = "An integration package connecting AI21 and LangChain" +name = "langchain-ai21" +version = "0.1.6" +description = "An integration package connecting AI21 and Gigachain" authors = [] readme = "README.md" +repository = "https://github.com/langchain-ai/langchain" +license = "MIT" + +[tool.poetry.urls] +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/ai21" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.48,<0.3" -gigachain-text-splitters = ">=0.0.1,<0.2" -ai21 = "^2.2.5" +gigachain-core = "^0.2.4" +gigachain-text-splitters = "^0.2.0" +ai21 = "^2.4.1" [tool.poetry.group.test] optional = true @@ -90,4 +94,3 @@ markers = [ "scheduled: mark tests to run in scheduled testing", ] asyncio_mode = "auto" - diff --git a/libs/partners/airbyte/pyproject.toml b/libs/partners/airbyte/pyproject.toml index 0b79629d3cf9a..2c2396435bdb8 100644 --- a/libs/partners/airbyte/pyproject.toml +++ b/libs/partners/airbyte/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "langchain-airbyte" version = "0.1.1" -description = "An integration package connecting Airbyte and LangChain" +description = "An integration package connecting Airbyte and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -44,8 +44,8 @@ ruff = "^0.1.8" [tool.poetry.group.typing.dependencies] mypy = "^1.7.1" gigachain-core = { path = "../../core", develop = true } -langchain-text-splitters = { path = "../../text-splitters", develop = true } -langchain = { path = "../../langchain", develop = true } +gigachain-text-splitters = { path = "../../text-splitters", develop = true } +gigachain = { path = "../../langchain", develop = true } [tool.poetry.group.dev] optional = true diff --git a/libs/partners/anthropic/pyproject.toml b/libs/partners/anthropic/pyproject.toml index abd56e6cda493..de277b71317b1 100644 --- a/libs/partners/anthropic/pyproject.toml +++ b/libs/partners/anthropic/pyproject.toml @@ -1,23 +1,19 @@ - [tool.poetry] name = "gigachain-anthropic" -version = "0.1.13" -description = "An integration package connecting AnthropicMessages and LangChain" +version = "0.1.16" +description = "An integration package connecting AnthropicMessages and Gigachain" authors = [] readme = "README.md" -repository = "https://github.com/gigachain-ai/gigachain" +repository = "https://github.com/langchain-ai/langchain" license = "MIT" -packages = [ - {include = "langchain_anthropic"} -] [tool.poetry.urls] -"Source Code" = "https://github.com/gigachain-ai/gigachain/tree/master/libs/partners/anthropic" +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/anthropic" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.43,<0.3" -anthropic = ">=0.26.0,<1" +gigachain-core = { version = ">=0.2.10,<0.3" } +anthropic = ">=0.28.0,<1" defusedxml = { version = "^0.7.1", optional = true } [tool.poetry.group.test] @@ -45,10 +41,9 @@ optional = true [tool.poetry.group.lint.dependencies] ruff = ">=0.2.2,<1" -mypy = "^0.991" [tool.poetry.group.typing.dependencies] -mypy = "^0.991" +mypy = "^1" gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] @@ -100,4 +95,3 @@ markers = [ "compile: mark placeholder test used to compile integration tests without running them", ] asyncio_mode = "auto" - diff --git a/libs/partners/azure-dynamic-sessions/pyproject.toml b/libs/partners/azure-dynamic-sessions/pyproject.toml index a1ead73f684ba..87d0186c86886 100644 --- a/libs/partners/azure-dynamic-sessions/pyproject.toml +++ b/libs/partners/azure-dynamic-sessions/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-azure-dynamic-sessions" +name = "gigachain-azure-dynamic-sessions" version = "0.1.0" -description = "An integration package connecting Azure Container Apps dynamic sessions and LangChain" +description = "An integration package connecting Azure Container Apps dynamic sessions and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,7 +12,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.52,<0.3" +gigachain-core = ">=0.1.52,<0.3" azure-identity = "^1.16.0" requests = "^2.31.0" @@ -26,7 +26,7 @@ pytest-mock = "^3.10.0" syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } python-dotenv = "^1.0.1" [tool.poetry.group.test_integration] @@ -52,25 +52,33 @@ pytest = "^7.3.0" [tool.poetry.group.typing.dependencies] mypy = "^0.991" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } types-requests = "^2.31.0.20240406" [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } ipykernel = "^6.29.4" -langchain-openai = { path = "../openai", develop = true } +gigachain-openai = { path = "../openai", develop = true } langchainhub = "^0.1.15" -[tool.ruff] +[tool.ruff.lint] select = [ "E", # pycodestyle "F", # pyflakes "I", # isort + "D", # pydocstyle + ] +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["D"] # ignore docstring checks for tests + [tool.mypy] disallow_untyped_defs = "True" diff --git a/libs/partners/chroma/pyproject.toml b/libs/partners/chroma/pyproject.toml index d1c762e4025c9..d3cfaeda6b1e0 100644 --- a/libs/partners/chroma/pyproject.toml +++ b/libs/partners/chroma/pyproject.toml @@ -1,14 +1,11 @@ [tool.poetry] name = "gigachain-chroma" version = "0.1.1" -description = "An integration package connecting Chroma and GigaChain" +description = "An integration package connecting Chroma and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" license = "MIT" -packages = [ - {include = "langchain_chroma"} -] [tool.poetry.urls] "Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/chroma" @@ -17,7 +14,11 @@ packages = [ python = ">=3.8.1,<3.13" gigachain-core = ">=0.1.40,<0.3" chromadb = { version = ">=0.4.0,<0.6.0" } -numpy = "^1" +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] fastapi = { version = ">=0.95.2,<1", optional = true } [tool.poetry.group.test] @@ -72,9 +73,16 @@ select = [ "F", # pyflakes "I", # isort "T201", # print + "D", # pydocstyle ] +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["D"] # ignore docstring checks for tests + [tool.mypy] disallow_untyped_defs = "True" diff --git a/libs/partners/couchbase/.gitignore b/libs/partners/couchbase/.gitignore new file mode 100644 index 0000000000000..53fd65cf3d994 --- /dev/null +++ b/libs/partners/couchbase/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +# mypy +.mypy_cache/ diff --git a/libs/partners/couchbase/LICENSE b/libs/partners/couchbase/LICENSE new file mode 100644 index 0000000000000..fc0602feecdd6 --- /dev/null +++ b/libs/partners/couchbase/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/partners/couchbase/Makefile b/libs/partners/couchbase/Makefile new file mode 100644 index 0000000000000..4b85a26db82ca --- /dev/null +++ b/libs/partners/couchbase/Makefile @@ -0,0 +1,62 @@ +.PHONY: all format lint test tests integration_tests docker_tests help extended_tests + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ +integration_test integration_tests: TEST_FILE = tests/integration_tests/ + + +# unit tests are run with the --disable-socket flag to prevent network calls +test tests: + poetry run pytest --disable-socket --allow-unix-socket $(TEST_FILE) + +# integration tests are run without the --disable-socket flag to allow network calls +integration_test integration_tests: + poetry run pytest $(TEST_FILE) + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/couchbase --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=langchain_couchbase +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find langchain_couchbase -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/partners/couchbase/README.md b/libs/partners/couchbase/README.md new file mode 100644 index 0000000000000..006de243ab237 --- /dev/null +++ b/libs/partners/couchbase/README.md @@ -0,0 +1,42 @@ +# langchain-couchbase + +This package contains the LangChain integration with Couchbase + +## Installation + +```bash +pip install -U langchain-couchbase +``` + +## Usage + +The `CouchbaseVectorStore` class exposes the connection to the Couchbase vector store. + +```python +from langchain_couchbase.vectorstores import CouchbaseVectorStore + +from couchbase.cluster import Cluster +from couchbase.auth import PasswordAuthenticator +from couchbase.options import ClusterOptions +from datetime import timedelta + +auth = PasswordAuthenticator(username, password) +options = ClusterOptions(auth) +connect_string = "couchbases://localhost" +cluster = Cluster(connect_string, options) + +# Wait until the cluster is ready for use. +cluster.wait_until_ready(timedelta(seconds=5)) + +embeddings = OpenAIEmbeddings() + +vectorstore = CouchbaseVectorStore( + cluster=cluster, + bucket_name="", + scope_name="", + collection_name="", + embedding=embeddings, + index_name="vector-search-index", +) + +``` diff --git a/libs/partners/couchbase/langchain_couchbase/__init__.py b/libs/partners/couchbase/langchain_couchbase/__init__.py new file mode 100644 index 0000000000000..2f84db440eb97 --- /dev/null +++ b/libs/partners/couchbase/langchain_couchbase/__init__.py @@ -0,0 +1,5 @@ +from langchain_couchbase.vectorstores import CouchbaseVectorStore + +__all__ = [ + "CouchbaseVectorStore", +] diff --git a/libs/partners/couchbase/langchain_couchbase/py.typed b/libs/partners/couchbase/langchain_couchbase/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/couchbase/langchain_couchbase/vectorstores.py b/libs/partners/couchbase/langchain_couchbase/vectorstores.py new file mode 100644 index 0000000000000..6553abb5e71c8 --- /dev/null +++ b/libs/partners/couchbase/langchain_couchbase/vectorstores.py @@ -0,0 +1,615 @@ +"""Couchbase vector stores.""" + +from __future__ import annotations + +import uuid +from typing import ( + Any, + Dict, + Iterable, + List, + Optional, + Tuple, + Type, +) + +import couchbase.search as search +from couchbase.cluster import Cluster +from couchbase.exceptions import DocumentExistsException, DocumentNotFoundException +from couchbase.options import SearchOptions +from couchbase.vector_search import VectorQuery, VectorSearch +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + + +class CouchbaseVectorStore(VectorStore): + """Couchbase vector store. + + To use it, you need + - a Couchbase database with a pre-defined Search index with support for + vector fields + + Example: + .. code-block:: python + + from langchain_couchbase import CouchbaseVectorStore + from langchain_openai import OpenAIEmbeddings + + from couchbase.cluster import Cluster + from couchbase.auth import PasswordAuthenticator + from couchbase.options import ClusterOptions + from datetime import timedelta + + auth = PasswordAuthenticator(username, password) + options = ClusterOptions(auth) + connect_string = "couchbases://localhost" + cluster = Cluster(connect_string, options) + + # Wait until the cluster is ready for use. + cluster.wait_until_ready(timedelta(seconds=5)) + + embeddings = OpenAIEmbeddings() + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + bucket_name="", + scope_name="", + collection_name="", + embedding=embeddings, + index_name="vector-index", + ) + + vectorstore.add_texts(["hello", "world"]) + results = vectorstore.similarity_search("ola", k=1) + """ + + # Default batch size + DEFAULT_BATCH_SIZE = 100 + _metadata_key = "metadata" + _default_text_key = "text" + _default_embedding_key = "embedding" + + def _check_bucket_exists(self) -> bool: + """Check if the bucket exists in the linked Couchbase cluster""" + bucket_manager = self._cluster.buckets() + try: + bucket_manager.get_bucket(self._bucket_name) + return True + except Exception: + return False + + def _check_scope_and_collection_exists(self) -> bool: + """Check if the scope and collection exists in the linked Couchbase bucket + Raises a ValueError if either is not found""" + scope_collection_map: Dict[str, Any] = {} + + # Get a list of all scopes in the bucket + for scope in self._bucket.collections().get_all_scopes(): + scope_collection_map[scope.name] = [] + + # Get a list of all the collections in the scope + for collection in scope.collections: + scope_collection_map[scope.name].append(collection.name) + + # Check if the scope exists + if self._scope_name not in scope_collection_map.keys(): + raise ValueError( + f"Scope {self._scope_name} not found in Couchbase " + f"bucket {self._bucket_name}" + ) + + # Check if the collection exists in the scope + if self._collection_name not in scope_collection_map[self._scope_name]: + raise ValueError( + f"Collection {self._collection_name} not found in scope " + f"{self._scope_name} in Couchbase bucket {self._bucket_name}" + ) + + return True + + def _check_index_exists(self) -> bool: + """Check if the Search index exists in the linked Couchbase cluster + Raises a ValueError if the index does not exist""" + if self._scoped_index: + all_indexes = [ + index.name for index in self._scope.search_indexes().get_all_indexes() + ] + if self._index_name not in all_indexes: + raise ValueError( + f"Index {self._index_name} does not exist. " + " Please create the index before searching." + ) + else: + all_indexes = [ + index.name for index in self._cluster.search_indexes().get_all_indexes() + ] + if self._index_name not in all_indexes: + raise ValueError( + f"Index {self._index_name} does not exist. " + " Please create the index before searching." + ) + + return True + + def __init__( + self, + cluster: Cluster, + bucket_name: str, + scope_name: str, + collection_name: str, + embedding: Embeddings, + index_name: str, + *, + text_key: Optional[str] = _default_text_key, + embedding_key: Optional[str] = _default_embedding_key, + scoped_index: bool = True, + ) -> None: + """ + Initialize the Couchbase Vector Store. + + Args: + + cluster (Cluster): couchbase cluster object with active connection. + bucket_name (str): name of bucket to store documents in. + scope_name (str): name of scope in the bucket to store documents in. + collection_name (str): name of collection in the scope to store documents in + embedding (Embeddings): embedding function to use. + index_name (str): name of the Search index to use. + text_key (optional[str]): key in document to use as text. + Set to text by default. + embedding_key (optional[str]): key in document to use for the embeddings. + Set to embedding by default. + scoped_index (optional[bool]): specify whether the index is a scoped index. + Set to True by default. + """ + if not isinstance(cluster, Cluster): + raise ValueError( + f"cluster should be an instance of couchbase.Cluster, " + f"got {type(cluster)}" + ) + + self._cluster = cluster + + if not embedding: + raise ValueError("Embeddings instance must be provided.") + + if not bucket_name: + raise ValueError("bucket_name must be provided.") + + if not scope_name: + raise ValueError("scope_name must be provided.") + + if not collection_name: + raise ValueError("collection_name must be provided.") + + if not index_name: + raise ValueError("index_name must be provided.") + + self._bucket_name = bucket_name + self._scope_name = scope_name + self._collection_name = collection_name + self._embedding_function = embedding + self._text_key = text_key + self._embedding_key = embedding_key + self._index_name = index_name + self._scoped_index = scoped_index + + # Check if the bucket exists + if not self._check_bucket_exists(): + raise ValueError( + f"Bucket {self._bucket_name} does not exist. " + " Please create the bucket before searching." + ) + + try: + self._bucket = self._cluster.bucket(self._bucket_name) + self._scope = self._bucket.scope(self._scope_name) + self._collection = self._scope.collection(self._collection_name) + except Exception as e: + raise ValueError( + "Error connecting to couchbase. " + "Please check the connection and credentials." + ) from e + + # Check if the scope and collection exists. Throws ValueError if they don't + try: + self._check_scope_and_collection_exists() + except Exception as e: + raise e + + # Check if the index exists. Throws ValueError if it doesn't + try: + self._check_index_exists() + except Exception as e: + raise e + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + batch_size: Optional[int] = None, + **kwargs: Any, + ) -> List[str]: + """Run texts through the embeddings and persist in vectorstore. + + If the document IDs are passed, the existing documents (if any) will be + overwritten with the new ones. + + Args: + texts (Iterable[str]): Iterable of strings to add to the vectorstore. + metadatas (Optional[List[Dict]]): Optional list of metadatas associated + with the texts. + ids (Optional[List[str]]): Optional list of ids associated with the texts. + IDs have to be unique strings across the collection. + If it is not specified uuids are generated and used as ids. + batch_size (Optional[int]): Optional batch size for bulk insertions. + Default is 100. + + Returns: + List[str]:List of ids from adding the texts into the vectorstore. + """ + + if not batch_size: + batch_size = self.DEFAULT_BATCH_SIZE + doc_ids: List[str] = [] + + if ids is None: + ids = [uuid.uuid4().hex for _ in texts] + + if metadatas is None: + metadatas = [{} for _ in texts] + + embedded_texts = self._embedding_function.embed_documents(list(texts)) + + documents_to_insert = [ + { + id: { + self._text_key: text, + self._embedding_key: vector, + self._metadata_key: metadata, + } + for id, text, vector, metadata in zip( + ids, texts, embedded_texts, metadatas + ) + } + ] + + # Insert in batches + for i in range(0, len(documents_to_insert), batch_size): + batch = documents_to_insert[i : i + batch_size] + try: + result = self._collection.upsert_multi(batch[0]) + if result.all_ok: + doc_ids.extend(batch[0].keys()) + except DocumentExistsException as e: + raise ValueError(f"Document already exists: {e}") + + return doc_ids + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + """Delete documents from the vector store by ids. + + Args: + ids (List[str]): List of IDs of the documents to delete. + batch_size (Optional[int]): Optional batch size for bulk deletions. + + Returns: + bool: True if all the documents were deleted successfully, False otherwise. + + """ + + if ids is None: + raise ValueError("No document ids provided to delete.") + + batch_size = kwargs.get("batch_size", self.DEFAULT_BATCH_SIZE) + deletion_status = True + + # Delete in batches + for i in range(0, len(ids), batch_size): + batch = ids[i : i + batch_size] + try: + result = self._collection.remove_multi(batch) + except DocumentNotFoundException as e: + deletion_status = False + raise ValueError(f"Document not found: {e}") + + deletion_status &= result.all_ok + + return deletion_status + + @property + def embeddings(self) -> Embeddings: + """Return the query embedding object.""" + return self._embedding_function + + def _format_metadata(self, row_fields: Dict[str, Any]) -> Dict[str, Any]: + """Helper method to format the metadata from the Couchbase Search API. + Args: + row_fields (Dict[str, Any]): The fields to format. + + Returns: + Dict[str, Any]: The formatted metadata. + """ + metadata = {} + for key, value in row_fields.items(): + # Couchbase Search returns the metadata key with a prefix + # `metadata.` We remove it to get the original metadata key + if key.startswith(self._metadata_key): + new_key = key.split(self._metadata_key + ".")[-1] + metadata[new_key] = value + else: + metadata[key] = value + + return metadata + + def similarity_search( + self, + query: str, + k: int = 4, + search_options: Optional[Dict[str, Any]] = {}, + **kwargs: Any, + ) -> List[Document]: + """Return documents most similar to embedding vector with their scores. + + Args: + query (str): Query to look up for similar documents + k (int): Number of Documents to return. + Defaults to 4. + search_options (Optional[Dict[str, Any]]): Optional search options that are + passed to Couchbase search. + Defaults to empty dictionary + fields (Optional[List[str]]): Optional list of fields to include in the + metadata of results. Note that these need to be stored in the index. + If nothing is specified, defaults to all the fields stored in the index. + + Returns: + List of Documents most similar to the query. + """ + query_embedding = self.embeddings.embed_query(query) + docs_with_scores = self.similarity_search_with_score_by_vector( + query_embedding, k, search_options, **kwargs + ) + return [doc for doc, _ in docs_with_scores] + + def similarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + search_options: Optional[Dict[str, Any]] = {}, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return docs most similar to embedding vector with their scores. + + Args: + embedding (List[float]): Embedding vector to look up documents similar to. + k (int): Number of Documents to return. + Defaults to 4. + search_options (Optional[Dict[str, Any]]): Optional search options that are + passed to Couchbase search. + Defaults to empty dictionary. + fields (Optional[List[str]]): Optional list of fields to include in the + metadata of results. Note that these need to be stored in the index. + If nothing is specified, defaults to all the fields stored in the index. + + Returns: + List of (Document, score) that are the most similar to the query vector. + """ + + fields = kwargs.get("fields", ["*"]) + + # Document text field needs to be returned from the search + if fields != ["*"] and self._text_key not in fields: + fields.append(self._text_key) + + search_req = search.SearchRequest.create( + VectorSearch.from_vector_query( + VectorQuery( + self._embedding_key, + embedding, + k, + ) + ) + ) + try: + if self._scoped_index: + search_iter = self._scope.search( + self._index_name, + search_req, + SearchOptions( + limit=k, + fields=fields, + raw=search_options, + ), + ) + + else: + search_iter = self._cluster.search( + self._index_name, + search_req, + SearchOptions(limit=k, fields=fields, raw=search_options), + ) + + docs_with_score = [] + + # Parse the results + for row in search_iter.rows(): + text = row.fields.pop(self._text_key, "") + + # Format the metadata from Couchbase + metadata = self._format_metadata(row.fields) + + score = row.score + doc = Document(page_content=text, metadata=metadata) + docs_with_score.append((doc, score)) + + except Exception as e: + raise ValueError(f"Search failed with error: {e}") + + return docs_with_score + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + search_options: Optional[Dict[str, Any]] = {}, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Return documents that are most similar to the query with their scores. + + Args: + query (str): Query to look up for similar documents + k (int): Number of Documents to return. + Defaults to 4. + search_options (Optional[Dict[str, Any]]): Optional search options that are + passed to Couchbase search. + Defaults to empty dictionary. + fields (Optional[List[str]]): Optional list of fields to include in the + metadata of results. Note that these need to be stored in the index. + If nothing is specified, defaults to text and metadata fields. + + Returns: + List of (Document, score) that are most similar to the query. + """ + query_embedding = self.embeddings.embed_query(query) + docs_with_score = self.similarity_search_with_score_by_vector( + query_embedding, k, search_options, **kwargs + ) + return docs_with_score + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + search_options: Optional[Dict[str, Any]] = {}, + **kwargs: Any, + ) -> List[Document]: + """Return documents that are most similar to the vector embedding. + + Args: + embedding (List[float]): Embedding to look up documents similar to. + k (int): Number of Documents to return. + Defaults to 4. + search_options (Optional[Dict[str, Any]]): Optional search options that are + passed to Couchbase search. + Defaults to empty dictionary. + fields (Optional[List[str]]): Optional list of fields to include in the + metadata of results. Note that these need to be stored in the index. + If nothing is specified, defaults to document text and metadata fields. + + Returns: + List of Documents most similar to the query. + """ + docs_with_score = self.similarity_search_with_score_by_vector( + embedding, k, search_options, **kwargs + ) + return [doc for doc, _ in docs_with_score] + + @classmethod + def _from_kwargs( + cls: Type[CouchbaseVectorStore], + embedding: Embeddings, + **kwargs: Any, + ) -> CouchbaseVectorStore: + """Initialize the Couchbase vector store from keyword arguments for the + vector store. + + Args: + embedding: Embedding object to use to embed text. + **kwargs: Keyword arguments to initialize the vector store with. + Accepted arguments are: + - cluster + - bucket_name + - scope_name + - collection_name + - index_name + - text_key + - embedding_key + - scoped_index + + """ + cluster = kwargs.get("cluster", None) + bucket_name = kwargs.get("bucket_name", None) + scope_name = kwargs.get("scope_name", None) + collection_name = kwargs.get("collection_name", None) + index_name = kwargs.get("index_name", None) + text_key = kwargs.get("text_key", cls._default_text_key) + embedding_key = kwargs.get("embedding_key", cls._default_embedding_key) + scoped_index = kwargs.get("scoped_index", True) + + return cls( + embedding=embedding, + cluster=cluster, + bucket_name=bucket_name, + scope_name=scope_name, + collection_name=collection_name, + index_name=index_name, + text_key=text_key, + embedding_key=embedding_key, + scoped_index=scoped_index, + ) + + @classmethod + def from_texts( + cls: Type[CouchbaseVectorStore], + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + **kwargs: Any, + ) -> CouchbaseVectorStore: + """Construct a Couchbase vector store from a list of texts. + + Example: + .. code-block:: python + + from langchain_couchbase import CouchbaseVectorStore + from langchain_openai import OpenAIEmbeddings + + from couchbase.cluster import Cluster + from couchbase.auth import PasswordAuthenticator + from couchbase.options import ClusterOptions + from datetime import timedelta + + auth = PasswordAuthenticator(username, password) + options = ClusterOptions(auth) + connect_string = "couchbases://localhost" + cluster = Cluster(connect_string, options) + + # Wait until the cluster is ready for use. + cluster.wait_until_ready(timedelta(seconds=5)) + + embeddings = OpenAIEmbeddings() + + texts = ["hello", "world"] + + vectorstore = CouchbaseVectorStore.from_texts( + texts, + embedding=embeddings, + cluster=cluster, + bucket_name="", + scope_name="", + collection_name="", + index_name="vector-index", + ) + + Args: + texts (List[str]): list of texts to add to the vector store. + embedding (Embeddings): embedding function to use. + metadatas (optional[List[Dict]): list of metadatas to add to documents. + **kwargs: Keyword arguments used to initialize the vector store with and/or + passed to `add_texts` method. Check the constructor and/or `add_texts` + for the list of accepted arguments. + + Returns: + A Couchbase vector store. + + """ + vector_store = cls._from_kwargs(embedding, **kwargs) + batch_size = kwargs.get("batch_size", vector_store.DEFAULT_BATCH_SIZE) + ids = kwargs.get("ids", None) + vector_store.add_texts( + texts, metadatas=metadatas, ids=ids, batch_size=batch_size + ) + + return vector_store diff --git a/libs/partners/couchbase/poetry.lock b/libs/partners/couchbase/poetry.lock new file mode 100644 index 0000000000000..5182aaf4c870c --- /dev/null +++ b/libs/partners/couchbase/poetry.lock @@ -0,0 +1,771 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "codespell" +version = "2.2.6" +description = "Codespell" +optional = false +python-versions = ">=3.8" +files = [ + {file = "codespell-2.2.6-py3-none-any.whl", hash = "sha256:9ee9a3e5df0990604013ac2a9f22fa8e57669c827124a2e961fe8a1da4cacc07"}, + {file = "codespell-2.2.6.tar.gz", hash = "sha256:a8c65d8eb3faa03deabab6b3bbe798bea72e1799c7e9e955d57eca4096abcff9"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pre-commit", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli", "twine"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "couchbase" +version = "4.2.1" +description = "Python Client for Couchbase" +optional = false +python-versions = ">=3.7" +files = [ + {file = "couchbase-4.2.1-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:7ad4c4462879f456a9067ac1788e62d852509439bac3538b9bc459a754666481"}, + {file = "couchbase-4.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:06d91891c599ba0f5052e594ac025a2ca6ab7885e528b854ac9c125df7c74146"}, + {file = "couchbase-4.2.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0191d4a631ead533551cb9a214704ad5f3dfff2029e21a23b57725a0b5666b25"}, + {file = "couchbase-4.2.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b206790d6834a18c5e457f9a70f44774f476f3acccf9f22e8c1b5283a5bd03fa"}, + {file = "couchbase-4.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c5ca571b9ce017ecbd447de12cd46e213f93e0664bec6fca0a06e1768db1a4f8"}, + {file = "couchbase-4.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:675c615cfd4b04e73e94cf03c786da5105d94527f5c3a087813dba477a1379e9"}, + {file = "couchbase-4.2.1-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:4cd09eedf162dc28386d9c6490e832c25068406c0f5d70a0417c0b1445394651"}, + {file = "couchbase-4.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfebb11551c6d947ce6297ab02b5006b1ac8739dda3e10d41896db0dc8672915"}, + {file = "couchbase-4.2.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:39e742ccfe90a0e59e6e1b0e12f0fe224a736c0207b218ef48048052f926e1c6"}, + {file = "couchbase-4.2.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f9ba24efddf47f30603275f5433434d8759a55233c78b3e4bc613c502ac429e9"}, + {file = "couchbase-4.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:adfca3929f07fb4385dc52f08d3a60634012f364b176f95ab023cdd1bb7fe9c0"}, + {file = "couchbase-4.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:e1c68b28c6f0475961afb9fe626ad2bac8a5643b53f719675386f060db4b6e19"}, + {file = "couchbase-4.2.1-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:137512462426cd495954c1815d78115d109308a4d9f8843b638285104388a359"}, + {file = "couchbase-4.2.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5987e5edcce7696e5f75b35be91f44fa69fb5eb95dba0957ad66f789affcdb36"}, + {file = "couchbase-4.2.1-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:080cb0fc333bd4a641ede4ee14ff0c7dbe95067fbb280826ea546681e0b9f9e3"}, + {file = "couchbase-4.2.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e317c2628a4a917083e8e7ce8e2662432b6a12ebac65fc00de6da2b37ab5975c"}, + {file = "couchbase-4.2.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:de7f8699ae344e2e96706ee0eac67e96bfdd3412fb18dcfb81d8ba5837dd3dfb"}, + {file = "couchbase-4.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:82b9deb8b1fe8e8d7dde9c232ac5f4c11ff0f067930837af0e7769706e6a9453"}, + {file = "couchbase-4.2.1-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:44502d069ea17a8d692b7c88d84bc0df2cf4e944cde337c8eb3175bc0b835bb9"}, + {file = "couchbase-4.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c0f131b816a7d91b755232872ba10f6d6ca5a715e595ee9534478bc97a518ae8"}, + {file = "couchbase-4.2.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e9b9deb312bbe5f9a8e63828f9de877714c4b09b7d88f7dc87b60e5ffb2a13e6"}, + {file = "couchbase-4.2.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:71e8da251850d795975c3569c01d35ba1a556825dc7d9549ff9918d148255804"}, + {file = "couchbase-4.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d04492144ce520c612a2f8f265278c9f0cdf62fdd6f703e7a3210a7476b228f6"}, + {file = "couchbase-4.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f91b7699ea7b8253cf34c9fb6e191de9b2edfd7aa4d6f97b29c10b9a1670444"}, + {file = "couchbase-4.2.1.tar.gz", hash = "sha256:dc1c60d3f2fc179db8225aac4cc30d601d73cf2535aaf023d607e86be2d7dd78"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain-core" +version = "0.2.1" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [] +develop = true + +[package.dependencies] +jsonpatch = "^1.33" +langsmith = "^0.1.0" +packaging = "^23.2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +tenacity = "^8.1.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[package.source] +type = "directory" +url = "../../core" + +[[package]] +name = "langsmith" +version = "0.1.62" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langsmith-0.1.62-py3-none-any.whl", hash = "sha256:3a9f112643f64d736b8c875390c750fe6485804ea53aeae4edebce0afa4383a5"}, + {file = "langsmith-0.1.62.tar.gz", hash = "sha256:7ef894c14e6d4175fce88ec3bcd5a9c8cf9a456ea77e26e361f519ad082f34a8"}, +] + +[package.dependencies] +orjson = ">=3.9.14,<4.0.0" +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "mypy" +version = "1.10.0" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "mypy-1.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2"}, + {file = "mypy-1.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99"}, + {file = "mypy-1.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2"}, + {file = "mypy-1.10.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9"}, + {file = "mypy-1.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051"}, + {file = "mypy-1.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1"}, + {file = "mypy-1.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee"}, + {file = "mypy-1.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de"}, + {file = "mypy-1.10.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7"}, + {file = "mypy-1.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53"}, + {file = "mypy-1.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b"}, + {file = "mypy-1.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30"}, + {file = "mypy-1.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e"}, + {file = "mypy-1.10.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5"}, + {file = "mypy-1.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda"}, + {file = "mypy-1.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0"}, + {file = "mypy-1.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727"}, + {file = "mypy-1.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4"}, + {file = "mypy-1.10.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061"}, + {file = "mypy-1.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f"}, + {file = "mypy-1.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976"}, + {file = "mypy-1.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec"}, + {file = "mypy-1.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821"}, + {file = "mypy-1.10.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746"}, + {file = "mypy-1.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a"}, + {file = "mypy-1.10.0-py3-none-any.whl", hash = "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee"}, + {file = "mypy-1.10.0.tar.gz", hash = "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131"}, +] + +[package.dependencies] +mypy-extensions = ">=1.0.0" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=4.1.0" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +mypyc = ["setuptools (>=50)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "orjson" +version = "3.10.3" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.10.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9fb6c3f9f5490a3eb4ddd46fc1b6eadb0d6fc16fb3f07320149c3286a1409dd8"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:252124b198662eee80428f1af8c63f7ff077c88723fe206a25df8dc57a57b1fa"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f3e87733823089a338ef9bbf363ef4de45e5c599a9bf50a7a9b82e86d0228da"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8334c0d87103bb9fbbe59b78129f1f40d1d1e8355bbed2ca71853af15fa4ed3"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1952c03439e4dce23482ac846e7961f9d4ec62086eb98ae76d97bd41d72644d7"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0403ed9c706dcd2809f1600ed18f4aae50be263bd7112e54b50e2c2bc3ebd6d"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:382e52aa4270a037d41f325e7d1dfa395b7de0c367800b6f337d8157367bf3a7"}, + {file = "orjson-3.10.3-cp310-none-win32.whl", hash = "sha256:be2aab54313752c04f2cbaab4515291ef5af8c2256ce22abc007f89f42f49109"}, + {file = "orjson-3.10.3-cp310-none-win_amd64.whl", hash = "sha256:416b195f78ae461601893f482287cee1e3059ec49b4f99479aedf22a20b1098b"}, + {file = "orjson-3.10.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:73100d9abbbe730331f2242c1fc0bcb46a3ea3b4ae3348847e5a141265479700"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a12eee96e3ab828dbfcb4d5a0023aa971b27143a1d35dc214c176fdfb29b3"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520de5e2ef0b4ae546bea25129d6c7c74edb43fc6cf5213f511a927f2b28148b"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccaa0a401fc02e8828a5bedfd80f8cd389d24f65e5ca3954d72c6582495b4bcf"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7bc9e8bc11bac40f905640acd41cbeaa87209e7e1f57ade386da658092dc16"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3582b34b70543a1ed6944aca75e219e1192661a63da4d039d088a09c67543b08"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c23dfa91481de880890d17aa7b91d586a4746a4c2aa9a145bebdbaf233768d5"}, + {file = "orjson-3.10.3-cp311-none-win32.whl", hash = "sha256:1770e2a0eae728b050705206d84eda8b074b65ee835e7f85c919f5705b006c9b"}, + {file = "orjson-3.10.3-cp311-none-win_amd64.whl", hash = "sha256:93433b3c1f852660eb5abdc1f4dd0ced2be031ba30900433223b28ee0140cde5"}, + {file = "orjson-3.10.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a39aa73e53bec8d410875683bfa3a8edf61e5a1c7bb4014f65f81d36467ea098"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0943a96b3fa09bee1afdfccc2cb236c9c64715afa375b2af296c73d91c23eab2"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e852baafceff8da3c9defae29414cc8513a1586ad93e45f27b89a639c68e8176"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18566beb5acd76f3769c1d1a7ec06cdb81edc4d55d2765fb677e3eaa10fa99e0"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd2218d5a3aa43060efe649ec564ebedec8ce6ae0a43654b81376216d5ebd42"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf20465e74c6e17a104ecf01bf8cd3b7b252565b4ccee4548f18b012ff2f8069"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba7f67aa7f983c4345eeda16054a4677289011a478ca947cd69c0a86ea45e534"}, + {file = "orjson-3.10.3-cp312-none-win32.whl", hash = "sha256:17e0713fc159abc261eea0f4feda611d32eabc35708b74bef6ad44f6c78d5ea0"}, + {file = "orjson-3.10.3-cp312-none-win_amd64.whl", hash = "sha256:4c895383b1ec42b017dd2c75ae8a5b862fc489006afde06f14afbdd0309b2af0"}, + {file = "orjson-3.10.3-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:be2719e5041e9fb76c8c2c06b9600fe8e8584e6980061ff88dcbc2691a16d20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0175a5798bdc878956099f5c54b9837cb62cfbf5d0b86ba6d77e43861bcec2"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:978be58a68ade24f1af7758626806e13cff7748a677faf95fbb298359aa1e20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16bda83b5c61586f6f788333d3cf3ed19015e3b9019188c56983b5a299210eb5"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ad1f26bea425041e0a1adad34630c4825a9e3adec49079b1fb6ac8d36f8b754"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9e253498bee561fe85d6325ba55ff2ff08fb5e7184cd6a4d7754133bd19c9195"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a62f9968bab8a676a164263e485f30a0b748255ee2f4ae49a0224be95f4532b"}, + {file = "orjson-3.10.3-cp38-none-win32.whl", hash = "sha256:8d0b84403d287d4bfa9bf7d1dc298d5c1c5d9f444f3737929a66f2fe4fb8f134"}, + {file = "orjson-3.10.3-cp38-none-win_amd64.whl", hash = "sha256:8bc7a4df90da5d535e18157220d7915780d07198b54f4de0110eca6b6c11e290"}, + {file = "orjson-3.10.3-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9059d15c30e675a58fdcd6f95465c1522b8426e092de9fff20edebfdc15e1cb0"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d40c7f7938c9c2b934b297412c067936d0b54e4b8ab916fd1a9eb8f54c02294"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a654ec1de8fdaae1d80d55cee65893cb06494e124681ab335218be6a0691e7"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:831c6ef73f9aa53c5f40ae8f949ff7681b38eaddb6904aab89dca4d85099cb78"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99b880d7e34542db89f48d14ddecbd26f06838b12427d5a25d71baceb5ba119d"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e5e176c994ce4bd434d7aafb9ecc893c15f347d3d2bbd8e7ce0b63071c52e25"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b69a58a37dab856491bf2d3bbf259775fdce262b727f96aafbda359cb1d114d8"}, + {file = "orjson-3.10.3-cp39-none-win32.whl", hash = "sha256:b8d4d1a6868cde356f1402c8faeb50d62cee765a1f7ffcfd6de732ab0581e063"}, + {file = "orjson-3.10.3-cp39-none-win_amd64.whl", hash = "sha256:5102f50c5fc46d94f2033fe00d392588564378260d64377aec702f21a7a22912"}, + {file = "orjson-3.10.3.tar.gz", hash = "sha256:2b166507acae7ba2f7c315dcf185a9111ad5e992ac81f2d507aac39193c2c818"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "pydantic" +version = "2.7.1" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, + {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.18.2" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.18.2" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, + {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, + {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, + {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, + {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, + {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, + {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, + {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, + {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, + {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, + {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, + {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, + {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, + {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.23.7" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"}, + {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + +[[package]] +name = "pytest-socket" +version = "0.7.0" +description = "Pytest Plugin to disable socket calls during tests" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "pytest_socket-0.7.0-py3-none-any.whl", hash = "sha256:7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45"}, + {file = "pytest_socket-0.7.0.tar.gz", hash = "sha256:71ab048cbbcb085c15a4423b73b619a8b35d6a307f46f78ea46be51b1b7e11b3"}, +] + +[package.dependencies] +pytest = ">=6.2.5" + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.32.2" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "ruff" +version = "0.1.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, +] + +[[package]] +name = "syrupy" +version = "4.6.1" +description = "Pytest Snapshot Test Utility" +optional = false +python-versions = ">=3.8.1,<4" +files = [ + {file = "syrupy-4.6.1-py3-none-any.whl", hash = "sha256:203e52f9cb9fa749cf683f29bd68f02c16c3bc7e7e5fe8f2fc59bdfe488ce133"}, + {file = "syrupy-4.6.1.tar.gz", hash = "sha256:37a835c9ce7857eeef86d62145885e10b3cb9615bc6abeb4ce404b3f18e1bb36"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9.0.0" + +[[package]] +name = "tenacity" +version = "8.3.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, + {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "typing-extensions" +version = "4.11.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"}, + {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "d27ea82fa58fa4e03d47f03b6644b8da6a1b1792014e6b68abfe88cc9f45c9b3" diff --git a/libs/partners/couchbase/pyproject.toml b/libs/partners/couchbase/pyproject.toml new file mode 100644 index 0000000000000..afdbba9a4b49e --- /dev/null +++ b/libs/partners/couchbase/pyproject.toml @@ -0,0 +1,92 @@ +[tool.poetry] +name = "gigachain-couchbase" +version = "0.0.1" +description = "An integration package connecting Couchbase and Gigachain" +authors = [] +readme = "README.md" +repository = "https://github.com/langchain-ai/langchain" +license = "MIT" + +[tool.poetry.urls] +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/couchbase" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +gigachain-core = ">=0.2.0,<0.3" +couchbase = "^4.2.1" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.4.3" +pytest-asyncio = "^0.23.2" +pytest-socket = "^0.7.0" +gigachain-core = {path = "../../core", develop = true} +syrupy = "^4.0.2" + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.6" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.8" + +[tool.poetry.group.typing.dependencies] +mypy = "^1.7.1" +gigachain-core = {path = "../../core", develop = true} + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +gigachain-core = {path = "../../core", develop = true} + +[tool.ruff.lint] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "T201", # print +] + +[tool.mypy] +disallow_untyped_defs = "True" +ignore_missing_imports = "True" + +[tool.coverage.run] +omit = [ + "tests/*", +] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/libs/partners/couchbase/scripts/check_imports.py b/libs/partners/couchbase/scripts/check_imports.py new file mode 100644 index 0000000000000..365f5fa118da4 --- /dev/null +++ b/libs/partners/couchbase/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) # noqa: T201 + traceback.print_exc() + print() # noqa: T201 + + sys.exit(1 if has_failure else 0) diff --git a/libs/partners/couchbase/scripts/check_pydantic.sh b/libs/partners/couchbase/scripts/check_pydantic.sh new file mode 100755 index 0000000000000..06b5bb81ae236 --- /dev/null +++ b/libs/partners/couchbase/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/partners/couchbase/scripts/lint_imports.sh b/libs/partners/couchbase/scripts/lint_imports.sh new file mode 100755 index 0000000000000..19ccec1480c01 --- /dev/null +++ b/libs/partners/couchbase/scripts/lint_imports.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain, langchain_experimental, or langchain_community +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_community\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/partners/couchbase/tests/__init__.py b/libs/partners/couchbase/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/couchbase/tests/integration_tests/__init__.py b/libs/partners/couchbase/tests/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/couchbase/tests/integration_tests/test_compile.py b/libs/partners/couchbase/tests/integration_tests/test_compile.py new file mode 100644 index 0000000000000..33ecccdfa0fbd --- /dev/null +++ b/libs/partners/couchbase/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/partners/couchbase/tests/integration_tests/test_vector_store.py b/libs/partners/couchbase/tests/integration_tests/test_vector_store.py new file mode 100644 index 0000000000000..4cad73481d032 --- /dev/null +++ b/libs/partners/couchbase/tests/integration_tests/test_vector_store.py @@ -0,0 +1,366 @@ +"""Test Couchbase Vector Store functionality""" + +import os +import time +from typing import Any + +import pytest +from langchain_core.documents import Document + +from langchain_couchbase import CouchbaseVectorStore +from tests.utils import ( + ConsistentFakeEmbeddings, +) + +CONNECTION_STRING = os.getenv("COUCHBASE_CONNECTION_STRING", "") +BUCKET_NAME = os.getenv("COUCHBASE_BUCKET_NAME", "") +SCOPE_NAME = os.getenv("COUCHBASE_SCOPE_NAME", "") +COLLECTION_NAME = os.getenv("COUCHBASE_COLLECTION_NAME", "") +USERNAME = os.getenv("COUCHBASE_USERNAME", "") +PASSWORD = os.getenv("COUCHBASE_PASSWORD", "") +INDEX_NAME = os.getenv("COUCHBASE_INDEX_NAME", "") +SLEEP_DURATION = 1 + + +def set_all_env_vars() -> bool: + return all( + [ + CONNECTION_STRING, + BUCKET_NAME, + SCOPE_NAME, + COLLECTION_NAME, + USERNAME, + PASSWORD, + INDEX_NAME, + ] + ) + + +def get_cluster() -> Any: + """Get a couchbase cluster object""" + from datetime import timedelta + + from couchbase.auth import PasswordAuthenticator + from couchbase.cluster import Cluster + from couchbase.options import ClusterOptions + + auth = PasswordAuthenticator(USERNAME, PASSWORD) + options = ClusterOptions(auth) + connect_string = CONNECTION_STRING + cluster = Cluster(connect_string, options) + + # Wait until the cluster is ready for use. + cluster.wait_until_ready(timedelta(seconds=5)) + + return cluster + + +@pytest.fixture() +def cluster() -> Any: + """Get a couchbase cluster object""" + return get_cluster() + + +def delete_documents( + cluster: Any, bucket_name: str, scope_name: str, collection_name: str +) -> None: + """Delete all the documents in the collection""" + query = f"DELETE FROM `{bucket_name}`.`{scope_name}`.`{collection_name}`" + cluster.query(query).execute() + + +@pytest.mark.skipif( + not set_all_env_vars(), reason="Missing Couchbase environment variables" +) +class TestCouchbaseVectorStore: + @classmethod + def setup_method(self) -> None: + cluster = get_cluster() + # Delete all the documents in the collection + delete_documents(cluster, BUCKET_NAME, SCOPE_NAME, COLLECTION_NAME) + + def test_from_documents(self, cluster: Any) -> None: + """Test end to end search using a list of documents.""" + + documents = [ + Document(page_content="foo", metadata={"page": 1}), + Document(page_content="bar", metadata={"page": 2}), + Document(page_content="baz", metadata={"page": 3}), + ] + + vectorstore = CouchbaseVectorStore.from_documents( + documents, + ConsistentFakeEmbeddings(), + cluster=cluster, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + index_name=INDEX_NAME, + ) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("baz", k=1) + assert output[0].page_content == "baz" + assert output[0].metadata["page"] == 3 + + def test_from_texts(self, cluster: Any) -> None: + """Test end to end search using a list of texts.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + vectorstore = CouchbaseVectorStore.from_texts( + texts, + ConsistentFakeEmbeddings(), + cluster=cluster, + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("foo", k=1) + assert len(output) == 1 + assert output[0].page_content == "foo" + + def test_from_texts_with_metadatas(self, cluster: Any) -> None: + """Test end to end search using a list of texts and metadatas.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore.from_texts( + texts, + ConsistentFakeEmbeddings(), + metadatas=metadatas, + cluster=cluster, + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("baz", k=1) + assert output[0].page_content == "baz" + assert output[0].metadata["c"] == 3 + + def test_add_texts_with_ids_and_metadatas(self, cluster: Any) -> None: + """Test end to end search by adding a list of texts, ids and metadatas.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + ids = ["a", "b", "c"] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + results = vectorstore.add_texts( + texts, + ids=ids, + metadatas=metadatas, + ) + assert results == ids + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("foo", k=1) + assert output[0].page_content == "foo" + assert output[0].metadata["a"] == 1 + + def test_delete_texts_with_ids(self, cluster: Any) -> None: + """Test deletion of documents by ids.""" + texts = [ + "foo", + "bar", + "baz", + ] + + ids = ["a", "b", "c"] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + results = vectorstore.add_texts( + texts, + ids=ids, + metadatas=metadatas, + ) + assert results == ids + assert vectorstore.delete(ids) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("foo", k=1) + assert len(output) == 0 + + def test_similarity_search_with_scores(self, cluster: Any) -> None: + """Test similarity search with scores.""" + + texts = ["foo", "bar", "baz"] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + vectorstore.add_texts(texts, metadatas=metadatas) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search_with_score("foo", k=2) + + assert len(output) == 2 + assert output[0][0].page_content == "foo" + + # check if the scores are sorted + assert output[0][0].metadata["a"] == 1 + assert output[0][1] > output[1][1] + + def test_similarity_search_by_vector(self, cluster: Any) -> None: + """Test similarity search by vector.""" + + texts = ["foo", "bar", "baz"] + + metadatas = [{"a": 1}, {"b": 2}, {"c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + vectorstore.add_texts(texts, metadatas=metadatas) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + vector = ConsistentFakeEmbeddings().embed_query("foo") + vector_output = vectorstore.similarity_search_by_vector(vector, k=1) + + assert vector_output[0].page_content == "foo" + + similarity_output = vectorstore.similarity_search("foo", k=1) + + assert similarity_output == vector_output + + def test_output_fields(self, cluster: Any) -> None: + """Test that output fields are set correctly.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + metadatas = [{"page": 1, "a": 1}, {"page": 2, "b": 2}, {"page": 3, "c": 3}] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + ids = vectorstore.add_texts(texts, metadatas) + assert len(ids) == len(texts) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + output = vectorstore.similarity_search("foo", k=1, fields=["metadata.page"]) + assert output[0].page_content == "foo" + assert output[0].metadata["page"] == 1 + assert "a" not in output[0].metadata + + def test_hybrid_search(self, cluster: Any) -> None: + """Test hybrid search.""" + + texts = [ + "foo", + "bar", + "baz", + ] + + metadatas = [ + {"section": "index"}, + {"section": "glossary"}, + {"section": "appendix"}, + ] + + vectorstore = CouchbaseVectorStore( + cluster=cluster, + embedding=ConsistentFakeEmbeddings(), + index_name=INDEX_NAME, + bucket_name=BUCKET_NAME, + scope_name=SCOPE_NAME, + collection_name=COLLECTION_NAME, + ) + + vectorstore.add_texts(texts, metadatas=metadatas) + + # Wait for the documents to be indexed + time.sleep(SLEEP_DURATION) + + result, score = vectorstore.similarity_search_with_score("foo", k=1)[0] + + # Wait for the documents to be indexed for hybrid search + time.sleep(SLEEP_DURATION) + + hybrid_result, hybrid_score = vectorstore.similarity_search_with_score( + "foo", + k=1, + search_options={"query": {"match": "index", "field": "metadata.section"}}, + )[0] + + assert result == hybrid_result + assert score <= hybrid_score diff --git a/libs/partners/couchbase/tests/unit_tests/__init__.py b/libs/partners/couchbase/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/couchbase/tests/unit_tests/test_imports.py b/libs/partners/couchbase/tests/unit_tests/test_imports.py new file mode 100644 index 0000000000000..ec771d0ba2a17 --- /dev/null +++ b/libs/partners/couchbase/tests/unit_tests/test_imports.py @@ -0,0 +1,9 @@ +from langchain_couchbase import __all__ + +EXPECTED_ALL = [ + "CouchbaseVectorStore", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/partners/couchbase/tests/unit_tests/test_vectorstore.py b/libs/partners/couchbase/tests/unit_tests/test_vectorstore.py new file mode 100644 index 0000000000000..8b137891791fe --- /dev/null +++ b/libs/partners/couchbase/tests/unit_tests/test_vectorstore.py @@ -0,0 +1 @@ + diff --git a/libs/partners/couchbase/tests/utils.py b/libs/partners/couchbase/tests/utils.py new file mode 100644 index 0000000000000..d12df58d92b3f --- /dev/null +++ b/libs/partners/couchbase/tests/utils.py @@ -0,0 +1,55 @@ +"""Fake Embedding class for testing purposes.""" + +from typing import List + +from langchain_core.embeddings import Embeddings + +fake_texts = ["foo", "bar", "baz"] + + +class FakeEmbeddings(Embeddings): + """Fake embeddings functionality for testing.""" + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return simple embeddings. + Embeddings encode each text as its index.""" + return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + """Return constant query embeddings. + Embeddings are identical to embed_documents(texts)[0]. + Distance to each text will be that text's index, + as it was passed to embed_documents.""" + return [float(1.0)] * 9 + [float(0.0)] + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +class ConsistentFakeEmbeddings(FakeEmbeddings): + """Fake embeddings which remember all the texts seen so far to return consistent + vectors for the same texts.""" + + def __init__(self, dimensionality: int = 10) -> None: + self.known_texts: List[str] = [] + self.dimensionality = dimensionality + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return consistent embeddings for each text seen so far.""" + out_vectors = [] + for text in texts: + if text not in self.known_texts: + self.known_texts.append(text) + vector = [float(1.0)] * (self.dimensionality - 1) + [ + float(self.known_texts.index(text)) + ] + out_vectors.append(vector) + return out_vectors + + def embed_query(self, text: str) -> List[float]: + """Return consistent embeddings for the text, if seen before, or a constant + one if the text is unknown.""" + return self.embed_documents([text])[0] diff --git a/libs/partners/exa/pyproject.toml b/libs/partners/exa/pyproject.toml index 96c66afcda88d..d4809b2a69003 100644 --- a/libs/partners/exa/pyproject.toml +++ b/libs/partners/exa/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-exa" +name = "gigachain-exa" version = "0.1.0" -description = "An integration package connecting Exa and LangChain" +description = "An integration package connecting Exa and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" diff --git a/libs/partners/fireworks/pyproject.toml b/libs/partners/fireworks/pyproject.toml index f3557d37f85cd..cc11d25044c58 100644 --- a/libs/partners/fireworks/pyproject.toml +++ b/libs/partners/fireworks/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-fireworks" +name = "gigachain-fireworks" version = "0.1.3" -description = "An integration package connecting Fireworks and LangChain" +description = "An integration package connecting Fireworks and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,7 +12,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.52,<0.3" +gigachain-core = ">=0.2.2,<0.3" fireworks-ai = ">=0.13.0" openai = "^1.10.0" requests = "^2" diff --git a/libs/partners/groq/pyproject.toml b/libs/partners/groq/pyproject.toml index c18d168cbd834..20e81c2f26f0b 100644 --- a/libs/partners/groq/pyproject.toml +++ b/libs/partners/groq/pyproject.toml @@ -1,18 +1,18 @@ [tool.poetry] name = "gigachain-groq" -version = "0.1.4" -description = "An integration package connecting Groq and LangChain" +version = "0.1.5" +description = "An integration package connecting Groq and Gigachain" authors = [] readme = "README.md" -repository = "https://github.com/gigachain-ai/gigachain" +repository = "https://github.com/langchain-ai/langchain" license = "MIT" [tool.poetry.urls] -"Source Code" = "https://github.com/gigachain-ai/gigachain/tree/master/libs/partners/groq" +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/groq" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.45,<0.3" +gigachain-core = ">=0.2.2,<0.3" groq = ">=0.4.1,<1" [tool.poetry.group.test] @@ -92,5 +92,6 @@ filterwarnings = [ 'ignore:The method `ChatGroq.with_structured_output` is in beta', # Maintain support for pydantic 1.X 'default:The `dict` method is deprecated; use `model_dump` instead:DeprecationWarning', + "ignore:tool_choice='any' is not currently supported. Converting to 'auto'.", ] asyncio_mode = "auto" diff --git a/libs/partners/huggingface/pyproject.toml b/libs/partners/huggingface/pyproject.toml index 918f660d40c29..fbe59d8f29028 100644 --- a/libs/partners/huggingface/pyproject.toml +++ b/libs/partners/huggingface/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-huggingface" -version = "0.0.1" -description = "An integration package connecting Hugging Face and LangChain" +name = "gigachain-huggingface" +version = "0.0.3" +description = "An integration package connecting Hugging Face and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,11 +12,10 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.52,<0.3" +gigachain-core = ">=0.1.52,<0.3" tokenizers = ">=0.19.1" transformers = ">=4.39.0" sentence-transformers = ">=2.6.0" -text-generation = "^0.7.0" huggingface-hub = ">=0.23.0" [tool.poetry.group.test] @@ -25,9 +24,18 @@ optional = true [tool.poetry.group.test.dependencies] pytest = "^7.3.0" pytest-asyncio = "^0.21.1" -langchain-core = { path = "../../core", develop = true } -langchain-standard-tests = { path = "../../standard-tests", develop = true } -langchain-community = { path = "../../community", develop = true } +gigachain-core = { path = "../../core", develop = true } +gigachain-standard-tests = { path = "../../standard-tests", develop = true } +gigachain-community = { path = "../../community", develop = true } +# Support Python 3.8 and 3.12+. +scipy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.7.0", python = ">=3.12"} +] +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] [tool.poetry.group.codespell] optional = true @@ -43,13 +51,13 @@ ruff = "^0.1.5" [tool.poetry.group.typing.dependencies] mypy = "^1" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } ipykernel = "^6.29.2" [tool.poetry.group.test_integration] diff --git a/libs/partners/ibm/pyproject.toml b/libs/partners/ibm/pyproject.toml index 7ffa56c5228bb..3a266b5b3f93e 100644 --- a/libs/partners/ibm/pyproject.toml +++ b/libs/partners/ibm/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-ibm" +name = "gigachain-ibm" version = "0.1.7" -description = "An integration package connecting IBM watsonx.ai and LangChain" +description = "An integration package connecting IBM watsonx.ai and Gigachain" authors = ["IBM"] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" diff --git a/libs/partners/milvus/.gitignore b/libs/partners/milvus/.gitignore new file mode 100644 index 0000000000000..bee8a64b79a99 --- /dev/null +++ b/libs/partners/milvus/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/libs/partners/milvus/LICENSE b/libs/partners/milvus/LICENSE new file mode 100644 index 0000000000000..426b65090341f --- /dev/null +++ b/libs/partners/milvus/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/libs/partners/milvus/Makefile b/libs/partners/milvus/Makefile new file mode 100644 index 0000000000000..263896e6e0a0b --- /dev/null +++ b/libs/partners/milvus/Makefile @@ -0,0 +1,57 @@ +.PHONY: all format lint test tests integration_tests docker_tests help extended_tests + +# Default target executed when no arguments are given to make. +all: help + +# Define a variable for the test file path. +TEST_FILE ?= tests/unit_tests/ +integration_test integration_tests: TEST_FILE=tests/integration_tests/ + +test tests integration_test integration_tests: + poetry run pytest $(TEST_FILE) + + +###################### +# LINTING AND FORMATTING +###################### + +# Define a variable for Python and notebook files. +PYTHON_FILES=. +MYPY_CACHE=.mypy_cache +lint format: PYTHON_FILES=. +lint_diff format_diff: PYTHON_FILES=$(shell git diff --relative=libs/partners/milvus --name-only --diff-filter=d master | grep -E '\.py$$|\.ipynb$$') +lint_package: PYTHON_FILES=langchain_milvus +lint_tests: PYTHON_FILES=tests +lint_tests: MYPY_CACHE=.mypy_cache_test + +lint lint_diff lint_package lint_tests: + poetry run ruff . + poetry run ruff format $(PYTHON_FILES) --diff + poetry run ruff --select I $(PYTHON_FILES) + mkdir $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) + +format format_diff: + poetry run ruff format $(PYTHON_FILES) + poetry run ruff --select I --fix $(PYTHON_FILES) + +spell_check: + poetry run codespell --toml pyproject.toml + +spell_fix: + poetry run codespell --toml pyproject.toml -w + +check_imports: $(shell find langchain_milvus -name '*.py') + poetry run python ./scripts/check_imports.py $^ + +###################### +# HELP +###################### + +help: + @echo '----' + @echo 'check_imports - check imports' + @echo 'format - run code formatters' + @echo 'lint - run linters' + @echo 'test - run unit tests' + @echo 'tests - run unit tests' + @echo 'test TEST_FILE= - run all tests in file' diff --git a/libs/partners/milvus/README.md b/libs/partners/milvus/README.md new file mode 100644 index 0000000000000..80820f32d1b6a --- /dev/null +++ b/libs/partners/milvus/README.md @@ -0,0 +1,42 @@ +# langchain-milvus + +This is a library integration with [Milvus](https://milvus.io/) and [Zilliz Cloud](https://zilliz.com/cloud). + +## Installation + +```bash +pip install -U langchain-milvus +``` + +## Milvus vector database + +See a [usage example](https://python.langchain.com/v0.2/docs/integrations/vectorstores/milvus/) + +```python +from langchain_milvus import Milvus +``` + +## Milvus hybrid search + +See a [usage example](https://python.langchain.com/v0.2/docs/integrations/retrievers/milvus_hybrid_search/). + +```python +from langchain_milvus import MilvusCollectionHybridSearchRetriever +``` + + +## Zilliz Cloud vector database + +See a [usage example](https://python.langchain.com/v0.2/docs/integrations/vectorstores/zilliz/). + +```python +from langchain_milvus import Zilliz +``` + +## Zilliz Cloud Pipeline Retriever + +See a [usage example](https://python.langchain.com/v0.2/docs/integrations/retrievers/zilliz_cloud_pipeline/). + +```python +from langchain_milvus import ZillizCloudPipelineRetriever +``` \ No newline at end of file diff --git a/libs/partners/milvus/langchain_milvus/__init__.py b/libs/partners/milvus/langchain_milvus/__init__.py new file mode 100644 index 0000000000000..b19bc1d7e697a --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/__init__.py @@ -0,0 +1,12 @@ +from langchain_milvus.retrievers import ( + MilvusCollectionHybridSearchRetriever, + ZillizCloudPipelineRetriever, +) +from langchain_milvus.vectorstores import Milvus, Zilliz + +__all__ = [ + "Milvus", + "Zilliz", + "ZillizCloudPipelineRetriever", + "MilvusCollectionHybridSearchRetriever", +] diff --git a/libs/partners/milvus/langchain_milvus/py.typed b/libs/partners/milvus/langchain_milvus/py.typed new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/langchain_milvus/retrievers/__init__.py b/libs/partners/milvus/langchain_milvus/retrievers/__init__.py new file mode 100644 index 0000000000000..1edac3cb5af73 --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/retrievers/__init__.py @@ -0,0 +1,8 @@ +from langchain_milvus.retrievers.milvus_hybrid_search import ( + MilvusCollectionHybridSearchRetriever, +) +from langchain_milvus.retrievers.zilliz_cloud_pipeline_retriever import ( + ZillizCloudPipelineRetriever, +) + +__all__ = ["ZillizCloudPipelineRetriever", "MilvusCollectionHybridSearchRetriever"] diff --git a/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py b/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py new file mode 100644 index 0000000000000..6b6e692dd2c21 --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/retrievers/milvus_hybrid_search.py @@ -0,0 +1,160 @@ +from typing import Any, Dict, List, Optional, Union + +from langchain_core.callbacks import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.retrievers import BaseRetriever +from pymilvus import AnnSearchRequest, Collection +from pymilvus.client.abstract import BaseRanker, SearchResult # type: ignore + +from langchain_milvus.utils.sparse import BaseSparseEmbedding + + +class MilvusCollectionHybridSearchRetriever(BaseRetriever): + """This is a hybrid search retriever + that uses Milvus Collection to retrieve documents based on multiple fields. + For more information, please refer to: + https://milvus.io/docs/release_notes.md#Multi-Embedding---Hybrid-Search + """ + + collection: Collection + """Milvus Collection object.""" + rerank: BaseRanker + """Milvus ranker object. Such as WeightedRanker or RRFRanker.""" + anns_fields: List[str] + """The names of vector fields that are used for ANNS search.""" + field_embeddings: List[Union[Embeddings, BaseSparseEmbedding]] + """The embedding functions of each vector fields, + which can be either Embeddings or BaseSparseEmbedding.""" + field_search_params: Optional[List[Dict]] = None + """The search parameters of each vector fields. + If not specified, the default search parameters will be used.""" + field_limits: Optional[List[int]] = None + """Limit number of results for each ANNS field. + If not specified, the default top_k will be used.""" + field_exprs: Optional[List[Optional[str]]] = None + """The boolean expression for filtering the search results.""" + top_k: int = 4 + """Final top-K number of documents to retrieve.""" + text_field: str = "text" + """The text field name, + which will be used as the `page_content` of a `Document` object.""" + output_fields: Optional[List[str]] = None + """Final output fields of the documents. + If not specified, all fields except the vector fields will be used as output fields, + which will be the `metadata` of a `Document` object.""" + + def __init__(self, **kwargs: Any): + super().__init__(**kwargs) + + # If some parameters are not specified, set default values + if self.field_search_params is None: + default_search_params = { + "metric_type": "L2", + "params": {"nprobe": 10}, + } + self.field_search_params = [default_search_params] * len(self.anns_fields) + if self.field_limits is None: + self.field_limits = [self.top_k] * len(self.anns_fields) + if self.field_exprs is None: + self.field_exprs = [None] * len(self.anns_fields) + + # Check the fields + self._validate_fields_num() + self.output_fields = self._get_output_fields() + self._validate_fields_name() + + # Load collection + self.collection.load() + + def _validate_fields_num(self) -> None: + assert ( + len(self.anns_fields) >= 2 + ), "At least two fields are required for hybrid search." + lengths = [len(self.anns_fields)] + if self.field_limits is not None: + lengths.append(len(self.field_limits)) + if self.field_exprs is not None: + lengths.append(len(self.field_exprs)) + + if not all(length == lengths[0] for length in lengths): + raise ValueError("All field-related lists must have the same length.") + + if len(self.field_search_params) != len(self.anns_fields): # type: ignore[arg-type] + raise ValueError( + "field_search_params must have the same length as anns_fields." + ) + + def _validate_fields_name(self) -> None: + collection_fields = [x.name for x in self.collection.schema.fields] + for field in self.anns_fields: + assert ( + field in collection_fields + ), f"{field} is not a valid field in the collection." + assert ( + self.text_field in collection_fields + ), f"{self.text_field} is not a valid field in the collection." + for field in self.output_fields: # type: ignore[union-attr] + assert ( + field in collection_fields + ), f"{field} is not a valid field in the collection." + + def _get_output_fields(self) -> List[str]: + if self.output_fields: + return self.output_fields + output_fields = [x.name for x in self.collection.schema.fields] + for field in self.anns_fields: + if field in output_fields: + output_fields.remove(field) + if self.text_field not in output_fields: + output_fields.append(self.text_field) + return output_fields + + def _build_ann_search_requests(self, query: str) -> List[AnnSearchRequest]: + search_requests = [] + for ann_field, embedding, param, limit, expr in zip( + self.anns_fields, + self.field_embeddings, + self.field_search_params, # type: ignore[arg-type] + self.field_limits, # type: ignore[arg-type] + self.field_exprs, # type: ignore[arg-type] + ): + request = AnnSearchRequest( + data=[embedding.embed_query(query)], + anns_field=ann_field, + param=param, + limit=limit, + expr=expr, + ) + search_requests.append(request) + return search_requests + + def _parse_document(self, data: dict) -> Document: + return Document( + page_content=data.pop(self.text_field), + metadata=data, + ) + + def _process_search_result( + self, search_results: List[SearchResult] + ) -> List[Document]: + documents = [] + for result in search_results[0]: + data = {x: result.entity.get(x) for x in self.output_fields} # type: ignore[union-attr] + doc = self._parse_document(data) + documents.append(doc) + return documents + + def _get_relevant_documents( + self, + query: str, + *, + run_manager: CallbackManagerForRetrieverRun, + **kwargs: Any, + ) -> List[Document]: + requests = self._build_ann_search_requests(query) + search_result = self.collection.hybrid_search( + requests, self.rerank, limit=self.top_k, output_fields=self.output_fields + ) + documents = self._process_search_result(search_result) + return documents diff --git a/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py b/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py new file mode 100644 index 0000000000000..6fbccfa47fa7c --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/retrievers/zilliz_cloud_pipeline_retriever.py @@ -0,0 +1,215 @@ +from typing import Any, Dict, List, Optional + +import requests +from langchain_core.callbacks.manager import CallbackManagerForRetrieverRun +from langchain_core.documents import Document +from langchain_core.retrievers import BaseRetriever + + +class ZillizCloudPipelineRetriever(BaseRetriever): + """`Zilliz Cloud Pipeline` retriever + + Args: + pipeline_ids (dict): A dictionary of pipeline ids. + Valid keys: "ingestion", "search", "deletion". + token (str): Zilliz Cloud's token. Defaults to "". + cloud_region (str='gcp-us-west1'): The region of Zilliz Cloud's cluster. + Defaults to 'gcp-us-west1'. + """ + + pipeline_ids: Dict + token: str = "" + cloud_region: str = "gcp-us-west1" + + def _get_relevant_documents( + self, + query: str, + top_k: int = 10, + offset: int = 0, + output_fields: List = [], + filter: str = "", + *, + run_manager: CallbackManagerForRetrieverRun, + ) -> List[Document]: + """ + Get documents relevant to a query. + + Args: + query (str): String to find relevant documents for + top_k (int=10): The number of results. Defaults to 10. + offset (int=0): The number of records to skip in the search result. + Defaults to 0. + output_fields (list=[]): The extra fields to present in output. + filter (str=""): The Milvus expression to filter search results. + Defaults to "". + run_manager (CallBackManagerForRetrieverRun): The callbacks handler to use. + + Returns: + List of relevant documents + """ + if "search" in self.pipeline_ids: + search_pipe_id = self.pipeline_ids.get("search") + else: + raise Exception( + "A search pipeline id must be provided in pipeline_ids to " + "get relevant documents." + ) + domain = ( + f"https://controller.api.{self.cloud_region}.zillizcloud.com/v1/pipelines" + ) + headers = { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + "Content-Type": "application/json", + } + url = f"{domain}/{search_pipe_id}/run" + + params = { + "data": {"query_text": query}, + "params": { + "limit": top_k, + "offset": offset, + "outputFields": output_fields, + "filter": filter, + }, + } + + response = requests.post(url, headers=headers, json=params) + if response.status_code != 200: + raise RuntimeError(response.text) + response_dict = response.json() + if response_dict["code"] != 200: + raise RuntimeError(response_dict) + response_data = response_dict["data"] + search_results = response_data["result"] + return [ + Document( + page_content=result.pop("text") + if "text" in result + else result.pop("chunk_text"), + metadata=result, + ) + for result in search_results + ] + + def add_texts( + self, texts: List[str], metadata: Optional[Dict[str, Any]] = None + ) -> Dict: + """ + Add documents to store. + Only supported by a text ingestion pipeline in Zilliz Cloud. + + Args: + texts (List[str]): A list of text strings. + metadata (Dict[str, Any]): A key-value dictionary of metadata will + be inserted as preserved fields required by ingestion pipeline. + Defaults to None. + """ + if "ingestion" in self.pipeline_ids: + ingeset_pipe_id = self.pipeline_ids.get("ingestion") + else: + raise Exception( + "An ingestion pipeline id must be provided in pipeline_ids to" + " add documents." + ) + domain = ( + f"https://controller.api.{self.cloud_region}.zillizcloud.com/v1/pipelines" + ) + headers = { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + "Content-Type": "application/json", + } + url = f"{domain}/{ingeset_pipe_id}/run" + + metadata = {} if metadata is None else metadata + params = {"data": {"text_list": texts}} + params["data"].update(metadata) + + response = requests.post(url, headers=headers, json=params) + if response.status_code != 200: + raise Exception(response.text) + response_dict = response.json() + if response_dict["code"] != 200: + raise Exception(response_dict) + response_data = response_dict["data"] + return response_data + + def add_doc_url( + self, doc_url: str, metadata: Optional[Dict[str, Any]] = None + ) -> Dict: + """ + Add a document from url. + Only supported by a document ingestion pipeline in Zilliz Cloud. + + Args: + doc_url: A document url. + metadata (Dict[str, Any]): A key-value dictionary of metadata will + be inserted as preserved fields required by ingestion pipeline. + Defaults to None. + """ + if "ingestion" in self.pipeline_ids: + ingest_pipe_id = self.pipeline_ids.get("ingestion") + else: + raise Exception( + "An ingestion pipeline id must be provided in pipeline_ids to " + "add documents." + ) + domain = ( + f"https://controller.api.{self.cloud_region}.zillizcloud.com/v1/pipelines" + ) + headers = { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + "Content-Type": "application/json", + } + url = f"{domain}/{ingest_pipe_id}/run" + + params = {"data": {"doc_url": doc_url}} + metadata = {} if metadata is None else metadata + params["data"].update(metadata) + + response = requests.post(url, headers=headers, json=params) + if response.status_code != 200: + raise Exception(response.text) + response_dict = response.json() + if response_dict["code"] != 200: + raise Exception(response_dict) + response_data = response_dict["data"] + return response_data + + def delete(self, key: str, value: Any) -> Dict: + """ + Delete documents. Only supported by a deletion pipeline in Zilliz Cloud. + + Args: + key: input name to run the deletion pipeline + value: input value to run deletion pipeline + """ + if "deletion" in self.pipeline_ids: + deletion_pipe_id = self.pipeline_ids.get("deletion") + else: + raise Exception( + "A deletion pipeline id must be provided in pipeline_ids to " + "add documents." + ) + domain = ( + f"https://controller.api.{self.cloud_region}.zillizcloud.com/v1/pipelines" + ) + headers = { + "Authorization": f"Bearer {self.token}", + "Accept": "application/json", + "Content-Type": "application/json", + } + url = f"{domain}/{deletion_pipe_id}/run" + + params = {"data": {key: value}} + + response = requests.post(url, headers=headers, json=params) + if response.status_code != 200: + raise Exception(response.text) + response_dict = response.json() + if response_dict["code"] != 200: + raise Exception(response_dict) + response_data = response_dict["data"] + return response_data diff --git a/libs/partners/milvus/langchain_milvus/utils/__init__.py b/libs/partners/milvus/langchain_milvus/utils/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/langchain_milvus/utils/sparse.py b/libs/partners/milvus/langchain_milvus/utils/sparse.py new file mode 100644 index 0000000000000..027c978c65d75 --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/utils/sparse.py @@ -0,0 +1,54 @@ +from abc import ABC, abstractmethod +from typing import Dict, List + +from scipy.sparse import csr_array # type: ignore + + +class BaseSparseEmbedding(ABC): + """Interface for Sparse embedding models. + You can inherit from it and implement your custom sparse embedding model. + """ + + @abstractmethod + def embed_query(self, query: str) -> Dict[int, float]: + """Embed query text.""" + + @abstractmethod + def embed_documents(self, texts: List[str]) -> List[Dict[int, float]]: + """Embed search docs.""" + + +class BM25SparseEmbedding(BaseSparseEmbedding): + """This is a class that inherits BaseSparseEmbedding + and implements a sparse vector embedding model based on BM25. + This class uses the BM25 model in Milvus model to implement sparse vector embedding. + This model requires pymilvus[model] to be installed. + `pip install pymilvus[model]` + For more information please refer to: + https://milvus.io/docs/embed-with-bm25.md + """ + + def __init__(self, corpus: List[str], language: str = "en"): + from pymilvus.model.sparse import BM25EmbeddingFunction # type: ignore + from pymilvus.model.sparse.bm25.tokenizers import ( # type: ignore + build_default_analyzer, + ) + + self.analyzer = build_default_analyzer(language=language) + self.bm25_ef = BM25EmbeddingFunction(self.analyzer, num_workers=1) + self.bm25_ef.fit(corpus) + + def embed_query(self, text: str) -> Dict[int, float]: + return self._sparse_to_dict(self.bm25_ef.encode_queries([text])) + + def embed_documents(self, texts: List[str]) -> List[Dict[int, float]]: + sparse_arrays = self.bm25_ef.encode_documents(texts) + return [self._sparse_to_dict(sparse_array) for sparse_array in sparse_arrays] + + def _sparse_to_dict(self, sparse_array: csr_array) -> Dict[int, float]: + row_indices, col_indices = sparse_array.nonzero() + non_zero_values = sparse_array.data + result_dict = {} + for col_index, value in zip(col_indices, non_zero_values): + result_dict[col_index] = value + return result_dict diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/__init__.py b/libs/partners/milvus/langchain_milvus/vectorstores/__init__.py new file mode 100644 index 0000000000000..5c6f304db98cb --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/vectorstores/__init__.py @@ -0,0 +1,7 @@ +from langchain_milvus.vectorstores.milvus import Milvus +from langchain_milvus.vectorstores.zilliz import Zilliz + +__all__ = [ + "Milvus", + "Zilliz", +] diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py new file mode 100644 index 0000000000000..78bf7a98527de --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/vectorstores/milvus.py @@ -0,0 +1,1146 @@ +from __future__ import annotations + +import logging +from typing import Any, Iterable, List, Optional, Tuple, Union +from uuid import uuid4 + +import numpy as np +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings +from langchain_core.vectorstores import VectorStore + +logger = logging.getLogger(__name__) + +DEFAULT_MILVUS_CONNECTION = { + "uri": "http://localhost:19530", +} + +Matrix = Union[List[List[float]], List[np.ndarray], np.ndarray] + + +def cosine_similarity(X: Matrix, Y: Matrix) -> np.ndarray: + """Row-wise cosine similarity between two equal-width matrices.""" + if len(X) == 0 or len(Y) == 0: + return np.array([]) + + X = np.array(X) + Y = np.array(Y) + if X.shape[1] != Y.shape[1]: + raise ValueError( + f"Number of columns in X and Y must be the same. X has shape {X.shape} " + f"and Y has shape {Y.shape}." + ) + try: + import simsimd as simd # type: ignore + + X = np.array(X, dtype=np.float32) + Y = np.array(Y, dtype=np.float32) + Z = 1 - np.array(simd.cdist(X, Y, metric="cosine")) + return Z + except ImportError: + logger.debug( + "Unable to import simsimd, defaulting to NumPy implementation. If you want " + "to use simsimd please install with `pip install simsimd`." + ) + X_norm = np.linalg.norm(X, axis=1) + Y_norm = np.linalg.norm(Y, axis=1) + # Ignore divide by zero errors run time warnings as those are handled below. + with np.errstate(divide="ignore", invalid="ignore"): + similarity = np.dot(X, Y.T) / np.outer(X_norm, Y_norm) + similarity[np.isnan(similarity) | np.isinf(similarity)] = 0.0 + return similarity + + +def maximal_marginal_relevance( + query_embedding: np.ndarray, + embedding_list: list, + lambda_mult: float = 0.5, + k: int = 4, +) -> List[int]: + """Calculate maximal marginal relevance.""" + if min(k, len(embedding_list)) <= 0: + return [] + if query_embedding.ndim == 1: + query_embedding = np.expand_dims(query_embedding, axis=0) + similarity_to_query = cosine_similarity(query_embedding, embedding_list)[0] + most_similar = int(np.argmax(similarity_to_query)) + idxs = [most_similar] + selected = np.array([embedding_list[most_similar]]) + while len(idxs) < min(k, len(embedding_list)): + best_score = -np.inf + idx_to_add = -1 + similarity_to_selected = cosine_similarity(embedding_list, selected) + for i, query_score in enumerate(similarity_to_query): + if i in idxs: + continue + redundant_score = max(similarity_to_selected[i]) + equation_score = ( + lambda_mult * query_score - (1 - lambda_mult) * redundant_score + ) + if equation_score > best_score: + best_score = equation_score + idx_to_add = i + idxs.append(idx_to_add) + selected = np.append(selected, [embedding_list[idx_to_add]], axis=0) + return idxs + + +class Milvus(VectorStore): + """`Milvus` vector store. + + You need to install `pymilvus` and run Milvus. + + See the following documentation for how to run a Milvus instance: + https://milvus.io/docs/install_standalone-docker.md + + If looking for a hosted Milvus, take a look at this documentation: + https://zilliz.com/cloud and make use of the Zilliz vectorstore found in + this project. + + IF USING L2/IP metric, IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA. + + Args: + embedding_function (Embeddings): Function used to embed the text. + collection_name (str): Which Milvus collection to use. Defaults to + "LangChainCollection". + collection_description (str): The description of the collection. Defaults to + "". + collection_properties (Optional[dict[str, any]]): The collection properties. + Defaults to None. + If set, will override collection existing properties. + For example: {"collection.ttl.seconds": 60}. + connection_args (Optional[dict[str, any]]): The connection args used for + this class comes in the form of a dict. + consistency_level (str): The consistency level to use for a collection. + Defaults to "Session". + index_params (Optional[dict]): Which index params to use. Defaults to + HNSW/AUTOINDEX depending on service. + search_params (Optional[dict]): Which search params to use. Defaults to + default of index. + drop_old (Optional[bool]): Whether to drop the current collection. Defaults + to False. + auto_id (bool): Whether to enable auto id for primary key. Defaults to False. + If False, you needs to provide text ids (string less than 65535 bytes). + If True, Milvus will generate unique integers as primary keys. + primary_field (str): Name of the primary key field. Defaults to "pk". + text_field (str): Name of the text field. Defaults to "text". + vector_field (str): Name of the vector field. Defaults to "vector". + metadata_field (str): Name of the metadta field. Defaults to None. + When metadata_field is specified, + the document's metadata will store as json. + + The connection args used for this class comes in the form of a dict, + here are a few of the options: + address (str): The actual address of Milvus + instance. Example address: "localhost:19530" + uri (str): The uri of Milvus instance. Example uri: + "http://randomwebsite:19530", + "tcp:foobarsite:19530", + "https://ok.s3.south.com:19530". + or "path/to/local/directory/milvus_demo.db" for Milvus Lite. + host (str): The host of Milvus instance. Default at "localhost", + PyMilvus will fill in the default host if only port is provided. + port (str/int): The port of Milvus instance. Default at 19530, PyMilvus + will fill in the default port if only host is provided. + user (str): Use which user to connect to Milvus instance. If user and + password are provided, we will add related header in every RPC call. + password (str): Required when user is provided. The password + corresponding to the user. + secure (bool): Default is false. If set to true, tls will be enabled. + client_key_path (str): If use tls two-way authentication, need to + write the client.key path. + client_pem_path (str): If use tls two-way authentication, need to + write the client.pem path. + ca_pem_path (str): If use tls two-way authentication, need to write + the ca.pem path. + server_pem_path (str): If use tls one-way authentication, need to + write the server.pem path. + server_name (str): If use tls, need to write the common name. + + Example: + .. code-block:: python + + from langchain_milvus.vectorstores import Milvus + from langchain_openai.embeddings import OpenAIEmbeddings + + embedding = OpenAIEmbeddings() + # Connect to a milvus instance on localhost + milvus_store = Milvus( + embedding_function = Embeddings, + collection_name = "LangChainCollection", + drop_old = True, + auto_id = True + ) + + Raises: + ValueError: If the pymilvus python package is not installed. + """ + + def __init__( + self, + embedding_function: Embeddings, + collection_name: str = "LangChainCollection", + collection_description: str = "", + collection_properties: Optional[dict[str, Any]] = None, + connection_args: Optional[dict[str, Any]] = None, + consistency_level: str = "Session", + index_params: Optional[dict] = None, + search_params: Optional[dict] = None, + drop_old: Optional[bool] = False, + auto_id: bool = False, + *, + primary_field: str = "pk", + text_field: str = "text", + vector_field: str = "vector", + metadata_field: Optional[str] = None, + partition_key_field: Optional[str] = None, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, + num_shards: Optional[int] = None, + ): + """Initialize the Milvus vector store.""" + try: + from pymilvus import Collection, utility + except ImportError: + raise ValueError( + "Could not import pymilvus python package. " + "Please install it with `pip install pymilvus`." + ) + + # Default search params when one is not provided. + self.default_search_params = { + "IVF_FLAT": {"metric_type": "L2", "params": {"nprobe": 10}}, + "IVF_SQ8": {"metric_type": "L2", "params": {"nprobe": 10}}, + "IVF_PQ": {"metric_type": "L2", "params": {"nprobe": 10}}, + "HNSW": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_FLAT": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_SQ": {"metric_type": "L2", "params": {"ef": 10}}, + "RHNSW_PQ": {"metric_type": "L2", "params": {"ef": 10}}, + "IVF_HNSW": {"metric_type": "L2", "params": {"nprobe": 10, "ef": 10}}, + "ANNOY": {"metric_type": "L2", "params": {"search_k": 10}}, + "SCANN": {"metric_type": "L2", "params": {"search_k": 10}}, + "AUTOINDEX": {"metric_type": "L2", "params": {}}, + "GPU_CAGRA": { + "metric_type": "L2", + "params": { + "itopk_size": 128, + "search_width": 4, + "min_iterations": 0, + "max_iterations": 0, + "team_size": 0, + }, + }, + "GPU_IVF_FLAT": {"metric_type": "L2", "params": {"nprobe": 10}}, + "GPU_IVF_PQ": {"metric_type": "L2", "params": {"nprobe": 10}}, + } + + self.embedding_func = embedding_function + self.collection_name = collection_name + self.collection_description = collection_description + self.collection_properties = collection_properties + self.index_params = index_params + self.search_params = search_params + self.consistency_level = consistency_level + self.auto_id = auto_id + + # In order for a collection to be compatible, pk needs to be varchar + self._primary_field = primary_field + # In order for compatibility, the text field will need to be called "text" + self._text_field = text_field + # In order for compatibility, the vector field needs to be called "vector" + self._vector_field = vector_field + self._metadata_field = metadata_field + self._partition_key_field = partition_key_field + self.fields: list[str] = [] + self.partition_names = partition_names + self.replica_number = replica_number + self.timeout = timeout + self.num_shards = num_shards + + # Create the connection to the server + if connection_args is None: + connection_args = DEFAULT_MILVUS_CONNECTION + self.alias = self._create_connection_alias(connection_args) + self.col: Optional[Collection] = None + + # Grab the existing collection if it exists + if utility.has_collection(self.collection_name, using=self.alias): + self.col = Collection( + self.collection_name, + using=self.alias, + ) + if self.collection_properties is not None: + self.col.set_properties(self.collection_properties) + # If need to drop old, drop it + if drop_old and isinstance(self.col, Collection): + self.col.drop() + self.col = None + + # Initialize the vector store + self._init( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) + + @property + def embeddings(self) -> Embeddings: + return self.embedding_func + + def _create_connection_alias(self, connection_args: dict) -> str: + """Create the connection to the Milvus server.""" + from pymilvus import MilvusException, connections + + # Grab the connection arguments that are used for checking existing connection + host: str = connection_args.get("host", None) + port: Union[str, int] = connection_args.get("port", None) + address: str = connection_args.get("address", None) + uri: str = connection_args.get("uri", None) + user = connection_args.get("user", None) + + # Order of use is host/port, uri, address + if host is not None and port is not None: + given_address = str(host) + ":" + str(port) + elif uri is not None: + if uri.startswith("https://"): + given_address = uri.split("https://")[1] + elif uri.startswith("http://"): + given_address = uri.split("http://")[1] + else: + given_address = uri # Milvus lite + elif address is not None: + given_address = address + else: + given_address = None + logger.debug("Missing standard address type for reuse attempt") + + # User defaults to empty string when getting connection info + if user is not None: + tmp_user = user + else: + tmp_user = "" + + # If a valid address was given, then check if a connection exists + if given_address is not None: + for con in connections.list_connections(): + addr = connections.get_connection_addr(con[0]) + if ( + con[1] + and ("address" in addr) + and (addr["address"] == given_address) + and ("user" in addr) + and (addr["user"] == tmp_user) + ): + logger.debug("Using previous connection: %s", con[0]) + return con[0] + + # Generate a new connection if one doesn't exist + alias = uuid4().hex + try: + connections.connect(alias=alias, **connection_args) + logger.debug("Created new connection using: %s", alias) + return alias + except MilvusException as e: + logger.error("Failed to create new connection using: %s", alias) + raise e + + def _init( + self, + embeddings: Optional[list] = None, + metadatas: Optional[list[dict]] = None, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, + ) -> None: + if embeddings is not None: + self._create_collection(embeddings, metadatas) + self._extract_fields() + self._create_index() + self._create_search_params() + self._load( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) + + def _create_collection( + self, embeddings: list, metadatas: Optional[list[dict]] = None + ) -> None: + from pymilvus import ( + Collection, + CollectionSchema, + DataType, + FieldSchema, + MilvusException, + ) + from pymilvus.orm.types import infer_dtype_bydata # type: ignore + + # Determine embedding dim + dim = len(embeddings[0]) + fields = [] + if self._metadata_field is not None: + fields.append(FieldSchema(self._metadata_field, DataType.JSON)) + else: + # Determine metadata schema + if metadatas: + # Create FieldSchema for each entry in metadata. + for key, value in metadatas[0].items(): + # Infer the corresponding datatype of the metadata + dtype = infer_dtype_bydata(value) + # Datatype isn't compatible + if dtype == DataType.UNKNOWN or dtype == DataType.NONE: + logger.error( + ( + "Failure to create collection, " + "unrecognized dtype for key: %s" + ), + key, + ) + raise ValueError(f"Unrecognized datatype for {key}.") + # Dataype is a string/varchar equivalent + elif dtype == DataType.VARCHAR: + fields.append( + FieldSchema(key, DataType.VARCHAR, max_length=65_535) + ) + else: + fields.append(FieldSchema(key, dtype)) + + # Create the text field + fields.append( + FieldSchema(self._text_field, DataType.VARCHAR, max_length=65_535) + ) + # Create the primary key field + if self.auto_id: + fields.append( + FieldSchema( + self._primary_field, DataType.INT64, is_primary=True, auto_id=True + ) + ) + else: + fields.append( + FieldSchema( + self._primary_field, + DataType.VARCHAR, + is_primary=True, + auto_id=False, + max_length=65_535, + ) + ) + # Create the vector field, supports binary or float vectors + fields.append( + FieldSchema(self._vector_field, infer_dtype_bydata(embeddings[0]), dim=dim) + ) + + # Create the schema for the collection + schema = CollectionSchema( + fields, + description=self.collection_description, + partition_key_field=self._partition_key_field, + ) + + # Create the collection + try: + if self.num_shards is not None: + # Issue with defaults: + # https://github.com/milvus-io/pymilvus/blob/59bf5e811ad56e20946559317fed855330758d9c/pymilvus/client/prepare.py#L82-L85 + self.col = Collection( + name=self.collection_name, + schema=schema, + consistency_level=self.consistency_level, + using=self.alias, + num_shards=self.num_shards, + ) + else: + self.col = Collection( + name=self.collection_name, + schema=schema, + consistency_level=self.consistency_level, + using=self.alias, + ) + # Set the collection properties if they exist + if self.collection_properties is not None: + self.col.set_properties(self.collection_properties) + except MilvusException as e: + logger.error( + "Failed to create collection: %s error: %s", self.collection_name, e + ) + raise e + + def _extract_fields(self) -> None: + """Grab the existing fields from the Collection""" + from pymilvus import Collection + + if isinstance(self.col, Collection): + schema = self.col.schema + for x in schema.fields: + self.fields.append(x.name) + + def _get_index(self) -> Optional[dict[str, Any]]: + """Return the vector index information if it exists""" + from pymilvus import Collection + + if isinstance(self.col, Collection): + for x in self.col.indexes: + if x.field_name == self._vector_field: + return x.to_dict() + return None + + def _create_index(self) -> None: + """Create a index on the collection""" + from pymilvus import Collection, MilvusException + + if isinstance(self.col, Collection) and self._get_index() is None: + try: + # If no index params, use a default HNSW based one + if self.index_params is None: + self.index_params = { + "metric_type": "L2", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + } + + try: + self.col.create_index( + self._vector_field, + index_params=self.index_params, + using=self.alias, + ) + + # If default did not work, most likely on Zilliz Cloud + except MilvusException: + # Use AUTOINDEX based index + self.index_params = { + "metric_type": "L2", + "index_type": "AUTOINDEX", + "params": {}, + } + self.col.create_index( + self._vector_field, + index_params=self.index_params, + using=self.alias, + ) + logger.debug( + "Successfully created an index on collection: %s", + self.collection_name, + ) + + except MilvusException as e: + logger.error( + "Failed to create an index on collection: %s", self.collection_name + ) + raise e + + def _create_search_params(self) -> None: + """Generate search params based on the current index type""" + from pymilvus import Collection + + if isinstance(self.col, Collection) and self.search_params is None: + index = self._get_index() + if index is not None: + index_type: str = index["index_param"]["index_type"] + metric_type: str = index["index_param"]["metric_type"] + self.search_params = self.default_search_params[index_type] + self.search_params["metric_type"] = metric_type + + def _load( + self, + partition_names: Optional[list] = None, + replica_number: int = 1, + timeout: Optional[float] = None, + ) -> None: + """Load the collection if available.""" + from pymilvus import Collection, utility + from pymilvus.client.types import LoadState # type: ignore + + timeout = self.timeout or timeout + if ( + isinstance(self.col, Collection) + and self._get_index() is not None + and utility.load_state(self.collection_name, using=self.alias) + == LoadState.NotLoad + ): + self.col.load( + partition_names=partition_names, + replica_number=replica_number, + timeout=timeout, + ) + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[dict]] = None, + timeout: Optional[float] = None, + batch_size: int = 1000, + *, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> List[str]: + """Insert text data into Milvus. + + Inserting data when the collection has not be made yet will result + in creating a new Collection. The data of the first entity decides + the schema of the new collection, the dim is extracted from the first + embedding and the columns are decided by the first metadata dict. + Metadata keys will need to be present for all inserted values. At + the moment there is no None equivalent in Milvus. + + Args: + texts (Iterable[str]): The texts to embed, it is assumed + that they all fit in memory. + metadatas (Optional[List[dict]]): Metadata dicts attached to each of + the texts. Defaults to None. + should be less than 65535 bytes. Required and work when auto_id is False. + timeout (Optional[float]): Timeout for each batch insert. Defaults + to None. + batch_size (int, optional): Batch size to use for insertion. + Defaults to 1000. + ids (Optional[List[str]]): List of text ids. The length of each item + + Raises: + MilvusException: Failure to add texts + + Returns: + List[str]: The resulting keys for each inserted element. + """ + from pymilvus import Collection, MilvusException + + texts = list(texts) + if not self.auto_id: + assert isinstance( + ids, list + ), "A list of valid ids are required when auto_id is False." + assert len(set(ids)) == len( + texts + ), "Different lengths of texts and unique ids are provided." + assert all( + len(x.encode()) <= 65_535 for x in ids + ), "Each id should be a string less than 65535 bytes." + + try: + embeddings = self.embedding_func.embed_documents(texts) + except NotImplementedError: + embeddings = [self.embedding_func.embed_query(x) for x in texts] + + if len(embeddings) == 0: + logger.debug("Nothing to insert, skipping.") + return [] + + # If the collection hasn't been initialized yet, perform all steps to do so + if not isinstance(self.col, Collection): + kwargs = {"embeddings": embeddings, "metadatas": metadatas} + if self.partition_names: + kwargs["partition_names"] = self.partition_names + if self.replica_number: + kwargs["replica_number"] = self.replica_number + if self.timeout: + kwargs["timeout"] = self.timeout + self._init(**kwargs) + + # Dict to hold all insert columns + insert_dict: dict[str, list] = { + self._text_field: texts, + self._vector_field: embeddings, + } + + if not self.auto_id: + insert_dict[self._primary_field] = ids # type: ignore[assignment] + + if self._metadata_field is not None: + for d in metadatas: # type: ignore[union-attr] + insert_dict.setdefault(self._metadata_field, []).append(d) + else: + # Collect the metadata into the insert dict. + if metadatas is not None: + for d in metadatas: + for key, value in d.items(): + keys = ( + [x for x in self.fields if x != self._primary_field] + if self.auto_id + else [x for x in self.fields] + ) + if key in keys: + insert_dict.setdefault(key, []).append(value) + + # Total insert count + vectors: list = insert_dict[self._vector_field] + total_count = len(vectors) + + pks: list[str] = [] + + assert isinstance(self.col, Collection) + for i in range(0, total_count, batch_size): + # Grab end index + end = min(i + batch_size, total_count) + # Convert dict to list of lists batch for insertion + insert_list = [ + insert_dict[x][i:end] for x in self.fields if x in insert_dict + ] + # Insert into the collection. + try: + res: Collection + timeout = self.timeout or timeout + res = self.col.insert(insert_list, timeout=timeout, **kwargs) + pks.extend(res.primary_keys) + except MilvusException as e: + logger.error( + "Failed to insert batch starting at entity: %s/%s", i, total_count + ) + raise e + return pks + + def similarity_search( + self, + query: str, + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a similarity search against the query string. + + Args: + query (str): The text to search. + k (int, optional): How many results to return. Defaults to 4. + param (dict, optional): The search params for the index type. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (int, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[Document]: Document results for search. + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + timeout = self.timeout or timeout + res = self.similarity_search_with_score( + query=query, k=k, param=param, expr=expr, timeout=timeout, **kwargs + ) + return [doc for doc, _ in res] + + def similarity_search_by_vector( + self, + embedding: List[float], + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a similarity search against the query string. + + Args: + embedding (List[float]): The embedding vector to search. + k (int, optional): How many results to return. Defaults to 4. + param (dict, optional): The search params for the index type. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (int, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[Document]: Document results for search. + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + timeout = self.timeout or timeout + res = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, param=param, expr=expr, timeout=timeout, **kwargs + ) + return [doc for doc, _ in res] + + def similarity_search_with_score( + self, + query: str, + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Perform a search on a query string and return results with score. + + For more information about the search parameters, take a look at the pymilvus + documentation found here: + https://milvus.io/api-reference/pymilvus/v2.2.6/Collection/search().md + + Args: + query (str): The text being searched. + k (int, optional): The amount of results to return. Defaults to 4. + param (dict): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (float, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[float], List[Tuple[Document, any, any]]: + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + + # Embed the query text. + embedding = self.embedding_func.embed_query(query) + timeout = self.timeout or timeout + res = self.similarity_search_with_score_by_vector( + embedding=embedding, k=k, param=param, expr=expr, timeout=timeout, **kwargs + ) + return res + + def similarity_search_with_score_by_vector( + self, + embedding: List[float], + k: int = 4, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Tuple[Document, float]]: + """Perform a search on a query string and return results with score. + + For more information about the search parameters, take a look at the pymilvus + documentation found here: + https://milvus.io/api-reference/pymilvus/v2.2.6/Collection/search().md + + Args: + embedding (List[float]): The embedding vector being searched. + k (int, optional): The amount of results to return. Defaults to 4. + param (dict): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (float, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[Tuple[Document, float]]: Result doc and score. + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + + if param is None: + param = self.search_params + + # Determine result metadata fields with PK. + output_fields = self.fields[:] + output_fields.remove(self._vector_field) + timeout = self.timeout or timeout + # Perform the search. + res = self.col.search( + data=[embedding], + anns_field=self._vector_field, + param=param, + limit=k, + expr=expr, + output_fields=output_fields, + timeout=timeout, + **kwargs, + ) + # Organize results. + ret = [] + for result in res[0]: + data = {x: result.entity.get(x) for x in output_fields} + doc = self._parse_document(data) + pair = (doc, result.score) + ret.append(pair) + + return ret + + def max_marginal_relevance_search( + self, + query: str, + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a search and return results that are reordered by MMR. + + Args: + query (str): The text being searched. + k (int, optional): How many results to give. Defaults to 4. + fetch_k (int, optional): Total results to select k from. + Defaults to 20. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5 + param (dict, optional): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (float, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + + Returns: + List[Document]: Document results for search. + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + + embedding = self.embedding_func.embed_query(query) + timeout = self.timeout or timeout + return self.max_marginal_relevance_search_by_vector( + embedding=embedding, + k=k, + fetch_k=fetch_k, + lambda_mult=lambda_mult, + param=param, + expr=expr, + timeout=timeout, + **kwargs, + ) + + def max_marginal_relevance_search_by_vector( + self, + embedding: list[float], + k: int = 4, + fetch_k: int = 20, + lambda_mult: float = 0.5, + param: Optional[dict] = None, + expr: Optional[str] = None, + timeout: Optional[float] = None, + **kwargs: Any, + ) -> List[Document]: + """Perform a search and return results that are reordered by MMR. + + Args: + embedding (str): The embedding vector being searched. + k (int, optional): How many results to give. Defaults to 4. + fetch_k (int, optional): Total results to select k from. + Defaults to 20. + lambda_mult: Number between 0 and 1 that determines the degree + of diversity among the results with 0 corresponding + to maximum diversity and 1 to minimum diversity. + Defaults to 0.5 + param (dict, optional): The search params for the specified index. + Defaults to None. + expr (str, optional): Filtering expression. Defaults to None. + timeout (float, optional): How long to wait before timeout error. + Defaults to None. + kwargs: Collection.search() keyword arguments. + + Returns: + List[Document]: Document results for search. + """ + if self.col is None: + logger.debug("No existing collection to search.") + return [] + + if param is None: + param = self.search_params + + # Determine result metadata fields. + output_fields = self.fields[:] + output_fields.remove(self._vector_field) + timeout = self.timeout or timeout + # Perform the search. + res = self.col.search( + data=[embedding], + anns_field=self._vector_field, + param=param, + limit=fetch_k, + expr=expr, + output_fields=output_fields, + timeout=timeout, + **kwargs, + ) + # Organize results. + ids = [] + documents = [] + scores = [] + for result in res[0]: + data = {x: result.entity.get(x) for x in output_fields} + doc = self._parse_document(data) + documents.append(doc) + scores.append(result.score) + ids.append(result.id) + + vectors = self.col.query( + expr=f"{self._primary_field} in {ids}", + output_fields=[self._primary_field, self._vector_field], + timeout=timeout, + ) + # Reorganize the results from query to match search order. + vectors = {x[self._primary_field]: x[self._vector_field] for x in vectors} + + ordered_result_embeddings = [vectors[x] for x in ids] + + # Get the new order of results. + new_ordering = maximal_marginal_relevance( + np.array(embedding), ordered_result_embeddings, k=k, lambda_mult=lambda_mult + ) + + # Reorder the values and return. + ret = [] + for x in new_ordering: + # Function can return -1 index + if x == -1: + break + else: + ret.append(documents[x]) + return ret + + def delete( # type: ignore[no-untyped-def] + self, ids: Optional[List[str]] = None, expr: Optional[str] = None, **kwargs: str + ): + """Delete by vector ID or boolean expression. + Refer to [Milvus documentation](https://milvus.io/docs/delete_data.md) + for notes and examples of expressions. + + Args: + ids: List of ids to delete. + expr: Boolean expression that specifies the entities to delete. + kwargs: Other parameters in Milvus delete api. + """ + if isinstance(ids, list) and len(ids) > 0: + if expr is not None: + logger.warning( + "Both ids and expr are provided. " "Ignore expr and delete by ids." + ) + expr = f"{self._primary_field} in {ids}" + else: + assert isinstance( + expr, str + ), "Either ids list or expr string must be provided." + return self.col.delete(expr=expr, **kwargs) # type: ignore[union-attr] + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection_name: str = "LangChainCollection", + connection_args: dict[str, Any] = DEFAULT_MILVUS_CONNECTION, + consistency_level: str = "Session", + index_params: Optional[dict] = None, + search_params: Optional[dict] = None, + drop_old: bool = False, + *, + ids: Optional[List[str]] = None, + **kwargs: Any, + ) -> Milvus: + """Create a Milvus collection, indexes it with HNSW, and insert data. + + Args: + texts (List[str]): Text data. + embedding (Embeddings): Embedding function. + metadatas (Optional[List[dict]]): Metadata for each text if it exists. + Defaults to None. + collection_name (str, optional): Collection name to use. Defaults to + "LangChainCollection". + connection_args (dict[str, Any], optional): Connection args to use. Defaults + to DEFAULT_MILVUS_CONNECTION. + consistency_level (str, optional): Which consistency level to use. Defaults + to "Session". + index_params (Optional[dict], optional): Which index_params to use. Defaults + to None. + search_params (Optional[dict], optional): Which search params to use. + Defaults to None. + drop_old (Optional[bool], optional): Whether to drop the collection with + that name if it exists. Defaults to False. + ids (Optional[List[str]]): List of text ids. Defaults to None. + + Returns: + Milvus: Milvus Vector Store + """ + if isinstance(ids, list) and len(ids) > 0: + auto_id = False + else: + auto_id = True + + vector_db = cls( + embedding_function=embedding, + collection_name=collection_name, + connection_args=connection_args, + consistency_level=consistency_level, + index_params=index_params, + search_params=search_params, + drop_old=drop_old, + auto_id=auto_id, + **kwargs, + ) + vector_db.add_texts(texts=texts, metadatas=metadatas, ids=ids) + return vector_db + + def _parse_document(self, data: dict) -> Document: + return Document( + page_content=data.pop(self._text_field), + metadata=data.pop(self._metadata_field) if self._metadata_field else data, + ) + + def get_pks(self, expr: str, **kwargs: Any) -> List[int] | None: + """Get primary keys with expression + + Args: + expr: Expression - E.g: "id in [1, 2]", or "title LIKE 'Abc%'" + + Returns: + List[int]: List of IDs (Primary Keys) + """ + + from pymilvus import MilvusException + + if self.col is None: + logger.debug("No existing collection to get pk.") + return None + + try: + query_result = self.col.query( + expr=expr, output_fields=[self._primary_field] + ) + except MilvusException as exc: + logger.error("Failed to get ids: %s error: %s", self.collection_name, exc) + raise exc + pks = [item.get(self._primary_field) for item in query_result] + return pks + + def upsert( + self, + ids: Optional[List[str]] = None, + documents: List[Document] | None = None, + **kwargs: Any, + ) -> List[str] | None: + """Update/Insert documents to the vectorstore. + + Args: + ids: IDs to update - Let's call get_pks to get ids with expression \n + documents (List[Document]): Documents to add to the vectorstore. + + Returns: + List[str]: IDs of the added texts. + """ + + from pymilvus import MilvusException + + if documents is None or len(documents) == 0: + logger.debug("No documents to upsert.") + return None + + if ids is not None and len(ids): + try: + self.delete(ids=ids) + except MilvusException: + pass + try: + return self.add_documents(documents=documents, **kwargs) + except MilvusException as exc: + logger.error( + "Failed to upsert entities: %s error: %s", self.collection_name, exc + ) + raise exc diff --git a/libs/partners/milvus/langchain_milvus/vectorstores/zilliz.py b/libs/partners/milvus/langchain_milvus/vectorstores/zilliz.py new file mode 100644 index 0000000000000..02f2ce739ff4c --- /dev/null +++ b/libs/partners/milvus/langchain_milvus/vectorstores/zilliz.py @@ -0,0 +1,196 @@ +from __future__ import annotations + +import logging +from typing import Any, Dict, List, Optional + +from langchain_core.embeddings import Embeddings + +from langchain_milvus.vectorstores.milvus import Milvus + +logger = logging.getLogger(__name__) + + +class Zilliz(Milvus): + """`Zilliz` vector store. + + You need to have `pymilvus` installed and a + running Zilliz database. + + See the following documentation for how to run a Zilliz instance: + https://docs.zilliz.com/docs/create-cluster + + + IF USING L2/IP metric IT IS HIGHLY SUGGESTED TO NORMALIZE YOUR DATA. + + Args: + embedding_function (Embeddings): Function used to embed the text. + collection_name (str): Which Zilliz collection to use. Defaults to + "LangChainCollection". + connection_args (Optional[dict[str, any]]): The connection args used for + this class comes in the form of a dict. + consistency_level (str): The consistency level to use for a collection. + Defaults to "Session". + index_params (Optional[dict]): Which index params to use. Defaults to + HNSW/AUTOINDEX depending on service. + search_params (Optional[dict]): Which search params to use. Defaults to + default of index. + drop_old (Optional[bool]): Whether to drop the current collection. Defaults + to False. + auto_id (bool): Whether to enable auto id for primary key. Defaults to False. + If False, you needs to provide text ids (string less than 65535 bytes). + If True, Milvus will generate unique integers as primary keys. + + The connection args used for this class comes in the form of a dict, + here are a few of the options: + address (str): The actual address of Zilliz + instance. Example address: "localhost:19530" + uri (str): The uri of Zilliz instance. Example uri: + "https://in03-ba4234asae.api.gcp-us-west1.zillizcloud.com", + host (str): The host of Zilliz instance. Default at "localhost", + PyMilvus will fill in the default host if only port is provided. + port (str/int): The port of Zilliz instance. Default at 19530, PyMilvus + will fill in the default port if only host is provided. + user (str): Use which user to connect to Zilliz instance. If user and + password are provided, we will add related header in every RPC call. + password (str): Required when user is provided. The password + corresponding to the user. + token (str): API key, for serverless clusters which can be used as + replacements for user and password. + secure (bool): Default is false. If set to true, tls will be enabled. + client_key_path (str): If use tls two-way authentication, need to + write the client.key path. + client_pem_path (str): If use tls two-way authentication, need to + write the client.pem path. + ca_pem_path (str): If use tls two-way authentication, need to write + the ca.pem path. + server_pem_path (str): If use tls one-way authentication, need to + write the server.pem path. + server_name (str): If use tls, need to write the common name. + + Example: + .. code-block:: python + + from langchain_community.vectorstores import Zilliz + from langchain_community.embeddings import OpenAIEmbeddings + + embedding = OpenAIEmbeddings() + # Connect to a Zilliz instance + milvus_store = Milvus( + embedding_function = embedding, + collection_name = "LangChainCollection", + connection_args = { + "uri": "https://in03-ba4234asae.api.gcp-us-west1.zillizcloud.com", + "user": "temp", + "password": "temp", + "token": "temp", # API key as replacements for user and password + "secure": True + } + drop_old: True, + ) + + Raises: + ValueError: If the pymilvus python package is not installed. + """ + + def _create_index(self) -> None: + """Create a index on the collection""" + from pymilvus import Collection, MilvusException + + if isinstance(self.col, Collection) and self._get_index() is None: + try: + # If no index params, use a default AutoIndex based one + if self.index_params is None: + self.index_params = { + "metric_type": "L2", + "index_type": "AUTOINDEX", + "params": {}, + } + + try: + self.col.create_index( + self._vector_field, + index_params=self.index_params, + using=self.alias, + ) + + # If default did not work, most likely Milvus self-hosted + except MilvusException: + # Use HNSW based index + self.index_params = { + "metric_type": "L2", + "index_type": "HNSW", + "params": {"M": 8, "efConstruction": 64}, + } + self.col.create_index( + self._vector_field, + index_params=self.index_params, + using=self.alias, + ) + logger.debug( + "Successfully created an index on collection: %s", + self.collection_name, + ) + + except MilvusException as e: + logger.error( + "Failed to create an index on collection: %s", self.collection_name + ) + raise e + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection_name: str = "LangChainCollection", + connection_args: Optional[Dict[str, Any]] = None, + consistency_level: str = "Session", + index_params: Optional[dict] = None, + search_params: Optional[dict] = None, + drop_old: bool = False, + *, + ids: Optional[List[str]] = None, + auto_id: bool = False, + **kwargs: Any, + ) -> Zilliz: + """Create a Zilliz collection, indexes it with HNSW, and insert data. + + Args: + texts (List[str]): Text data. + embedding (Embeddings): Embedding function. + metadatas (Optional[List[dict]]): Metadata for each text if it exists. + Defaults to None. + collection_name (str, optional): Collection name to use. Defaults to + "LangChainCollection". + connection_args (dict[str, Any], optional): Connection args to use. Defaults + to DEFAULT_MILVUS_CONNECTION. + consistency_level (str, optional): Which consistency level to use. Defaults + to "Session". + index_params (Optional[dict], optional): Which index_params to use. + Defaults to None. + search_params (Optional[dict], optional): Which search params to use. + Defaults to None. + drop_old (Optional[bool], optional): Whether to drop the collection with + that name if it exists. Defaults to False. + ids (Optional[List[str]]): List of text ids. + auto_id (bool): Whether to enable auto id for primary key. Defaults to + False. If False, you needs to provide text ids (string less than 65535 + bytes). If True, Milvus will generate unique integers as primary keys. + + Returns: + Zilliz: Zilliz Vector Store + """ + vector_db = cls( + embedding_function=embedding, + collection_name=collection_name, + connection_args=connection_args or {}, + consistency_level=consistency_level, + index_params=index_params, + search_params=search_params, + drop_old=drop_old, + auto_id=auto_id, + **kwargs, + ) + vector_db.add_texts(texts=texts, metadatas=metadatas, ids=ids) + return vector_db diff --git a/libs/partners/milvus/poetry.lock b/libs/partners/milvus/poetry.lock new file mode 100644 index 0000000000000..059dade2041fa --- /dev/null +++ b/libs/partners/milvus/poetry.lock @@ -0,0 +1,1291 @@ +# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand. + +[[package]] +name = "annotated-types" +version = "0.7.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.8" +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + +[[package]] +name = "codespell" +version = "2.3.0" +description = "Codespell" +optional = false +python-versions = ">=3.8" +files = [ + {file = "codespell-2.3.0-py3-none-any.whl", hash = "sha256:a9c7cef2501c9cfede2110fd6d4e5e62296920efe9abfb84648df866e47f58d1"}, + {file = "codespell-2.3.0.tar.gz", hash = "sha256:360c7d10f75e65f67bad720af7007e1060a5d395670ec11a7ed1fed9dd17471f"}, +] + +[package.extras] +dev = ["Pygments", "build", "chardet", "pre-commit", "pytest", "pytest-cov", "pytest-dependency", "ruff", "tomli", "twine"] +hard-encoding-detection = ["chardet"] +toml = ["tomli"] +types = ["chardet (>=5.1.0)", "mypy", "pytest", "pytest-cov", "pytest-dependency"] + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "environs" +version = "9.5.0" +description = "simplified environment variable parsing" +optional = false +python-versions = ">=3.6" +files = [ + {file = "environs-9.5.0-py2.py3-none-any.whl", hash = "sha256:1e549569a3de49c05f856f40bce86979e7d5ffbbc4398e7f338574c220189124"}, + {file = "environs-9.5.0.tar.gz", hash = "sha256:a76307b36fbe856bdca7ee9161e6c466fd7fcffc297109a118c59b54e27e30c9"}, +] + +[package.dependencies] +marshmallow = ">=3.0.0" +python-dotenv = "*" + +[package.extras] +dev = ["dj-database-url", "dj-email-url", "django-cache-url", "flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)", "pytest", "tox"] +django = ["dj-database-url", "dj-email-url", "django-cache-url"] +lint = ["flake8 (==4.0.1)", "flake8-bugbear (==21.9.2)", "mypy (==0.910)", "pre-commit (>=2.4,<3.0)"] +tests = ["dj-database-url", "dj-email-url", "django-cache-url", "pytest"] + +[[package]] +name = "exceptiongroup" +version = "1.2.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, + {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "freezegun" +version = "1.5.1" +description = "Let your Python tests travel through time" +optional = false +python-versions = ">=3.7" +files = [ + {file = "freezegun-1.5.1-py3-none-any.whl", hash = "sha256:bf111d7138a8abe55ab48a71755673dbaa4ab87f4cff5634a4442dfec34c15f1"}, + {file = "freezegun-1.5.1.tar.gz", hash = "sha256:b29dedfcda6d5e8e083ce71b2b542753ad48cfec44037b3fc79702e2980a89e9"}, +] + +[package.dependencies] +python-dateutil = ">=2.7" + +[[package]] +name = "grpcio" +version = "1.63.0" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.8" +files = [ + {file = "grpcio-1.63.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:2e93aca840c29d4ab5db93f94ed0a0ca899e241f2e8aec6334ab3575dc46125c"}, + {file = "grpcio-1.63.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:91b73d3f1340fefa1e1716c8c1ec9930c676d6b10a3513ab6c26004cb02d8b3f"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:b3afbd9d6827fa6f475a4f91db55e441113f6d3eb9b7ebb8fb806e5bb6d6bd0d"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8f3f6883ce54a7a5f47db43289a0a4c776487912de1a0e2cc83fdaec9685cc9f"}, + {file = "grpcio-1.63.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf8dae9cc0412cb86c8de5a8f3be395c5119a370f3ce2e69c8b7d46bb9872c8d"}, + {file = "grpcio-1.63.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:08e1559fd3b3b4468486b26b0af64a3904a8dbc78d8d936af9c1cf9636eb3e8b"}, + {file = "grpcio-1.63.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5c039ef01516039fa39da8a8a43a95b64e288f79f42a17e6c2904a02a319b357"}, + {file = "grpcio-1.63.0-cp310-cp310-win32.whl", hash = "sha256:ad2ac8903b2eae071055a927ef74121ed52d69468e91d9bcbd028bd0e554be6d"}, + {file = "grpcio-1.63.0-cp310-cp310-win_amd64.whl", hash = "sha256:b2e44f59316716532a993ca2966636df6fbe7be4ab6f099de6815570ebe4383a"}, + {file = "grpcio-1.63.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:f28f8b2db7b86c77916829d64ab21ff49a9d8289ea1564a2b2a3a8ed9ffcccd3"}, + {file = "grpcio-1.63.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:65bf975639a1f93bee63ca60d2e4951f1b543f498d581869922910a476ead2f5"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:b5194775fec7dc3dbd6a935102bb156cd2c35efe1685b0a46c67b927c74f0cfb"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4cbb2100ee46d024c45920d16e888ee5d3cf47c66e316210bc236d5bebc42b3"}, + {file = "grpcio-1.63.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ff737cf29b5b801619f10e59b581869e32f400159e8b12d7a97e7e3bdeee6a2"}, + {file = "grpcio-1.63.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cd1e68776262dd44dedd7381b1a0ad09d9930ffb405f737d64f505eb7f77d6c7"}, + {file = "grpcio-1.63.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:93f45f27f516548e23e4ec3fbab21b060416007dbe768a111fc4611464cc773f"}, + {file = "grpcio-1.63.0-cp311-cp311-win32.whl", hash = "sha256:878b1d88d0137df60e6b09b74cdb73db123f9579232c8456f53e9abc4f62eb3c"}, + {file = "grpcio-1.63.0-cp311-cp311-win_amd64.whl", hash = "sha256:756fed02dacd24e8f488f295a913f250b56b98fb793f41d5b2de6c44fb762434"}, + {file = "grpcio-1.63.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:93a46794cc96c3a674cdfb59ef9ce84d46185fe9421baf2268ccb556f8f81f57"}, + {file = "grpcio-1.63.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:a7b19dfc74d0be7032ca1eda0ed545e582ee46cd65c162f9e9fc6b26ef827dc6"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:8064d986d3a64ba21e498b9a376cbc5d6ab2e8ab0e288d39f266f0fca169b90d"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:219bb1848cd2c90348c79ed0a6b0ea51866bc7e72fa6e205e459fedab5770172"}, + {file = "grpcio-1.63.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2d60cd1d58817bc5985fae6168d8b5655c4981d448d0f5b6194bbcc038090d2"}, + {file = "grpcio-1.63.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:9e350cb096e5c67832e9b6e018cf8a0d2a53b2a958f6251615173165269a91b0"}, + {file = "grpcio-1.63.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:56cdf96ff82e3cc90dbe8bac260352993f23e8e256e063c327b6cf9c88daf7a9"}, + {file = "grpcio-1.63.0-cp312-cp312-win32.whl", hash = "sha256:3a6d1f9ea965e750db7b4ee6f9fdef5fdf135abe8a249e75d84b0a3e0c668a1b"}, + {file = "grpcio-1.63.0-cp312-cp312-win_amd64.whl", hash = "sha256:d2497769895bb03efe3187fb1888fc20e98a5f18b3d14b606167dacda5789434"}, + {file = "grpcio-1.63.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:fdf348ae69c6ff484402cfdb14e18c1b0054ac2420079d575c53a60b9b2853ae"}, + {file = "grpcio-1.63.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a3abfe0b0f6798dedd2e9e92e881d9acd0fdb62ae27dcbbfa7654a57e24060c0"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:6ef0ad92873672a2a3767cb827b64741c363ebaa27e7f21659e4e31f4d750280"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b416252ac5588d9dfb8a30a191451adbf534e9ce5f56bb02cd193f12d8845b7f"}, + {file = "grpcio-1.63.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e3b77eaefc74d7eb861d3ffbdf91b50a1bb1639514ebe764c47773b833fa2d91"}, + {file = "grpcio-1.63.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b005292369d9c1f80bf70c1db1c17c6c342da7576f1c689e8eee4fb0c256af85"}, + {file = "grpcio-1.63.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cdcda1156dcc41e042d1e899ba1f5c2e9f3cd7625b3d6ebfa619806a4c1aadda"}, + {file = "grpcio-1.63.0-cp38-cp38-win32.whl", hash = "sha256:01799e8649f9e94ba7db1aeb3452188048b0019dc37696b0f5ce212c87c560c3"}, + {file = "grpcio-1.63.0-cp38-cp38-win_amd64.whl", hash = "sha256:6a1a3642d76f887aa4009d92f71eb37809abceb3b7b5a1eec9c554a246f20e3a"}, + {file = "grpcio-1.63.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:75f701ff645858a2b16bc8c9fc68af215a8bb2d5a9b647448129de6e85d52bce"}, + {file = "grpcio-1.63.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cacdef0348a08e475a721967f48206a2254a1b26ee7637638d9e081761a5ba86"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:0697563d1d84d6985e40ec5ec596ff41b52abb3fd91ec240e8cb44a63b895094"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6426e1fb92d006e47476d42b8f240c1d916a6d4423c5258ccc5b105e43438f61"}, + {file = "grpcio-1.63.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e48cee31bc5f5a31fb2f3b573764bd563aaa5472342860edcc7039525b53e46a"}, + {file = "grpcio-1.63.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:50344663068041b34a992c19c600236e7abb42d6ec32567916b87b4c8b8833b3"}, + {file = "grpcio-1.63.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:259e11932230d70ef24a21b9fb5bb947eb4703f57865a404054400ee92f42f5d"}, + {file = "grpcio-1.63.0-cp39-cp39-win32.whl", hash = "sha256:a44624aad77bf8ca198c55af811fd28f2b3eaf0a50ec5b57b06c034416ef2d0a"}, + {file = "grpcio-1.63.0-cp39-cp39-win_amd64.whl", hash = "sha256:166e5c460e5d7d4656ff9e63b13e1f6029b122104c1633d5f37eaea348d7356d"}, + {file = "grpcio-1.63.0.tar.gz", hash = "sha256:f3023e14805c61bc439fb40ca545ac3d5740ce66120a678a3c6c2c55b70343d1"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.63.0)"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "iniconfig" +version = "2.0.0" +description = "brain-dead simple config-ini parsing" +optional = false +python-versions = ">=3.7" +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + +[[package]] +name = "jsonpatch" +version = "1.33" +description = "Apply JSON-Patches (RFC 6902)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpatch-1.33-py2.py3-none-any.whl", hash = "sha256:0ae28c0cd062bbd8b8ecc26d7d164fbbea9652a1a3693f3b956c1eae5145dade"}, + {file = "jsonpatch-1.33.tar.gz", hash = "sha256:9fcd4009c41e6d12348b4a0ff2563ba56a2923a7dfee731d004e212e1ee5030c"}, +] + +[package.dependencies] +jsonpointer = ">=1.9" + +[[package]] +name = "jsonpointer" +version = "2.4" +description = "Identify specific nodes in a JSON document (RFC 6901)" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*" +files = [ + {file = "jsonpointer-2.4-py2.py3-none-any.whl", hash = "sha256:15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a"}, + {file = "jsonpointer-2.4.tar.gz", hash = "sha256:585cee82b70211fa9e6043b7bb89db6e1aa49524340dde8ad6b63206ea689d88"}, +] + +[[package]] +name = "langchain-core" +version = "0.2.2rc1" +description = "Building applications with LLMs through composability" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [] +develop = true + +[package.dependencies] +jsonpatch = "^1.33" +langsmith = "^0.1.0" +packaging = "^23.2" +pydantic = ">=1,<3" +PyYAML = ">=5.3" +tenacity = "^8.1.0" + +[package.extras] +extended-testing = ["jinja2 (>=3,<4)"] + +[package.source] +type = "directory" +url = "../../core" + +[[package]] +name = "langsmith" +version = "0.1.63" +description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "langsmith-0.1.63-py3-none-any.whl", hash = "sha256:7810afdf5e3f3b472fc581a29371fb96cd843dde2149e048d1b9610325159d1e"}, + {file = "langsmith-0.1.63.tar.gz", hash = "sha256:a609405b52f6f54df442a142cbf19ab38662d54e532f96028b4c546434d4afdf"}, +] + +[package.dependencies] +orjson = ">=3.9.14,<4.0.0" +pydantic = ">=1,<3" +requests = ">=2,<3" + +[[package]] +name = "marshmallow" +version = "3.21.2" +description = "A lightweight library for converting complex datatypes to and from native Python datatypes." +optional = false +python-versions = ">=3.8" +files = [ + {file = "marshmallow-3.21.2-py3-none-any.whl", hash = "sha256:70b54a6282f4704d12c0a41599682c5c5450e843b9ec406308653b47c59648a1"}, + {file = "marshmallow-3.21.2.tar.gz", hash = "sha256:82408deadd8b33d56338d2182d455db632c6313aa2af61916672146bb32edc56"}, +] + +[package.dependencies] +packaging = ">=17.0" + +[package.extras] +dev = ["marshmallow[tests]", "pre-commit (>=3.5,<4.0)", "tox"] +docs = ["alabaster (==0.7.16)", "autodocsumm (==0.2.12)", "sphinx (==7.3.7)", "sphinx-issues (==4.1.0)", "sphinx-version-warning (==1.1.2)"] +tests = ["pytest", "pytz", "simplejson"] + +[[package]] +name = "milvus-lite" +version = "2.4.6" +description = "A lightweight version of Milvus wrapped with Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "milvus_lite-2.4.6-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:43ac9f36903b31455e50a8f1d9cb033e18971643029c89eb5c9610f01c1f2e26"}, + {file = "milvus_lite-2.4.6-py3-none-macosx_11_0_arm64.whl", hash = "sha256:95afe2ee019c569713926747bbe18ab5944927797374fed796f00fbe564cccd6"}, + {file = "milvus_lite-2.4.6-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2f9116bfc6a0d95636d3aa144582486b622c492689f3c93c519101bd7158b7db"}, +] + +[[package]] +name = "mypy" +version = "0.991" +description = "Optional static typing for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mypy-0.991-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7d17e0a9707d0772f4a7b878f04b4fd11f6f5bcb9b3813975a9b13c9332153ab"}, + {file = "mypy-0.991-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0714258640194d75677e86c786e80ccf294972cc76885d3ebbb560f11db0003d"}, + {file = "mypy-0.991-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0c8f3be99e8a8bd403caa8c03be619544bc2c77a7093685dcf308c6b109426c6"}, + {file = "mypy-0.991-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc9ec663ed6c8f15f4ae9d3c04c989b744436c16d26580eaa760ae9dd5d662eb"}, + {file = "mypy-0.991-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4307270436fd7694b41f913eb09210faff27ea4979ecbcd849e57d2da2f65305"}, + {file = "mypy-0.991-cp310-cp310-win_amd64.whl", hash = "sha256:901c2c269c616e6cb0998b33d4adbb4a6af0ac4ce5cd078afd7bc95830e62c1c"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d13674f3fb73805ba0c45eb6c0c3053d218aa1f7abead6e446d474529aafc372"}, + {file = "mypy-0.991-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1c8cd4fb70e8584ca1ed5805cbc7c017a3d1a29fb450621089ffed3e99d1857f"}, + {file = "mypy-0.991-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:209ee89fbb0deed518605edddd234af80506aec932ad28d73c08f1400ef80a33"}, + {file = "mypy-0.991-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37bd02ebf9d10e05b00d71302d2c2e6ca333e6c2a8584a98c00e038db8121f05"}, + {file = "mypy-0.991-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:26efb2fcc6b67e4d5a55561f39176821d2adf88f2745ddc72751b7890f3194ad"}, + {file = "mypy-0.991-cp311-cp311-win_amd64.whl", hash = "sha256:3a700330b567114b673cf8ee7388e949f843b356a73b5ab22dd7cff4742a5297"}, + {file = "mypy-0.991-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:1f7d1a520373e2272b10796c3ff721ea1a0712288cafaa95931e66aa15798813"}, + {file = "mypy-0.991-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:641411733b127c3e0dab94c45af15fea99e4468f99ac88b39efb1ad677da5711"}, + {file = "mypy-0.991-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:3d80e36b7d7a9259b740be6d8d906221789b0d836201af4234093cae89ced0cd"}, + {file = "mypy-0.991-cp37-cp37m-win_amd64.whl", hash = "sha256:e62ebaad93be3ad1a828a11e90f0e76f15449371ffeecca4a0a0b9adc99abcef"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b86ce2c1866a748c0f6faca5232059f881cda6dda2a893b9a8373353cfe3715a"}, + {file = "mypy-0.991-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ac6e503823143464538efda0e8e356d871557ef60ccd38f8824a4257acc18d93"}, + {file = "mypy-0.991-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0cca5adf694af539aeaa6ac633a7afe9bbd760df9d31be55ab780b77ab5ae8bf"}, + {file = "mypy-0.991-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a12c56bf73cdab116df96e4ff39610b92a348cc99a1307e1da3c3768bbb5b135"}, + {file = "mypy-0.991-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:652b651d42f155033a1967739788c436491b577b6a44e4c39fb340d0ee7f0d70"}, + {file = "mypy-0.991-cp38-cp38-win_amd64.whl", hash = "sha256:4175593dc25d9da12f7de8de873a33f9b2b8bdb4e827a7cae952e5b1a342e243"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98e781cd35c0acf33eb0295e8b9c55cdbef64fcb35f6d3aa2186f289bed6e80d"}, + {file = "mypy-0.991-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6d7464bac72a85cb3491c7e92b5b62f3dcccb8af26826257760a552a5e244aa5"}, + {file = "mypy-0.991-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c9166b3f81a10cdf9b49f2d594b21b31adadb3d5e9db9b834866c3258b695be3"}, + {file = "mypy-0.991-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8472f736a5bfb159a5e36740847808f6f5b659960115ff29c7cecec1741c648"}, + {file = "mypy-0.991-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5e80e758243b97b618cdf22004beb09e8a2de1af481382e4d84bc52152d1c476"}, + {file = "mypy-0.991-cp39-cp39-win_amd64.whl", hash = "sha256:74e259b5c19f70d35fcc1ad3d56499065c601dfe94ff67ae48b85596b9ec1461"}, + {file = "mypy-0.991-py3-none-any.whl", hash = "sha256:de32edc9b0a7e67c2775e574cb061a537660e51210fbf6006b0b36ea695ae9bb"}, + {file = "mypy-0.991.tar.gz", hash = "sha256:3c0165ba8f354a6d9881809ef29f1a9318a236a6d81c690094c5df32107bde06"}, +] + +[package.dependencies] +mypy-extensions = ">=0.4.3" +tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""} +typing-extensions = ">=3.10" + +[package.extras] +dmypy = ["psutil (>=4.0)"] +install-types = ["pip"] +python2 = ["typed-ast (>=1.4.0,<2)"] +reports = ["lxml"] + +[[package]] +name = "mypy-extensions" +version = "1.0.0" +description = "Type system extensions for programs checked with the mypy type checker." +optional = false +python-versions = ">=3.5" +files = [ + {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"}, + {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, +] + +[[package]] +name = "numpy" +version = "1.24.4" +description = "Fundamental package for array computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.24.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c0bfb52d2169d58c1cdb8cc1f16989101639b34c7d3ce60ed70b19c63eba0b64"}, + {file = "numpy-1.24.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ed094d4f0c177b1b8e7aa9cba7d6ceed51c0e569a5318ac0ca9a090680a6a1b1"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79fc682a374c4a8ed08b331bef9c5f582585d1048fa6d80bc6c35bc384eee9b4"}, + {file = "numpy-1.24.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ffe43c74893dbf38c2b0a1f5428760a1a9c98285553c89e12d70a96a7f3a4d6"}, + {file = "numpy-1.24.4-cp310-cp310-win32.whl", hash = "sha256:4c21decb6ea94057331e111a5bed9a79d335658c27ce2adb580fb4d54f2ad9bc"}, + {file = "numpy-1.24.4-cp310-cp310-win_amd64.whl", hash = "sha256:b4bea75e47d9586d31e892a7401f76e909712a0fd510f58f5337bea9572c571e"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f136bab9c2cfd8da131132c2cf6cc27331dd6fae65f95f69dcd4ae3c3639c810"}, + {file = "numpy-1.24.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2926dac25b313635e4d6cf4dc4e51c8c0ebfed60b801c799ffc4c32bf3d1254"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:222e40d0e2548690405b0b3c7b21d1169117391c2e82c378467ef9ab4c8f0da7"}, + {file = "numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7215847ce88a85ce39baf9e89070cb860c98fdddacbaa6c0da3ffb31b3350bd5"}, + {file = "numpy-1.24.4-cp311-cp311-win32.whl", hash = "sha256:4979217d7de511a8d57f4b4b5b2b965f707768440c17cb70fbf254c4b225238d"}, + {file = "numpy-1.24.4-cp311-cp311-win_amd64.whl", hash = "sha256:b7b1fc9864d7d39e28f41d089bfd6353cb5f27ecd9905348c24187a768c79694"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1452241c290f3e2a312c137a9999cdbf63f78864d63c79039bda65ee86943f61"}, + {file = "numpy-1.24.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:04640dab83f7c6c85abf9cd729c5b65f1ebd0ccf9de90b270cd61935eef0197f"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5425b114831d1e77e4b5d812b69d11d962e104095a5b9c3b641a218abcc050e"}, + {file = "numpy-1.24.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd80e219fd4c71fc3699fc1dadac5dcf4fd882bfc6f7ec53d30fa197b8ee22dc"}, + {file = "numpy-1.24.4-cp38-cp38-win32.whl", hash = "sha256:4602244f345453db537be5314d3983dbf5834a9701b7723ec28923e2889e0bb2"}, + {file = "numpy-1.24.4-cp38-cp38-win_amd64.whl", hash = "sha256:692f2e0f55794943c5bfff12b3f56f99af76f902fc47487bdfe97856de51a706"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2541312fbf09977f3b3ad449c4e5f4bb55d0dbf79226d7724211acc905049400"}, + {file = "numpy-1.24.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9667575fb6d13c95f1b36aca12c5ee3356bf001b714fc354eb5465ce1609e62f"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3a86ed21e4f87050382c7bc96571755193c4c1392490744ac73d660e8f564a9"}, + {file = "numpy-1.24.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d11efb4dbecbdf22508d55e48d9c8384db795e1b7b51ea735289ff96613ff74d"}, + {file = "numpy-1.24.4-cp39-cp39-win32.whl", hash = "sha256:6620c0acd41dbcb368610bb2f4d83145674040025e5536954782467100aa8835"}, + {file = "numpy-1.24.4-cp39-cp39-win_amd64.whl", hash = "sha256:befe2bf740fd8373cf56149a5c23a0f601e82869598d41f8e188a0e9869926f8"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:31f13e25b4e304632a4619d0e0777662c2ffea99fcae2029556b17d8ff958aef"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95f7ac6540e95bc440ad77f56e520da5bf877f87dca58bd095288dce8940532a"}, + {file = "numpy-1.24.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e98f220aa76ca2a977fe435f5b04d7b3470c0a2e6312907b37ba6068f26787f2"}, + {file = "numpy-1.24.4.tar.gz", hash = "sha256:80f5e3a4e498641401868df4208b74581206afbee7cf7b8329daae82676d9463"}, +] + +[[package]] +name = "orjson" +version = "3.10.3" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.8" +files = [ + {file = "orjson-3.10.3-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9fb6c3f9f5490a3eb4ddd46fc1b6eadb0d6fc16fb3f07320149c3286a1409dd8"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:252124b198662eee80428f1af8c63f7ff077c88723fe206a25df8dc57a57b1fa"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9f3e87733823089a338ef9bbf363ef4de45e5c599a9bf50a7a9b82e86d0228da"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8334c0d87103bb9fbbe59b78129f1f40d1d1e8355bbed2ca71853af15fa4ed3"}, + {file = "orjson-3.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1952c03439e4dce23482ac846e7961f9d4ec62086eb98ae76d97bd41d72644d7"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c0403ed9c706dcd2809f1600ed18f4aae50be263bd7112e54b50e2c2bc3ebd6d"}, + {file = "orjson-3.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:382e52aa4270a037d41f325e7d1dfa395b7de0c367800b6f337d8157367bf3a7"}, + {file = "orjson-3.10.3-cp310-none-win32.whl", hash = "sha256:be2aab54313752c04f2cbaab4515291ef5af8c2256ce22abc007f89f42f49109"}, + {file = "orjson-3.10.3-cp310-none-win_amd64.whl", hash = "sha256:416b195f78ae461601893f482287cee1e3059ec49b4f99479aedf22a20b1098b"}, + {file = "orjson-3.10.3-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:73100d9abbbe730331f2242c1fc0bcb46a3ea3b4ae3348847e5a141265479700"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:544a12eee96e3ab828dbfcb4d5a0023aa971b27143a1d35dc214c176fdfb29b3"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520de5e2ef0b4ae546bea25129d6c7c74edb43fc6cf5213f511a927f2b28148b"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccaa0a401fc02e8828a5bedfd80f8cd389d24f65e5ca3954d72c6582495b4bcf"}, + {file = "orjson-3.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a7bc9e8bc11bac40f905640acd41cbeaa87209e7e1f57ade386da658092dc16"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3582b34b70543a1ed6944aca75e219e1192661a63da4d039d088a09c67543b08"}, + {file = "orjson-3.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1c23dfa91481de880890d17aa7b91d586a4746a4c2aa9a145bebdbaf233768d5"}, + {file = "orjson-3.10.3-cp311-none-win32.whl", hash = "sha256:1770e2a0eae728b050705206d84eda8b074b65ee835e7f85c919f5705b006c9b"}, + {file = "orjson-3.10.3-cp311-none-win_amd64.whl", hash = "sha256:93433b3c1f852660eb5abdc1f4dd0ced2be031ba30900433223b28ee0140cde5"}, + {file = "orjson-3.10.3-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a39aa73e53bec8d410875683bfa3a8edf61e5a1c7bb4014f65f81d36467ea098"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0943a96b3fa09bee1afdfccc2cb236c9c64715afa375b2af296c73d91c23eab2"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e852baafceff8da3c9defae29414cc8513a1586ad93e45f27b89a639c68e8176"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18566beb5acd76f3769c1d1a7ec06cdb81edc4d55d2765fb677e3eaa10fa99e0"}, + {file = "orjson-3.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bd2218d5a3aa43060efe649ec564ebedec8ce6ae0a43654b81376216d5ebd42"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cf20465e74c6e17a104ecf01bf8cd3b7b252565b4ccee4548f18b012ff2f8069"}, + {file = "orjson-3.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ba7f67aa7f983c4345eeda16054a4677289011a478ca947cd69c0a86ea45e534"}, + {file = "orjson-3.10.3-cp312-none-win32.whl", hash = "sha256:17e0713fc159abc261eea0f4feda611d32eabc35708b74bef6ad44f6c78d5ea0"}, + {file = "orjson-3.10.3-cp312-none-win_amd64.whl", hash = "sha256:4c895383b1ec42b017dd2c75ae8a5b862fc489006afde06f14afbdd0309b2af0"}, + {file = "orjson-3.10.3-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:be2719e5041e9fb76c8c2c06b9600fe8e8584e6980061ff88dcbc2691a16d20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0175a5798bdc878956099f5c54b9837cb62cfbf5d0b86ba6d77e43861bcec2"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:978be58a68ade24f1af7758626806e13cff7748a677faf95fbb298359aa1e20d"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16bda83b5c61586f6f788333d3cf3ed19015e3b9019188c56983b5a299210eb5"}, + {file = "orjson-3.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ad1f26bea425041e0a1adad34630c4825a9e3adec49079b1fb6ac8d36f8b754"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:9e253498bee561fe85d6325ba55ff2ff08fb5e7184cd6a4d7754133bd19c9195"}, + {file = "orjson-3.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:0a62f9968bab8a676a164263e485f30a0b748255ee2f4ae49a0224be95f4532b"}, + {file = "orjson-3.10.3-cp38-none-win32.whl", hash = "sha256:8d0b84403d287d4bfa9bf7d1dc298d5c1c5d9f444f3737929a66f2fe4fb8f134"}, + {file = "orjson-3.10.3-cp38-none-win_amd64.whl", hash = "sha256:8bc7a4df90da5d535e18157220d7915780d07198b54f4de0110eca6b6c11e290"}, + {file = "orjson-3.10.3-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9059d15c30e675a58fdcd6f95465c1522b8426e092de9fff20edebfdc15e1cb0"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d40c7f7938c9c2b934b297412c067936d0b54e4b8ab916fd1a9eb8f54c02294"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d4a654ec1de8fdaae1d80d55cee65893cb06494e124681ab335218be6a0691e7"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:831c6ef73f9aa53c5f40ae8f949ff7681b38eaddb6904aab89dca4d85099cb78"}, + {file = "orjson-3.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99b880d7e34542db89f48d14ddecbd26f06838b12427d5a25d71baceb5ba119d"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:2e5e176c994ce4bd434d7aafb9ecc893c15f347d3d2bbd8e7ce0b63071c52e25"}, + {file = "orjson-3.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b69a58a37dab856491bf2d3bbf259775fdce262b727f96aafbda359cb1d114d8"}, + {file = "orjson-3.10.3-cp39-none-win32.whl", hash = "sha256:b8d4d1a6868cde356f1402c8faeb50d62cee765a1f7ffcfd6de732ab0581e063"}, + {file = "orjson-3.10.3-cp39-none-win_amd64.whl", hash = "sha256:5102f50c5fc46d94f2033fe00d392588564378260d64377aec702f21a7a22912"}, + {file = "orjson-3.10.3.tar.gz", hash = "sha256:2b166507acae7ba2f7c315dcf185a9111ad5e992ac81f2d507aac39193c2c818"}, +] + +[[package]] +name = "packaging" +version = "23.2" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"}, + {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"}, +] + +[[package]] +name = "pandas" +version = "2.0.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] + +[[package]] +name = "pluggy" +version = "1.5.0" +description = "plugin and hook calling mechanisms for python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"}, + {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"}, +] + +[package.extras] +dev = ["pre-commit", "tox"] +testing = ["pytest", "pytest-benchmark"] + +[[package]] +name = "protobuf" +version = "5.27.0" +description = "" +optional = false +python-versions = ">=3.8" +files = [ + {file = "protobuf-5.27.0-cp310-abi3-win32.whl", hash = "sha256:2f83bf341d925650d550b8932b71763321d782529ac0eaf278f5242f513cc04e"}, + {file = "protobuf-5.27.0-cp310-abi3-win_amd64.whl", hash = "sha256:b276e3f477ea1eebff3c2e1515136cfcff5ac14519c45f9b4aa2f6a87ea627c4"}, + {file = "protobuf-5.27.0-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:744489f77c29174328d32f8921566fb0f7080a2f064c5137b9d6f4b790f9e0c1"}, + {file = "protobuf-5.27.0-cp38-abi3-manylinux2014_aarch64.whl", hash = "sha256:f51f33d305e18646f03acfdb343aac15b8115235af98bc9f844bf9446573827b"}, + {file = "protobuf-5.27.0-cp38-abi3-manylinux2014_x86_64.whl", hash = "sha256:56937f97ae0dcf4e220ff2abb1456c51a334144c9960b23597f044ce99c29c89"}, + {file = "protobuf-5.27.0-cp38-cp38-win32.whl", hash = "sha256:a17f4d664ea868102feaa30a674542255f9f4bf835d943d588440d1f49a3ed15"}, + {file = "protobuf-5.27.0-cp38-cp38-win_amd64.whl", hash = "sha256:aabbbcf794fbb4c692ff14ce06780a66d04758435717107c387f12fb477bf0d8"}, + {file = "protobuf-5.27.0-cp39-cp39-win32.whl", hash = "sha256:587be23f1212da7a14a6c65fd61995f8ef35779d4aea9e36aad81f5f3b80aec5"}, + {file = "protobuf-5.27.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cb65fc8fba680b27cf7a07678084c6e68ee13cab7cace734954c25a43da6d0f"}, + {file = "protobuf-5.27.0-py3-none-any.whl", hash = "sha256:673ad60f1536b394b4fa0bcd3146a4130fcad85bfe3b60eaa86d6a0ace0fa374"}, + {file = "protobuf-5.27.0.tar.gz", hash = "sha256:07f2b9a15255e3cf3f137d884af7972407b556a7a220912b252f26dc3121e6bf"}, +] + +[[package]] +name = "pydantic" +version = "2.7.1" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"}, + {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.18.2" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.18.2" +description = "Core functionality for Pydantic validation and serialization" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"}, + {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"}, + {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"}, + {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"}, + {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"}, + {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"}, + {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"}, + {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"}, + {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"}, + {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"}, + {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"}, + {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"}, + {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"}, + {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"}, + {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"}, + {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"}, + {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"}, + {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"}, + {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"}, + {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"}, + {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"}, + {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"}, + {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"}, + {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"}, + {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"}, + {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"}, + {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"}, + {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"}, + {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"}, + {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"}, + {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pymilvus" +version = "2.4.3" +description = "Python Sdk for Milvus" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pymilvus-2.4.3-py3-none-any.whl", hash = "sha256:38239e89f8d739f665141d0b80908990b5f59681e889e135c234a4a45669a5c8"}, + {file = "pymilvus-2.4.3.tar.gz", hash = "sha256:703ac29296cdce03d6dc2aaebbe959e57745c141a94150e371dc36c61c226cc1"}, +] + +[package.dependencies] +environs = "<=9.5.0" +grpcio = ">=1.49.1,<=1.63.0" +milvus-lite = ">=2.4.0,<2.5.0" +numpy = {version = "<1.25.0", markers = "python_version <= \"3.8\""} +pandas = ">=1.2.4" +protobuf = ">=3.20.0" +setuptools = ">=67" +ujson = ">=2.0.0" + +[package.extras] +bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "requests"] +dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"] +model = ["milvus-model (>=0.1.0)"] + +[[package]] +name = "pytest" +version = "7.4.4" +description = "pytest: simple powerful testing with Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"}, + {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} +iniconfig = "*" +packaging = "*" +pluggy = ">=0.12,<2.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} + +[package.extras] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] + +[[package]] +name = "pytest-asyncio" +version = "0.21.2" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pytest_asyncio-0.21.2-py3-none-any.whl", hash = "sha256:ab664c88bb7998f711d8039cacd4884da6430886ae8bbd4eded552ed2004f16b"}, + {file = "pytest_asyncio-0.21.2.tar.gz", hash = "sha256:d67738fc232b94b326b9d060750beb16e0074210b98dd8b58a5239fa2a154f45"}, +] + +[package.dependencies] +pytest = ">=7.0.0" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "flaky (>=3.5.0)", "hypothesis (>=5.7.1)", "mypy (>=0.931)", "pytest-trio (>=0.7.0)"] + +[[package]] +name = "pytest-mock" +version = "3.14.0" +description = "Thin-wrapper around the mock package for easier use with pytest" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pytest-mock-3.14.0.tar.gz", hash = "sha256:2719255a1efeceadbc056d6bf3df3d1c5015530fb40cf347c0f9afac88410bd0"}, + {file = "pytest_mock-3.14.0-py3-none-any.whl", hash = "sha256:0b72c38033392a5f4621342fe11e9219ac11ec9d375f8e2a0c164539e0d70f6f"}, +] + +[package.dependencies] +pytest = ">=6.2.5" + +[package.extras] +dev = ["pre-commit", "pytest-asyncio", "tox"] + +[[package]] +name = "pytest-watcher" +version = "0.3.5" +description = "Automatically rerun your tests on file modifications" +optional = false +python-versions = ">=3.7.0,<4.0.0" +files = [ + {file = "pytest_watcher-0.3.5-py3-none-any.whl", hash = "sha256:af00ca52c7be22dc34c0fd3d7ffef99057207a73b05dc5161fe3b2fe91f58130"}, + {file = "pytest_watcher-0.3.5.tar.gz", hash = "sha256:8896152460ba2b1a8200c12117c6611008ec96c8b2d811f0a05ab8a82b043ff8"}, +] + +[package.dependencies] +tomli = {version = ">=2.0.1,<3.0.0", markers = "python_version < \"3.11\""} +watchdog = ">=2.0.0" + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, + {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, + {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, + {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, + {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, + {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "requests" +version = "2.32.2" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.2-py3-none-any.whl", hash = "sha256:fc06670dd0ed212426dfeb94fc1b983d917c4f9847c863f313c9dfaaffb7c23c"}, + {file = "requests-2.32.2.tar.gz", hash = "sha256:dd951ff5ecf3e3b3aa26b40703ba77495dab41da839ae72ef3c8e5d8e2433289"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "ruff" +version = "0.1.15" +description = "An extremely fast Python linter and code formatter, written in Rust." +optional = false +python-versions = ">=3.7" +files = [ + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:5fe8d54df166ecc24106db7dd6a68d44852d14eb0729ea4672bb4d96c320b7df"}, + {file = "ruff-0.1.15-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:6f0bfbb53c4b4de117ac4d6ddfd33aa5fc31beeaa21d23c45c6dd249faf9126f"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0d432aec35bfc0d800d4f70eba26e23a352386be3a6cf157083d18f6f5881c8"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9405fa9ac0e97f35aaddf185a1be194a589424b8713e3b97b762336ec79ff807"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c66ec24fe36841636e814b8f90f572a8c0cb0e54d8b5c2d0e300d28a0d7bffec"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:6f8ad828f01e8dd32cc58bc28375150171d198491fc901f6f98d2a39ba8e3ff5"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86811954eec63e9ea162af0ffa9f8d09088bab51b7438e8b6488b9401863c25e"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fd4025ac5e87d9b80e1f300207eb2fd099ff8200fa2320d7dc066a3f4622dc6b"}, + {file = "ruff-0.1.15-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b17b93c02cdb6aeb696effecea1095ac93f3884a49a554a9afa76bb125c114c1"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ddb87643be40f034e97e97f5bc2ef7ce39de20e34608f3f829db727a93fb82c5"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:abf4822129ed3a5ce54383d5f0e964e7fef74a41e48eb1dfad404151efc130a2"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_i686.whl", hash = "sha256:6c629cf64bacfd136c07c78ac10a54578ec9d1bd2a9d395efbee0935868bf852"}, + {file = "ruff-0.1.15-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1bab866aafb53da39c2cadfb8e1c4550ac5340bb40300083eb8967ba25481447"}, + {file = "ruff-0.1.15-py3-none-win32.whl", hash = "sha256:2417e1cb6e2068389b07e6fa74c306b2810fe3ee3476d5b8a96616633f40d14f"}, + {file = "ruff-0.1.15-py3-none-win_amd64.whl", hash = "sha256:3837ac73d869efc4182d9036b1405ef4c73d9b1f88da2413875e34e0d6919587"}, + {file = "ruff-0.1.15-py3-none-win_arm64.whl", hash = "sha256:9a933dfb1c14ec7a33cceb1e49ec4a16b51ce3c20fd42663198746efc0427360"}, + {file = "ruff-0.1.15.tar.gz", hash = "sha256:f6dfa8c1b21c913c326919056c390966648b680966febcb796cc9d1aaab8564e"}, +] + +[[package]] +name = "scipy" +version = "1.9.3" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"}, + {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"}, + {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"}, + {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"}, + {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"}, + {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"}, + {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<1.26.0" + +[package.extras] +dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] +test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "setuptools" +version = "70.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "setuptools-70.0.0-py3-none-any.whl", hash = "sha256:54faa7f2e8d2d11bcd2c07bed282eef1046b5c080d1c32add737d7b5817b1ad4"}, + {file = "setuptools-70.0.0.tar.gz", hash = "sha256:f211a66637b8fa059bb28183da127d4e86396c991a942b028c6650d4319c3fd0"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "syrupy" +version = "4.6.1" +description = "Pytest Snapshot Test Utility" +optional = false +python-versions = ">=3.8.1,<4" +files = [ + {file = "syrupy-4.6.1-py3-none-any.whl", hash = "sha256:203e52f9cb9fa749cf683f29bd68f02c16c3bc7e7e5fe8f2fc59bdfe488ce133"}, + {file = "syrupy-4.6.1.tar.gz", hash = "sha256:37a835c9ce7857eeef86d62145885e10b3cb9615bc6abeb4ce404b3f18e1bb36"}, +] + +[package.dependencies] +pytest = ">=7.0.0,<9.0.0" + +[[package]] +name = "tenacity" +version = "8.3.0" +description = "Retry code until it succeeds" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tenacity-8.3.0-py3-none-any.whl", hash = "sha256:3649f6443dbc0d9b01b9d8020a9c4ec7a1ff5f6f3c6c8a036ef371f573fe9185"}, + {file = "tenacity-8.3.0.tar.gz", hash = "sha256:953d4e6ad24357bceffbc9707bc74349aca9d245f68eb65419cf0c249a1949a2"}, +] + +[package.extras] +doc = ["reno", "sphinx"] +test = ["pytest", "tornado (>=4.5)", "typeguard"] + +[[package]] +name = "tomli" +version = "2.0.1" +description = "A lil' TOML parser" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "types-requests" +version = "2.32.0.20240523" +description = "Typing stubs for requests" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-requests-2.32.0.20240523.tar.gz", hash = "sha256:26b8a6de32d9f561192b9942b41c0ab2d8010df5677ca8aa146289d11d505f57"}, + {file = "types_requests-2.32.0.20240523-py3-none-any.whl", hash = "sha256:f19ed0e2daa74302069bbbbf9e82902854ffa780bc790742a810a9aaa52f65ec"}, +] + +[package.dependencies] +urllib3 = ">=2" + +[[package]] +name = "typing-extensions" +version = "4.12.0" +description = "Backported and Experimental Type Hints for Python 3.8+" +optional = false +python-versions = ">=3.8" +files = [ + {file = "typing_extensions-4.12.0-py3-none-any.whl", hash = "sha256:b349c66bea9016ac22978d800cfff206d5f9816951f12a7d0ec5578b0a819594"}, + {file = "typing_extensions-4.12.0.tar.gz", hash = "sha256:8cbcdc8606ebcb0d95453ad7dc5065e6237b6aa230a31e81d0f440c30fed5fd8"}, +] + +[[package]] +name = "tzdata" +version = "2024.1" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, +] + +[[package]] +name = "ujson" +version = "5.10.0" +description = "Ultra fast JSON encoder and decoder for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "ujson-5.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2601aa9ecdbee1118a1c2065323bda35e2c5a2cf0797ef4522d485f9d3ef65bd"}, + {file = "ujson-5.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:348898dd702fc1c4f1051bc3aacbf894caa0927fe2c53e68679c073375f732cf"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22cffecf73391e8abd65ef5f4e4dd523162a3399d5e84faa6aebbf9583df86d6"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26b0e2d2366543c1bb4fbd457446f00b0187a2bddf93148ac2da07a53fe51569"}, + {file = "ujson-5.10.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:caf270c6dba1be7a41125cd1e4fc7ba384bf564650beef0df2dd21a00b7f5770"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a245d59f2ffe750446292b0094244df163c3dc96b3ce152a2c837a44e7cda9d1"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:94a87f6e151c5f483d7d54ceef83b45d3a9cca7a9cb453dbdbb3f5a6f64033f5"}, + {file = "ujson-5.10.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:29b443c4c0a113bcbb792c88bea67b675c7ca3ca80c3474784e08bba01c18d51"}, + {file = "ujson-5.10.0-cp310-cp310-win32.whl", hash = "sha256:c18610b9ccd2874950faf474692deee4223a994251bc0a083c114671b64e6518"}, + {file = "ujson-5.10.0-cp310-cp310-win_amd64.whl", hash = "sha256:924f7318c31874d6bb44d9ee1900167ca32aa9b69389b98ecbde34c1698a250f"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a5b366812c90e69d0f379a53648be10a5db38f9d4ad212b60af00bd4048d0f00"}, + {file = "ujson-5.10.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:502bf475781e8167f0f9d0e41cd32879d120a524b22358e7f205294224c71126"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b91b5d0d9d283e085e821651184a647699430705b15bf274c7896f23fe9c9d8"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:129e39af3a6d85b9c26d5577169c21d53821d8cf68e079060602e861c6e5da1b"}, + {file = "ujson-5.10.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f77b74475c462cb8b88680471193064d3e715c7c6074b1c8c412cb526466efe9"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7ec0ca8c415e81aa4123501fee7f761abf4b7f386aad348501a26940beb1860f"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ab13a2a9e0b2865a6c6db9271f4b46af1c7476bfd51af1f64585e919b7c07fd4"}, + {file = "ujson-5.10.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:57aaf98b92d72fc70886b5a0e1a1ca52c2320377360341715dd3933a18e827b1"}, + {file = "ujson-5.10.0-cp311-cp311-win32.whl", hash = "sha256:2987713a490ceb27edff77fb184ed09acdc565db700ee852823c3dc3cffe455f"}, + {file = "ujson-5.10.0-cp311-cp311-win_amd64.whl", hash = "sha256:f00ea7e00447918ee0eff2422c4add4c5752b1b60e88fcb3c067d4a21049a720"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:98ba15d8cbc481ce55695beee9f063189dce91a4b08bc1d03e7f0152cd4bbdd5"}, + {file = "ujson-5.10.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a9d2edbf1556e4f56e50fab7d8ff993dbad7f54bac68eacdd27a8f55f433578e"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6627029ae4f52d0e1a2451768c2c37c0c814ffc04f796eb36244cf16b8e57043"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8ccb77b3e40b151e20519c6ae6d89bfe3f4c14e8e210d910287f778368bb3d1"}, + {file = "ujson-5.10.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3caf9cd64abfeb11a3b661329085c5e167abbe15256b3b68cb5d914ba7396f3"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6e32abdce572e3a8c3d02c886c704a38a1b015a1fb858004e03d20ca7cecbb21"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a65b6af4d903103ee7b6f4f5b85f1bfd0c90ba4eeac6421aae436c9988aa64a2"}, + {file = "ujson-5.10.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:604a046d966457b6cdcacc5aa2ec5314f0e8c42bae52842c1e6fa02ea4bda42e"}, + {file = "ujson-5.10.0-cp312-cp312-win32.whl", hash = "sha256:6dea1c8b4fc921bf78a8ff00bbd2bfe166345f5536c510671bccececb187c80e"}, + {file = "ujson-5.10.0-cp312-cp312-win_amd64.whl", hash = "sha256:38665e7d8290188b1e0d57d584eb8110951a9591363316dd41cf8686ab1d0abc"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_10_9_x86_64.whl", hash = "sha256:618efd84dc1acbd6bff8eaa736bb6c074bfa8b8a98f55b61c38d4ca2c1f7f287"}, + {file = "ujson-5.10.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38d5d36b4aedfe81dfe251f76c0467399d575d1395a1755de391e58985ab1c2e"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:67079b1f9fb29ed9a2914acf4ef6c02844b3153913eb735d4bf287ee1db6e557"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7d0e0ceeb8fe2468c70ec0c37b439dd554e2aa539a8a56365fd761edb418988"}, + {file = "ujson-5.10.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:59e02cd37bc7c44d587a0ba45347cc815fb7a5fe48de16bf05caa5f7d0d2e816"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a890b706b64e0065f02577bf6d8ca3b66c11a5e81fb75d757233a38c07a1f20"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:621e34b4632c740ecb491efc7f1fcb4f74b48ddb55e65221995e74e2d00bbff0"}, + {file = "ujson-5.10.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b9500e61fce0cfc86168b248104e954fead61f9be213087153d272e817ec7b4f"}, + {file = "ujson-5.10.0-cp313-cp313-win32.whl", hash = "sha256:4c4fc16f11ac1612f05b6f5781b384716719547e142cfd67b65d035bd85af165"}, + {file = "ujson-5.10.0-cp313-cp313-win_amd64.whl", hash = "sha256:4573fd1695932d4f619928fd09d5d03d917274381649ade4328091ceca175539"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a984a3131da7f07563057db1c3020b1350a3e27a8ec46ccbfbf21e5928a43050"}, + {file = "ujson-5.10.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:73814cd1b9db6fc3270e9d8fe3b19f9f89e78ee9d71e8bd6c9a626aeaeaf16bd"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61e1591ed9376e5eddda202ec229eddc56c612b61ac6ad07f96b91460bb6c2fb"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2c75269f8205b2690db4572a4a36fe47cd1338e4368bc73a7a0e48789e2e35a"}, + {file = "ujson-5.10.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7223f41e5bf1f919cd8d073e35b229295aa8e0f7b5de07ed1c8fddac63a6bc5d"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d4dc2fd6b3067c0782e7002ac3b38cf48608ee6366ff176bbd02cf969c9c20fe"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:232cc85f8ee3c454c115455195a205074a56ff42608fd6b942aa4c378ac14dd7"}, + {file = "ujson-5.10.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:cc6139531f13148055d691e442e4bc6601f6dba1e6d521b1585d4788ab0bfad4"}, + {file = "ujson-5.10.0-cp38-cp38-win32.whl", hash = "sha256:e7ce306a42b6b93ca47ac4a3b96683ca554f6d35dd8adc5acfcd55096c8dfcb8"}, + {file = "ujson-5.10.0-cp38-cp38-win_amd64.whl", hash = "sha256:e82d4bb2138ab05e18f089a83b6564fee28048771eb63cdecf4b9b549de8a2cc"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dfef2814c6b3291c3c5f10065f745a1307d86019dbd7ea50e83504950136ed5b"}, + {file = "ujson-5.10.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4734ee0745d5928d0ba3a213647f1c4a74a2a28edc6d27b2d6d5bd9fa4319e27"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ebb01bd865fdea43da56254a3930a413f0c5590372a1241514abae8aa7c76"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dee5e97c2496874acbf1d3e37b521dd1f307349ed955e62d1d2f05382bc36dd5"}, + {file = "ujson-5.10.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7490655a2272a2d0b072ef16b0b58ee462f4973a8f6bbe64917ce5e0a256f9c0"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ba17799fcddaddf5c1f75a4ba3fd6441f6a4f1e9173f8a786b42450851bd74f1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:2aff2985cef314f21d0fecc56027505804bc78802c0121343874741650a4d3d1"}, + {file = "ujson-5.10.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ad88ac75c432674d05b61184178635d44901eb749786c8eb08c102330e6e8996"}, + {file = "ujson-5.10.0-cp39-cp39-win32.whl", hash = "sha256:2544912a71da4ff8c4f7ab5606f947d7299971bdd25a45e008e467ca638d13c9"}, + {file = "ujson-5.10.0-cp39-cp39-win_amd64.whl", hash = "sha256:3ff201d62b1b177a46f113bb43ad300b424b7847f9c5d38b1b4ad8f75d4a282a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5b6fee72fa77dc172a28f21693f64d93166534c263adb3f96c413ccc85ef6e64"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:61d0af13a9af01d9f26d2331ce49bb5ac1fb9c814964018ac8df605b5422dcb3"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ecb24f0bdd899d368b715c9e6664166cf694d1e57be73f17759573a6986dd95a"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbd8fd427f57a03cff3ad6574b5e299131585d9727c8c366da4624a9069ed746"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:beeaf1c48e32f07d8820c705ff8e645f8afa690cca1544adba4ebfa067efdc88"}, + {file = "ujson-5.10.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:baed37ea46d756aca2955e99525cc02d9181de67f25515c468856c38d52b5f3b"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7663960f08cd5a2bb152f5ee3992e1af7690a64c0e26d31ba7b3ff5b2ee66337"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:d8640fb4072d36b08e95a3a380ba65779d356b2fee8696afeb7794cf0902d0a1"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78778a3aa7aafb11e7ddca4e29f46bc5139131037ad628cc10936764282d6753"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0111b27f2d5c820e7f2dbad7d48e3338c824e7ac4d2a12da3dc6061cc39c8e6"}, + {file = "ujson-5.10.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:c66962ca7565605b355a9ed478292da628b8f18c0f2793021ca4425abf8b01e5"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ba43cc34cce49cf2d4bc76401a754a81202d8aa926d0e2b79f0ee258cb15d3a4"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ac56eb983edce27e7f51d05bc8dd820586c6e6be1c5216a6809b0c668bb312b8"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44bd4b23a0e723bf8b10628288c2c7c335161d6840013d4d5de20e48551773b"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c10f4654e5326ec14a46bcdeb2b685d4ada6911050aa8baaf3501e57024b804"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0de4971a89a762398006e844ae394bd46991f7c385d7a6a3b93ba229e6dac17e"}, + {file = "ujson-5.10.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e1402f0564a97d2a52310ae10a64d25bcef94f8dd643fcf5d310219d915484f7"}, + {file = "ujson-5.10.0.tar.gz", hash = "sha256:b3cd8f3c5d8c7738257f1018880444f7b7d9b66232c64649f562d7ba86ad4bc1"}, +] + +[[package]] +name = "urllib3" +version = "2.2.1" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, + {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + +[[package]] +name = "watchdog" +version = "4.0.1" +description = "Filesystem events monitoring" +optional = false +python-versions = ">=3.8" +files = [ + {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da2dfdaa8006eb6a71051795856bedd97e5b03e57da96f98e375682c48850645"}, + {file = "watchdog-4.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e93f451f2dfa433d97765ca2634628b789b49ba8b504fdde5837cdcf25fdb53b"}, + {file = "watchdog-4.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ef0107bbb6a55f5be727cfc2ef945d5676b97bffb8425650dadbb184be9f9a2b"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:17e32f147d8bf9657e0922c0940bcde863b894cd871dbb694beb6704cfbd2fb5"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03e70d2df2258fb6cb0e95bbdbe06c16e608af94a3ffbd2b90c3f1e83eb10767"}, + {file = "watchdog-4.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:123587af84260c991dc5f62a6e7ef3d1c57dfddc99faacee508c71d287248459"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:093b23e6906a8b97051191a4a0c73a77ecc958121d42346274c6af6520dec175"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:611be3904f9843f0529c35a3ff3fd617449463cb4b73b1633950b3d97fa4bfb7"}, + {file = "watchdog-4.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:62c613ad689ddcb11707f030e722fa929f322ef7e4f18f5335d2b73c61a85c28"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:d4925e4bf7b9bddd1c3de13c9b8a2cdb89a468f640e66fbfabaf735bd85b3e35"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:cad0bbd66cd59fc474b4a4376bc5ac3fc698723510cbb64091c2a793b18654db"}, + {file = "watchdog-4.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a3c2c317a8fb53e5b3d25790553796105501a235343f5d2bf23bb8649c2c8709"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c9904904b6564d4ee8a1ed820db76185a3c96e05560c776c79a6ce5ab71888ba"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:667f3c579e813fcbad1b784db7a1aaa96524bed53437e119f6a2f5de4db04235"}, + {file = "watchdog-4.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d10a681c9a1d5a77e75c48a3b8e1a9f2ae2928eda463e8d33660437705659682"}, + {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0144c0ea9997b92615af1d94afc0c217e07ce2c14912c7b1a5731776329fcfc7"}, + {file = "watchdog-4.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:998d2be6976a0ee3a81fb8e2777900c28641fb5bfbd0c84717d89bca0addcdc5"}, + {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e7921319fe4430b11278d924ef66d4daa469fafb1da679a2e48c935fa27af193"}, + {file = "watchdog-4.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:f0de0f284248ab40188f23380b03b59126d1479cd59940f2a34f8852db710625"}, + {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:bca36be5707e81b9e6ce3208d92d95540d4ca244c006b61511753583c81c70dd"}, + {file = "watchdog-4.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab998f567ebdf6b1da7dc1e5accfaa7c6992244629c0fdaef062f43249bd8dee"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dddba7ca1c807045323b6af4ff80f5ddc4d654c8bce8317dde1bd96b128ed253"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_armv7l.whl", hash = "sha256:4513ec234c68b14d4161440e07f995f231be21a09329051e67a2118a7a612d2d"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_i686.whl", hash = "sha256:4107ac5ab936a63952dea2a46a734a23230aa2f6f9db1291bf171dac3ebd53c6"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64.whl", hash = "sha256:6e8c70d2cd745daec2a08734d9f63092b793ad97612470a0ee4cbb8f5f705c57"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:f27279d060e2ab24c0aa98363ff906d2386aa6c4dc2f1a374655d4e02a6c5e5e"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_s390x.whl", hash = "sha256:f8affdf3c0f0466e69f5b3917cdd042f89c8c63aebdb9f7c078996f607cdb0f5"}, + {file = "watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ac7041b385f04c047fcc2951dc001671dee1b7e0615cde772e84b01fbf68ee84"}, + {file = "watchdog-4.0.1-py3-none-win32.whl", hash = "sha256:206afc3d964f9a233e6ad34618ec60b9837d0582b500b63687e34011e15bb429"}, + {file = "watchdog-4.0.1-py3-none-win_amd64.whl", hash = "sha256:7577b3c43e5909623149f76b099ac49a1a01ca4e167d1785c76eb52fa585745a"}, + {file = "watchdog-4.0.1-py3-none-win_ia64.whl", hash = "sha256:d7b9f5f3299e8dd230880b6c55504a1f69cf1e4316275d1b215ebdd8187ec88d"}, + {file = "watchdog-4.0.1.tar.gz", hash = "sha256:eebaacf674fa25511e8867028d281e602ee6500045b57f43b08778082f7f8b44"}, +] + +[package.extras] +watchmedo = ["PyYAML (>=3.10)"] + +[metadata] +lock-version = "2.0" +python-versions = ">=3.8.1,<4.0" +content-hash = "bdd4f827b6ae022134ab2be9ee987e3247d6be99c1d1cb2e403448b8b0677a4a" diff --git a/libs/partners/milvus/pyproject.toml b/libs/partners/milvus/pyproject.toml new file mode 100644 index 0000000000000..6b55eff00cfb5 --- /dev/null +++ b/libs/partners/milvus/pyproject.toml @@ -0,0 +1,99 @@ +[tool.poetry] +name = "gigachain-milvus" +version = "0.1.1" +description = "An integration package connecting Milvus and Gigachain" +authors = [] +readme = "README.md" +repository = "https://github.com/langchain-ai/langchain" +license = "MIT" + +[tool.poetry.urls] +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/milvus" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +gigachain-core = "^0.2.0" +pymilvus = "^2.4.3" +scipy = "^1.7" + +[tool.poetry.group.test] +optional = true + +[tool.poetry.group.test.dependencies] +pytest = "^7.3.0" +freezegun = "^1.2.2" +pytest-mock = "^3.10.0" +syrupy = "^4.0.2" +pytest-watcher = "^0.3.4" +pytest-asyncio = "^0.21.1" +gigachain-core = { path = "../../core", develop = true } + +[tool.poetry.group.codespell] +optional = true + +[tool.poetry.group.codespell.dependencies] +codespell = "^2.2.0" + +[tool.poetry.group.test_integration] +optional = true + +[tool.poetry.group.test_integration.dependencies] + +[tool.poetry.group.lint] +optional = true + +[tool.poetry.group.lint.dependencies] +ruff = "^0.1.5" + +[tool.poetry.group.typing.dependencies] +mypy = "^0.991" +gigachain-core = { path = "../../core", develop = true } +types-requests = "^2" + +[tool.poetry.group.dev] +optional = true + +[tool.poetry.group.dev.dependencies] +gigachain-core = { path = "../../core", develop = true } + +[tool.ruff] +select = [ + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "T201", # print +] + +[tool.mypy] +disallow_untyped_defs = "True" + +[[tool.mypy.overrides]] +module = ["pymilvus"] +ignore_missing_imports = "True" + +[tool.coverage.run] +omit = ["tests/*"] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--snapshot-warn-unused --strict-markers --strict-config --durations=5" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "asyncio: mark tests as requiring asyncio", + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" diff --git a/libs/partners/milvus/scripts/check_imports.py b/libs/partners/milvus/scripts/check_imports.py new file mode 100644 index 0000000000000..365f5fa118da4 --- /dev/null +++ b/libs/partners/milvus/scripts/check_imports.py @@ -0,0 +1,17 @@ +import sys +import traceback +from importlib.machinery import SourceFileLoader + +if __name__ == "__main__": + files = sys.argv[1:] + has_failure = False + for file in files: + try: + SourceFileLoader("x", file).load_module() + except Exception: + has_faillure = True + print(file) # noqa: T201 + traceback.print_exc() + print() # noqa: T201 + + sys.exit(1 if has_failure else 0) diff --git a/libs/partners/milvus/scripts/check_pydantic.sh b/libs/partners/milvus/scripts/check_pydantic.sh new file mode 100755 index 0000000000000..06b5bb81ae236 --- /dev/null +++ b/libs/partners/milvus/scripts/check_pydantic.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# This script searches for lines starting with "import pydantic" or "from pydantic" +# in tracked files within a Git repository. +# +# Usage: ./scripts/check_pydantic.sh /path/to/repository + +# Check if a path argument is provided +if [ $# -ne 1 ]; then + echo "Usage: $0 /path/to/repository" + exit 1 +fi + +repository_path="$1" + +# Search for lines matching the pattern within the specified repository +result=$(git -C "$repository_path" grep -E '^import pydantic|^from pydantic') + +# Check if any matching lines were found +if [ -n "$result" ]; then + echo "ERROR: The following lines need to be updated:" + echo "$result" + echo "Please replace the code with an import from langchain_core.pydantic_v1." + echo "For example, replace 'from pydantic import BaseModel'" + echo "with 'from langchain_core.pydantic_v1 import BaseModel'" + exit 1 +fi diff --git a/libs/partners/milvus/scripts/lint_imports.sh b/libs/partners/milvus/scripts/lint_imports.sh new file mode 100755 index 0000000000000..695613c7ba8fd --- /dev/null +++ b/libs/partners/milvus/scripts/lint_imports.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +set -eu + +# Initialize a variable to keep track of errors +errors=0 + +# make sure not importing from langchain or langchain_experimental +git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) +git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) + +# Decide on an exit status based on the errors +if [ "$errors" -gt 0 ]; then + exit 1 +else + exit 0 +fi diff --git a/libs/partners/milvus/tests/__init__.py b/libs/partners/milvus/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/integration_tests/__init__.py b/libs/partners/milvus/tests/integration_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/integration_tests/retrievers/__init__.py b/libs/partners/milvus/tests/integration_tests/retrievers/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/integration_tests/test_compile.py b/libs/partners/milvus/tests/integration_tests/test_compile.py new file mode 100644 index 0000000000000..33ecccdfa0fbd --- /dev/null +++ b/libs/partners/milvus/tests/integration_tests/test_compile.py @@ -0,0 +1,7 @@ +import pytest + + +@pytest.mark.compile +def test_placeholder() -> None: + """Used for compiling integration tests without running any real tests.""" + pass diff --git a/libs/partners/milvus/tests/integration_tests/utils.py b/libs/partners/milvus/tests/integration_tests/utils.py new file mode 100644 index 0000000000000..f3ef87d2f2acc --- /dev/null +++ b/libs/partners/milvus/tests/integration_tests/utils.py @@ -0,0 +1,40 @@ +from typing import List + +from langchain_core.documents import Document +from langchain_core.embeddings import Embeddings + +fake_texts = ["foo", "bar", "baz"] + + +class FakeEmbeddings(Embeddings): + """Fake embeddings functionality for testing.""" + + def embed_documents(self, texts: List[str]) -> List[List[float]]: + """Return simple embeddings. + Embeddings encode each text as its index.""" + return [[float(1.0)] * 9 + [float(i)] for i in range(len(texts))] + + async def aembed_documents(self, texts: List[str]) -> List[List[float]]: + return self.embed_documents(texts) + + def embed_query(self, text: str) -> List[float]: + """Return constant query embeddings. + Embeddings are identical to embed_documents(texts)[0]. + Distance to each text will be that text's index, + as it was passed to embed_documents.""" + return [float(1.0)] * 9 + [float(0.0)] + + async def aembed_query(self, text: str) -> List[float]: + return self.embed_query(text) + + +def assert_docs_equal_without_pk( + docs1: List[Document], docs2: List[Document], pk_field: str = "pk" +) -> None: + """Assert two lists of Documents are equal, ignoring the primary key field.""" + assert len(docs1) == len(docs2) + for doc1, doc2 in zip(docs1, docs2): + assert doc1.page_content == doc2.page_content + doc1.metadata.pop(pk_field, None) + doc2.metadata.pop(pk_field, None) + assert doc1.metadata == doc2.metadata diff --git a/libs/partners/milvus/tests/integration_tests/vectorstores/__init__.py b/libs/partners/milvus/tests/integration_tests/vectorstores/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py b/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py new file mode 100644 index 0000000000000..5eaf2abcc904f --- /dev/null +++ b/libs/partners/milvus/tests/integration_tests/vectorstores/test_milvus.py @@ -0,0 +1,184 @@ +"""Test Milvus functionality.""" +from typing import Any, List, Optional + +from langchain_core.documents import Document + +from langchain_milvus.vectorstores import Milvus +from tests.integration_tests.utils import ( + FakeEmbeddings, + assert_docs_equal_without_pk, + fake_texts, +) + +# +# To run this test properly, please start a Milvus server with the following command: +# +# ```shell +# wget https://raw.githubusercontent.com/milvus-io/milvus/master/scripts/standalone_embed.sh +# bash standalone_embed.sh start +# ``` +# +# Here is the reference: +# https://milvus.io/docs/install_standalone-docker.md +# + + +def _milvus_from_texts( + metadatas: Optional[List[dict]] = None, + ids: Optional[List[str]] = None, + drop: bool = True, +) -> Milvus: + return Milvus.from_texts( + fake_texts, + FakeEmbeddings(), + metadatas=metadatas, + ids=ids, + # connection_args={"uri": "http://127.0.0.1:19530"}, + connection_args={"uri": "./milvus_demo.db"}, + drop_old=drop, + ) + + +def _get_pks(expr: str, docsearch: Milvus) -> List[Any]: + return docsearch.get_pks(expr) # type: ignore[return-value] + + +def test_milvus() -> None: + """Test end to end construction and search.""" + docsearch = _milvus_from_texts() + output = docsearch.similarity_search("foo", k=1) + assert_docs_equal_without_pk(output, [Document(page_content="foo")]) + + +def test_milvus_with_metadata() -> None: + """Test with metadata""" + docsearch = _milvus_from_texts(metadatas=[{"label": "test"}] * len(fake_texts)) + output = docsearch.similarity_search("foo", k=1) + assert_docs_equal_without_pk( + output, [Document(page_content="foo", metadata={"label": "test"})] + ) + + +def test_milvus_with_id() -> None: + """Test with ids""" + ids = ["id_" + str(i) for i in range(len(fake_texts))] + docsearch = _milvus_from_texts(ids=ids) + output = docsearch.similarity_search("foo", k=1) + assert_docs_equal_without_pk(output, [Document(page_content="foo")]) + + output = docsearch.delete(ids=ids) + assert output.delete_count == len(fake_texts) # type: ignore[attr-defined] + + try: + ids = ["dup_id" for _ in fake_texts] + _milvus_from_texts(ids=ids) + except Exception as e: + assert isinstance(e, AssertionError) + + +def test_milvus_with_score() -> None: + """Test end to end construction and search with scores and IDs.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + output = docsearch.similarity_search_with_score("foo", k=3) + docs = [o[0] for o in output] + scores = [o[1] for o in output] + assert_docs_equal_without_pk( + docs, + [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="bar", metadata={"page": 1}), + Document(page_content="baz", metadata={"page": 2}), + ], + ) + assert scores[0] < scores[1] < scores[2] + + +def test_milvus_max_marginal_relevance_search() -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + output = docsearch.max_marginal_relevance_search("foo", k=2, fetch_k=3) + assert_docs_equal_without_pk( + output, + [ + Document(page_content="foo", metadata={"page": 0}), + Document(page_content="baz", metadata={"page": 2}), + ], + ) + + +def test_milvus_add_extra() -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + + docsearch.add_texts(texts, metadatas) + + output = docsearch.similarity_search("foo", k=10) + assert len(output) == 6 + + +def test_milvus_no_drop() -> None: + """Test end to end construction and MRR search.""" + texts = ["foo", "bar", "baz"] + metadatas = [{"page": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + del docsearch + + docsearch = _milvus_from_texts(metadatas=metadatas, drop=False) + + output = docsearch.similarity_search("foo", k=10) + assert len(output) == 6 + + +def test_milvus_get_pks() -> None: + """Test end to end construction and get pks with expr""" + texts = ["foo", "bar", "baz"] + metadatas = [{"id": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + expr = "id in [1,2]" + output = _get_pks(expr, docsearch) + assert len(output) == 2 + + +def test_milvus_delete_entities() -> None: + """Test end to end construction and delete entities""" + texts = ["foo", "bar", "baz"] + metadatas = [{"id": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + expr = "id in [1,2]" + pks = _get_pks(expr, docsearch) + result = docsearch.delete(pks) + assert result.delete_count == 2 # type: ignore[attr-defined] + + +def test_milvus_upsert_entities() -> None: + """Test end to end construction and upsert entities""" + texts = ["foo", "bar", "baz"] + metadatas = [{"id": i} for i in range(len(texts))] + docsearch = _milvus_from_texts(metadatas=metadatas) + expr = "id in [1,2]" + pks = _get_pks(expr, docsearch) + documents = [ + Document(page_content="test_1", metadata={"id": 1}), + Document(page_content="test_2", metadata={"id": 3}), + ] + ids = docsearch.upsert(pks, documents) + assert len(ids) == 2 # type: ignore[arg-type] + + +# if __name__ == "__main__": +# test_milvus() +# test_milvus_with_metadata() +# test_milvus_with_id() +# test_milvus_with_score() +# test_milvus_max_marginal_relevance_search() +# test_milvus_add_extra() +# test_milvus_no_drop() +# test_milvus_get_pks() +# test_milvus_delete_entities() +# test_milvus_upsert_entities() diff --git a/libs/partners/milvus/tests/unit_tests/__init__.py b/libs/partners/milvus/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/unit_tests/retrievers/__init__.py b/libs/partners/milvus/tests/unit_tests/retrievers/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/unit_tests/test_imports.py b/libs/partners/milvus/tests/unit_tests/test_imports.py new file mode 100644 index 0000000000000..8be170e36f272 --- /dev/null +++ b/libs/partners/milvus/tests/unit_tests/test_imports.py @@ -0,0 +1,12 @@ +from langchain_milvus import __all__ + +EXPECTED_ALL = [ + "Milvus", + "MilvusCollectionHybridSearchRetriever", + "Zilliz", + "ZillizCloudPipelineRetriever", +] + + +def test_all_imports() -> None: + assert sorted(EXPECTED_ALL) == sorted(__all__) diff --git a/libs/partners/milvus/tests/unit_tests/vectorstores/__init__.py b/libs/partners/milvus/tests/unit_tests/vectorstores/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/partners/milvus/tests/unit_tests/vectorstores/test_milvus.py b/libs/partners/milvus/tests/unit_tests/vectorstores/test_milvus.py new file mode 100644 index 0000000000000..1ef2314e79590 --- /dev/null +++ b/libs/partners/milvus/tests/unit_tests/vectorstores/test_milvus.py @@ -0,0 +1,17 @@ +import os +from tempfile import TemporaryDirectory +from unittest.mock import Mock + +from langchain_milvus.vectorstores import Milvus + + +def test_initialization() -> None: + """Test integration milvus initialization.""" + embedding = Mock() + with TemporaryDirectory() as tmp_dir: + Milvus( + embedding_function=embedding, + connection_args={ + "uri": os.path.join(tmp_dir, "milvus.db"), + }, + ) diff --git a/libs/partners/mistralai/pyproject.toml b/libs/partners/mistralai/pyproject.toml index ff8667bb25722..1eedeb38c4f3e 100644 --- a/libs/partners/mistralai/pyproject.toml +++ b/libs/partners/mistralai/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "gigachain-mistralai" -version = "0.1.7" -description = "An integration package connecting Mistral and LangChain" +version = "0.1.8" +description = "An integration package connecting Mistral and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,7 +12,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.46,<0.3" +gigachain-core = ">=0.2.2,<0.3" tokenizers = ">=0.15.1,<1" httpx = ">=0.25.2,<1" httpx-sse = ">=0.3.1,<1" diff --git a/libs/partners/mongodb/langchain_mongodb/index.py b/libs/partners/mongodb/langchain_mongodb/index.py new file mode 100644 index 0000000000000..a2c0e10ed667a --- /dev/null +++ b/libs/partners/mongodb/langchain_mongodb/index.py @@ -0,0 +1,105 @@ +import logging +from typing import Any, Dict, List, Optional + +from pymongo.collection import Collection +from pymongo.operations import SearchIndexModel + +logger = logging.getLogger(__file__) + + +def _vector_search_index_definition( + dimensions: int, + path: str, + similarity: str, + filters: Optional[List[Dict[str, str]]], +) -> Dict[str, Any]: + return { + "fields": [ + { + "numDimensions": dimensions, + "path": path, + "similarity": similarity, + "type": "vector", + }, + *(filters or []), + ] + } + + +def create_vector_search_index( + collection: Collection, + index_name: str, + dimensions: int, + path: str, + similarity: str, + filters: List[Dict[str, str]], +) -> None: + """Experimental Utility function to create a vector search index + + Args: + collection (Collection): MongoDB Collection + index_name (str): Name of Index + dimensions (int): Number of dimensions in embedding + path (str): field with vector embedding + similarity (str): The similarity score used for the index + filters (List[Dict[str, str]]): additional filters for index definition. + """ + logger.info("Creating Search Index %s on %s", index_name, collection.name) + result = collection.create_search_index( + SearchIndexModel( + definition=_vector_search_index_definition( + dimensions=dimensions, path=path, similarity=similarity, filters=filters + ), + name=index_name, + type="vectorSearch", + ) + ) + logger.info(result) + + +def drop_vector_search_index(collection: Collection, index_name: str) -> None: + """Drop a created vector search index + + Args: + collection (Collection): MongoDB Collection with index to be dropped + index_name (str): Name of the MongoDB index + """ + logger.info( + "Dropping Search Index %s from Collection: %s", index_name, collection.name + ) + collection.drop_search_index(index_name) + logger.info("Vector Search index %s.%s dropped", collection.name, index_name) + + +def update_vector_search_index( + collection: Collection, + index_name: str, + dimensions: int, + path: str, + similarity: str, + filters: List[Dict[str, str]], +) -> None: + """Leverages the updateSearchIndex call + + Args: + collection (Collection): MongoDB Collection + index_name (str): Name of Index + dimensions (int): Number of dimensions in embedding. + path (str): field with vector embedding. + similarity (str): The similarity score used for the index. + filters (List[Dict[str, str]]): additional filters for index definition. + """ + + logger.info( + "Updating Search Index %s from Collection: %s", index_name, collection.name + ) + collection.update_search_index( + name=index_name, + definition=_vector_search_index_definition( + dimensions=dimensions, + path=path, + similarity=similarity, + filters=filters, + ), + ) + logger.info("Update succeeded") diff --git a/libs/partners/mongodb/pyproject.toml b/libs/partners/mongodb/pyproject.toml index 5173db7937967..a84ff09bb1010 100644 --- a/libs/partners/mongodb/pyproject.toml +++ b/libs/partners/mongodb/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "gigachain-mongodb" -version = "0.1.5" -description = "An integration package connecting MongoDB and LangChain" +version = "0.1.6" +description = "An integration package connecting MongoDB and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -14,7 +14,11 @@ license = "MIT" python = ">=3.8.1,<4.0" pymongo = ">=4.6.1,<5.0" gigachain-core = ">=0.1.46,<0.3" -numpy = "^1" +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] [tool.poetry.group.test] optional = true diff --git a/libs/partners/nomic/pyproject.toml b/libs/partners/nomic/pyproject.toml index 6f29ab4f48334..837035a5b567b 100644 --- a/libs/partners/nomic/pyproject.toml +++ b/libs/partners/nomic/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-nomic" -version = "0.1.0" -description = "An integration package connecting Nomic and LangChain" +name = "gigachain-nomic" +version = "0.1.2" +description = "An integration package connecting Nomic and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -13,7 +13,8 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" gigachain-core = ">=0.1.46,<0.3" -nomic = "^3.0.12" +nomic = "^3.0.29" +pillow = "^10.3.0" [tool.poetry.group.test] optional = true diff --git a/libs/partners/openai/pyproject.toml b/libs/partners/openai/pyproject.toml index 458a45aecf161..0ce249533f8c4 100644 --- a/libs/partners/openai/pyproject.toml +++ b/libs/partners/openai/pyproject.toml @@ -1,22 +1,19 @@ [tool.poetry] name = "gigachain-openai" -version = "0.1.7" -description = "An integration package connecting OpenAI and LangChain" +version = "0.1.10" +description = "An integration package connecting OpenAI and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" license = "MIT" -packages = [ - {include = "langchain_openai"} -] [tool.poetry.urls] "Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/openai" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.46,<0.3" -openai = "^1.24.0" +gigachain-core = ">=0.2.2,<0.3" +openai = "^1.26.0" tiktoken = ">=0.7,<1" [tool.poetry.group.test] @@ -32,7 +29,11 @@ pytest-asyncio = "^0.21.1" gigachain-core = { path = "../../core", develop = true } pytest-cov = "^4.1.0" gigachain-standard-tests = { path = "../../standard-tests", develop = true } -numpy = "^1.24" +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] [tool.poetry.group.codespell] optional = true @@ -61,7 +62,13 @@ gigachain-core = { path = "../../core", develop = true } optional = true [tool.poetry.group.test_integration.dependencies] -numpy = "^1" +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] +httpx = "^0.27.0" +pillow = "^10.3.0" [tool.ruff.lint] select = [ @@ -71,6 +78,10 @@ select = [ "T201", # print ] +[tool.ruff.format] +docstring-code-format = true +skip-magic-trailing-comma = true + [tool.mypy] disallow_untyped_defs = "True" diff --git a/libs/partners/pinecone/pyproject.toml b/libs/partners/pinecone/pyproject.toml index 3b90035491456..35a5fb2d75a3f 100644 --- a/libs/partners/pinecone/pyproject.toml +++ b/libs/partners/pinecone/pyproject.toml @@ -1,22 +1,25 @@ - [tool.poetry] -name = "langchain-pinecone" +name = "gigachain-pinecone" version = "0.1.1" -description = "An integration package connecting Pinecone and LangChain" +description = "An integration package connecting Pinecone and Gigachain" authors = [] readme = "README.md" -repository = "https://github.com/gigachain-ai/gigachain" +repository = "https://github.com/langchain-ai/langchain" license = "MIT" [tool.poetry.urls] -"Source Code" = "https://github.com/gigachain-ai/gigachain/tree/master/libs/partners/pinecone" +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/pinecone" [tool.poetry.dependencies] # <3.13 is due to restriction in pinecone-client package python = ">=3.8.1,<3.13" -langchain-core = ">=0.1.52,<0.3" -pinecone-client = "^3.2.2" -numpy = "^1" +gigachain-core = ">=0.1.52,<0.3" +pinecone-client = ">=3.2.2,<5" +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] [tool.poetry.group.test] optional = true diff --git a/libs/partners/prompty/pyproject.toml b/libs/partners/prompty/pyproject.toml index a8fe552cd2550..f7b1755770942 100644 --- a/libs/partners/prompty/pyproject.toml +++ b/libs/partners/prompty/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-prompty" +name = "gigachain-prompty" version = "0.0.2" -description = "An integration package connecting Prompty and LangChain" +description = "An integration package connecting Prompty and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,7 +12,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.52,<0.3" +gigachain-core = ">=0.1.52,<0.3" pyyaml = "^6.0.1" types-pyyaml = "^6.0.12.20240311" @@ -26,9 +26,9 @@ pytest-mock = "^3.10.0" syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" -langchain-core = { path = "../../core", develop = true } -langchain = { path = "../../langchain", develop = true } -langchain-text-splitters = { path = "../../text-splitters", develop = true } +gigachain-core = { path = "../../core", develop = true } +gigachain = { path = "../../langchain", develop = true } +gigachain-text-splitters = { path = "../../text-splitters", develop = true } [tool.poetry.group.codespell] optional = true @@ -49,13 +49,13 @@ ruff = "^0.1.5" [tool.poetry.group.typing.dependencies] mypy = "^0.991" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } types-pyyaml = "^6.0.12.20240311" [tool.ruff] diff --git a/libs/partners/qdrant/pyproject.toml b/libs/partners/qdrant/pyproject.toml index 0871b867ee13f..29ab4b720c86f 100644 --- a/libs/partners/qdrant/pyproject.toml +++ b/libs/partners/qdrant/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-qdrant" -version = "0.1.0" -description = "An integration package connecting Qdrant and LangChain" +name = "gigachain-qdrant" +version = "0.1.1" +description = "An integration package connecting Qdrant and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,7 +12,7 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.52,<0.3" +gigachain-core = ">=0.1.52,<0.3" qdrant-client = "^1.9.0" [tool.poetry.group.test] @@ -25,7 +25,7 @@ pytest-mock = "^3.10.0" syrupy = "^4.0.2" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } requests = "^2.31.0" [tool.poetry.group.codespell] @@ -47,13 +47,13 @@ ruff = "^0.1.5" [tool.poetry.group.typing.dependencies] mypy = "^0.991" -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } [tool.poetry.group.dev] optional = true [tool.poetry.group.dev.dependencies] -langchain-core = { path = "../../core", develop = true } +gigachain-core = { path = "../../core", develop = true } [tool.ruff] select = [ diff --git a/libs/partners/robocorp/pyproject.toml b/libs/partners/robocorp/pyproject.toml index 8aa0204fb98e2..44d2e7bfb69fa 100644 --- a/libs/partners/robocorp/pyproject.toml +++ b/libs/partners/robocorp/pyproject.toml @@ -1,19 +1,14 @@ - [tool.poetry] name = "gigachain-robocorp" -version = "0.0.7" -description = "An integration package connecting Robocorp Action Server and LangChain" +version = "0.0.9.post1" +description = "An integration package connecting Robocorp Action Server and Gigachain" authors = [] readme = "README.md" -repository = "https://github.com/gigachain-ai/gigachain" +repository = "https://github.com/langchain-ai/langchain" license = "MIT" -packages = [ - {include = "langchain_robocorp"} -] - [tool.poetry.urls] -"Source Code" = "https://github.com/gigachain-ai/gigachain/tree/master/libs/partners/robocorp" +"Source Code" = "https://github.com/langchain-ai/langchain/tree/master/libs/partners/robocorp" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" @@ -97,4 +92,3 @@ markers = [ "compile: mark placeholder test used to compile integration tests without running them", ] asyncio_mode = "auto" - diff --git a/libs/partners/robocorp/tests/unit_tests/_openapi3.fixture.json b/libs/partners/robocorp/tests/unit_tests/_openapi3.fixture.json new file mode 100644 index 0000000000000..97f07b218769e --- /dev/null +++ b/libs/partners/robocorp/tests/unit_tests/_openapi3.fixture.json @@ -0,0 +1,1891 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "Sema4.ai Action Server", + "version": "0.11.0" + }, + "servers": [ + { + "url": "http://localhost:8806" + } + ], + "paths": { + "/api/actions/google-calendar/create-event/run": { + "post": { + "summary": "Create Event", + "description": "Creates a new event in the specified calendar.", + "operationId": "create_event", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": { + "event": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the event." + }, + "summary": { + "type": "string", + "title": "Summary", + "description": "A short summary of the event's purpose." + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Location", + "description": "The physical location of the event." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A more detailed description of the event." + }, + "start": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (inclusive) start time of the event." + }, + "end": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (exclusive) end time of the event." + }, + "recurrence": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Recurrence", + "description": "A list of RRULE, EXRULE, RDATE, and EXDATE lines for a recurring event." + }, + "attendees": { + "anyOf": [ + { + "items": { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + }, + "optional": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Optional", + "description": "Whether this is an optional attendee." + }, + "responseStatus": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Responsestatus", + "description": "The response status of the attendee." + }, + "organizer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Organizer", + "description": "Whether the attendee is the organizer of the event." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Attendee" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attendees", + "description": "A list of attendees." + }, + "reminders": { + "anyOf": [ + { + "properties": { + "useDefault": { + "type": "boolean", + "title": "Usedefault", + "description": "Indicates whether to use the default reminders." + }, + "overrides": { + "anyOf": [ + { + "items": { + "properties": { + "method": { + "type": "string", + "title": "Method", + "description": "The method of the reminder (email or popup)." + }, + "minutes": { + "type": "integer", + "title": "Minutes", + "description": "The number of minutes before the event when the reminder should occur." + } + }, + "type": "object", + "required": [ + "method", + "minutes" + ], + "title": "ReminderOverride" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Overrides", + "description": "A list of overrides for the reminders." + } + }, + "type": "object", + "required": [ + "useDefault" + ], + "title": "Reminder" + }, + { + "type": "null" + } + ], + "description": "Reminders settings for the event." + }, + "organizer": { + "allOf": [ + { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Identity" + } + ], + "description": "The organizer of the event." + } + }, + "type": "object", + "required": [ + "id", + "summary", + "start", + "end", + "organizer" + ], + "title": "Event", + "description": "JSON representation of the Google Calendar V3 event." + }, + "calendar_id": { + "type": "string", + "title": "Calendar Id", + "description": "Calendar identifier which can be found by listing all calendars action.\nDefault value is \"primary\" which indicates the calendar where the user is currently logged in.", + "default": "primary" + } + }, + "type": "object", + "required": [ + "event" + ] + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the event." + }, + "summary": { + "type": "string", + "title": "Summary", + "description": "A short summary of the event's purpose." + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Location", + "description": "The physical location of the event." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A more detailed description of the event." + }, + "start": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (inclusive) start time of the event." + }, + "end": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (exclusive) end time of the event." + }, + "recurrence": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Recurrence", + "description": "A list of RRULE, EXRULE, RDATE, and EXDATE lines for a recurring event." + }, + "attendees": { + "anyOf": [ + { + "items": { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + }, + "optional": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Optional", + "description": "Whether this is an optional attendee." + }, + "responseStatus": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Responsestatus", + "description": "The response status of the attendee." + }, + "organizer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Organizer", + "description": "Whether the attendee is the organizer of the event." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Attendee" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attendees", + "description": "A list of attendees." + }, + "reminders": { + "anyOf": [ + { + "properties": { + "useDefault": { + "type": "boolean", + "title": "Usedefault", + "description": "Indicates whether to use the default reminders." + }, + "overrides": { + "anyOf": [ + { + "items": { + "properties": { + "method": { + "type": "string", + "title": "Method", + "description": "The method of the reminder (email or popup)." + }, + "minutes": { + "type": "integer", + "title": "Minutes", + "description": "The number of minutes before the event when the reminder should occur." + } + }, + "type": "object", + "required": [ + "method", + "minutes" + ], + "title": "ReminderOverride" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Overrides", + "description": "A list of overrides for the reminders." + } + }, + "type": "object", + "required": [ + "useDefault" + ], + "title": "Reminder" + }, + { + "type": "null" + } + ], + "description": "Reminders settings for the event." + }, + "organizer": { + "allOf": [ + { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Identity" + } + ], + "description": "The organizer of the event." + } + }, + "type": "object", + "required": [ + "id", + "summary", + "start", + "end", + "organizer" + ], + "title": "Response for Create Event", + "description": "The newly created event." + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "x-openai-isConsequential": true + } + }, + "/api/actions/google-calendar/list-events/run": { + "post": { + "summary": "List Events", + "description": "List all events in the user's primary calendar between the given dates.\nTo aggregate all events across calendars, call this method for each calendar returned by list_calendars endpoint.", + "operationId": "list_events", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": { + "calendar_id": { + "type": "string", + "title": "Calendar Id", + "description": "Calendar identifier which can be found by listing all calendars action.\nDefault value is \"primary\" which indicates the calendar where the user is currently logged in.", + "default": "primary" + }, + "query": { + "type": "string", + "title": "Query", + "description": "Free text search terms to find events that match these terms in summary, description, location,\nattendee's name / email or working location information.", + "default": "" + }, + "start_date": { + "type": "string", + "title": "Start Date", + "description": "Upper bound (exclusive) for an event's start time to filter by.\nMust be an RFC3339 timestamp with mandatory time zone offset.", + "default": "" + }, + "end_date": { + "type": "string", + "title": "End Date", + "description": "Lower bound (exclusive) for an event's end time to filter by.\nMust be an RFC3339 timestamp with mandatory time zone offset.", + "default": "" + } + }, + "type": "object" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "properties": { + "result": { + "anyOf": [ + { + "properties": { + "events": { + "items": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the event." + }, + "summary": { + "type": "string", + "title": "Summary", + "description": "A short summary of the event's purpose." + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Location", + "description": "The physical location of the event." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A more detailed description of the event." + }, + "start": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (inclusive) start time of the event." + }, + "end": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (exclusive) end time of the event." + }, + "recurrence": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Recurrence", + "description": "A list of RRULE, EXRULE, RDATE, and EXDATE lines for a recurring event." + }, + "attendees": { + "anyOf": [ + { + "items": { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + }, + "optional": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Optional", + "description": "Whether this is an optional attendee." + }, + "responseStatus": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Responsestatus", + "description": "The response status of the attendee." + }, + "organizer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Organizer", + "description": "Whether the attendee is the organizer of the event." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Attendee" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attendees", + "description": "A list of attendees." + }, + "reminders": { + "anyOf": [ + { + "properties": { + "useDefault": { + "type": "boolean", + "title": "Usedefault", + "description": "Indicates whether to use the default reminders." + }, + "overrides": { + "anyOf": [ + { + "items": { + "properties": { + "method": { + "type": "string", + "title": "Method", + "description": "The method of the reminder (email or popup)." + }, + "minutes": { + "type": "integer", + "title": "Minutes", + "description": "The number of minutes before the event when the reminder should occur." + } + }, + "type": "object", + "required": [ + "method", + "minutes" + ], + "title": "ReminderOverride" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Overrides", + "description": "A list of overrides for the reminders." + } + }, + "type": "object", + "required": [ + "useDefault" + ], + "title": "Reminder" + }, + { + "type": "null" + } + ], + "description": "Reminders settings for the event." + }, + "organizer": { + "allOf": [ + { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Identity" + } + ], + "description": "The organizer of the event." + } + }, + "type": "object", + "required": [ + "id", + "summary", + "start", + "end", + "organizer" + ], + "title": "Event" + }, + "type": "array", + "title": "Events" + } + }, + "type": "object", + "required": [ + "events" + ], + "title": "EventList" + }, + { + "type": "null" + } + ], + "description": "The result for the action if it ran successfully" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error", + "description": "The error message if the action failed for some reason" + } + }, + "type": "object", + "title": "Response for List Events", + "description": "A list of calendar events that match the query, if defined." + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "x-openai-isConsequential": false + } + }, + "/api/actions/google-calendar/list-calendars/run": { + "post": { + "summary": "List Calendars", + "description": "List all calendars that the user is subscribed to.", + "operationId": "list_calendars", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": {}, + "type": "object" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "properties": { + "result": { + "anyOf": [ + { + "properties": { + "calendars": { + "items": { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the calendar." + }, + "summary": { + "type": "string", + "title": "Summary", + "description": "The name or summary of the calendar." + }, + "timeZone": { + "type": "string", + "title": "Timezone", + "description": "The timezone the calendar is set to, such as 'Europe/Bucharest'." + }, + "selected": { + "type": "boolean", + "title": "Selected", + "description": "A boolean indicating if the calendar is selected by the user in their UI." + }, + "accessRole": { + "type": "string", + "title": "Accessrole", + "description": "The access role of the user with respect to the calendar, e.g., 'owner'." + } + }, + "type": "object", + "required": [ + "id", + "summary", + "timeZone", + "selected", + "accessRole" + ], + "title": "Calendar" + }, + "type": "array", + "title": "Calendars" + } + }, + "type": "object", + "required": [ + "calendars" + ], + "title": "CalendarList" + }, + { + "type": "null" + } + ], + "description": "The result for the action if it ran successfully" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error", + "description": "The error message if the action failed for some reason" + } + }, + "type": "object", + "title": "Response for List Calendars", + "description": "A list of calendars." + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "x-openai-isConsequential": false + } + }, + "/api/actions/google-calendar/update-event/run": { + "post": { + "summary": "Update Event", + "description": "Update an existing Google Calendar event with dynamic arguments.", + "operationId": "update_event", + "requestBody": { + "content": { + "application/json": { + "schema": { + "properties": { + "event_id": { + "type": "string", + "title": "Event Id", + "description": "Identifier of the event to update. Can be found by listing events in all calendars." + }, + "updates": { + "properties": { + "summary": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Summary", + "description": "A short summary of the event's purpose." + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Location", + "description": "The physical location of the event." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A more detailed description of the event." + }, + "start": { + "anyOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + }, + { + "type": "null" + } + ], + "description": "The (inclusive) start time of the event." + }, + "end": { + "anyOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + }, + { + "type": "null" + } + ], + "description": "The (exclusive) end time of the event." + }, + "attendees": { + "anyOf": [ + { + "items": { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + }, + "optional": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Optional", + "description": "Whether this is an optional attendee." + }, + "responseStatus": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Responsestatus", + "description": "The response status of the attendee." + }, + "organizer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Organizer", + "description": "Whether the attendee is the organizer of the event." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Attendee" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attendees", + "description": "A list of attendees consisting in email and whether they are mandatory to participate or not." + } + }, + "type": "object", + "title": "Updates", + "description": "A dictionary containing the event attributes to update.\nPossible keys include 'summary', 'description', 'start', 'end', and 'attendees'." + }, + "calendar_id": { + "type": "string", + "title": "Calendar Id", + "description": "Identifier of the calendar where the event is.\nDefault value is \"primary\" which indicates the calendar where the user is currently logged in.", + "default": "primary" + } + }, + "type": "object", + "required": [ + "event_id", + "updates" + ] + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "Successful Response", + "content": { + "application/json": { + "schema": { + "properties": { + "result": { + "anyOf": [ + { + "properties": { + "id": { + "type": "string", + "title": "Id", + "description": "The id of the event." + }, + "summary": { + "type": "string", + "title": "Summary", + "description": "A short summary of the event's purpose." + }, + "location": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Location", + "description": "The physical location of the event." + }, + "description": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description", + "description": "A more detailed description of the event." + }, + "start": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (inclusive) start time of the event." + }, + "end": { + "allOf": [ + { + "properties": { + "date": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Date", + "description": "The date, in the format 'yyyy-mm-dd', if this is an all-day event." + }, + "dateTime": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Datetime", + "description": "The start or end time of the event." + }, + "timeZone": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Timezone", + "description": "The time zone in which the time is specified, formatted as IANA Time Zone Database. For single events this field is optional." + } + }, + "type": "object", + "title": "EventDateTime" + } + ], + "description": "The (exclusive) end time of the event." + }, + "recurrence": { + "anyOf": [ + { + "items": { + "type": "string" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Recurrence", + "description": "A list of RRULE, EXRULE, RDATE, and EXDATE lines for a recurring event." + }, + "attendees": { + "anyOf": [ + { + "items": { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + }, + "optional": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Optional", + "description": "Whether this is an optional attendee." + }, + "responseStatus": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Responsestatus", + "description": "The response status of the attendee." + }, + "organizer": { + "anyOf": [ + { + "type": "boolean" + }, + { + "type": "null" + } + ], + "title": "Organizer", + "description": "Whether the attendee is the organizer of the event." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Attendee" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Attendees", + "description": "A list of attendees." + }, + "reminders": { + "anyOf": [ + { + "properties": { + "useDefault": { + "type": "boolean", + "title": "Usedefault", + "description": "Indicates whether to use the default reminders." + }, + "overrides": { + "anyOf": [ + { + "items": { + "properties": { + "method": { + "type": "string", + "title": "Method", + "description": "The method of the reminder (email or popup)." + }, + "minutes": { + "type": "integer", + "title": "Minutes", + "description": "The number of minutes before the event when the reminder should occur." + } + }, + "type": "object", + "required": [ + "method", + "minutes" + ], + "title": "ReminderOverride" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "title": "Overrides", + "description": "A list of overrides for the reminders." + } + }, + "type": "object", + "required": [ + "useDefault" + ], + "title": "Reminder" + }, + { + "type": "null" + } + ], + "description": "Reminders settings for the event." + }, + "organizer": { + "allOf": [ + { + "properties": { + "email": { + "type": "string", + "title": "Email", + "description": "The email address of the identity." + }, + "displayName": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Displayname", + "description": "The display name of the identity." + } + }, + "type": "object", + "required": [ + "email" + ], + "title": "Identity" + } + ], + "description": "The organizer of the event." + } + }, + "type": "object", + "required": [ + "id", + "summary", + "start", + "end", + "organizer" + ], + "title": "Event" + }, + { + "type": "null" + } + ], + "description": "The result for the action if it ran successfully" + }, + "error": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Error", + "description": "The error message if the action failed for some reason" + } + }, + "type": "object", + "title": "Response for Update Event", + "description": "Updated event details." + } + } + } + }, + "422": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HTTPValidationError" + } + } + } + } + }, + "x-openai-isConsequential": true + } + } + }, + "components": { + "schemas": { + "HTTPValidationError": { + "properties": { + "errors": { + "items": { + "$ref": "#/components/schemas/ValidationError" + }, + "type": "array", + "title": "Errors" + } + }, + "type": "object", + "title": "HTTPValidationError" + }, + "ValidationError": { + "properties": { + "loc": { + "items": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ] + }, + "type": "array", + "title": "Location" + }, + "msg": { + "type": "string", + "title": "Message" + }, + "type": { + "type": "string", + "title": "Error Type" + } + }, + "type": "object", + "required": [ + "loc", + "msg", + "type" + ], + "title": "ValidationError" + } + } + } +} diff --git a/libs/partners/together/pyproject.toml b/libs/partners/together/pyproject.toml index fcf657c8fd310..355b776f0d277 100644 --- a/libs/partners/together/pyproject.toml +++ b/libs/partners/together/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-together" -version = "0.1.2" -description = "An integration package connecting Together AI and LangChain" +name = "gigachain-together" +version = "0.1.3" +description = "An integration package connecting Together AI and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -12,8 +12,8 @@ license = "MIT" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = ">=0.1.52,<0.3" -gigachain-openai = "^0.1.3" +gigachain-core = ">=0.2.2,<0.3" +gigachain-openai = "^0.1.8" requests = "^2" aiohttp = "^3.9.1" @@ -30,7 +30,6 @@ pytest-asyncio = "^0.21.1" gigachain-openai = { path = "../openai", develop = true } gigachain-core = { path = "../../core", develop = true } docarray = "^0.32.1" -pydantic = "^1.10.9" gigachain-standard-tests = { path = "../../standard-tests", develop = true } [tool.poetry.group.codespell] @@ -43,6 +42,12 @@ codespell = "^2.2.0" optional = true [tool.poetry.group.test_integration.dependencies] +# Support Python 3.8 and 3.12+. +numpy = [ + {version = "^1", python = "<3.12"}, + {version = "^1.26.0", python = ">=3.12"} +] + [tool.poetry.group.lint] optional = true @@ -64,13 +69,20 @@ optional = true [tool.poetry.group.dev.dependencies] gigachain-core = { path = "../../core", develop = true } -[tool.ruff] +[tool.ruff.lint] select = [ "E", # pycodestyle "F", # pyflakes "I", # isort + "D", # pydocstyle ] +[tool.ruff.lint.pydocstyle] +convention = "google" + +[tool.ruff.lint.per-file-ignores] +"tests/**" = ["D"] # ignore docstring checks for tests + [tool.mypy] disallow_untyped_defs = "True" diff --git a/libs/partners/upstage/pyproject.toml b/libs/partners/upstage/pyproject.toml index c51c640852d1d..baef74fb7ce34 100644 --- a/libs/partners/upstage/pyproject.toml +++ b/libs/partners/upstage/pyproject.toml @@ -2,7 +2,7 @@ [tool.poetry] name = "gigachain-upstage" version = "0.1.5" -description = "An integration package connecting Upstage and LangChain" +description = "An integration package connecting Upstage and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -16,8 +16,8 @@ packages = [ [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.52,<0.3" -langchain-openai = "^0.1.3" +gigachain-core = ">=0.1.52,<0.3" +gigachain-openai = "^0.1.3" pymupdf = "^1.24.1" requests = "^2.31.0" diff --git a/libs/partners/voyageai/pyproject.toml b/libs/partners/voyageai/pyproject.toml index 29038ee01e40e..06a63edc497b9 100644 --- a/libs/partners/voyageai/pyproject.toml +++ b/libs/partners/voyageai/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] -name = "langchain-voyageai" +name = "gigachain-voyageai" version = "0.1.1" -description = "An integration package connecting VoyageAI and LangChain" +description = "An integration package connecting VoyageAI and Gigachain" authors = [] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/base_store.py b/libs/standard-tests/langchain_standard_tests/integration_tests/base_store.py new file mode 100644 index 0000000000000..8f74d066a45d6 --- /dev/null +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/base_store.py @@ -0,0 +1,276 @@ +from abc import ABC, abstractmethod +from typing import AsyncGenerator, Generator, Generic, Tuple, TypeVar + +import pytest +from langchain_core.stores import BaseStore + +V = TypeVar("V") + + +class BaseStoreSyncTests(ABC, Generic[V]): + """Test suite for checking the key-value API of a BaseStore. + + This test suite verifies the basic key-value API of a BaseStore. + + The test suite is designed for synchronous key-value stores. + + Implementers should subclass this test suite and provide a fixture + that returns an empty key-value store for each test. + """ + + @abstractmethod + @pytest.fixture + def kv_store(self) -> BaseStore[str, V]: + """Get the key-value store class to test. + + The returned key-value store should be EMPTY. + """ + + @abstractmethod + @pytest.fixture() + def three_values(self) -> Tuple[V, V, V]: + """Thee example values that will be used in the tests.""" + pass + + def test_three_values(self, three_values: Tuple[V, V, V]) -> None: + """Test that the fixture provides three values.""" + assert isinstance(three_values, tuple) + assert len(three_values) == 3 + + def test_kv_store_is_empty(self, kv_store: BaseStore[str, V]) -> None: + """Test that the key-value store is empty.""" + keys = ["foo", "bar", "buzz"] + assert kv_store.mget(keys) == [None, None, None] + + def test_set_and_get_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test setting and getting values in the key-value store.""" + foo = three_values[0] + bar = three_values[1] + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + assert kv_store.mget(["foo", "bar"]) == [foo, bar] + + def test_store_still_empty(self, kv_store: BaseStore[str, V]) -> None: + """This test should follow a test that sets values. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + keys = ["foo"] + assert kv_store.mget(keys) == [None] + + def test_delete_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test deleting values from the key-value store.""" + foo = three_values[0] + bar = three_values[1] + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + kv_store.mdelete(["foo"]) + assert kv_store.mget(["foo", "bar"]) == [None, bar] + + def test_delete_bulk_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can delete several values at once.""" + foo, bar, buz = three_values + key_values = [("foo", foo), ("bar", bar), ("buz", buz)] + kv_store.mset(key_values) + kv_store.mdelete(["foo", "buz"]) + assert kv_store.mget(["foo", "bar", "buz"]) == [None, bar, None] + + def test_delete_missing_keys(self, kv_store: BaseStore[str, V]) -> None: + """Deleting missing keys should not raise an exception.""" + kv_store.mdelete(["foo"]) + kv_store.mdelete(["foo", "bar", "baz"]) + + def test_set_values_is_idempotent( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Setting values by key should be idempotent.""" + foo, bar, _ = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + kv_store.mset(key_value_pairs) + assert kv_store.mget(["foo", "bar"]) == [foo, bar] + assert sorted(kv_store.yield_keys()) == ["bar", "foo"] + + def test_get_can_get_same_value( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that the same value can be retrieved multiple times.""" + foo, bar, _ = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + # This test assumes kv_store does not handle duplicates by default + assert kv_store.mget(["foo", "bar", "foo", "bar"]) == [foo, bar, foo, bar] + + def test_overwrite_values_by_key( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can overwrite values by key using mset.""" + foo, bar, buzz = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + + # Now overwrite value of key "foo" + new_key_value_pairs = [("foo", buzz)] + kv_store.mset(new_key_value_pairs) + + # Check that the value has been updated + assert kv_store.mget(["foo", "bar"]) == [buzz, bar] + + def test_yield_keys( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can yield keys from the store.""" + foo, bar, buzz = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + kv_store.mset(key_value_pairs) + + generator = kv_store.yield_keys() + assert isinstance(generator, Generator) + + assert sorted(kv_store.yield_keys()) == ["bar", "foo"] + assert sorted(kv_store.yield_keys(prefix="foo")) == ["foo"] + + +class BaseStoreAsyncTests(ABC): + """Test suite for checking the key-value API of a BaseStore. + + This test suite verifies the basic key-value API of a BaseStore. + + The test suite is designed for synchronous key-value stores. + + Implementers should subclass this test suite and provide a fixture + that returns an empty key-value store for each test. + """ + + @abstractmethod + @pytest.fixture + async def kv_store(self) -> BaseStore[str, V]: + """Get the key-value store class to test. + + The returned key-value store should be EMPTY. + """ + + @abstractmethod + @pytest.fixture() + def three_values(self) -> Tuple[V, V, V]: + """Thee example values that will be used in the tests.""" + pass + + async def test_three_values(self, three_values: Tuple[V, V, V]) -> None: + """Test that the fixture provides three values.""" + assert isinstance(three_values, tuple) + assert len(three_values) == 3 + + async def test_kv_store_is_empty(self, kv_store: BaseStore[str, V]) -> None: + """Test that the key-value store is empty.""" + keys = ["foo", "bar", "buzz"] + assert await kv_store.amget(keys) == [None, None, None] + + async def test_set_and_get_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test setting and getting values in the key-value store.""" + foo = three_values[0] + bar = three_values[1] + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + assert await kv_store.amget(["foo", "bar"]) == [foo, bar] + + async def test_store_still_empty(self, kv_store: BaseStore[str, V]) -> None: + """This test should follow a test that sets values. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + keys = ["foo"] + assert await kv_store.amget(keys) == [None] + + async def test_delete_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test deleting values from the key-value store.""" + foo = three_values[0] + bar = three_values[1] + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + await kv_store.amdelete(["foo"]) + assert await kv_store.amget(["foo", "bar"]) == [None, bar] + + async def test_delete_bulk_values( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can delete several values at once.""" + foo, bar, buz = three_values + key_values = [("foo", foo), ("bar", bar), ("buz", buz)] + await kv_store.amset(key_values) + await kv_store.amdelete(["foo", "buz"]) + assert await kv_store.amget(["foo", "bar", "buz"]) == [None, bar, None] + + async def test_delete_missing_keys(self, kv_store: BaseStore[str, V]) -> None: + """Deleting missing keys should not raise an exception.""" + await kv_store.amdelete(["foo"]) + await kv_store.amdelete(["foo", "bar", "baz"]) + + async def test_set_values_is_idempotent( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Setting values by key should be idempotent.""" + foo, bar, _ = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + await kv_store.amset(key_value_pairs) + assert await kv_store.amget(["foo", "bar"]) == [foo, bar] + assert sorted(kv_store.yield_keys()) == ["bar", "foo"] + + async def test_get_can_get_same_value( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that the same value can be retrieved multiple times.""" + foo, bar, _ = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + # This test assumes kv_store does not handle duplicates by async default + assert await kv_store.amget(["foo", "bar", "foo", "bar"]) == [ + foo, + bar, + foo, + bar, + ] + + async def test_overwrite_values_by_key( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can overwrite values by key using mset.""" + foo, bar, buzz = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + + # Now overwrite value of key "foo" + new_key_value_pairs = [("foo", buzz)] + await kv_store.amset(new_key_value_pairs) + + # Check that the value has been updated + assert await kv_store.amget(["foo", "bar"]) == [buzz, bar] + + async def test_yield_keys( + self, kv_store: BaseStore[str, V], three_values: Tuple[V, V, V] + ) -> None: + """Test that we can yield keys from the store.""" + foo, bar, buzz = three_values + key_value_pairs = [("foo", foo), ("bar", bar)] + await kv_store.amset(key_value_pairs) + + generator = kv_store.ayield_keys() + assert isinstance(generator, AsyncGenerator) + + assert sorted([key async for key in kv_store.ayield_keys()]) == ["bar", "foo"] + assert sorted([key async for key in kv_store.ayield_keys(prefix="foo")]) == [ + "foo" + ] diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/cache.py b/libs/standard-tests/langchain_standard_tests/integration_tests/cache.py new file mode 100644 index 0000000000000..fe84d8450cf47 --- /dev/null +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/cache.py @@ -0,0 +1,192 @@ +from abc import ABC, abstractmethod + +import pytest +from langchain_core.caches import BaseCache +from langchain_core.outputs import Generation + + +class SyncCacheTestSuite(ABC): + """Test suite for checking the BaseCache API of a caching layer for LLMs. + + This test suite verifies the basic caching API of a caching layer for LLMs. + + The test suite is designed for synchronous caching layers. + + Implementers should subclass this test suite and provide a fixture + that returns an empty cache for each test. + """ + + @abstractmethod + @pytest.fixture + def cache(self) -> BaseCache: + """Get the cache class to test. + + The returned cache should be EMPTY. + """ + + def get_sample_prompt(self) -> str: + """Return a sample prompt for testing.""" + return "Sample prompt for testing." + + def get_sample_llm_string(self) -> str: + """Return a sample LLM string for testing.""" + return "Sample LLM string configuration." + + def get_sample_generation(self) -> Generation: + """Return a sample Generation object for testing.""" + return Generation( + text="Sample generated text.", generation_info={"reason": "test"} + ) + + def test_cache_is_empty(self, cache: BaseCache) -> None: + """Test that the cache is empty.""" + assert ( + cache.lookup(self.get_sample_prompt(), self.get_sample_llm_string()) is None + ) + + def test_update_cache(self, cache: BaseCache) -> None: + """Test updating the cache.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + cache.update(prompt, llm_string, [generation]) + assert cache.lookup(prompt, llm_string) == [generation] + + def test_cache_still_empty(self, cache: BaseCache) -> None: + """This test should follow a test that updates the cache. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + assert ( + cache.lookup(self.get_sample_prompt(), self.get_sample_llm_string()) is None + ) + + def test_clear_cache(self, cache: BaseCache) -> None: + """Test clearing the cache.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + cache.update(prompt, llm_string, [generation]) + cache.clear() + assert cache.lookup(prompt, llm_string) is None + + def test_cache_miss(self, cache: BaseCache) -> None: + """Test cache miss.""" + assert cache.lookup("Nonexistent prompt", self.get_sample_llm_string()) is None + + def test_cache_hit(self, cache: BaseCache) -> None: + """Test cache hit.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + cache.update(prompt, llm_string, [generation]) + assert cache.lookup(prompt, llm_string) == [generation] + + def test_update_cache_with_multiple_generations(self, cache: BaseCache) -> None: + """Test updating the cache with multiple Generation objects.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generations = [ + self.get_sample_generation(), + Generation(text="Another generated text."), + ] + cache.update(prompt, llm_string, generations) + assert cache.lookup(prompt, llm_string) == generations + + +class AsyncCacheTestSuite(ABC): + """Test suite for checking the BaseCache API of a caching layer for LLMs. + + This test suite verifies the basic caching API of a caching layer for LLMs. + + The test suite is designed for synchronous caching layers. + + Implementers should subclass this test suite and provide a fixture + that returns an empty cache for each test. + """ + + @abstractmethod + @pytest.fixture + async def cache(self) -> BaseCache: + """Get the cache class to test. + + The returned cache should be EMPTY. + """ + + def get_sample_prompt(self) -> str: + """Return a sample prompt for testing.""" + return "Sample prompt for testing." + + def get_sample_llm_string(self) -> str: + """Return a sample LLM string for testing.""" + return "Sample LLM string configuration." + + def get_sample_generation(self) -> Generation: + """Return a sample Generation object for testing.""" + return Generation( + text="Sample generated text.", generation_info={"reason": "test"} + ) + + async def test_cache_is_empty(self, cache: BaseCache) -> None: + """Test that the cache is empty.""" + assert ( + await cache.alookup(self.get_sample_prompt(), self.get_sample_llm_string()) + is None + ) + + async def test_update_cache(self, cache: BaseCache) -> None: + """Test updating the cache.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + await cache.aupdate(prompt, llm_string, [generation]) + assert await cache.alookup(prompt, llm_string) == [generation] + + async def test_cache_still_empty(self, cache: BaseCache) -> None: + """This test should follow a test that updates the cache. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + assert ( + await cache.alookup(self.get_sample_prompt(), self.get_sample_llm_string()) + is None + ) + + async def test_clear_cache(self, cache: BaseCache) -> None: + """Test clearing the cache.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + await cache.aupdate(prompt, llm_string, [generation]) + await cache.aclear() + assert await cache.alookup(prompt, llm_string) is None + + async def test_cache_miss(self, cache: BaseCache) -> None: + """Test cache miss.""" + assert ( + await cache.alookup("Nonexistent prompt", self.get_sample_llm_string()) + is None + ) + + async def test_cache_hit(self, cache: BaseCache) -> None: + """Test cache hit.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generation = self.get_sample_generation() + await cache.aupdate(prompt, llm_string, [generation]) + assert await cache.alookup(prompt, llm_string) == [generation] + + async def test_update_cache_with_multiple_generations( + self, cache: BaseCache + ) -> None: + """Test updating the cache with multiple Generation objects.""" + prompt = self.get_sample_prompt() + llm_string = self.get_sample_llm_string() + generations = [ + self.get_sample_generation(), + Generation(text="Another generated text."), + ] + await cache.aupdate(prompt, llm_string, generations) + assert await cache.alookup(prompt, llm_string) == generations diff --git a/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py new file mode 100644 index 0000000000000..d65eb12934947 --- /dev/null +++ b/libs/standard-tests/langchain_standard_tests/integration_tests/vectorstores.py @@ -0,0 +1,301 @@ +"""Test suite to test vectostores.""" +from abc import ABC, abstractmethod + +import pytest +from langchain_core.documents import Document +from langchain_core.embeddings.fake import DeterministicFakeEmbedding, Embeddings +from langchain_core.vectorstores import VectorStore + +# Arbitrarily chosen. Using a small embedding size +# so tests are faster and easier to debug. +EMBEDDING_SIZE = 6 + + +class ReadWriteTestSuite(ABC): + """Test suite for checking the read-write API of a vectorstore. + + This test suite verifies the basic read-write API of a vectorstore. + + The test suite is designed for synchronous vectorstores. + + Implementers should subclass this test suite and provide a fixture + that returns an empty vectorstore for each test. + + The fixture should use the `get_embeddings` method to get a pre-defined + embeddings model that should be used for this test suite. + """ + + @abstractmethod + @pytest.fixture + def vectorstore(self) -> VectorStore: + """Get the vectorstore class to test. + + The returned vectorstore should be EMPTY. + """ + + @staticmethod + def get_embeddings() -> Embeddings: + """A pre-defined embeddings model that should be used for this test.""" + return DeterministicFakeEmbedding( + size=EMBEDDING_SIZE, + ) + + def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None: + """Test that the vectorstore is empty.""" + assert vectorstore.similarity_search("foo", k=1) == [] + + def test_add_documents(self, vectorstore: VectorStore) -> None: + """Test adding documents into the vectorstore.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + vectorstore.add_documents(documents) + documents = vectorstore.similarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="foo", metadata={"id": 1}), + ] + + def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None: + """This test should follow a test that adds documents. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + assert vectorstore.similarity_search("foo", k=1) == [] + + def test_deleting_documents(self, vectorstore: VectorStore) -> None: + """Test deleting documents from the vectorstore.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + vectorstore.add_documents(documents, ids=["1", "2"]) + vectorstore.delete(["1"]) + documents = vectorstore.similarity_search("foo", k=1) + assert documents == [Document(page_content="bar", metadata={"id": 2})] + + def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None: + """Test that we can delete several documents at once.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="baz", metadata={"id": 3}), + ] + + vectorstore.add_documents(documents, ids=["1", "2", "3"]) + vectorstore.delete(["1", "2"]) + documents = vectorstore.similarity_search("foo", k=1) + assert documents == [Document(page_content="baz", metadata={"id": 3})] + + def test_delete_missing_content(self, vectorstore: VectorStore) -> None: + """Deleting missing content should not raise an exception.""" + vectorstore.delete(["1"]) + vectorstore.delete(["1", "2", "3"]) + + def test_add_documents_with_ids_is_idempotent( + self, vectorstore: VectorStore + ) -> None: + """Adding by ID should be idempotent.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + vectorstore.add_documents(documents, ids=["1", "2"]) + vectorstore.add_documents(documents, ids=["1", "2"]) + documents = vectorstore.similarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="foo", metadata={"id": 1}), + ] + + def test_add_documents_without_ids_gets_duplicated( + self, vectorstore: VectorStore + ) -> None: + """Adding documents without specifying IDs should duplicate content.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + + vectorstore.add_documents(documents) + vectorstore.add_documents(documents) + documents = vectorstore.similarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="bar", metadata={"id": 2}), + ] + + def test_add_documents_by_id_with_mutation(self, vectorstore: VectorStore) -> None: + """Test that we can overwrite by ID using add_documents.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + + vectorstore.add_documents(documents=documents, ids=["1", "2"]) + + # Now over-write content of ID 1 + new_documents = [ + Document( + page_content="new foo", metadata={"id": 1, "some_other_field": "foo"} + ), + ] + + vectorstore.add_documents(documents=new_documents, ids=["1"]) + + # Check that the content has been updated + documents = vectorstore.similarity_search("new foo", k=2) + assert documents == [ + Document( + page_content="new foo", metadata={"id": 1, "some_other_field": "foo"} + ), + Document(page_content="bar", metadata={"id": 2}), + ] + + +class AsyncReadWriteTestSuite(ABC): + """Test suite for checking the **async** read-write API of a vectorstore. + + This test suite verifies the basic read-write API of a vectorstore. + + The test suite is designed for asynchronous vectorstores. + + Implementers should subclass this test suite and provide a fixture + that returns an empty vectorstore for each test. + + The fixture should use the `get_embeddings` method to get a pre-defined + embeddings model that should be used for this test suite. + """ + + @abstractmethod + @pytest.fixture + async def vectorstore(self) -> VectorStore: + """Get the vectorstore class to test. + + The returned vectorstore should be EMPTY. + """ + + @staticmethod + def get_embeddings() -> Embeddings: + """A pre-defined embeddings model that should be used for this test.""" + return DeterministicFakeEmbedding( + size=EMBEDDING_SIZE, + ) + + async def test_vectorstore_is_empty(self, vectorstore: VectorStore) -> None: + """Test that the vectorstore is empty.""" + assert await vectorstore.asimilarity_search("foo", k=1) == [] + + async def test_add_documents(self, vectorstore: VectorStore) -> None: + """Test adding documents into the vectorstore.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + await vectorstore.aadd_documents(documents) + documents = await vectorstore.asimilarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="foo", metadata={"id": 1}), + ] + + async def test_vectorstore_still_empty(self, vectorstore: VectorStore) -> None: + """This test should follow a test that adds documents. + + This just verifies that the fixture is set up properly to be empty + after each test. + """ + assert await vectorstore.asimilarity_search("foo", k=1) == [] + + async def test_deleting_documents(self, vectorstore: VectorStore) -> None: + """Test deleting documents from the vectorstore.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + await vectorstore.aadd_documents(documents, ids=["1", "2"]) + await vectorstore.adelete(["1"]) + documents = await vectorstore.asimilarity_search("foo", k=1) + assert documents == [Document(page_content="bar", metadata={"id": 2})] + + async def test_deleting_bulk_documents(self, vectorstore: VectorStore) -> None: + """Test that we can delete several documents at once.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="baz", metadata={"id": 3}), + ] + + await vectorstore.aadd_documents(documents, ids=["1", "2", "3"]) + await vectorstore.adelete(["1", "2"]) + documents = await vectorstore.asimilarity_search("foo", k=1) + assert documents == [Document(page_content="baz", metadata={"id": 3})] + + async def test_delete_missing_content(self, vectorstore: VectorStore) -> None: + """Deleting missing content should not raise an exception.""" + await vectorstore.adelete(["1"]) + await vectorstore.adelete(["1", "2", "3"]) + + async def test_add_documents_with_ids_is_idempotent( + self, vectorstore: VectorStore + ) -> None: + """Adding by ID should be idempotent.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + await vectorstore.aadd_documents(documents, ids=["1", "2"]) + await vectorstore.aadd_documents(documents, ids=["1", "2"]) + documents = await vectorstore.asimilarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="foo", metadata={"id": 1}), + ] + + async def test_add_documents_without_ids_gets_duplicated( + self, vectorstore: VectorStore + ) -> None: + """Adding documents without specifying IDs should duplicate content.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + + await vectorstore.aadd_documents(documents) + await vectorstore.aadd_documents(documents) + documents = await vectorstore.asimilarity_search("bar", k=2) + assert documents == [ + Document(page_content="bar", metadata={"id": 2}), + Document(page_content="bar", metadata={"id": 2}), + ] + + async def test_add_documents_by_id_with_mutation( + self, vectorstore: VectorStore + ) -> None: + """Test that we can overwrite by ID using add_documents.""" + documents = [ + Document(page_content="foo", metadata={"id": 1}), + Document(page_content="bar", metadata={"id": 2}), + ] + + await vectorstore.aadd_documents(documents=documents, ids=["1", "2"]) + + # Now over-write content of ID 1 + new_documents = [ + Document( + page_content="new foo", metadata={"id": 1, "some_other_field": "foo"} + ), + ] + + await vectorstore.aadd_documents(documents=new_documents, ids=["1"]) + + # Check that the content has been updated + documents = await vectorstore.asimilarity_search("new foo", k=2) + assert documents == [ + Document( + page_content="new foo", metadata={"id": 1, "some_other_field": "foo"} + ), + Document(page_content="bar", metadata={"id": 2}), + ] diff --git a/libs/standard-tests/pyproject.toml b/libs/standard-tests/pyproject.toml index 3b926090b7407..1a8dcbc7560b3 100644 --- a/libs/standard-tests/pyproject.toml +++ b/libs/standard-tests/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "gigachain-standard-tests" -version = "0.1.0" -description = "Standard tests for gigachain implementations" +version = "0.1.1" +description = "Standard tests for Gigachain implementations" authors = ["Erick Friis "] readme = "README.md" repository = "https://github.com/langchain-ai/langchain" @@ -14,12 +14,14 @@ license = "MIT" python = ">=3.8.1,<4.0" gigachain-core = ">=0.1.40,<0.3" pytest = ">=7,<9" +httpx = "^0.27.0" [tool.poetry.group.test] optional = true [tool.poetry.group.test.dependencies] gigachain-core = { path = "../core", develop = true } +pytest-asyncio = "^0.23.7" [tool.poetry.group.test_integration] optional = true @@ -59,3 +61,24 @@ omit = ["tests/*"] [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +# --strict-markers will raise errors on unknown marks. +# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks +# +# https://docs.pytest.org/en/7.1.x/reference/reference.html +# --strict-config any warnings encountered while parsing the `pytest` +# section of the configuration file raise errors. +# +# https://github.com/tophat/syrupy +# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite. +addopts = "--strict-markers --strict-config --durations=5 -vv" +# Registering custom markers. +# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers +markers = [ + "requires: mark tests as requiring a specific library", + "scheduled: mark tests to run in scheduled testing", + "compile: mark placeholder test used to compile integration tests without running them", +] +asyncio_mode = "auto" + diff --git a/libs/standard-tests/tests/unit_tests/__init__.py b/libs/standard-tests/tests/unit_tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/libs/standard-tests/tests/unit_tests/test_in_memory_base_store.py b/libs/standard-tests/tests/unit_tests/test_in_memory_base_store.py new file mode 100644 index 0000000000000..245b096554eae --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/test_in_memory_base_store.py @@ -0,0 +1,30 @@ +"""Tests for the InMemoryStore class.""" +from typing import Tuple + +import pytest +from langchain_core.stores import InMemoryStore + +from langchain_standard_tests.integration_tests.base_store import ( + BaseStoreAsyncTests, + BaseStoreSyncTests, +) + + +class TestInMemoryStore(BaseStoreSyncTests): + @pytest.fixture + def three_values(self) -> Tuple[str, str, str]: + return "foo", "bar", "buzz" + + @pytest.fixture + def kv_store(self) -> InMemoryStore: + return InMemoryStore() + + +class TestInMemoryStoreAsync(BaseStoreAsyncTests): + @pytest.fixture + def three_values(self) -> Tuple[str, str, str]: # type: ignore + return "foo", "bar", "buzz" + + @pytest.fixture + async def kv_store(self) -> InMemoryStore: + return InMemoryStore() diff --git a/libs/standard-tests/tests/unit_tests/test_in_memory_cache.py b/libs/standard-tests/tests/unit_tests/test_in_memory_cache.py new file mode 100644 index 0000000000000..4f67a876490d0 --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/test_in_memory_cache.py @@ -0,0 +1,19 @@ +import pytest +from langchain_core.caches import InMemoryCache + +from langchain_standard_tests.integration_tests.cache import ( + AsyncCacheTestSuite, + SyncCacheTestSuite, +) + + +class TestInMemoryCache(SyncCacheTestSuite): + @pytest.fixture + def cache(self) -> InMemoryCache: + return InMemoryCache() + + +class TestInMemoryCacheAsync(AsyncCacheTestSuite): + @pytest.fixture + async def cache(self) -> InMemoryCache: + return InMemoryCache() diff --git a/libs/standard-tests/tests/unit_tests/test_in_memory_vectorstore.py b/libs/standard-tests/tests/unit_tests/test_in_memory_vectorstore.py new file mode 100644 index 0000000000000..d34bf25c3881c --- /dev/null +++ b/libs/standard-tests/tests/unit_tests/test_in_memory_vectorstore.py @@ -0,0 +1,28 @@ +import pytest +from langchain_core.vectorstores import VectorStore + +from langchain_standard_tests.integration_tests.vectorstores import ( + AsyncReadWriteTestSuite, + ReadWriteTestSuite, +) + +# We'll need to move this dependency to core +pytest.importorskip("langchain_community") + +from langchain_community.vectorstores.inmemory import ( # type: ignore # noqa + InMemoryVectorStore, +) + + +class TestInMemoryVectorStore(ReadWriteTestSuite): + @pytest.fixture + def vectorstore(self) -> VectorStore: + embeddings = self.get_embeddings() + return InMemoryVectorStore(embedding=embeddings) + + +class TestAysncInMemoryVectorStore(AsyncReadWriteTestSuite): + @pytest.fixture + async def vectorstore(self) -> VectorStore: + embeddings = self.get_embeddings() + return InMemoryVectorStore(embedding=embeddings) diff --git a/libs/text-splitters/extended_testing_deps.txt b/libs/text-splitters/extended_testing_deps.txt new file mode 100644 index 0000000000000..8d45ad3ea8b0b --- /dev/null +++ b/libs/text-splitters/extended_testing_deps.txt @@ -0,0 +1,2 @@ +lxml>=4.9.3,<6.0 +beautifulsoup4>=4.12.3,<5 diff --git a/libs/text-splitters/pyproject.toml b/libs/text-splitters/pyproject.toml index c29aba1d483ae..86877168bc8df 100644 --- a/libs/text-splitters/pyproject.toml +++ b/libs/text-splitters/pyproject.toml @@ -1,29 +1,23 @@ - [tool.poetry] name = "gigachain-text-splitters" -version = "0.2.0" -description = "GigaChain text splitting utilities" +version = "0.2.2" +description = "Gigachain text splitting utilities" authors = [] license = "MIT" readme = "README.md" -repository = "https://github.com/ai-forever/gigachain" -packages = [ - {include = "langchain_text_splitters"} -] +repository = "https://github.com/langchain-ai/langchain" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain-core = "^0.2.0" -lxml = {version = ">=4.9.3,<6.0", optional = true} -beautifulsoup4 = {version = "^4.12.3", optional = true} +gigachain-core = "^0.2.10" [tool.poetry.group.lint] optional = true [tool.poetry.group.lint.dependencies] ruff = "^0.1.5" -gigachain-core = {path = "../core", develop = true} +gigachain-core = { path = "../core", develop = true } [tool.poetry.group.typing] optional = true @@ -40,7 +34,7 @@ optional = true [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" -gigachain-core = {path = "../core", develop = true} +gigachain-core = { path = "../core", develop = true } [tool.poetry.group.test] optional = true @@ -51,26 +45,21 @@ optional = true # Any dependencies that do not meet that criteria will be removed. pytest = "^7.3.0" freezegun = "^1.2.2" -pytest-mock = "^3.10.0" +pytest-mock = "^3.10.0" pytest-watcher = "^0.3.4" pytest-asyncio = "^0.21.1" pytest-profiling = "^1.7.0" -gigachain-core = {path = "../core", develop = true} +gigachain-core = { path = "../core", develop = true } + [tool.poetry.group.test_integration] -optional = true dependencies = {} -[tool.poetry.extras] -extended_testing = [ - "lxml", "beautifulsoup4" -] - [tool.ruff.lint] select = [ - "E", # pycodestyle - "F", # pyflakes - "I", # isort + "E", # pycodestyle + "F", # pyflakes + "I", # isort "T201", # print ] @@ -78,11 +67,18 @@ select = [ disallow_untyped_defs = "True" [[tool.mypy.overrides]] -module = ["transformers", "sentence_transformers", "nltk.tokenize", "konlpy.tag", "bs4"] +module = [ + "transformers", + "sentence_transformers", + "nltk.tokenize", + "konlpy.tag", + "bs4", + "pytest", +] ignore_missing_imports = "True" [tool.coverage.run] -omit = ["tests/*", ] +omit = ["tests/*"] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/libs/text-splitters/tests/test_data/test_splitter.xslt b/libs/text-splitters/tests/test_data/test_splitter.xslt new file mode 100644 index 0000000000000..cbb5828bf1242 --- /dev/null +++ b/libs/text-splitters/tests/test_data/test_splitter.xslt @@ -0,0 +1,9 @@ + + + + + + + + \ No newline at end of file diff --git a/templates/anthropic-iterative-search/pyproject.toml b/templates/anthropic-iterative-search/pyproject.toml index 99b3f3e60d1bb..7422e112bd371 100644 --- a/templates/anthropic-iterative-search/pyproject.toml +++ b/templates/anthropic-iterative-search/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "anthropic-iterative-search" version = "0.0.1" @@ -15,6 +14,10 @@ langchain-anthropic = "^0.1.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "anthropic_iterative_search" +export_attr = "chain" + [tool.templates-hub] use-case = "research" author = "LangChain" diff --git a/templates/basic-critique-revise/pyproject.toml b/templates/basic-critique-revise/pyproject.toml index 01cfd40116205..1ef1aa9bf3d62 100644 --- a/templates/basic-critique-revise/pyproject.toml +++ b/templates/basic-critique-revise/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" [tool.poetry.group.dev.dependencies] @@ -15,7 +15,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "basic_critique_revise" export_attr = "chain" diff --git a/templates/bedrock-jcvd/pyproject.toml b/templates/bedrock-jcvd/pyproject.toml index 045fdfc3136c3..fea760fe593ff 100644 --- a/templates/bedrock-jcvd/pyproject.toml +++ b/templates/bedrock-jcvd/pyproject.toml @@ -6,14 +6,14 @@ authors = ["JGalego "] readme = "README.md" [tool.poetry.dependencies] -python = ">=3.10, <4.0" +python = "^3.11" uvicorn = "^0.23.2" -langserve = {extras = ["server"], version = ">=0.0.30"} +gigaserve = {extras = ["server"], version = ">=0.0.30"} pydantic = "<2" boto3 = "^1.33.10" gigachain = "^0.1" -[tool.langserve] +[tool.gigaserve] export_module = "bedrock_jcvd.chain" export_attr = "chain" @@ -25,3 +25,7 @@ tags = ["conversation"] [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/templates/cassandra-entomology-rag/pyproject.toml b/templates/cassandra-entomology-rag/pyproject.toml index 48f1fc4668a01..fda48cab4b2fa 100644 --- a/templates/cassandra-entomology-rag/pyproject.toml +++ b/templates/cassandra-entomology-rag/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "cassandra-entomology-rag" version = "0.0.1" @@ -10,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" cassio = "^0.1.3" @@ -18,6 +17,10 @@ cassio = "^0.1.3" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "cassandra_entomology_rag" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "DataStax" diff --git a/templates/cassandra-synonym-caching/pyproject.toml b/templates/cassandra-synonym-caching/pyproject.toml index 5414859cb4ea8..a3a5f95635044 100644 --- a/templates/cassandra-synonym-caching/pyproject.toml +++ b/templates/cassandra-synonym-caching/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" cassio = "^0.1.3" @@ -17,6 +17,10 @@ cassio = "^0.1.3" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "cassandra_synonym_caching" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "DataStax" diff --git a/templates/chain-of-note-wiki/pyproject.toml b/templates/chain-of-note-wiki/pyproject.toml index 360214cd570c6..05f53f0b32407 100644 --- a/templates/chain-of-note-wiki/pyproject.toml +++ b/templates/chain-of-note-wiki/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "chain-of-note-wiki" version = "0.0.1" @@ -18,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "chain_of_note_wiki" export_attr = "chain" diff --git a/templates/chat-bot-feedback/pyproject.toml b/templates/chat-bot-feedback/pyproject.toml index 484966760663e..889cbb4a467bf 100644 --- a/templates/chat-bot-feedback/pyproject.toml +++ b/templates/chat-bot-feedback/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "chat-bot-feedback" version = "0.0.1" @@ -8,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.329" +gigachain = "^0.1" openai = "<2" langsmith = ">=0.0.54" langchainhub = ">=0.1.13" @@ -18,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "chat_bot_feedback.chain" export_attr = "chain" diff --git a/templates/cohere-librarian/pyproject.toml b/templates/cohere-librarian/pyproject.toml index 6bdadb71dffde..ed7531d4f4983 100644 --- a/templates/cohere-librarian/pyproject.toml +++ b/templates/cohere-librarian/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" cohere = "^4.37" chromadb = "^0.4.18" @@ -16,7 +16,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "cohere_librarian" export_attr = "chain" diff --git a/templates/csv-agent/pyproject.toml b/templates/csv-agent/pyproject.toml index b5b27b46be04d..1ba02bb2bc83a 100644 --- a/templates/csv-agent/pyproject.toml +++ b/templates/csv-agent/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<3.13" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" faiss-cpu = "^1.7.4" @@ -20,6 +20,10 @@ gigachain-experimental = ">=0.0.54" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "csv_agent" +export_attr = "agent_executor" + [tool.templates-hub] use-case = "question-answering" author = "LangChain" diff --git a/templates/docs/CONTRIBUTING.md b/templates/docs/CONTRIBUTING.md new file mode 100644 index 0000000000000..3888df1d44ec9 --- /dev/null +++ b/templates/docs/CONTRIBUTING.md @@ -0,0 +1,43 @@ +# Contributing + +Thanks for taking the time to contribute a new template! +We've tried to make this process as simple and painless as possible. +If you need any help at all, please reach out! + +To contribute a new template, first fork this repository. +Then clone that fork and pull it down locally. +Set up an appropriate dev environment, and make sure you are in this `templates` directory. + +Make sure you have `langchain-cli` installed. + +```shell +pip install -U langchain-cli +``` + +You can then run the following command to create a new skeleton of a package. +By convention, package names should use `-` delimiters (not `_`). + +```shell +langchain template new $PROJECT_NAME +``` + +You can then edit the contents of the package as you desire. +Note that by default we expect the main chain to be exposed as `chain` in the `__init__.py` file of the package. +You can change this (either the name or the location), but if you do so it is important to update the `tool.langchain` +part of `pyproject.toml`. +For example, if you update the main chain exposed to be called `agent_executor`, then that section should look like: + +```text +[tool.langserve] +export_module = "..." +export_attr = "agent_executor" +``` + +Make sure to add any requirements of the package to `pyproject.toml` (and to remove any that are not used). + +Please update the `README.md` file to give some background on your package and how to set it up. + +If you want to change the license of your template for whatever, you may! Note that by default it is MIT licensed. + +If you want to test out your package at any point in time, you can spin up a LangServe instance directly from the package. +See instructions [here](LAUNCHING_PACKAGE.md) on how to best do that. diff --git a/templates/docs/INDEX.md b/templates/docs/INDEX.md new file mode 100644 index 0000000000000..2a5294d74cc67 --- /dev/null +++ b/templates/docs/INDEX.md @@ -0,0 +1,80 @@ +# Templates + +Highlighting a few different categories of templates + +## ⭐ Popular + +These are some of the more popular templates to get started with. + +- [Retrieval Augmented Generation Chatbot](../rag-conversation): Build a chatbot over your data. Defaults to OpenAI and PineconeVectorStore. +- [Extraction with OpenAI Functions](../extraction-openai-functions): Do extraction of structured data from unstructured data. Uses OpenAI function calling. +- [Local Retrieval Augmented Generation](../rag-chroma-private): Build a chatbot over your data. Uses only local tooling: Ollama, GPT4all, Chroma. +- [OpenAI Functions Agent](../openai-functions-agent): Build a chatbot that can take actions. Uses OpenAI function calling and Tavily. +- [XML Agent](../xml-agent): Build a chatbot that can take actions. Uses Anthropic and You.com. + + +## 📥 Advanced Retrieval + +These templates cover advanced retrieval techniques, which can be used for chat and QA over databases or documents. + +- [Reranking](../rag-pinecone-rerank): This retrieval technique uses Cohere's reranking endpoint to rerank documents from an initial retrieval step. +- [Anthropic Iterative Search](../anthropic-iterative-search): This retrieval technique uses iterative prompting to determine what to retrieve and whether the retriever documents are good enough. +- **Parent Document Retrieval** using [Neo4j](../neo4j-parent) or [MongoDB](../mongo-parent-document-retrieval): This retrieval technique stores embeddings for smaller chunks, but then returns larger chunks to pass to the model for generation. +- [Semi-Structured RAG](../rag-semi-structured): The template shows how to do retrieval over semi-structured data (e.g. data that involves both text and tables). +- [Temporal RAG](../rag-timescale-hybrid-search-time): The template shows how to do hybrid search over data with a time-based component using [Timescale Vector](https://www.timescale.com/ai?utm_campaign=vectorlaunch&utm_source=langchain&utm_medium=referral). + +## 🔍Advanced Retrieval - Query Transformation + +A selection of advanced retrieval methods that involve transforming the original user query, which can improve retrieval quality. + +- [Hypothetical Document Embeddings](../hyde): A retrieval technique that generates a hypothetical document for a given query, and then uses the embedding of that document to do semantic search. [Paper](https://arxiv.org/abs/2212.10496). +- [Rewrite-Retrieve-Read](../rewrite-retrieve-read): A retrieval technique that rewrites a given query before passing it to a search engine. [Paper](https://arxiv.org/abs/2305.14283). +- [Step-back QA Prompting](../stepback-qa-prompting): A retrieval technique that generates a "step-back" question and then retrieves documents relevant to both that question and the original question. [Paper](https://arxiv.org/abs//2310.06117). +- [RAG-Fusion](../rag-fusion): A retrieval technique that generates multiple queries and then reranks the retrieved documents using reciprocal rank fusion. [Article](https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1). +- [Multi-Query Retriever](../rag-pinecone-multi-query): This retrieval technique uses an LLM to generate multiple queries and then fetches documents for all queries. + + +## 🧠Advanced Retrieval - Query Construction + +A selection of advanced retrieval methods that involve constructing a query in a separate DSL from natural language, which enable natural language chat over various structured databases. + +- [Elastic Query Generator](../elastic-query-generator): Generate elastic search queries from natural language. +- [Neo4j Cypher Generation](../neo4j-cypher): Generate cypher statements from natural language. Available with a ["full text" option](../neo4j-cypher-ft) as well. +- [Supabase Self Query](../self-query-supabase): Parse a natural language query into a semantic query as well as a metadata filter for Supabase. + +## 🦙 OSS Models + +These templates use OSS models, which enable privacy for sensitive data. + +- [Local Retrieval Augmented Generation](../rag-chroma-private): Build a chatbot over your data. Uses only local tooling: Ollama, GPT4all, Chroma. +- [SQL Question Answering (Replicate)](../sql-llama2): Question answering over a SQL database, using Llama2 hosted on [Replicate](https://replicate.com/). +- [SQL Question Answering (LlamaCpp)](../sql-llamacpp): Question answering over a SQL database, using Llama2 through [LlamaCpp](https://github.com/ggerganov/llama.cpp). +- [SQL Question Answering (Ollama)](../sql-ollama): Question answering over a SQL database, using Llama2 through [Ollama](https://github.com/jmorganca/ollama). + +## ⛏️ Extraction + +These templates extract data in a structured format based upon a user-specified schema. + +- [Extraction Using OpenAI Functions](../extraction-openai-functions): Extract information from text using OpenAI Function Calling. +- [Extraction Using Anthropic Functions](../extraction-anthropic-functions): Extract information from text using a LangChain wrapper around the Anthropic endpoints intended to simulate function calling. +- [Extract BioTech Plate Data](../plate-chain): Extract microplate data from messy Excel spreadsheets into a more normalized format. + +## ⛏️Summarization and tagging + +These templates summarize or categorize documents and text. + +- [Summarization using Anthropic](../summarize-anthropic): Uses Anthropic's Claude2 to summarize long documents. + +## 🤖 Agents + +These templates build chatbots that can take actions, helping to automate tasks. + +- [OpenAI Functions Agent](../openai-functions-agent): Build a chatbot that can take actions. Uses OpenAI function calling and Tavily. +- [XML Agent](../xml-agent): Build a chatbot that can take actions. Uses Anthropic and You.com. + +## :rotating_light: Safety and evaluation + +These templates enable moderation or evaluation of LLM outputs. + +- [Guardrails Output Parser](../guardrails-output-parser): Use guardrails-ai to validate LLM output. +- [Chatbot Feedback](../chat-bot-feedback): Use LangSmith to evaluate chatbot responses. diff --git a/templates/docs/LAUNCHING_PACKAGE.md b/templates/docs/LAUNCHING_PACKAGE.md new file mode 100644 index 0000000000000..439a072052283 --- /dev/null +++ b/templates/docs/LAUNCHING_PACKAGE.md @@ -0,0 +1,41 @@ +# Launching LangServe from a Package + +You can also launch LangServe directly from a package, without having to pull it into a project. +This can be useful when you are developing a package and want to test it quickly. +The downside of this is that it gives you a little less control over how the LangServe APIs are configured, +which is why for proper projects we recommend creating a full project. + +In order to do this, first change your working directory to the package itself. +For example, if you are currently in this `templates` module, you can go into the `pirate-speak` package with: + +```shell +cd pirate-speak +``` + +Inside this package there is a `pyproject.toml` file. +This file contains a `tool.langchain` section that contains information on how this package should be used. +For example, in `pirate-speak` we see: + +```text +[tool.langserve] +export_module = "pirate_speak.chain" +export_attr = "chain" +``` + +This information can be used to launch a LangServe instance automatically. +In order to do this, first make sure the CLI is installed: + +```shell +pip install -U langchain-cli +``` + +You can then run: + +```shell +langchain template serve +``` + +This will spin up endpoints, documentation, and playground for this chain. +For example, you can access the playground at [http://127.0.0.1:8000/playground/](http://127.0.0.1:8000/playground/) + +![Screenshot of the LangServe Playground web interface with input and output fields.](playground.png "LangServe Playground Interface") diff --git a/templates/docs/docs.png b/templates/docs/docs.png new file mode 100644 index 0000000000000..3ad2fc8a6d12e Binary files /dev/null and b/templates/docs/docs.png differ diff --git a/templates/docs/playground.png b/templates/docs/playground.png new file mode 100644 index 0000000000000..6ecc38a40b88b Binary files /dev/null and b/templates/docs/playground.png differ diff --git a/templates/elastic-query-generator/pyproject.toml b/templates/elastic-query-generator/pyproject.toml index e13058bedcb23..adebb3a7b8f64 100644 --- a/templates/elastic-query-generator/pyproject.toml +++ b/templates/elastic-query-generator/pyproject.toml @@ -7,13 +7,17 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" elasticsearch = "^8.10.1" openai = "<2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "elastic_query_generator" +export_attr = "chain" + [tool.templates-hub] use-case = "query" author = "LangChain" diff --git a/templates/extraction-anthropic-functions/pyproject.toml b/templates/extraction-anthropic-functions/pyproject.toml index 23b4548b1105e..59f9ad0b6b33f 100644 --- a/templates/extraction-anthropic-functions/pyproject.toml +++ b/templates/extraction-anthropic-functions/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" anthropic = ">=0.5.0" langchainhub = ">=0.1.13" gigachain-experimental = ">=0.0.54" @@ -17,6 +17,10 @@ gigachain-experimental = ">=0.0.54" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "extraction_anthropic_functions" +export_attr = "chain" + [tool.templates-hub] use-case = "extraction" author = "LangChain" diff --git a/templates/extraction-openai-functions/pyproject.toml b/templates/extraction-openai-functions/pyproject.toml index 914df5cc07ca8..71d2c8a5b2345 100644 --- a/templates/extraction-openai-functions/pyproject.toml +++ b/templates/extraction-openai-functions/pyproject.toml @@ -9,12 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "extraction_openai_functions" +export_attr = "chain" + [tool.templates-hub] use-case = "extraction" author = "LangChain" diff --git a/templates/gemini-functions-agent/pyproject.toml b/templates/gemini-functions-agent/pyproject.toml index 3e9e44db506ad..4d2bd2c546318 100644 --- a/templates/gemini-functions-agent/pyproject.toml +++ b/templates/gemini-functions-agent/pyproject.toml @@ -7,14 +7,14 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<4.0" -langchain = "^0.1" +gigachain = "^0.1" tavily-python = "^0.1.9" -langchain-google-genai = ">=0.0.8,<0.1" +gigachain-google-genai = ">=0.0.8,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "gemini_functions_agent" export_attr = "agent_executor" diff --git a/templates/guardrails-output-parser/pyproject.toml b/templates/guardrails-output-parser/pyproject.toml index b78a93338692e..86f0f8dcfc113 100644 --- a/templates/guardrails-output-parser/pyproject.toml +++ b/templates/guardrails-output-parser/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<3.13" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" guardrails-ai = "^0.2.4" alt-profanity-check = "^1.3.1" @@ -17,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "guardrails_output_parser.chain" export_attr = "chain" diff --git a/templates/hybrid-search-weaviate/pyproject.toml b/templates/hybrid-search-weaviate/pyproject.toml index b6e93364e60dd..8fcc78eb38928 100644 --- a/templates/hybrid-search-weaviate/pyproject.toml +++ b/templates/hybrid-search-weaviate/pyproject.toml @@ -1,5 +1,3 @@ - - [tool.poetry] name = "hybrid-search-weaviate" version = "0.1.0" @@ -9,13 +7,18 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" weaviate-client = ">=3.24.2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] +version = "^1.0.0" [tool.gigaserve] export_module = "hybrid_search_weaviate" diff --git a/templates/hyde/pyproject.toml b/templates/hyde/pyproject.toml index 72486e6514493..597ef3cce4759 100644 --- a/templates/hyde/pyproject.toml +++ b/templates/hyde/pyproject.toml @@ -7,11 +7,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" chromadb = "^0.4.15" tiktoken = "^0.5.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] poethepoet = "^0.24.1" @@ -30,7 +30,7 @@ integrations = ["OpenAI", "ChromaDB"] tags = ["paper"] [tool.poe.tasks.start] -cmd = "uvicorn gigachain_cli.dev_scripts:create_demo_server --reload --port $port --host $host" +cmd = "uvicorn langchain_cli.dev_scripts:create_demo_server --reload --port $port --host $host" args = [ { name = "port", help = "port to run on", default = "8000" }, { name = "host", help = "host to run on", default = "127.0.0.1" }, diff --git a/templates/intel-rag-xeon/pyproject.toml b/templates/intel-rag-xeon/pyproject.toml index b5ebb5b98132a..d96e00e90dd6a 100644 --- a/templates/intel-rag-xeon/pyproject.toml +++ b/templates/intel-rag-xeon/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<3.13" -langchain = "^0.1" +gigachain = "^0.1" fastapi = "^0.104.0" sse-starlette = "^1.6.5" sentence-transformers = "2.2.2" @@ -27,7 +27,7 @@ extras = [ poethepoet = "^0.24.1" gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "intel_rag_xeon.chain" export_attr = "chain" diff --git a/templates/llama2-functions/pyproject.toml b/templates/llama2-functions/pyproject.toml index abd9537421722..74b936ccb96b0 100644 --- a/templates/llama2-functions/pyproject.toml +++ b/templates/llama2-functions/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" replicate = ">=0.15.4" [tool.poetry.group.dev.dependencies] diff --git a/templates/mongo-parent-document-retrieval/pyproject.toml b/templates/mongo-parent-document-retrieval/pyproject.toml index ec8e03e457274..639a607713a06 100644 --- a/templates/mongo-parent-document-retrieval/pyproject.toml +++ b/templates/mongo-parent-document-retrieval/pyproject.toml @@ -7,12 +7,12 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" pymongo = "^4.6.0" pypdf = "^3.17.0" tiktoken = "^0.5.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-advanced-rag/pyproject.toml b/templates/neo4j-advanced-rag/pyproject.toml index 0ac8fb76e9b7c..2c796a1271081 100644 --- a/templates/neo4j-advanced-rag/pyproject.toml +++ b/templates/neo4j-advanced-rag/pyproject.toml @@ -9,17 +9,17 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" tiktoken = "^0.5.1" openai = "<2" neo4j = "^5.14.0" -langchain-text-splitters = ">=0.0.1,<0.1" -langchain-openai = "^0.1.1" +gigachain-text-splitters = ">=0.0.1,<0.1" +gigachain-openai = "^0.1.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "neo4j_advanced_rag" export_attr = "chain" diff --git a/templates/neo4j-cypher-ft/pyproject.toml b/templates/neo4j-cypher-ft/pyproject.toml index f5d6d1fcc331f..d43f819009df5 100644 --- a/templates/neo4j-cypher-ft/pyproject.toml +++ b/templates/neo4j-cypher-ft/pyproject.toml @@ -9,11 +9,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" neo4j = ">5.12" openai = "<2" -langchain-community = "^0.0.33" -langchain-openai = "^0.1.3" +gigachain-community = "^0.0.33" +gigachain-openai = "^0.1.3" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-cypher-memory/pyproject.toml b/templates/neo4j-cypher-memory/pyproject.toml index 6f4463a205e87..45741cba7780d 100644 --- a/templates/neo4j-cypher-memory/pyproject.toml +++ b/templates/neo4j-cypher-memory/pyproject.toml @@ -9,11 +9,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" neo4j = ">5.12" openai = "<2" -langchain-community = "^0.0.33" -langchain-openai = "^0.1.3" +gigachain-community = "^0.0.33" +gigachain-openai = "^0.1.3" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-cypher/pyproject.toml b/templates/neo4j-cypher/pyproject.toml index 51869133e1064..0f4d6856a91d8 100644 --- a/templates/neo4j-cypher/pyproject.toml +++ b/templates/neo4j-cypher/pyproject.toml @@ -9,11 +9,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" neo4j = ">5.12" openai = "<2" -langchain-openai = "^0.1.3" -langchain-community = "^0.0.33" +gigachain-openai = "^0.1.3" +gigachain-community = "^0.0.33" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-generation/pyproject.toml b/templates/neo4j-generation/pyproject.toml index 1cfc7ef242bc1..107330cb721f1 100644 --- a/templates/neo4j-generation/pyproject.toml +++ b/templates/neo4j-generation/pyproject.toml @@ -9,12 +9,12 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" neo4j = "^5.12.0" gigachain-openai = "^0.0.8" -langchain-community = "^0.0.28" -langchain-experimental = "^0.0.54" +gigachain-community = "^0.0.28" +gigachain-experimental = "^0.0.54" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-parent/pyproject.toml b/templates/neo4j-parent/pyproject.toml index efd7d51fff705..c862ddfca5097 100644 --- a/templates/neo4j-parent/pyproject.toml +++ b/templates/neo4j-parent/pyproject.toml @@ -9,12 +9,12 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" tiktoken = "^0.5.1" openai = "<2" neo4j = "^5.14.0" -langchain-text-splitters = ">=0.0.1,<0.1" -langchain-openai = "^0.1.1" +gigachain-text-splitters = ">=0.0.1,<0.1" +gigachain-openai = "^0.1.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/neo4j-semantic-layer/pyproject.toml b/templates/neo4j-semantic-layer/pyproject.toml index 6a9768a8ba952..b99456442c1df 100644 --- a/templates/neo4j-semantic-layer/pyproject.toml +++ b/templates/neo4j-semantic-layer/pyproject.toml @@ -16,7 +16,7 @@ neo4j = "^5.14.0" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "neo4j_semantic_layer" export_attr = "agent_executor" diff --git a/templates/neo4j-semantic-ollama/pyproject.toml b/templates/neo4j-semantic-ollama/pyproject.toml index 64e5c585f5db7..d800f7220ec08 100644 --- a/templates/neo4j-semantic-ollama/pyproject.toml +++ b/templates/neo4j-semantic-ollama/pyproject.toml @@ -9,14 +9,14 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain = "^0.1" +gigachain = "^0.1" openai = "<2" neo4j = "^5.14.0" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "neo4j_semantic_ollama" export_attr = "agent_executor" diff --git a/templates/neo4j-vector-memory/pyproject.toml b/templates/neo4j-vector-memory/pyproject.toml index 8ce44caa70e70..4209f05d5a90d 100644 --- a/templates/neo4j-vector-memory/pyproject.toml +++ b/templates/neo4j-vector-memory/pyproject.toml @@ -13,8 +13,8 @@ gigachain = "^0.1" tiktoken = "^0.5.1" openai = "<2" neo4j = "^5.14.0" -langchain-text-splitters = ">=0.0.1,<0.1" -langchain-openai = "^0.1.1" +gigachain-text-splitters = ">=0.0.1,<0.1" +gigachain-openai = "^0.1.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/nvidia-rag-canonical/pyproject.toml b/templates/nvidia-rag-canonical/pyproject.toml index c81b8444658a3..29f4413ffc607 100644 --- a/templates/nvidia-rag-canonical/pyproject.toml +++ b/templates/nvidia-rag-canonical/pyproject.toml @@ -9,14 +9,14 @@ readme = "README.md" python = ">=3.8.1,<4.0" gigachain = "^0.1" pymilvus = ">=2.3.0" -langchain-nvidia-aiplay = "^0.0.2" +gigachain-nvidia-aiplay = "^0.0.2" pypdf = ">=3.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "nvidia_rag_canonical" export_attr = "chain" diff --git a/templates/openai-functions-agent-gmail/pyproject.toml b/templates/openai-functions-agent-gmail/pyproject.toml index f45ec10f1da8c..e27c1341a7ee7 100644 --- a/templates/openai-functions-agent-gmail/pyproject.toml +++ b/templates/openai-functions-agent-gmail/pyproject.toml @@ -21,7 +21,7 @@ bs4 = "^0.0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "openai_functions_agent" export_attr = "agent_executor" diff --git a/templates/propositional-retrieval/pyproject.toml b/templates/propositional-retrieval/pyproject.toml index 38a4d21aa577c..522fc40b6acd6 100644 --- a/templates/propositional-retrieval/pyproject.toml +++ b/templates/propositional-retrieval/pyproject.toml @@ -14,12 +14,12 @@ openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" bs4 = "^0.0.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_chroma_multi_modal_multi_vector" export_attr = "chain" diff --git a/templates/pyproject.toml b/templates/pyproject.toml index f27dcb9b81d11..9be07beac1f7f 100644 --- a/templates/pyproject.toml +++ b/templates/pyproject.toml @@ -53,7 +53,7 @@ watch = "poetry run ptw" lint = ["_lint", "_check_formatting"] format = ["_lint_fix", "_format"] -_check_formatting = "poetry run ruff format . --check" +_check_formatting = "poetry run ruff format . --diff" _lint = "poetry run ruff ." _format = "poetry run ruff format ." _lint_fix = "poetry run ruff . --fix" diff --git a/templates/python-lint/pyproject.toml b/templates/python-lint/pyproject.toml index f4e896780b146..425a6ecd24ae5 100644 --- a/templates/python-lint/pyproject.toml +++ b/templates/python-lint/pyproject.toml @@ -18,7 +18,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "python_lint" export_attr = "agent_executor" diff --git a/templates/rag-aws-bedrock/pyproject.toml b/templates/rag-aws-bedrock/pyproject.toml index 4bec541988454..9c853e7e1a0a9 100644 --- a/templates/rag-aws-bedrock/pyproject.toml +++ b/templates/rag-aws-bedrock/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" tiktoken = ">=0.5.1" faiss-cpu = ">=1.7.4" boto3 = ">=1.28.57" diff --git a/templates/rag-aws-kendra/pyproject.toml b/templates/rag-aws-kendra/pyproject.toml index cf6bc4426f49d..d15007b766c23 100644 --- a/templates/rag-aws-kendra/pyproject.toml +++ b/templates/rag-aws-kendra/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" tiktoken = ">=0.5.1" boto3 = ">=1.28.57" awscli = ">=1.29.57" diff --git a/templates/rag-azure-search/pyproject.toml b/templates/rag-azure-search/pyproject.toml index 2de133d3eedff..82328b02d48a2 100644 --- a/templates/rag-azure-search/pyproject.toml +++ b/templates/rag-azure-search/pyproject.toml @@ -7,8 +7,8 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain-core = ">=0.1.5" -langchain-openai = ">=0.0.1" +gigachain-core = ">=0.1.5" +gigachain-openai = ">=0.0.1" azure-search-documents = ">=11.4.0" [tool.poetry.group.dev.dependencies] @@ -16,7 +16,7 @@ gigachain-cli = ">=0.0.4" fastapi = "^0.104.0" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_azure_search" export_attr = "chain" diff --git a/templates/rag-chroma-multi-modal-multi-vector/pyproject.toml b/templates/rag-chroma-multi-modal-multi-vector/pyproject.toml index eb119168c575c..f26025812a041 100644 --- a/templates/rag-chroma-multi-modal-multi-vector/pyproject.toml +++ b/templates/rag-chroma-multi-modal-multi-vector/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = "^0.1" +gigachain = ">=0.0.353,<0.2" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" @@ -21,7 +21,7 @@ pillow = ">=10.1.0" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_chroma_multi_modal_multi_vector" export_attr = "chain" diff --git a/templates/rag-chroma-multi-modal/pyproject.toml b/templates/rag-chroma-multi-modal/pyproject.toml index 69bacb4941000..7c6f144cc53fc 100644 --- a/templates/rag-chroma-multi-modal/pyproject.toml +++ b/templates/rag-chroma-multi-modal/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = ">=0.0.353,<0.2" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" @@ -21,7 +21,7 @@ gigachain-experimental = ">=0.0.43" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_chroma_multi_modal" export_attr = "chain" diff --git a/templates/rag-chroma-private/pyproject.toml b/templates/rag-chroma-private/pyproject.toml index 15c3bf92584a8..c8dbe02aeee84 100644 --- a/templates/rag-chroma-private/pyproject.toml +++ b/templates/rag-chroma-private/pyproject.toml @@ -9,12 +9,12 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" gpt4all = ">=1.0.8" beautifulsoup4 = ">=4.12.2" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-chroma/pyproject.toml b/templates/rag-chroma/pyproject.toml index c74d10772e884..805ecab656a97 100644 --- a/templates/rag-chroma/pyproject.toml +++ b/templates/rag-chroma/pyproject.toml @@ -9,11 +9,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-codellama-fireworks/pyproject.toml b/templates/rag-codellama-fireworks/pyproject.toml index 4949c8c0dcfa1..457e77c1ab8cd 100644 --- a/templates/rag-codellama-fireworks/pyproject.toml +++ b/templates/rag-codellama-fireworks/pyproject.toml @@ -9,12 +9,12 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" gpt4all = ">=1.0.8" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" fireworks-ai = ">=0.6.0" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-conversation-zep/pyproject.toml b/templates/rag-conversation-zep/pyproject.toml index 4561824a504f5..65f298094271a 100644 --- a/templates/rag-conversation-zep/pyproject.toml +++ b/templates/rag-conversation-zep/pyproject.toml @@ -7,13 +7,13 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" zep-python = "^1.4.0" tiktoken = "^0.5.1" beautifulsoup4 = "^4.12.2" bs4 = "^0.0.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-conversation/pyproject.toml b/templates/rag-conversation/pyproject.toml index d3ec19f977514..06a0659631ec6 100644 --- a/templates/rag-conversation/pyproject.toml +++ b/templates/rag-conversation/pyproject.toml @@ -14,7 +14,7 @@ openai = "<2" tiktoken = ">=0.5.1" pinecone-client = ">=2.2.4" beautifulsoup4 = "^4.12.2" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-fusion/pyproject.toml b/templates/rag-fusion/pyproject.toml index 3ea8f5248ca04..1fcfeb3b3d635 100644 --- a/templates/rag-fusion/pyproject.toml +++ b/templates/rag-fusion/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" pinecone-client = "^2.2.4" langchainhub = "^0.1.13" diff --git a/templates/rag-gemini-multi-modal/pyproject.toml b/templates/rag-gemini-multi-modal/pyproject.toml index d30ae39055b41..953cb1b0b93a5 100644 --- a/templates/rag-gemini-multi-modal/pyproject.toml +++ b/templates/rag-gemini-multi-modal/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.9,<4.0" -gigachain = ">=0.0.350" +gigachain = ">=0.0.353,<0.2" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" @@ -17,12 +17,12 @@ open-clip-torch = ">=2.23.0" torch = ">=2.1.0" pypdfium2 = ">=4.20.0" gigachain-experimental = ">=0.0.43" -langchain-google-genai = ">=0.0.1" +gigachain-google-genai = ">=0.0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_gemini_multi_modal" export_attr = "chain" diff --git a/templates/rag-google-cloud-sensitive-data-protection/pyproject.toml b/templates/rag-google-cloud-sensitive-data-protection/pyproject.toml index 83d514bd338ae..528f5f1e6703f 100644 --- a/templates/rag-google-cloud-sensitive-data-protection/pyproject.toml +++ b/templates/rag-google-cloud-sensitive-data-protection/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.333" +gigachain = "^0.1" google-cloud-aiplatform = ">=1.35.0" google-cloud-dlp = "^3.13.0" @@ -17,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_google_cloud_sensitive_data_protection" export_attr = "chain" diff --git a/templates/rag-google-cloud-vertexai-search/pyproject.toml b/templates/rag-google-cloud-vertexai-search/pyproject.toml index 73e2ba223723b..f1b2a96c9b149 100644 --- a/templates/rag-google-cloud-vertexai-search/pyproject.toml +++ b/templates/rag-google-cloud-vertexai-search/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.333" +gigachain = "^0.1" google-cloud-aiplatform = ">=1.35.0" diff --git a/templates/rag-gpt-crawler/pyproject.toml b/templates/rag-gpt-crawler/pyproject.toml index 86e846e5d64f7..29663fa05d3f6 100644 --- a/templates/rag-gpt-crawler/pyproject.toml +++ b/templates/rag-gpt-crawler/pyproject.toml @@ -9,16 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_gpt_crawler" export_attr = "chain" diff --git a/templates/rag-jaguardb/pyproject.toml b/templates/rag-jaguardb/pyproject.toml index 095b41d44b99b..1f957fdde6c7d 100644 --- a/templates/rag-jaguardb/pyproject.toml +++ b/templates/rag-jaguardb/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain = "^0.1" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" jaguar = ">=3.4" @@ -17,7 +17,7 @@ jaguar = ">=3.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.15" -[tool.langserve] +[tool.gigaserve] export_module = "rag_jaguardb" export_attr = "chain" diff --git a/templates/rag-lancedb/pyproject.toml b/templates/rag-lancedb/pyproject.toml index 889c9bd789d72..aa78f631694d9 100644 --- a/templates/rag-lancedb/pyproject.toml +++ b/templates/rag-lancedb/pyproject.toml @@ -19,7 +19,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_lancedb" export_attr = "chain" diff --git a/templates/rag-lantern/pyproject.toml b/templates/rag-lantern/pyproject.toml index 4c2a8757bbae7..ff92d33c278db 100644 --- a/templates/rag-lantern/pyproject.toml +++ b/templates/rag-lantern/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain = "^0.1" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" rag-lantern = {path = "packages/rag-lantern", develop = true} @@ -22,7 +22,7 @@ extras = [ ] version = "^1.0.0" -[tool.langserve] +[tool.gigaserve] export_module = "rag_lantern.chain" export_attr = "chain" diff --git a/templates/rag-matching-engine/pyproject.toml b/templates/rag-matching-engine/pyproject.toml index bff39de0eac74..7c23a7e907912 100644 --- a/templates/rag-matching-engine/pyproject.toml +++ b/templates/rag-matching-engine/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" google-cloud-aiplatform = "^1.35.0" [tool.poetry.group.dev.dependencies] diff --git a/templates/rag-milvus/.gitignore b/templates/rag-milvus/.gitignore new file mode 100644 index 0000000000000..bee8a64b79a99 --- /dev/null +++ b/templates/rag-milvus/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/templates/rag-milvus/LICENSE b/templates/rag-milvus/LICENSE new file mode 100644 index 0000000000000..fc0602feecdd6 --- /dev/null +++ b/templates/rag-milvus/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 LangChain, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/templates/rag-milvus/README.md b/templates/rag-milvus/README.md new file mode 100644 index 0000000000000..c5c289817304a --- /dev/null +++ b/templates/rag-milvus/README.md @@ -0,0 +1,68 @@ +# rag-milvus + +This template performs RAG using Milvus and OpenAI. + +## Environment Setup + +Start the milvus server instance, and get the host ip and port. + +Set the `OPENAI_API_KEY` environment variable to access the OpenAI models. + +## Usage + +To use this package, you should first have the LangChain CLI installed: + +```shell +pip install -U langchain-cli +``` + +To create a new LangChain project and install this as the only package, you can do: + +```shell +langchain app new my-app --package rag-milvus +``` + +If you want to add this to an existing project, you can just run: + +```shell +langchain app add rag-milvus +``` + +And add the following code to your `server.py` file: +```python +from rag_milvus import chain as rag_milvus_chain + +add_routes(app, rag_milvus_chain, path="/rag-milvus") +``` + +(Optional) Let's now configure LangSmith. +LangSmith will help us trace, monitor and debug LangChain applications. +You can sign up for LangSmith [here](https://smith.langchain.com/). +If you don't have access, you can skip this section + + +```shell +export LANGCHAIN_TRACING_V2=true +export LANGCHAIN_API_KEY= +export LANGCHAIN_PROJECT= # if not specified, defaults to "default" +``` + +If you are inside this directory, then you can spin up a LangServe instance directly by: + +```shell +langchain serve +``` + +This will start the FastAPI app with a server is running locally at +[http://localhost:8000](http://localhost:8000) + +We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) +We can access the playground at [http://127.0.0.1:8000/rag-milvus/playground](http://127.0.0.1:8000/rag-milvus/playground) + +We can access the template from code with: + +```python +from langserve.client import RemoteRunnable + +runnable = RemoteRunnable("http://localhost:8000/rag-milvus") +``` diff --git a/templates/rag-milvus/pyproject.toml b/templates/rag-milvus/pyproject.toml new file mode 100644 index 0000000000000..f13a7135c6b7f --- /dev/null +++ b/templates/rag-milvus/pyproject.toml @@ -0,0 +1,34 @@ +[tool.poetry] +name = "rag-milvus" +version = "0.1.1" +description = "RAG using Milvus" +authors = [] +readme = "README.md" + +[tool.poetry.dependencies] +python = ">=3.8.1,<4.0" +gigachain = "^0.1" +gigachain-core = "^0.1" +gigachain-openai = "^0.1" +gigachain-community = "^0.0.30" +pymilvus = "^2.4.3" +scipy = "^1.9" + +[tool.poetry.group.dev.dependencies] +gigachain-cli = ">=0.0.4" +fastapi = "^0.104.0" +sse-starlette = "^1.6.5" + +[tool.gigaserve] +export_module = "rag_milvus" +export_attr = "chain" + +[tool.templates-hub] +use-case = "rag" +author = "LangChain" +integrations = ["OpenAI", "Milvus"] +tags = ["vectordbs"] + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/templates/rag-milvus/rag_milvus/__init__.py b/templates/rag-milvus/rag_milvus/__init__.py new file mode 100644 index 0000000000000..cf9e1eac2677f --- /dev/null +++ b/templates/rag-milvus/rag_milvus/__init__.py @@ -0,0 +1,3 @@ +from rag_milvus.chain import chain + +__all__ = ["chain"] diff --git a/templates/rag-milvus/rag_milvus/chain.py b/templates/rag-milvus/rag_milvus/chain.py new file mode 100644 index 0000000000000..57c5300694520 --- /dev/null +++ b/templates/rag-milvus/rag_milvus/chain.py @@ -0,0 +1,79 @@ +from langchain_core.output_parsers import StrOutputParser +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.pydantic_v1 import BaseModel +from langchain_core.runnables import RunnableParallel, RunnablePassthrough +from langchain_milvus.vectorstores import Milvus +from langchain_openai import ChatOpenAI, OpenAIEmbeddings + +# Example for document loading (from url), splitting, and creating vectorstore + +# Setting the URI as a local file, e.g.`./milvus.db`, is the most convenient method, +# as it automatically utilizes Milvus Lite to store all data in this file. +# +# If you have large scale of data such as more than a million docs, +# we recommend setting up a more performant Milvus server on docker or kubernetes. +# (https://milvus.io/docs/quickstart.md) +# When using this setup, please use the server URI, +# e.g.`http://localhost:19530`, as your URI. + +URI = "./milvus.db" + +""" +# Load +from langchain_community.document_loaders import WebBaseLoader + +loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/") +data = loader.load() + +# Split +from langchain_text_splitters import RecursiveCharacterTextSplitter + +text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) +all_splits = text_splitter.split_documents(data) + +# Add to vectorDB +vectorstore = Milvus.from_documents(documents=all_splits, + collection_name="rag_milvus", + embedding=OpenAIEmbeddings(), + drop_old=True, + connection_args={"uri": URI}, + ) +retriever = vectorstore.as_retriever() +""" + +# Embed a single document as a test +vectorstore = Milvus.from_texts( + ["harrison worked at kensho"], + collection_name="rag_milvus", + embedding=OpenAIEmbeddings(), + drop_old=True, + connection_args={"uri": URI}, +) +retriever = vectorstore.as_retriever() + +# RAG prompt +template = """Answer the question based only on the following context: +{context} + +Question: {question} +""" +prompt = ChatPromptTemplate.from_template(template) + +# LLM +model = ChatOpenAI() + +# RAG chain +chain = ( + RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) + | prompt + | model + | StrOutputParser() +) + + +# Add typing for input +class Question(BaseModel): + __root__: str + + +chain = chain.with_types(input_type=Question) diff --git a/templates/rag-milvus/tests/__init__.py b/templates/rag-milvus/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/templates/rag-momento-vector-index/pyproject.toml b/templates/rag-momento-vector-index/pyproject.toml index 27d3bc77f768d..2202c15c5728b 100644 --- a/templates/rag-momento-vector-index/pyproject.toml +++ b/templates/rag-momento-vector-index/pyproject.toml @@ -7,11 +7,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" momento = "^1.12.0" openai = "<2" tiktoken = "^0.5.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-mongo/pyproject.toml b/templates/rag-mongo/pyproject.toml index b7bdad776d1b5..89989ffc9d611 100644 --- a/templates/rag-mongo/pyproject.toml +++ b/templates/rag-mongo/pyproject.toml @@ -9,11 +9,11 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" pymongo = ">=4.5.0" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" diff --git a/templates/rag-multi-index-fusion/pyproject.toml b/templates/rag-multi-index-fusion/pyproject.toml index 542cfb99d8982..ab2fe58ee1126 100644 --- a/templates/rag-multi-index-fusion/pyproject.toml +++ b/templates/rag-multi-index-fusion/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" xmltodict = "^0.13.0" kay = "^0.1.2" diff --git a/templates/rag-multi-index-router/pyproject.toml b/templates/rag-multi-index-router/pyproject.toml index 4b60f0f6ace5a..1eaf7009e2afa 100644 --- a/templates/rag-multi-index-router/pyproject.toml +++ b/templates/rag-multi-index-router/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" xmltodict = "^0.13.0" kay = "^0.1.2" diff --git a/templates/rag-multi-modal-local/pyproject.toml b/templates/rag-multi-modal-local/pyproject.toml index 5558558f926ef..adb02af5ee602 100644 --- a/templates/rag-multi-modal-local/pyproject.toml +++ b/templates/rag-multi-modal-local/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.351" +gigachain = ">=0.0.353,<0.2" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" @@ -21,7 +21,7 @@ gigachain-community = ">=0.0.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_multi_modal_local" export_attr = "chain" diff --git a/templates/rag-multi-modal-mv-local/pyproject.toml b/templates/rag-multi-modal-mv-local/pyproject.toml index 9706948d5469f..d71375b66266e 100644 --- a/templates/rag-multi-modal-mv-local/pyproject.toml +++ b/templates/rag-multi-modal-mv-local/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.351" +gigachain = ">=0.0.353,<0.2" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" @@ -21,7 +21,7 @@ gigachain-community = ">=0.0.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_multi_modal_mv_local" export_attr = "chain" diff --git a/templates/rag-ollama-multi-query/pyproject.toml b/templates/rag-ollama-multi-query/pyproject.toml index 5b18ef76933d3..6880a371bb2ba 100644 --- a/templates/rag-ollama-multi-query/pyproject.toml +++ b/templates/rag-ollama-multi-query/pyproject.toml @@ -9,16 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" chromadb = ">=0.4.14" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "rag_ollama_multi_query" export_attr = "chain" diff --git a/templates/rag-opensearch/pyproject.toml b/templates/rag-opensearch/pyproject.toml index 7f47605397439..4fceb0f885b8e 100644 --- a/templates/rag-opensearch/pyproject.toml +++ b/templates/rag-opensearch/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "^0.28.1" opensearch-py = "^2.0.0" tiktoken = "^0.5.1" @@ -18,7 +18,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_opensearch" export_attr = "chain" diff --git a/templates/rag-pinecone-multi-query/pyproject.toml b/templates/rag-pinecone-multi-query/pyproject.toml index 39f20ef8cc192..00e401319ccd8 100644 --- a/templates/rag-pinecone-multi-query/pyproject.toml +++ b/templates/rag-pinecone-multi-query/pyproject.toml @@ -9,15 +9,19 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" pinecone-client = ">=2.2.4" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_pinecone_multi_query" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/rag-pinecone-rerank/pyproject.toml b/templates/rag-pinecone-rerank/pyproject.toml index 7471bbdc68537..8c193961f8ada 100644 --- a/templates/rag-pinecone-rerank/pyproject.toml +++ b/templates/rag-pinecone-rerank/pyproject.toml @@ -9,16 +9,20 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" pinecone-client = ">=2.2.4" cohere = ">=4.32" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_pinecone_rerank" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/rag-pinecone/pyproject.toml b/templates/rag-pinecone/pyproject.toml index 14bc54cb4a93d..1cbccd34a8c06 100644 --- a/templates/rag-pinecone/pyproject.toml +++ b/templates/rag-pinecone/pyproject.toml @@ -9,15 +9,19 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" pinecone-client = ">=2.2.4" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_pinecone" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/rag-redis-multi-modal-multi-vector/pyproject.toml b/templates/rag-redis-multi-modal-multi-vector/pyproject.toml index 0c2ed8bf4313c..d952ec1bd111a 100644 --- a/templates/rag-redis-multi-modal-multi-vector/pyproject.toml +++ b/templates/rag-redis-multi-modal-multi-vector/pyproject.toml @@ -20,7 +20,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_redis_multi_modal_multi_vector" export_attr = "chain" diff --git a/templates/rag-redis/pyproject.toml b/templates/rag-redis/pyproject.toml index a360cb143a315..c65b48365ce76 100644 --- a/templates/rag-redis/pyproject.toml +++ b/templates/rag-redis/pyproject.toml @@ -28,6 +28,10 @@ extras = ["pdf"] poethepoet = "^0.24.1" gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_redis.chain" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "Redis" @@ -35,7 +39,7 @@ integrations = ["OpenAI", "Redis", "HuggingFace"] tags = ["vectordbs"] [tool.poe.tasks.start] -cmd = "uvicorn gigachain_cli.dev_scripts:create_demo_server --reload --port $port --host $host" +cmd = "uvicorn langchain_cli.dev_scripts:create_demo_server --reload --port $port --host $host" args = [ { name = "port", help = "port to run on", default = "8000" }, { name = "host", help = "host to run on", default = "127.0.0.1" }, diff --git a/templates/rag-self-query/pyproject.toml b/templates/rag-self-query/pyproject.toml index c48b1ad433d9a..140231fb35281 100644 --- a/templates/rag-self-query/pyproject.toml +++ b/templates/rag-self-query/pyproject.toml @@ -20,6 +20,10 @@ gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_self_query" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/rag-singlestoredb/pyproject.toml b/templates/rag-singlestoredb/pyproject.toml index 9a57f4dde5061..38d94195107a9 100644 --- a/templates/rag-singlestoredb/pyproject.toml +++ b/templates/rag-singlestoredb/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" singlestoredb = ">=0.8.1" tiktoken = "^0.5.1" @@ -18,7 +18,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "rag_singlestoredb" export_attr = "chain" diff --git a/templates/rag-supabase/pyproject.toml b/templates/rag-supabase/pyproject.toml index 68214cde45800..99b65707fddef 100644 --- a/templates/rag-supabase/pyproject.toml +++ b/templates/rag-supabase/pyproject.toml @@ -9,13 +9,18 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" supabase = "^1.2.0" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] +version = "^1.0.0" [tool.gigaserve] export_module = "rag_supabase.chain" diff --git a/templates/rag-timescale-conversation/pyproject.toml b/templates/rag-timescale-conversation/pyproject.toml index a8f5955b4d9eb..56a6b419a3eb5 100644 --- a/templates/rag-timescale-conversation/pyproject.toml +++ b/templates/rag-timescale-conversation/pyproject.toml @@ -9,18 +9,22 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.335" +gigachain = "^0.1" openai = "<2" tiktoken = ">=0.5.1" pinecone-client = ">=2.2.4" beautifulsoup4 = "^4.12.2" python-dotenv = "^1.0.0" timescale-vector = "^0.0.3" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_timescale_conversation" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "Timescale" diff --git a/templates/rag-timescale-hybrid-search-time/pyproject.toml b/templates/rag-timescale-hybrid-search-time/pyproject.toml index 47636d9ff795d..e945bdcd0be40 100644 --- a/templates/rag-timescale-hybrid-search-time/pyproject.toml +++ b/templates/rag-timescale-hybrid-search-time/pyproject.toml @@ -7,18 +7,22 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" timescale-vector = "^0.0.3" lark = "^1.1.8" tiktoken = "^0.5.1" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rag_timescale_hybrid_search_time.chain" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "Timescale" diff --git a/templates/rag-vectara-multiquery/pyproject.toml b/templates/rag-vectara-multiquery/pyproject.toml index 5a904e32a3ee2..afaf4b17e3256 100644 --- a/templates/rag-vectara-multiquery/pyproject.toml +++ b/templates/rag-vectara-multiquery/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rag-vectara-multiquery" -version = "0.1.0" +version = "0.2.0" description = "RAG using vectara with multiquery retriever" authors = [ "Ofer Mendelevitch ", @@ -9,12 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] version = "^1.0.0" [tool.gigaserve] diff --git a/templates/rag-vectara/pyproject.toml b/templates/rag-vectara/pyproject.toml index 4540f7dc196cf..f085f7c93a3a8 100644 --- a/templates/rag-vectara/pyproject.toml +++ b/templates/rag-vectara/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rag-vectara" -version = "0.1.0" +version = "0.2.0" description = "RAG using vectara retriever" authors = [ "Ofer Mendelevitch ", @@ -9,12 +9,17 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] +version = "^1.0.0" [tool.gigaserve] export_module = "rag_vectara" diff --git a/templates/rag-weaviate/pyproject.toml b/templates/rag-weaviate/pyproject.toml index ab14d55d10a7a..6a1c55d8de640 100644 --- a/templates/rag-weaviate/pyproject.toml +++ b/templates/rag-weaviate/pyproject.toml @@ -9,14 +9,19 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" weaviate-client = ">=3.24.2" -langchain-text-splitters = ">=0.0.1,<0.1" +gigachain-text-splitters = ">=0.0.1,<0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] +version = "^1.0.0" [tool.gigaserve] export_module = "rag_weaviate" diff --git a/templates/research-assistant/pyproject.toml b/templates/research-assistant/pyproject.toml index bdf89c38c048a..f2ad4c0926f59 100644 --- a/templates/research-assistant/pyproject.toml +++ b/templates/research-assistant/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" beautifulsoup4 = "^4.12.2" duckduckgo-search = "^3.9.5" @@ -18,7 +18,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "research_assistant" export_attr = "chain" diff --git a/templates/retrieval-agent-fireworks/pyproject.toml b/templates/retrieval-agent-fireworks/pyproject.toml index cdab12b7a6239..06f909b7e8749 100644 --- a/templates/retrieval-agent-fireworks/pyproject.toml +++ b/templates/retrieval-agent-fireworks/pyproject.toml @@ -7,9 +7,9 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain = "^0.1" +gigachain = "^0.1" arxiv = "^2.0.0" -langchain-community = ">=0.0.17,<0.2" +gigachain-community = ">=0.0.17,<0.2" langchainhub = "^0.1.14" fireworks-ai = "^0.11.2" @@ -19,7 +19,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "retrieval_agent_fireworks" export_attr = "agent_executor" diff --git a/templates/retrieval-agent/pyproject.toml b/templates/retrieval-agent/pyproject.toml index 38588eed0462b..c78f802b3ff0f 100644 --- a/templates/retrieval-agent/pyproject.toml +++ b/templates/retrieval-agent/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" arxiv = "^2.0.0" gigachain-openai = "^0.0.2.post1" @@ -17,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "retrieval_agent" export_attr = "agent_executor" diff --git a/templates/rewrite-retrieve-read/pyproject.toml b/templates/rewrite-retrieve-read/pyproject.toml index f822c7f2d07f8..4111df6143948 100644 --- a/templates/rewrite-retrieve-read/pyproject.toml +++ b/templates/rewrite-retrieve-read/pyproject.toml @@ -7,13 +7,17 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" duckduckgo-search = "^3.9.3" openai = "<2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "rewrite_retrieve_read.chain" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/robocorp-action-server/pyproject.toml b/templates/robocorp-action-server/pyproject.toml index 4b79aef8d608a..3908a1bcc5613 100644 --- a/templates/robocorp-action-server/pyproject.toml +++ b/templates/robocorp-action-server/pyproject.toml @@ -9,14 +9,14 @@ readme = "README.md" python = ">=3.8.1,<4.0" gigachain = "^0.1" gigachain-openai = ">=0.0.2,<0.2" -langchain-robocorp = ">=0.0.3,<0.2" +gigachain-robocorp = ">=0.0.3,<0.2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "robocorp_action_server" export_attr = "agent_executor" diff --git a/templates/self-query-supabase/pyproject.toml b/templates/self-query-supabase/pyproject.toml index 5ef8d6e378840..8a69159aad32c 100644 --- a/templates/self-query-supabase/pyproject.toml +++ b/templates/self-query-supabase/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" openai = "<2" tiktoken = "^0.5.1" supabase = "^1.2.0" @@ -17,6 +17,11 @@ lark = "^1.1.8" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.poetry.group.dev.dependencies.python-dotenv] +extras = [ + "cli", +] +version = "^1.0.0" [tool.gigaserve] export_module = "self_query_supabase.chain" diff --git a/templates/shopping-assistant/pyproject.toml b/templates/shopping-assistant/pyproject.toml index ad7f74fcfcc05..139b2cb441067 100644 --- a/templates/shopping-assistant/pyproject.toml +++ b/templates/shopping-assistant/pyproject.toml @@ -7,16 +7,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.12,<4.0" -langchain = "^0.1" +gigachain = "^0.1" openai = "<2" -ionic-langchain = "^0.2.2" +ionic-gigachain = "^0.2.2" gigachain-openai = "^0.0.5" langchainhub = "^0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" -[tool.langserve] +[tool.gigaserve] export_module = "shopping_assistant.agent" export_attr = "agent_executor" diff --git a/templates/skeleton-of-thought/pyproject.toml b/templates/skeleton-of-thought/pyproject.toml index 7ed284719dbfe..f90da7f6ebe05 100644 --- a/templates/skeleton-of-thought/pyproject.toml +++ b/templates/skeleton-of-thought/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "^0.28.1" [tool.poetry.group.dev.dependencies] @@ -15,7 +15,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "skeleton_of_thought" export_attr = "chain" diff --git a/templates/solo-performance-prompting-agent/pyproject.toml b/templates/solo-performance-prompting-agent/pyproject.toml index 0f9c7cf1fab33..018679f515954 100644 --- a/templates/solo-performance-prompting-agent/pyproject.toml +++ b/templates/solo-performance-prompting-agent/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" duckduckgo-search = "^3.9.3" @@ -16,7 +16,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "solo_performance_prompting_agent.agent" export_attr = "agent_executor" diff --git a/templates/sql-llama2/pyproject.toml b/templates/sql-llama2/pyproject.toml index a67a70aac974c..afd4e6a8b881a 100644 --- a/templates/sql-llama2/pyproject.toml +++ b/templates/sql-llama2/pyproject.toml @@ -9,12 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" replicate = ">=0.15.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "sql_llama2" +export_attr = "chain" + [tool.templates-hub] use-case = "sql" author = "LangChain" diff --git a/templates/sql-llamacpp/pyproject.toml b/templates/sql-llamacpp/pyproject.toml index 7f1a0a8f13de3..8e33180e021a4 100644 --- a/templates/sql-llamacpp/pyproject.toml +++ b/templates/sql-llamacpp/pyproject.toml @@ -9,12 +9,16 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" llama-cpp-python = ">=0.1.79" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "sql_llamacpp" +export_attr = "chain" + [tool.templates-hub] use-case = "sql" author = "LangChain" diff --git a/templates/sql-ollama/pyproject.toml b/templates/sql-ollama/pyproject.toml index 2598c7fa4df65..b0997b69f3b2f 100644 --- a/templates/sql-ollama/pyproject.toml +++ b/templates/sql-ollama/pyproject.toml @@ -9,11 +9,15 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "sql_ollama" +export_attr = "chain" + [tool.templates-hub] use-case = "sql" author = "LangChain" diff --git a/templates/sql-pgvector/pyproject.toml b/templates/sql-pgvector/pyproject.toml index b564603cf26f5..b1d9c048494cf 100644 --- a/templates/sql-pgvector/pyproject.toml +++ b/templates/sql-pgvector/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "<2" psycopg2 = "^2.9.9" tiktoken = "^0.5.1" @@ -17,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "sql_pgvector" export_attr = "chain" diff --git a/templates/sql-research-assistant/pyproject.toml b/templates/sql-research-assistant/pyproject.toml index 02516560e5e16..3658dfcfc117a 100644 --- a/templates/sql-research-assistant/pyproject.toml +++ b/templates/sql-research-assistant/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.313, <0.1" +gigachain = "^0.1" openai = "^0.28.1" bs4 = "^0.0.1" duckduckgo-search = "^4.1.0" @@ -17,7 +17,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "sql_research_assistant" export_attr = "chain" diff --git a/templates/stepback-qa-prompting/pyproject.toml b/templates/stepback-qa-prompting/pyproject.toml index 47bdabe03c6c8..d714d837aaf00 100644 --- a/templates/stepback-qa-prompting/pyproject.toml +++ b/templates/stepback-qa-prompting/pyproject.toml @@ -7,13 +7,17 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -gigachain = ">=0.0.325" +gigachain = "^0.1" duckduckgo-search = "^3.9.3" openai = "<2" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "stepback_qa_prompting.chain" +export_attr = "chain" + [tool.templates-hub] use-case = "rag" author = "LangChain" diff --git a/templates/summarize-anthropic/pyproject.toml b/templates/summarize-anthropic/pyproject.toml index 9cb4db77a9ed5..663386d68e8ec 100644 --- a/templates/summarize-anthropic/pyproject.toml +++ b/templates/summarize-anthropic/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "summarize-anthropic" version = "0.1.0" @@ -10,11 +9,15 @@ readme = "README.md" python = ">=3.8.1,<4.0" gigachain = "^0.1" langchainhub = ">=0.1.13" -gigachain-anthropic = "^0.1.4" +langchain-anthropic = "^0.1.4" [tool.poetry.group.dev.dependencies] gigachain-cli = ">=0.0.21" +[tool.gigaserve] +export_module = "summarize_anthropic" +export_attr = "chain" + [tool.templates-hub] use-case = "summarization" author = "LangChain" @@ -24,4 +27,3 @@ tags = ["summarization"] [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" - diff --git a/templates/vertexai-chuck-norris/pyproject.toml b/templates/vertexai-chuck-norris/pyproject.toml index ae495f823aadc..7930a0a726849 100644 --- a/templates/vertexai-chuck-norris/pyproject.toml +++ b/templates/vertexai-chuck-norris/pyproject.toml @@ -7,7 +7,7 @@ readme = "README.md" [tool.poetry.dependencies] python = ">=3.8.1,<4.0" -langchain = "^0.1" +gigachain = "^0.1" google-cloud-aiplatform = "^1.36.4" [tool.poetry.group.dev.dependencies] @@ -15,7 +15,7 @@ gigachain-cli = ">=0.0.21" fastapi = ">=0.104.0,<1" sse-starlette = "^1.6.5" -[tool.langserve] +[tool.gigaserve] export_module = "vertexai_chuck_norris.chain" export_attr = "chain" diff --git a/templates/xml-agent/pyproject.toml b/templates/xml-agent/pyproject.toml index d9f8030fe8487..0eeff517f749a 100644 --- a/templates/xml-agent/pyproject.toml +++ b/templates/xml-agent/pyproject.toml @@ -1,4 +1,3 @@ - [tool.poetry] name = "xml-agent" version = "0.1.0"