From 46f1945a48309a6704788ac040e6daf97804be31 Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 22 Oct 2023 18:51:02 +0300 Subject: [PATCH 01/26] [cli] Added config file for `resin start` The current solution won't work with multiple workers. But people aren't supposed to work with multiple workers in UVicorn anyway, but rather use Gunicorn instead . I removed the `workers` paramter. We will add gunicorn as part of the Dockerfile, when we'll have one --- src/resin_cli/app.py | 66 ++++++++++++++++++++---- src/resin_cli/cli.py | 10 ++-- src/resin_cli/data_loader/__init__.py | 1 - src/resin_cli/data_loader/data_loader.py | 7 --- src/resin_cli/errors.py | 13 +++++ 5 files changed, 74 insertions(+), 23 deletions(-) create mode 100644 src/resin_cli/errors.py diff --git a/src/resin_cli/app.py b/src/resin_cli/app.py index 9e8c64eb..3d1ae9bb 100644 --- a/src/resin_cli/app.py +++ b/src/resin_cli/app.py @@ -6,6 +6,8 @@ import openai from multiprocessing import current_process + +import yaml from dotenv import load_dotenv from resin.llm import BaseLLM @@ -28,6 +30,7 @@ ContextUpsertRequest, HealthStatus, ContextDeleteRequest from resin.llm.openai import OpenAILLM +from resin_cli.errors import ConfigError load_dotenv() # load env vars before import of openai openai.api_key = os.getenv("OPENAI_API_KEY") @@ -198,24 +201,65 @@ def _init_logging(): def _init_engines(): - global kb, context_engine, chat_engine, llm - Tokenizer.initialize(OpenAITokenizer, model_name='gpt-3.5-turbo-0613') + global kb, context_engine, chat_engine, llm, logger - INDEX_NAME = os.getenv("INDEX_NAME") - if not INDEX_NAME: + index_name = os.getenv("INDEX_NAME") + if not index_name: raise ValueError("INDEX_NAME environment variable must be set") - kb = KnowledgeBase(index_name=INDEX_NAME) - context_engine = ContextEngine(knowledge_base=kb) - llm = OpenAILLM() - chat_engine = ChatEngine(context_engine=context_engine, llm=llm) + config_file = os.getenv("RESIN_CONFIG_FILE") + if config_file: + _load_config(config_file) + + else: + print("Initializing default engines") + Tokenizer.initialize() + kb = KnowledgeBase(index_name=index_name) + context_engine = ContextEngine(knowledge_base=kb) + llm = OpenAILLM() + chat_engine = ChatEngine(context_engine=context_engine, llm=llm) kb.connect() -def start(host="0.0.0.0", port=8000, reload=False, workers=1): - uvicorn.run("resin_cli.app:app", - host=host, port=port, reload=reload, workers=workers) +def _load_config(config_file): + global chat_engine, llm, context_engine, kb + try: + with open(config_file, "r") as f: + config = yaml.safe_load(f) + except Exception as e: + logger.exception(f"Failed to load config file {config_file}") + raise ConfigError( + f"Failed to load config file {config_file}. Error: {str(e)}" + ) + print(config) + tokenizer_config = config.get("tokenizer", {}) + Tokenizer.initialize_from_config(tokenizer_config) + if "chat_engine" not in config: + raise ConfigError( + f"Config file {config_file} must contain a 'chat_engine' section" + ) + chat_engine_config = config["chat_engine"] + try: + chat_engine = ChatEngine.from_config(chat_engine_config) + except Exception as e: + logger.exception( + f"Failed to initialize chat engine from config file {config_file}" + ) + raise ConfigError( + f"Failed to initialize chat engine from config file {config_file}." + f" Error: {str(e)}" + ) + llm = chat_engine.llm + context_engine = chat_engine.context_engine + kb = context_engine.knowledge_base + + +def start(host="0.0.0.0", port=8000, reload=False, config_file=None): + if config_file: + os.environ["RESIN_CONFIG_FILE"] = config_file + + uvicorn.run("resin_cli.app:app", host=host, port=port, reload=reload) if __name__ == "__main__": diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index b1635a9a..9bc42048 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -17,9 +17,9 @@ from resin.tokenizer import Tokenizer from resin_cli.data_loader import ( load_from_path, - CLIError, IDsNotUniqueError, DocumentsValidationError) +from resin_cli.errors import CLIError from resin import __version__ @@ -391,10 +391,12 @@ def chat(chat_service_url, compare, debug, stream): help="TCP port to bind the server to. Defaults to 8000") @click.option("--reload/--no-reload", default=False, help="Set the server to reload on code changes. Defaults to False") -@click.option("--workers", default=1, help="Number of worker processes. Defaults to 1") -def start(host, port, reload, workers): +@click.option("--config", "-c", default=None, + help="Path to a resin config file. Optional, otherwise configuration " + "defaults will be used.") +def start(host, port, reload, config): click.echo(f"Starting Resin service on {host}:{port}") - start_service(host, port=port, reload=reload, workers=workers) + start_service(host, port=port, reload=reload, config_file=config) @cli.command( diff --git a/src/resin_cli/data_loader/__init__.py b/src/resin_cli/data_loader/__init__.py index 85464408..8a298030 100644 --- a/src/resin_cli/data_loader/__init__.py +++ b/src/resin_cli/data_loader/__init__.py @@ -1,6 +1,5 @@ from .data_loader import ( load_from_path, - CLIError, IDsNotUniqueError, DocumentsValidationError ) diff --git a/src/resin_cli/data_loader/data_loader.py b/src/resin_cli/data_loader/data_loader.py index b84434b6..ee2d921a 100644 --- a/src/resin_cli/data_loader/data_loader.py +++ b/src/resin_cli/data_loader/data_loader.py @@ -5,10 +5,8 @@ from typing import List from textwrap import dedent -import click import numpy as np import pandas as pd -from click import ClickException from pydantic import ValidationError @@ -27,11 +25,6 @@ def format_multiline(msg): return dedent(msg).strip() -class CLIError(ClickException): - def format_message(self) -> str: - return click.style(format_multiline(self.message), fg='red') - - def _process_metadata(value): if pd.isna(value): return {} diff --git a/src/resin_cli/errors.py b/src/resin_cli/errors.py new file mode 100644 index 00000000..1f54ed7e --- /dev/null +++ b/src/resin_cli/errors.py @@ -0,0 +1,13 @@ +import click +from click import ClickException + +from resin_cli.data_loader.data_loader import format_multiline + + +class CLIError(ClickException): + def format_message(self) -> str: + return click.style(format_multiline(self.message), fg='red') + + +class ConfigError(RuntimeError): + pass From 361a8d7753d346c05b966ff52ce78a82fd3f569d Mon Sep 17 00:00:00 2001 From: ilai Date: Sun, 22 Oct 2023 19:33:04 +0300 Subject: [PATCH 02/26] [cli] In `resin start` - always run with zero workers That ensures that if the startup process failed, it will break the execution. Also added big red note that the user should use Gunicorn for production --- pyproject.toml | 1 + src/resin_cli/app.py | 6 +++--- src/resin_cli/cli.py | 11 +++++++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 529c3e87..f5260140 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ fastapi = "^0.92.0" uvicorn = "^0.20.0" tenacity = "^8.2.1" sse-starlette = "^1.6.5" +gunicorn = "^21.2.0" [tool.poetry.group.dev.dependencies] diff --git a/src/resin_cli/app.py b/src/resin_cli/app.py index 3d1ae9bb..4a1407f4 100644 --- a/src/resin_cli/app.py +++ b/src/resin_cli/app.py @@ -212,7 +212,7 @@ def _init_engines(): _load_config(config_file) else: - print("Initializing default engines") + logger.info("Did not find config file. Initializing engines with default config") Tokenizer.initialize() kb = KnowledgeBase(index_name=index_name) context_engine = ContextEngine(knowledge_base=kb) @@ -224,6 +224,7 @@ def _init_engines(): def _load_config(config_file): global chat_engine, llm, context_engine, kb + logger.info(f"Initializing engines with config file {config_file}") try: with open(config_file, "r") as f: config = yaml.safe_load(f) @@ -232,7 +233,6 @@ def _load_config(config_file): raise ConfigError( f"Failed to load config file {config_file}. Error: {str(e)}" ) - print(config) tokenizer_config = config.get("tokenizer", {}) Tokenizer.initialize_from_config(tokenizer_config) if "chat_engine" not in config: @@ -259,7 +259,7 @@ def start(host="0.0.0.0", port=8000, reload=False, config_file=None): if config_file: os.environ["RESIN_CONFIG_FILE"] = config_file - uvicorn.run("resin_cli.app:app", host=host, port=port, reload=reload) + uvicorn.run("resin_cli.app:app", host=host, port=port, reload=reload, workers=0) if __name__ == "__main__": diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index 9bc42048..63294a3b 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -395,6 +395,17 @@ def chat(chat_service_url, compare, debug, stream): help="Path to a resin config file. Optional, otherwise configuration " "defaults will be used.") def start(host, port, reload, config): + note_msg = ( + "🚨 Note 🚨\n" + "For debugging only. To run the Resin service in production, run the command:\n" + "gunicorn resin_cli.app:app --worker-class uvicorn.workers.UvicornWorker " + f"--bind {host}:{port} --workers " + ) + for c in note_msg: + click.echo(click.style(c, fg="red"), nl=False) + time.sleep(0.01) + click.echo() + click.echo(f"Starting Resin service on {host}:{port}") start_service(host, port=port, reload=reload, config_file=config) From 650fbe41830d215cf7c8535e41281b5bbd01aaf8 Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 23 Oct 2023 11:03:07 +0300 Subject: [PATCH 03/26] [cli] `resin stop` handle case where started with gunicorn Ugly hack - simply send a `pkill` signal --- src/resin_cli/cli.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index 63294a3b..c835d983 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -1,4 +1,6 @@ import os +import signal +import subprocess import click import time @@ -422,6 +424,28 @@ def start(host, port, reload, config): @click.option("url", "--url", default="http://0.0.0.0:8000", help="URL of the Resin service to use. Defaults to http://0.0.0.0:8000") def stop(url): + # Check if the service was started using Gunicorn + res = subprocess.run(["pgrep", "-f", "gunicorn resin_cli.app:app"], + capture_output=True) + output = res.stdout.decode("utf-8").split() + + # If Gunicorn was used, kill all Gunicorn processes + if output: + msg = ("It seems that Resin service was launched using Gunicorn.\n" + "Do you want to kill all Gunicorn processes?") + click.confirm(click.style(msg, fg="red"), abort=True) + try: + subprocess.run(["pkill", "-f", "gunicorn resin_cli.app:app"], check=True) + except subprocess.CalledProcessError: + try: + [os.kill(int(pid), signal.SIGINT) for pid in output] + except OSError: + msg = ( + "Could not kill Gunicorn processes. Please kill them manually." + f"Found process ids: {output}" + ) + raise CLIError(msg) + try: res = requests.get(urljoin(url, "/shutdown")) res.raise_for_status() From 0b16a1b5bd91be6ada26bb2727ad508fbbca950e Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 23 Oct 2023 13:47:00 +0300 Subject: [PATCH 04/26] [cli] Load KB config for `new` and `upsert` I made the config file mandatory (defaults to the built-in config file that comes with the repo). This makes reduces the chance of configuration difference between two different commands --- src/resin_cli/cli.py | 56 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index 30992d36..232d66a1 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -1,11 +1,13 @@ import os import signal import subprocess +from typing import Dict, Any import click import time import requests +import yaml from dotenv import load_dotenv from tenacity import retry, stop_after_attempt, wait_fixed from tqdm import tqdm @@ -97,6 +99,31 @@ def _initialize_tokenizer(): raise CLIError(msg) +def _load_kb_config(config_file: str) -> Dict[str, Any]: + try: + with open(os.path.join("config", config_file), 'r') as f: + config = yaml.safe_load(f) + except Exception as e: + msg = f"Failed to load config file {config_file}. Reason:\n{e}" + raise CLIError(msg) + + if "knowledge_base" in config: + kb_config = config.get("knowledge_base", None) + elif "chat_engine" in config: + kb_config = config["chat_engine"]\ + .get("context_engine", {})\ + .get("knowledge_base", None) + else: + kb_config = None + + if kb_config is None: + msg = (f"Did not find a `knowledge_base` configuration in {config_file}", + "Would you like to use the default configuration?") + click.confirm(click.style(msg, fg="red"), abort=True) + kb_config = {} + return kb_config + + @click.group(invoke_without_command=True, context_settings=CONTEXT_SETTINGS) @click.version_option(__version__, "-v", "--version", prog_name="Resin") @click.pass_context @@ -135,9 +162,13 @@ def health(url): ) ) @click.argument("index-name", nargs=1, envvar="INDEX_NAME", type=str, required=True) -def new(index_name): +@click.option("--config", "-c", default="config.yaml", + help="Name of the resin config file. The file needs to be in the" + "`config/` directory. Defaults to `config.yaml`") +def new(index_name, config): _initialize_tokenizer() - kb = KnowledgeBase(index_name=index_name) + kb_config = _load_kb_config(config) + kb = KnowledgeBase.from_config(kb_config, index_name=index_name) click.echo("Resin is going to create a new index: ", nl=False) click.echo(click.style(f"{kb.index_name}", fg="green")) click.confirm(click.style("Do you want to continue?", fg="red"), abort=True) @@ -174,7 +205,14 @@ def new(index_name): help="Number of documents to upload in each batch. Defaults to 10.") @click.option("--stop-on-error/--no-stop-on-error", default=True, help="Whether to stop when the first error arises. Defaults to True.") -def upsert(index_name: str, data_path: str, batch_size: int, stop_on_error: bool): +@click.option("--config", "-c", default="config.yaml", + help="Name of the resin config file. The file needs to be in the" + "`config/` directory. Defaults to `config.yaml`") +def upsert(index_name: str, + data_path: str, + batch_size: int, + stop_on_error: bool, + config: str): if index_name is None: msg = ( "No index name provided. Please set --index-name or INDEX_NAME environment " @@ -184,7 +222,8 @@ def upsert(index_name: str, data_path: str, batch_size: int, stop_on_error: bool _initialize_tokenizer() - kb = KnowledgeBase(index_name=index_name) + kb_config = _load_kb_config(config) + kb = KnowledgeBase.from_config(kb_config, index_name=index_name) try: kb.connect() except RuntimeError as e: @@ -418,9 +457,9 @@ def chat(chat_service_url, baseline, debug, stream): help="TCP port to bind the server to. Defaults to 8000") @click.option("--reload/--no-reload", default=False, help="Set the server to reload on code changes. Defaults to False") -@click.option("--config", "-c", default=None, - help="Path to a resin config file. Optional, otherwise configuration " - "defaults will be used.") +@click.option("--config", "-c", default="config.yaml", + help="Name of the resin config file. The file needs to be in the" + "`config/` directory. Defaults to `config.yaml`") def start(host, port, reload, config): note_msg = ( "🚨 Note 🚨\n" @@ -434,7 +473,8 @@ def start(host, port, reload, config): click.echo() click.echo(f"Starting Resin service on {host}:{port}") - start_service(host, port=port, reload=reload, config_file=config) + config_file = os.path.join("config", config) + start_service(host, port=port, reload=reload, config_file=config_file) @cli.command( From 1c48cc71f5998e528a61e132c95fd53b39f3703a Mon Sep 17 00:00:00 2001 From: ilai Date: Mon, 23 Oct 2023 13:59:32 +0300 Subject: [PATCH 05/26] [cli] Load dotenv from root dir No reason to assume the env file is in the cli dir --- src/resin_cli/cli.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index 232d66a1..f124c34b 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -33,8 +33,7 @@ from .api_models import ChatDebugInfo -dotenv_path = os.path.join(os.path.dirname(__file__), ".env") -load_dotenv(dotenv_path) +load_dotenv() if os.getenv("OPENAI_API_KEY"): openai.api_key = os.getenv("OPENAI_API_KEY") From bdccd2e053758fa938cd82b003232fba0444d21c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 15:36:50 +0300 Subject: [PATCH 06/26] initial workflow --- .github/workflows/pre-release-CI.yml | 47 ++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 .github/workflows/pre-release-CI.yml diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml new file mode 100644 index 00000000..e33bd2c1 --- /dev/null +++ b/.github/workflows/pre-release-CI.yml @@ -0,0 +1,47 @@ +name: Pre Release CI + +on: + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + build-and-test: + name: Build & Test on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: 3.9 + + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + version: 1.3.2 + + - name: Build wheel + run: | + poetry build + + - name: Install the wheel + run: | + pip install dist/*.whl + + - name: Run tests + run: pytest --html=report.html --self-contained-html tests/unit + + - name: Upload pytest reports + if: always() + uses: actions/upload-artifact@v3 + with: + name: pytest-report-${{ matrix.os }}-py${{ matrix.python-version }} + path: .pytest_cache From a84c90edd5376cd8931ced5f757eea286ef108f4 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 15:42:28 +0300 Subject: [PATCH 07/26] run workflow on current branch for testing --- .github/workflows/pre-release-CI.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index e33bd2c1..e74b2889 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -1,7 +1,9 @@ name: Pre Release CI on: - workflow_dispatch: + push: + branches: + - "pre-release-CI" concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 824e729e4cb21deb4a18af8a4d13d25b16a4ca46 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 15:49:54 +0300 Subject: [PATCH 08/26] install pytest --- .github/workflows/pre-release-CI.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index e74b2889..c53d97e3 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -38,6 +38,10 @@ jobs: run: | pip install dist/*.whl + - name: Install pytest + run: | + poetry install pytest + - name: Run tests run: pytest --html=report.html --self-contained-html tests/unit From 064de2f272358a579eb29f3457e4208d1b9d5a4c Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 15:55:01 +0300 Subject: [PATCH 09/26] Install tests dependencies --- .github/workflows/pre-release-CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index c53d97e3..6b69b2a4 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -38,9 +38,9 @@ jobs: run: | pip install dist/*.whl - - name: Install pytest + - name: Install tests dependencies run: | - poetry install pytest + pip install pytest pytest-html asyncio pytest-asyncio pytest-xdist - name: Run tests run: pytest --html=report.html --self-contained-html tests/unit From 891d90a7f495a9f282cf57b1bc56c9c4c34088da Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 15:58:52 +0300 Subject: [PATCH 10/26] temp disable in progress cancelling --- .github/workflows/pre-release-CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 6b69b2a4..25adf702 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -7,7 +7,7 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true + cancel-in-progress: false jobs: build-and-test: @@ -40,7 +40,7 @@ jobs: - name: Install tests dependencies run: | - pip install pytest pytest-html asyncio pytest-asyncio pytest-xdist + pip install pytest pytest-html asyncio pytest-asyncio pytest-xdist pytest-mock - name: Run tests run: pytest --html=report.html --self-contained-html tests/unit From 39722d175c53babffc16caa1321aa3e355428619 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:02:18 +0300 Subject: [PATCH 11/26] temp remove windows --- .github/workflows/pre-release-CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 25adf702..79207ea1 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -15,7 +15,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, macos-latest] steps: - uses: actions/checkout@v3 From 6eca830622d80d27529181388e7901152935a8d9 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:06:31 +0300 Subject: [PATCH 12/26] fix windows --- .github/workflows/pre-release-CI.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 79207ea1..c0b48598 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -15,7 +15,10 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest] + os: [ubuntu-latest, windows-latest, macos-latest] + defaults: + run: + shell: bash steps: - uses: actions/checkout@v3 From fe4ff0084234a9aaaa52e7d9d420ebf67b357f31 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:20:16 +0300 Subject: [PATCH 13/26] revert dispatch --- .github/workflows/pre-release-CI.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index c0b48598..77faedbb 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -1,9 +1,7 @@ name: Pre Release CI on: - push: - branches: - - "pre-release-CI" + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 92ae5d2daaa8cdff8a39e0811018767ecf4f1f3d Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:21:38 +0300 Subject: [PATCH 14/26] enable cancel in progress --- .github/workflows/pre-release-CI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 77faedbb..7ef338da 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -5,7 +5,7 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: false + cancel-in-progress: true jobs: build-and-test: From a886785c71a37e7592bb8b617f86d5e87c836096 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:45:08 +0300 Subject: [PATCH 15/26] fix mypy issue --- src/resin_cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index bbe77ea7..e8d4bc5a 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -51,7 +51,7 @@ def check_service_health(url: str): raise CLIError(msg) except requests.exceptions.HTTPError as e: - error = e.response.json().get("detail", None) or e.response.text + error = e.response.json().get("detail", "") or e.response.text msg = ( f"Resin service on {url} is not healthy, failed with error: {error}" ) From 40a3efdb325c5debac4a5ccb0468fc647efca3c6 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 16:54:59 +0300 Subject: [PATCH 16/26] resolve mypy --- src/resin_cli/cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index e8d4bc5a..aba635b7 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -51,7 +51,10 @@ def check_service_health(url: str): raise CLIError(msg) except requests.exceptions.HTTPError as e: - error = e.response.json().get("detail", "") or e.response.text + if e.response is not None: + error = e.response.json().get("detail", "") or e.response.text + else: + error = str(e) msg = ( f"Resin service on {url} is not healthy, failed with error: {error}" ) From ffa46664b658b403dc077f594405d07c062b7cb6 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Mon, 23 Oct 2023 18:20:09 +0300 Subject: [PATCH 17/26] run on several python versions --- .github/workflows/pre-release-CI.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 7ef338da..d272b94a 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -14,6 +14,7 @@ jobs: strategy: matrix: os: [ubuntu-latest, windows-latest, macos-latest] + python-version: [3.9, '3.10', 3.11] defaults: run: shell: bash @@ -24,7 +25,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: ${{ matrix.python-version }} - name: Install Poetry uses: snok/install-poetry@v1 @@ -52,3 +53,4 @@ jobs: with: name: pytest-report-${{ matrix.os }}-py${{ matrix.python-version }} path: .pytest_cache + From f354bc2ee8784cb8db9dd1cc8e0a04952d1a1681 Mon Sep 17 00:00:00 2001 From: ilai Date: Tue, 24 Oct 2023 09:50:44 +0300 Subject: [PATCH 18/26] [cli] Make config optional Don't assume anything about the presence of a config file. The default of all commands would be config=None, which would load the hard-coded defaults --- src/resin_cli/cli.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index f124c34b..6856c57f 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -1,7 +1,7 @@ import os import signal import subprocess -from typing import Dict, Any +from typing import Dict, Any, Optional import click import time @@ -98,7 +98,10 @@ def _initialize_tokenizer(): raise CLIError(msg) -def _load_kb_config(config_file: str) -> Dict[str, Any]: +def _load_kb_config(config_file: Optional[str]) -> Dict[str, Any]: + if config_file is None: + return {} + try: with open(os.path.join("config", config_file), 'r') as f: config = yaml.safe_load(f) @@ -161,10 +164,10 @@ def health(url): ) ) @click.argument("index-name", nargs=1, envvar="INDEX_NAME", type=str, required=True) -@click.option("--config", "-c", default="config.yaml", - help="Name of the resin config file. The file needs to be in the" - "`config/` directory. Defaults to `config.yaml`") -def new(index_name, config): +@click.option("--config", "-c", default=None, + help="Path to a resin config file. Optional, otherwise configuration " + "defaults will be used.") +def new(index_name: str, config: Optional[str]): _initialize_tokenizer() kb_config = _load_kb_config(config) kb = KnowledgeBase.from_config(kb_config, index_name=index_name) @@ -204,14 +207,14 @@ def new(index_name, config): help="Number of documents to upload in each batch. Defaults to 10.") @click.option("--stop-on-error/--no-stop-on-error", default=True, help="Whether to stop when the first error arises. Defaults to True.") -@click.option("--config", "-c", default="config.yaml", - help="Name of the resin config file. The file needs to be in the" - "`config/` directory. Defaults to `config.yaml`") +@click.option("--config", "-c", default=None, + help="Path to a resin config file. Optional, otherwise configuration " + "defaults will be used.") def upsert(index_name: str, data_path: str, batch_size: int, stop_on_error: bool, - config: str): + config: Optional[str]): if index_name is None: msg = ( "No index name provided. Please set --index-name or INDEX_NAME environment " @@ -456,10 +459,10 @@ def chat(chat_service_url, baseline, debug, stream): help="TCP port to bind the server to. Defaults to 8000") @click.option("--reload/--no-reload", default=False, help="Set the server to reload on code changes. Defaults to False") -@click.option("--config", "-c", default="config.yaml", - help="Name of the resin config file. The file needs to be in the" - "`config/` directory. Defaults to `config.yaml`") -def start(host, port, reload, config): +@click.option("--config", "-c", default=None, + help="Path to a resin config file. Optional, otherwise configuration " + "defaults will be used.") +def start(host: str, port: str, reload: bool, config: Optional[str]): note_msg = ( "🚨 Note 🚨\n" "For debugging only. To run the Resin service in production, run the command:\n" @@ -472,8 +475,7 @@ def start(host, port, reload, config): click.echo() click.echo(f"Starting Resin service on {host}:{port}") - config_file = os.path.join("config", config) - start_service(host, port=port, reload=reload, config_file=config_file) + start_service(host, port=port, reload=reload, config_file=config) @cli.command( From 14ab3078bdd035151fe9ce58a0ce6969bb101ec5 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 11:57:36 +0300 Subject: [PATCH 19/26] specify wheel and use poetry install for dev dependencies --- .github/workflows/pre-release-CI.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index d272b94a..bc3c680e 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -1,7 +1,9 @@ name: Pre Release CI on: - workflow_dispatch: + push: + branches: + - pre-release-CI concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} @@ -38,11 +40,11 @@ jobs: - name: Install the wheel run: | - pip install dist/*.whl + pip install dist/pinecone_resin*.whl - name: Install tests dependencies run: | - pip install pytest pytest-html asyncio pytest-asyncio pytest-xdist pytest-mock + poetry install --no-root --only dev - name: Run tests run: pytest --html=report.html --self-contained-html tests/unit From 9d20f6668c3a2834fb2f6708ee057ee898cd14c1 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 12:08:58 +0300 Subject: [PATCH 20/26] use pip to install dev req --- .github/workflows/pre-release-CI.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index bc3c680e..a406e3fc 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -42,9 +42,13 @@ jobs: run: | pip install dist/pinecone_resin*.whl - - name: Install tests dependencies + - name: Create dev requirements file run: | - poetry install --no-root --only dev + poetry export -f requirements.txt --without-hashes --only dev -o only-dev.txt + + - name: Install dev requirements + run: | + pip install -r only-dev.txt - name: Run tests run: pytest --html=report.html --self-contained-html tests/unit From c085a8552ca1799b9707f21b81206e8a94d90b63 Mon Sep 17 00:00:00 2001 From: ilai Date: Tue, 24 Oct 2023 12:10:59 +0300 Subject: [PATCH 21/26] make linter happy --- src/resin_cli/app.py | 7 ++++--- src/resin_cli/cli.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/resin_cli/app.py b/src/resin_cli/app.py index 33485e25..765e3070 100644 --- a/src/resin_cli/app.py +++ b/src/resin_cli/app.py @@ -12,7 +12,7 @@ from resin.llm import BaseLLM from resin.llm.models import UserMessage -from resin.tokenizer import OpenAITokenizer, Tokenizer +from resin.tokenizer import Tokenizer from resin.knowledge_base import KnowledgeBase from resin.context_engine import ContextEngine from resin.chat_engine import ChatEngine @@ -212,7 +212,8 @@ def _init_engines(): _load_config(config_file) else: - logger.info("Did not find config file. Initializing engines with default config") + logger.info("Did not find config file. Initializing engines with default " + "configuration") Tokenizer.initialize() kb = KnowledgeBase(index_name=index_name) context_engine = ContextEngine(knowledge_base=kb) @@ -223,7 +224,7 @@ def _init_engines(): def _load_config(config_file): - global chat_engine, llm, context_engine, kb + global chat_engine, llm, context_engine, kb, logger logger.info(f"Initializing engines with config file {config_file}") try: with open(config_file, "r") as f: diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index f3fad340..bf23a846 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -119,7 +119,7 @@ def _load_kb_config(config_file: Optional[str]) -> Dict[str, Any]: kb_config = None if kb_config is None: - msg = (f"Did not find a `knowledge_base` configuration in {config_file}", + msg = (f"Did not find a `knowledge_base` configuration in {config_file}, " "Would you like to use the default configuration?") click.confirm(click.style(msg, fg="red"), abort=True) kb_config = {} From e991fc7437a1dc99a41bdfb2ab5083c62c8296a8 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 12:18:25 +0300 Subject: [PATCH 22/26] dispatch manually --- .github/workflows/pre-release-CI.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index a406e3fc..9a09d046 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -1,9 +1,7 @@ name: Pre Release CI on: - push: - branches: - - pre-release-CI + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} From 621dc0302905cb17f690fa89798a7b0a74636a5e Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 12:21:19 +0300 Subject: [PATCH 23/26] add py version to run name --- .github/workflows/pre-release-CI.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-release-CI.yml b/.github/workflows/pre-release-CI.yml index 9a09d046..50df3692 100644 --- a/.github/workflows/pre-release-CI.yml +++ b/.github/workflows/pre-release-CI.yml @@ -7,9 +7,9 @@ concurrency: group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: true -jobs: +jobs:git build-and-test: - name: Build & Test on ${{ matrix.os }} + name: Build & Test on ${{ matrix.os }}-py${{ matrix.python-version }} runs-on: ${{ matrix.os }} strategy: matrix: From ffa126009e173a433eb6e851c20a13949abb3889 Mon Sep 17 00:00:00 2001 From: Amnon Catav Date: Tue, 24 Oct 2023 12:34:28 +0300 Subject: [PATCH 24/26] Update src/resin_cli/cli.py Co-authored-by: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> --- src/resin_cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index aba635b7..7a87f78a 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -52,7 +52,7 @@ def check_service_health(url: str): except requests.exceptions.HTTPError as e: if e.response is not None: - error = e.response.json().get("detail", "") or e.response.text + error = e.response.json().get("detail", None) or e.response.text else: error = str(e) msg = ( From 5ab02af6e8c21fcfb9dac8e4b439054cf70d8c2a Mon Sep 17 00:00:00 2001 From: ilai Date: Tue, 24 Oct 2023 12:55:27 +0300 Subject: [PATCH 25/26] [pyproj] Added Yaml-types So that mypy will be happy... --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 5139002f..069889a5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ tenacity = "^8.2.1" sse-starlette = "^1.6.5" types-tqdm = "^4.61.0" gunicorn = "^21.2.0" +types-pyyaml = "^6.0.12.12" [tool.poetry.group.dev.dependencies] From 31600d374cd55cecea8c9ab26d4d467205e41d4c Mon Sep 17 00:00:00 2001 From: ilai Date: Tue, 24 Oct 2023 14:55:10 +0300 Subject: [PATCH 26/26] [cli] Bug fix - don't send the entire data in every batch Totally stpupid bug due to wrong conflict resolving... --- src/resin_cli/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/resin_cli/cli.py b/src/resin_cli/cli.py index 5a8b208b..01f75599 100644 --- a/src/resin_cli/cli.py +++ b/src/resin_cli/cli.py @@ -239,7 +239,7 @@ def upsert(index_name: str, data_path: str, batch_size: int, allow_failures: boo for i in range(0, len(data), batch_size): batch = data[i:i + batch_size] try: - kb.upsert(data) + kb.upsert(batch) except Exception as e: if allow_failures and len(failed_docs) < len(data) // 10: failed_docs.extend([_.id for _ in batch])