diff --git a/.github/workflows/lightsail-mgmt.yml b/.github/workflows/lightsail-mgmt.yml index 012d51f..a6d2d93 100644 --- a/.github/workflows/lightsail-mgmt.yml +++ b/.github/workflows/lightsail-mgmt.yml @@ -46,7 +46,7 @@ on: - small - medium - large - # - xlarge + - xlarge permissions: id-token: write # This is required for requesting the JWT from GitHub's OIDC provider for AWS authentication diff --git a/05-assistive-chatbot/.dockerignore b/05-assistive-chatbot/.dockerignore index 1d30eca..ed3080e 100644 --- a/05-assistive-chatbot/.dockerignore +++ b/05-assistive-chatbot/.dockerignore @@ -1,9 +1,11 @@ __pycache__ -chroma_db/ *cache/ *.log log/ +# Allow the database to be copied to save time building the Docker image locally +# chroma_db/ + *.DS_STORE .git diff --git a/05-assistive-chatbot/.env-DEV b/05-assistive-chatbot/.env-DEV index 1d059ea..68fc309 100644 --- a/05-assistive-chatbot/.env-DEV +++ b/05-assistive-chatbot/.env-DEV @@ -1,3 +1,6 @@ +# This is the default environment variable values for local development for all developers. +# To override, create a personal .env file. + CHATBOT_LOG_LEVEL='WARN' ENABLE_CHATBOT_API=False diff --git a/05-assistive-chatbot/.env-PROD b/05-assistive-chatbot/.env-PROD index 1344fbf..0bf37ce 100644 --- a/05-assistive-chatbot/.env-PROD +++ b/05-assistive-chatbot/.env-PROD @@ -1,7 +1,14 @@ +# Change to INFO once deployments are consistently successful CHATBOT_LOG_LEVEL='DEBUG' +# Needed for healthcheck and to dynamically set environment variables ENABLE_CHATBOT_API=True +# Only load the modules that are needed for faster startup and lower resource usage +ENGINE_MODULES="v2_household_engine" +LLM_MODULES="openai_client" + +# Default Chainlit settings for user testing CHAT_ENGINE='Summaries' LLM_MODEL_NAME='openai :: gpt-3.5-turbo-instruct' RETRIEVE_K=4 diff --git a/05-assistive-chatbot/Dockerfile b/05-assistive-chatbot/Dockerfile index db9a5b5..9bd9889 100644 --- a/05-assistive-chatbot/Dockerfile +++ b/05-assistive-chatbot/Dockerfile @@ -32,7 +32,9 @@ ENV PYTHONUNBUFFERED=1 # To prevent files from being copied into the image, update .dockerignore COPY --chown=tron . . -RUN ./ingest-guru-cards.py +# Only run the ingest script when the chroma_db directory does not exist. +# This saves time when building the image locally. +RUN [ -d "chroma_db" ] || ./ingest-guru-cards.py EXPOSE 8000 HEALTHCHECK CMD curl http://localhost:8000 || exit 1 diff --git a/05-assistive-chatbot/chatbot-chainlit.py b/05-assistive-chatbot/chatbot-chainlit.py index 2f1545d..8cbfc52 100755 --- a/05-assistive-chatbot/chatbot-chainlit.py +++ b/05-assistive-chatbot/chatbot-chainlit.py @@ -30,6 +30,7 @@ @cl.on_chat_start async def init_chat(): + logger.debug("init_chat") git_sha = os.environ.get("GIT_SHA", "") build_date = os.environ.get("BUILD_DATE", "unknown") metadata = { diff --git a/05-assistive-chatbot/chatbot/__init__.py b/05-assistive-chatbot/chatbot/__init__.py index 983a8d8..fceff2e 100644 --- a/05-assistive-chatbot/chatbot/__init__.py +++ b/05-assistive-chatbot/chatbot/__init__.py @@ -47,6 +47,12 @@ def configure_logging(): logger = logging.getLogger(__name__) logger.info("Build date: %s", os.environ.get("BUILD_DATE")) +if env == "PROD": + # https://www.uvicorn.org/settings/#production + # https://sentry.io/answers/number-of-uvicorn-workers-needed-in-production/ + # Too many workers will use more resources, which slows down all operations + os.environ.setdefault("WEB_CONCURRENCY", "2") + ## Initialize settings diff --git a/05-assistive-chatbot/chatbot/engines/__init__.py b/05-assistive-chatbot/chatbot/engines/__init__.py index ecfb25b..db3aa8e 100644 --- a/05-assistive-chatbot/chatbot/engines/__init__.py +++ b/05-assistive-chatbot/chatbot/engines/__init__.py @@ -1,4 +1,6 @@ +import importlib import logging +import os from types import ModuleType from typing import Dict @@ -19,9 +21,13 @@ def _discover_chat_engines(force=False): if force: _engines.clear() if not _engines: + ENGINE_MODULES = os.environ.get("ENGINE_MODULES", "").split(",") + engine_modules = {name: importlib.import_module(f"chatbot.engines.{name}") for name in ENGINE_MODULES if name} + if not engine_modules: + engine_modules = utils.scan_modules(__package__) + settings = chatbot.initial_settings - found_llm_modules = utils.scan_modules(__package__) - for module_name, module in found_llm_modules.items(): + for module_name, module in engine_modules.items(): if not hasattr(module, "ENGINE_NAME"): logger.debug("Skipping module without an ENGINE_NAME: %s", module_name) continue diff --git a/05-assistive-chatbot/chatbot/llms/__init__.py b/05-assistive-chatbot/chatbot/llms/__init__.py index a0e4013..58ee646 100644 --- a/05-assistive-chatbot/chatbot/llms/__init__.py +++ b/05-assistive-chatbot/chatbot/llms/__init__.py @@ -1,4 +1,6 @@ +import importlib import logging +import os from types import ModuleType from typing import Dict, Tuple @@ -19,9 +21,13 @@ def _discover_llms(force=False): if force: _llms.clear() if not _llms: + LLM_MODULES = os.environ.get("LLM_MODULES", "").split(",") + llm_modules = {name: importlib.import_module(f"chatbot.llms.{name}") for name in LLM_MODULES if name} + if not llm_modules: + llm_modules = utils.scan_modules(__package__) + settings = chatbot.initial_settings - found_modules = utils.scan_modules(__package__) - for module_name, module in found_modules.items(): + for module_name, module in llm_modules.items(): if not module or ignore(module_name): logger.debug("Skipping module: %s", module_name) continue diff --git a/05-assistive-chatbot/chatbot_api.py b/05-assistive-chatbot/chatbot_api.py index 9d2be64..43e01d2 100755 --- a/05-assistive-chatbot/chatbot_api.py +++ b/05-assistive-chatbot/chatbot_api.py @@ -25,12 +25,12 @@ app = FastAPI() else: # Otherwise use Chainlit's app + # See https://docs.chainlit.io/deploy/api#how-it-works from chainlit.server import app logger = logging.getLogger(f"chatbot.{__name__}") -# TODO Ensure this is thread safe when run by via chalint. Check if the chainlit command might handle threading/multiple requests for us. class ApiState: @cached_property def chat_engine(self): @@ -45,7 +45,7 @@ def chat_engine(self): app_state = ApiState() -# See https://docs.chainlit.io/deploy/api#how-it-works +# This function cannot be async because it uses a single non-thread-safe app_state @app.post("/query") def query(message: str | Dict): response = app_state.chat_engine().gen_response(message)