navapbc · yoomlam · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024 · Jun 8, 2024
diff --git a/.github/workflows/lightsail-mgmt.yml b/.github/workflows/lightsail-mgmt.yml
@@ -46,7 +46,7 @@ on:
         - small
         - medium
         - large
-        # - xlarge
+        - xlarge
 
 permissions:
   id-token: write # This is required for requesting the JWT from GitHub's OIDC provider for AWS authentication

diff --git a/05-assistive-chatbot/.dockerignore b/05-assistive-chatbot/.dockerignore
@@ -1,9 +1,11 @@
 __pycache__
-chroma_db/
 *cache/
 *.log
 log/
 
+# Allow the database to be copied to save time building the Docker image locally
+# chroma_db/
+
 *.DS_STORE
 
 .git
diff --git a/05-assistive-chatbot/.env-DEV b/05-assistive-chatbot/.env-DEV
@@ -1,3 +1,6 @@
+# This is the default environment variable values for local development for all developers.
+# To override, create a personal .env file.
+
 CHATBOT_LOG_LEVEL='WARN'
 
 ENABLE_CHATBOT_API=False

diff --git a/05-assistive-chatbot/.env-PROD b/05-assistive-chatbot/.env-PROD
@@ -1,7 +1,14 @@
+# Change to INFO once deployments are consistently successful
 CHATBOT_LOG_LEVEL='DEBUG'
 
+# Needed for healthcheck and to dynamically set environment variables
 ENABLE_CHATBOT_API=True
 
+# Only load the modules that are needed for faster startup and lower resource usage
+ENGINE_MODULES="v2_household_engine"
+LLM_MODULES="openai_client"
+
+# Default Chainlit settings for user testing
 CHAT_ENGINE='Summaries'
 LLM_MODEL_NAME='openai :: gpt-3.5-turbo-instruct'
 RETRIEVE_K=4

diff --git a/05-assistive-chatbot/Dockerfile b/05-assistive-chatbot/Dockerfile
@@ -32,7 +32,9 @@ ENV PYTHONUNBUFFERED=1
 # To prevent files from being copied into the image, update .dockerignore
 COPY --chown=tron . .
 
-RUN ./ingest-guru-cards.py
+# Only run the ingest script when the chroma_db directory does not exist.
+# This saves time when building the image locally.
+RUN [ -d "chroma_db" ] || ./ingest-guru-cards.py
 
 EXPOSE 8000
 HEALTHCHECK CMD curl http://localhost:8000 || exit 1

diff --git a/05-assistive-chatbot/chatbot-chainlit.py b/05-assistive-chatbot/chatbot-chainlit.py
@@ -30,6 +30,7 @@
 
 @cl.on_chat_start
 async def init_chat():
+    logger.debug("init_chat")
     git_sha = os.environ.get("GIT_SHA", "")
     build_date = os.environ.get("BUILD_DATE", "unknown")
     metadata = {

diff --git a/05-assistive-chatbot/chatbot/__init__.py b/05-assistive-chatbot/chatbot/__init__.py
@@ -47,6 +47,12 @@ def configure_logging():
 logger = logging.getLogger(__name__)
 logger.info("Build date: %s", os.environ.get("BUILD_DATE"))
 
+if env == "PROD":
+    # https://www.uvicorn.org/settings/#production
+    # https://sentry.io/answers/number-of-uvicorn-workers-needed-in-production/
+    # Too many workers will use more resources, which slows down all operations
+    os.environ.setdefault("WEB_CONCURRENCY", "2")
+
 
 ## Initialize settings
 

diff --git a/05-assistive-chatbot/chatbot/engines/__init__.py b/05-assistive-chatbot/chatbot/engines/__init__.py
@@ -1,4 +1,6 @@
+import importlib
 import logging
+import os
 from types import ModuleType
 from typing import Dict
 
@@ -19,9 +21,13 @@ def _discover_chat_engines(force=False):
     if force:
         _engines.clear()
     if not _engines:
+        ENGINE_MODULES = os.environ.get("ENGINE_MODULES", "").split(",")
+        engine_modules = {name: importlib.import_module(f"chatbot.engines.{name}") for name in ENGINE_MODULES if name}
+        if not engine_modules:
+            engine_modules = utils.scan_modules(__package__)
+
         settings = chatbot.initial_settings
-        found_llm_modules = utils.scan_modules(__package__)
-        for module_name, module in found_llm_modules.items():
+        for module_name, module in engine_modules.items():
             if not hasattr(module, "ENGINE_NAME"):
                 logger.debug("Skipping module without an ENGINE_NAME: %s", module_name)
                 continue

diff --git a/05-assistive-chatbot/chatbot/llms/__init__.py b/05-assistive-chatbot/chatbot/llms/__init__.py
@@ -1,4 +1,6 @@
+import importlib
 import logging
+import os
 from types import ModuleType
 from typing import Dict, Tuple
 
@@ -19,9 +21,13 @@ def _discover_llms(force=False):
     if force:
         _llms.clear()
     if not _llms:
+        LLM_MODULES = os.environ.get("LLM_MODULES", "").split(",")
+        llm_modules = {name: importlib.import_module(f"chatbot.llms.{name}") for name in LLM_MODULES if name}
+        if not llm_modules:
+            llm_modules = utils.scan_modules(__package__)
+
         settings = chatbot.initial_settings
-        found_modules = utils.scan_modules(__package__)
-        for module_name, module in found_modules.items():
+        for module_name, module in llm_modules.items():
             if not module or ignore(module_name):
                 logger.debug("Skipping module: %s", module_name)
                 continue

diff --git a/05-assistive-chatbot/chatbot_api.py b/05-assistive-chatbot/chatbot_api.py
@@ -25,12 +25,12 @@
     app = FastAPI()
 else:
     # Otherwise use Chainlit's app
+    # See https://docs.chainlit.io/deploy/api#how-it-works
     from chainlit.server import app
 
 logger = logging.getLogger(f"chatbot.{__name__}")
 
 
-# TODO Ensure this is thread safe when run by via chalint. Check if the chainlit command might handle threading/multiple requests for us.
 class ApiState:
     @cached_property
     def chat_engine(self):
@@ -45,7 +45,7 @@ def chat_engine(self):
 app_state = ApiState()
 
 
-# See https://docs.chainlit.io/deploy/api#how-it-works
+# This function cannot be async because it uses a single non-thread-safe app_state
 @app.post("/query")
 def query(message: str | Dict):
     response = app_state.chat_engine().gen_response(message)
-Original file line number
+Diff line change
@@ Expand Up / @@ -46,7 +46,7 @@ on: @@
             - small
             - medium
             - large
-            # - xlarge
+            - xlarge
     permissions:
       id-token: write # This is required for requesting the JWT from GitHub's OIDC provider for AWS authentication
@@ Expand Down @@