Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: Improve initial loading #38

Merged
merged 4 commits into from
Jun 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lightsail-mgmt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ on:
- small
- medium
- large
# - xlarge
- xlarge

permissions:
id-token: write # This is required for requesting the JWT from GitHub's OIDC provider for AWS authentication
Expand Down
4 changes: 3 additions & 1 deletion 05-assistive-chatbot/.dockerignore
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
__pycache__
chroma_db/
*cache/
*.log
log/

# Allow the database to be copied to save time building the Docker image locally
# chroma_db/

*.DS_STORE

.git
3 changes: 3 additions & 0 deletions 05-assistive-chatbot/.env-DEV
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# This is the default environment variable values for local development for all developers.
# To override, create a personal .env file.

CHATBOT_LOG_LEVEL='WARN'

ENABLE_CHATBOT_API=False
Expand Down
7 changes: 7 additions & 0 deletions 05-assistive-chatbot/.env-PROD
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# Change to INFO once deployments are consistently successful
CHATBOT_LOG_LEVEL='DEBUG'

# Needed for healthcheck and to dynamically set environment variables
ENABLE_CHATBOT_API=True

# Only load the modules that are needed for faster startup and lower resource usage
ENGINE_MODULES="v2_household_engine"
LLM_MODULES="openai_client"

# Default Chainlit settings for user testing
CHAT_ENGINE='Summaries'
LLM_MODEL_NAME='openai :: gpt-3.5-turbo-instruct'
RETRIEVE_K=4
Expand Down
4 changes: 3 additions & 1 deletion 05-assistive-chatbot/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ ENV PYTHONUNBUFFERED=1
# To prevent files from being copied into the image, update .dockerignore
COPY --chown=tron . .

RUN ./ingest-guru-cards.py
# Only run the ingest script when the chroma_db directory does not exist.
# This saves time when building the image locally.
RUN [ -d "chroma_db" ] || ./ingest-guru-cards.py

EXPOSE 8000
HEALTHCHECK CMD curl http://localhost:8000 || exit 1
Expand Down
1 change: 1 addition & 0 deletions 05-assistive-chatbot/chatbot-chainlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

@cl.on_chat_start
async def init_chat():
logger.debug("init_chat")
git_sha = os.environ.get("GIT_SHA", "")
build_date = os.environ.get("BUILD_DATE", "unknown")
metadata = {
Expand Down
6 changes: 6 additions & 0 deletions 05-assistive-chatbot/chatbot/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ def configure_logging():
logger = logging.getLogger(__name__)
logger.info("Build date: %s", os.environ.get("BUILD_DATE"))

if env == "PROD":
# https://www.uvicorn.org/settings/#production
# https://sentry.io/answers/number-of-uvicorn-workers-needed-in-production/
# Too many workers will use more resources, which slows down all operations
os.environ.setdefault("WEB_CONCURRENCY", "2")


## Initialize settings

Expand Down
10 changes: 8 additions & 2 deletions 05-assistive-chatbot/chatbot/engines/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import importlib
import logging
import os
from types import ModuleType
from typing import Dict

Expand All @@ -19,9 +21,13 @@ def _discover_chat_engines(force=False):
if force:
_engines.clear()
if not _engines:
ENGINE_MODULES = os.environ.get("ENGINE_MODULES", "").split(",")
engine_modules = {name: importlib.import_module(f"chatbot.engines.{name}") for name in ENGINE_MODULES if name}
if not engine_modules:
engine_modules = utils.scan_modules(__package__)

settings = chatbot.initial_settings
found_llm_modules = utils.scan_modules(__package__)
for module_name, module in found_llm_modules.items():
for module_name, module in engine_modules.items():
if not hasattr(module, "ENGINE_NAME"):
logger.debug("Skipping module without an ENGINE_NAME: %s", module_name)
continue
Expand Down
10 changes: 8 additions & 2 deletions 05-assistive-chatbot/chatbot/llms/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import importlib
import logging
import os
from types import ModuleType
from typing import Dict, Tuple

Expand All @@ -19,9 +21,13 @@ def _discover_llms(force=False):
if force:
_llms.clear()
if not _llms:
LLM_MODULES = os.environ.get("LLM_MODULES", "").split(",")
llm_modules = {name: importlib.import_module(f"chatbot.llms.{name}") for name in LLM_MODULES if name}
if not llm_modules:
llm_modules = utils.scan_modules(__package__)

settings = chatbot.initial_settings
found_modules = utils.scan_modules(__package__)
for module_name, module in found_modules.items():
for module_name, module in llm_modules.items():
if not module or ignore(module_name):
logger.debug("Skipping module: %s", module_name)
continue
Expand Down
4 changes: 2 additions & 2 deletions 05-assistive-chatbot/chatbot_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@
app = FastAPI()
else:
# Otherwise use Chainlit's app
# See https://docs.chainlit.io/deploy/api#how-it-works
from chainlit.server import app

logger = logging.getLogger(f"chatbot.{__name__}")


# TODO Ensure this is thread safe when run by via chalint. Check if the chainlit command might handle threading/multiple requests for us.
class ApiState:
@cached_property
def chat_engine(self):
Expand All @@ -45,7 +45,7 @@ def chat_engine(self):
app_state = ApiState()


# See https://docs.chainlit.io/deploy/api#how-it-works
# This function cannot be async because it uses a single non-thread-safe app_state
@app.post("/query")
def query(message: str | Dict):
response = app_state.chat_engine().gen_response(message)
Expand Down