Skip to content

Commit

Permalink
Build models and all downloads directly into docker containers
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 committed Apr 17, 2024
1 parent f5fcc15 commit df30014
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
6 changes: 6 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ RUN apt-get remove -y --allow-remove-essential perl-base xserver-common xvfb cma
rm -rf /var/lib/apt/lists/* && \
rm /usr/local/lib/python3.11/site-packages/tornado/test/test.key

# Pre-downloading NLTK for setups with limited egress
RUN python -c "import nltk; \
nltk.download('stopwords', quiet=True); \
nltk.download('wordnet', quiet=True); \
nltk.download('punkt', quiet=True);"

# Set up application files
WORKDIR /app
COPY ./danswer /app/danswer
Expand Down
10 changes: 10 additions & 0 deletions backend/Dockerfile.model_server
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ RUN pip install --no-cache-dir --upgrade -r /tmp/requirements.txt
RUN apt-get remove -y --allow-remove-essential perl-base && \
apt-get autoremove -y

# Pre-downloading models for setups with limited egress
RUN python -c "from transformers import AutoModel, AutoTokenizer, TFDistilBertForSequenceClassification; \
from huggingface_hub import snapshot_download; \
AutoTokenizer.from_pretrained('danswer/intent-model'); \
AutoTokenizer.from_pretrained('intfloat/e5-base-v2'); \
AutoTokenizer.from_pretrained('mixedbread-ai/mxbai-rerank-xsmall-v1'); \
snapshot_download('danswer/intent-model'); \
snapshot_download('intfloat/e5-base-v2'); \
snapshot_download('mixedbread-ai/mxbai-rerank-xsmall-v1')"

WORKDIR /app

# Utils used by model server
Expand Down

0 comments on commit df30014

Please sign in to comment.