Skip to content

Commit

Permalink
install postgresql in the docker container (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
kyteinsky authored Oct 31, 2024
2 parents 4a2e505 + 024589d commit 3b8d459
Show file tree
Hide file tree
Showing 14 changed files with 184 additions and 43 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ jobs:
fail-fast: false
matrix:
php-versions: [ '8.1' ]
databases: [ 'sqlite' ]
databases: [ 'pgsql' ]
server-versions: [ 'master' ]

name: Integration test on ${{ matrix.server-versions }} php@${{ matrix.php-versions }}

env:
MYSQL_PORT: 4444
PGSQL_PORT: 4445
# use the same db for ccb and nextcloud
CCB_DB_URL: postgresql+psycopg://root:rootpassword@localhost:4445/nextcloud

services:
mysql:
Expand All @@ -59,7 +61,7 @@ jobs:
MYSQL_ROOT_PASSWORD: rootpassword
options: --health-cmd="mysqladmin ping" --health-interval 5s --health-timeout 2s --health-retries 5
postgres:
image: postgres
image: pgvector/pgvector:pg17
ports:
- 4445:5432/tcp
env:
Expand Down
41 changes: 25 additions & 16 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ARG CCB_DB_NAME=ccb
ARG CCB_DB_USER=ccbuser
ARG CCB_DB_PASS=ccbpass

RUN apt-get update
RUN apt-get install -y software-properties-common
RUN add-apt-repository -y ppa:deadsnakes/ppa
RUN apt-get update
RUN apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git pciutils
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
RUN apt-get -y clean
RUN rm -rf /var/lib/apt/lists/*
ENV CCB_DB_NAME ${CCB_DB_NAME}
ENV CCB_DB_USER ${CCB_DB_USER}
ENV CCB_DB_PASS ${CCB_DB_PASS}
ENV DEBIAN_FRONTEND noninteractive
ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute
ENV AA_DOCKER_ENV 1

# Set working directory
WORKDIR /app

# Install dependencies
ADD dockerfile_scripts/install_deps.sh dockerfile_scripts/install_deps.sh
RUN ./dockerfile_scripts/install_deps.sh
ADD dockerfile_scripts/install_py11.sh dockerfile_scripts/install_py11.sh
RUN ./dockerfile_scripts/install_py11.sh
ADD dockerfile_scripts/pgsql dockerfile_scripts/pgsql
RUN ./dockerfile_scripts/pgsql/install.sh
RUN apt-get autoclean
ADD dockerfile_scripts/entrypoint.sh dockerfile_scripts/entrypoint.sh

# Restore interactivity
ENV DEBIAN_FRONTEND dialog

# Copy requirements files
COPY requirements.txt .

Expand All @@ -23,15 +37,10 @@ RUN python3 -m pip install --no-cache-dir https://github.com/abetlen/llama-cpp-p
RUN sed -i '/llama_cpp_python/d' requirements.txt
RUN python3 -m pip install --no-cache-dir --no-deps -r requirements.txt

ENV NVIDIA_VISIBLE_DEVICES all
ENV NVIDIA_DRIVER_CAPABILITIES compute
ENV DEBIAN_FRONTEND dialog
ENV AA_DOCKER_ENV 1

# Copy application files
COPY context_chat_backend context_chat_backend
COPY main.py .
COPY config.?pu.yaml .
COPY hwdetect.sh .

ENTRYPOINT ["python3", "main.py"]
ENTRYPOINT [ "./dockerfile_scripts/entrypoint.sh" ]
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ Install the given apps for Context Chat to work as desired **in the given order*
5. Copy example.env to .env and fill in the variables
6. Ensure the config file at `persistent_storage/config.yaml` points to the correct config file (cpu vs gpu). If you're unsure, delete it. It will be recreated upon launching the application. The default is to point to the gpu config.
7. Configure `persistent_storage/config.yaml` for the model name, model type and its parameters (which also includes model file's path and model id as per requirements, see example config)
8. `./main.py`
9. [Follow the below steps to register the app in the app ecosystem](#register-as-an-ex-app)
8. Setup postgresql externally or use `dockerfile_scripts/pgsql/install.sh` to install it on a Debian-family system.
9. Set the env var `EXTERNAL_DB` or the `connection` key in the `pgvector` config to the postgresql connection string if you're using an external database.
10. Start the database (see `dockerfile_scripts/pgsql/setup.sh` for an example)
11. `./main.py`
12. [Follow the below steps to register the app in the app ecosystem](#register-as-an-ex-app)

## Complex Install (with docker)

Expand Down
6 changes: 3 additions & 3 deletions config.cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ model_download_uri: https://download.nextcloud.com/server/apps/context_chat_back


vectordb:
pgvector:
# 'connection' overrides the env var 'CCB_DB_URL'

chroma:
is_persistent: true
# chroma_server_host:
# chroma_server_http_port:
# chroma_server_ssl_enabled:
# chroma_server_api_default_path:

pgvector:
connection: postgresql+psycopg://ccbuser:ccbpass@localhost:5432/ccb

weaviate:
# auth_client_secret:
# url: http://localhost:8080
Expand Down
6 changes: 3 additions & 3 deletions config.gpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@ model_download_uri: https://download.nextcloud.com/server/apps/context_chat_back


vectordb:
pgvector:
# 'connection' overrides the env var 'CCB_DB_URL'

chroma:
is_persistent: true
# chroma_server_host:
# chroma_server_http_port:
# chroma_server_ssl_enabled:
# chroma_server_api_default_path:

pgvector:
connection: postgresql+psycopg://ccbuser:ccbpass@localhost:5432/ccb

weaviate:
# auth_client_secret:
# url: http://localhost:8080
Expand Down
11 changes: 8 additions & 3 deletions context_chat_backend/vectordb/pgvector.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from logging import error as log_error
from typing import Any

Expand All @@ -23,11 +24,15 @@ class VectorDB(BaseVectorDB):
def __init__(self, embedding: Embeddings | None = None, **kwargs):
if not embedding:
raise DbException('Error: embedding model not provided for pgvector')
if 'connection' not in kwargs:
raise DbException('Error: connection string not provided for pgvector')
if os.getenv('CCB_DB_URL') is None and 'connection' not in kwargs:
raise DbException(
'Error: Either env var CCB_DB_URL or connection string in the config is required for pgvector'
)

self.client_kwargs = kwargs
self.embedding = embedding
self.client_kwargs = kwargs
# Use connection string from env var if not provided in kwargs
self.client_kwargs.update({'connection': str(self.client_kwargs.get('connection', os.environ['CCB_DB_URL']))})

def get_users(self) -> list[str]:
engine = sa.create_engine(self.client_kwargs['connection'])
Expand Down
9 changes: 9 additions & 0 deletions dockerfile_scripts/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

set -e

source /etc/environment
"$(dirname $(realpath $0))/pgsql/setup.sh"
source /etc/environment

python3 -u "$(dirname $(dirname $(realpath $0)))/main.py"
4 changes: 4 additions & 0 deletions dockerfile_scripts/install_deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

apt-get update
apt-get install -y --no-install-recommends vim git pciutils
8 changes: 8 additions & 0 deletions dockerfile_scripts/install_py11.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

apt-get update
apt-get install -y software-properties-common
add-apt-repository -y ppa:deadsnakes/ppa
apt-get update
apt-get install -y --no-install-recommends python3.11 python3.11-venv python3-pip vim git pciutils
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
7 changes: 7 additions & 0 deletions dockerfile_scripts/pgsql/env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Define the PostgreSQL data directory
BASE_DIR="${APP_PERSISTENT_STORAGE:-persistent_storage}/vector_db_data"
DATA_DIR="${BASE_DIR}/pgsql"

PG_VERSION=17
PG_BIN="/usr/lib/postgresql/${PG_VERSION}/bin"
PG_SQL="/usr/lib/postgresql/${PG_VERSION}/bin/psql"
24 changes: 24 additions & 0 deletions dockerfile_scripts/pgsql/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

# Stolen from https://github.com/cloud-py-api/flow

set -e

# Environment variables
source "$(dirname $(realpath $0))/env"

apt-get update
apt-get install -y curl sudo

# Check if PostgreSQL is installed by checking for the existence of binary files
if [ -d "$PG_BIN" ]; then
echo "PostgreSQL binaries found."
else
echo "PostgreSQL binaries not found."
echo "Adding the PostgreSQL APT repository..."
VERSION="$(awk -F'=' '/^VERSION_CODENAME=/{ print $NF }' /etc/os-release)"
echo "deb http://apt.postgresql.org/pub/repos/apt ${VERSION}-pgdg main" >/etc/apt/sources.list.d/pgdg.list
curl -sSL https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor --output /etc/apt/trusted.gpg.d/postgresql.gpg
echo "Installing PostgreSQL..."
apt-get update && apt-get install -y postgresql-$PG_VERSION postgresql-$PG_VERSION-pgvector
fi
64 changes: 64 additions & 0 deletions dockerfile_scripts/pgsql/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/bash

# Stolen from https://github.com/cloud-py-api/flow

set -e

# Environment variables
source "$(dirname $(realpath $0))/env"

# Check if EXTERNAL_DB is set
if [ -n "${EXTERNAL_DB}" ]; then
CCB_DB_URL="${EXTERNAL_DB}"
echo "Using EXTERNAL_DB, CCB_DB_URL is set to: $CCB_DB_URL"

if [[ "$CCB_DB_URL" != "postgresql+psycopg://"* ]]; then
echo "CCB_DB_URL must be a PostgreSQL URL and start with 'postgresql+psycopg://'"
exit 1
fi

if ! grep -q "^export EXTERNAL_DB=" /etc/environment; then
echo "export EXTERNAL_DB=\"$EXTERNAL_DB\"" >> /etc/environment
fi
exit 0
fi

# Ensure the directory exists and has the correct permissions
mkdir -p "$DATA_DIR"
chown -R postgres:postgres "$DATA_DIR"

if [ ! -d "$DATA_DIR/base" ]; then
echo "Initializing the PostgreSQL database..."
sudo -u postgres ${PG_BIN}/initdb -D "$DATA_DIR" -E UTF8
fi

echo "Starting PostgreSQL..."
sudo -u postgres ${PG_BIN}/pg_ctl -D "$DATA_DIR" -l "${DATA_DIR}/logfile" start

echo "Waiting for PostgreSQL to start..."
until sudo -u postgres ${PG_SQL} -c "SELECT 1" > /dev/null 2>&1; do
sleep 1
echo -n "."
done
echo "PostgreSQL is up and running."

if [ -n "${CCB_DB_URL}" ]; then
echo "CCB_DB_URL is already set. Skipping database setup."
exit 0
fi

# Check if the user exists and create if not
sudo -u postgres $PG_SQL -c "SELECT 1 FROM pg_user WHERE usename = '$CCB_DB_USER'" | grep -q 1 || \
sudo -u postgres $PG_SQL -c "CREATE USER $CCB_DB_USER WITH PASSWORD '$CCB_DB_PASS';" && \
sudo -u postgres $PG_SQL -c "ALTER USER $CCB_DB_USER WITH SUPERUSER;"

# Check if the database exists and create if not
sudo -u postgres $PG_SQL -c "SELECT 1 FROM pg_database WHERE datname = '$CCB_DB_NAME'" | grep -q 1 || \
sudo -u postgres $PG_SQL -c "CREATE DATABASE $CCB_DB_NAME OWNER $CCB_DB_USER;"

# Check or create the vector extension
sudo -u postgres $PG_SQL -c "CREATE EXTENSION IF NOT EXISTS vector"

if ! grep -q "^export CCB_DB_URL=" /etc/environment; then
echo "export CCB_DB_URL=\"postgresql+psycopg://$CCB_DB_USER:$CCB_DB_PASS@localhost:5432/$CCB_DB_NAME\"" >> /etc/environment
fi
1 change: 1 addition & 0 deletions requirements.in.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ httpx
InstructorEmbedding
langchain
langchain-community
langchain-postgres
llama_cpp_python
msg-parser
odfdo
Expand Down
Loading

0 comments on commit 3b8d459

Please sign in to comment.