Skip to content

Commit

Permalink
Update base
Browse files Browse the repository at this point in the history
  • Loading branch information
robballantyne committed Feb 11, 2024
1 parent 5ec4d5a commit 573bbd4
Show file tree
Hide file tree
Showing 18 changed files with 154 additions and 151 deletions.
68 changes: 38 additions & 30 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ env:
UBUNTU_VERSION: 22.04
BUILDX_NO_DEFAULT_ATTESTATIONS: 1
# Conservative defaults for cloud providers
LATEST_CUDA: "pytorch-2.1.1-py3.10-cuda-11.8.0-base-22.04"
LATEST_CUDA_JUPYTER: "jupyter-pytorch-2.1.1-py3.10-cuda-11.8.0-base-22.04"
LATEST_ROCM: "pytorch-2.1.1-py3.10-rocm-5.6-runtime-22.04"
LATEST_ROCM_JUPYTER: "jupyter-pytorch-2.1.1-py3.10-rocm-5.6-runtime-22.04"
LATEST_CPU: "pytorch-2.1.1-py3.10-cpu-22.04"
LATEST_CPU_JUPYTER: "jupyter-pytorch-2.1.1-py3.10-cpu-22.04"
LATEST_CUDA: "pytorch-2.2.0-py3.10-cuda-11.8.0-runtime-22.04"
LATEST_CUDA_JUPYTER: "jupyter-pytorch-2.2.0-py3.10-cuda-11.8.0-runtime-22.04"
LATEST_ROCM: "pytorch-2.2.0-py3.10-rocm-5.7-runtime-22.04"
LATEST_ROCM_JUPYTER: "jupyter-pytorch-2.2.0-py3.10-rocm-5.7-runtime-22.04"
LATEST_CPU: "pytorch-2.2.0-py3.10-cpu-22.04"
LATEST_CPU_JUPYTER: "jupyter-pytorch-2.2.0-py3.10-cpu-22.04"

jobs:
cpu-base:
Expand All @@ -28,17 +28,20 @@ jobs:
python:
- "3.10"
pytorch:
- "2.0.1"
- "2.1.1"
- "2.1.2"
- "2.2.0"
steps:
-
name: Free Space
run: |
df -h
rm -rf /usr/share/dotnet
rm -rf /opt/ghc
rm -rf /usr/local/share/boost
rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/lib/android
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -h
-
name: Env Setter
Expand Down Expand Up @@ -91,25 +94,25 @@ jobs:
python:
- "3.10"
pytorch:
- "2.0.1"
- "2.1.1"
- "2.1.2"
- "2.2.0"
cuda:
- "11.8.0"
- "12.1.0"
level:
- "base"
exclude:
- cuda: "12.1.0"
pytorch: "2.0.1"
- "runtime"
steps:
-
name: Free Space
run: |
df -h
rm -rf /usr/share/dotnet
rm -rf /opt/ghc
rm -rf /usr/local/share/boost
rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/lib/android
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -h
-
name: Env Setter
Expand Down Expand Up @@ -162,28 +165,33 @@ jobs:
python:
- "3.10"
pytorch:
- "2.0.1"
- "2.1.1"
- "2.1.2"
- "2.2.0"
rocm:
- "5.4.2"
- "5.6"
- "5.7"
- "5.7"
level:
- "runtime"
exclude:
- rocm: "5.4.2"
pytorch: "2.1.1"
- rocm: "5.6"
pytorch: "2.0.1"
pytorch: "2.2.0"
- rocm: "5.7"
pytorch: "2.1.2"
- rocm: "5.7"
pytorch: "2.1.1"

steps:
-
name: Free Space
run: |
df -h
rm -rf /usr/share/dotnet
rm -rf /opt/ghc
rm -rf /usr/local/share/boost
rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet
sudo rm -rf /opt/ghc
sudo rm -rf /usr/local/.ghcup
sudo rm -rf /usr/local/share/boost
sudo rm -rf /usr/local/lib/android
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
df -h
-
name: Env Setter
Expand Down
84 changes: 34 additions & 50 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Run [ComfyUI](https://github.com/comfyanonymous/ComfyUI) in a docker container l

These container images are tested extensively at [Vast.ai](https://link.ai-dock.org/template-vast-comfyui-jupyter) & [Runpod.io](https://link.ai-dock.org/template-runpod-comfyui-jupyter) but compatibility with other GPU cloud services is expected.

>[!NOTE]
>[!NOTE]
>These images do not bundle models or third-party configurations. You should use a [provisioning script](#provisioning-script) to automatically configure your container. You can find examples in `config/provisioning`.
## Quick Start
Expand Down Expand Up @@ -64,31 +64,31 @@ Tags follow these patterns:
##### _CUDA_
- `:pytorch-[pytorch-version]-py[python-version]-cuda-[x.x.x]-base-[ubuntu-version]`

- `:latest-cuda` → `:pytorch-2.1.1-py3.10-cuda-11.8.0-base-22.04`
- `:latest-cuda` → `:pytorch-2.2.0-py3.10-cuda-11.8.0-base-22.04`

- `:latest-cuda-jupyter` → `:jupyter-pytorch-2.1.1-py3.10-cuda-11.8.0-base-22.04`
- `:latest-cuda-jupyter` → `:jupyter-pytorch-2.2.0-py3.10-cuda-11.8.0-base-22.04`

##### _ROCm_
- `:pytorch-[pytorch-version]-py[python-version]-rocm-[x.x.x]-runtime-[ubuntu-version]`

- `:latest-rocm` → `:pytorch-2.1.1-py3.10-rocm-5.6-runtime-22.04`
- `:latest-rocm` → `:pytorch-2.2.0-py3.10-rocm-5.7-runtime-22.04`

- `:latest-rocm-jupyter` → `:jupyter-pytorch-2.1.1-py3.10-rocm-5.6-runtime-22.04`
- `:latest-rocm-jupyter` → `:jupyter-pytorch-2.2.0-py3.10-rocm-5.7-runtime-22.04`

##### _CPU_
- `:pytorch-[pytorch-version]-py[python-version]-ubuntu-[ubuntu-version]`

- `:latest-cpu` → `:pytorch-2.1.1-py3.10-cpu-22.04`
- `:latest-cpu` → `:pytorch-2.2.0-py3.10-cpu-22.04`

- `:latest-cpu-jupyter` → `:jupyter-pytorch-2.1.1-py3.10-cpu-22.04`
- `:latest-cpu-jupyter` → `:jupyter-pytorch-2.2.0-py3.10-cpu-22.04`

Browse [here](https://github.com/ai-dock/comfyui/pkgs/container/comfyui) for an image suitable for your target environment.

You can also [build from source](#building-images) by editing `.env` and running `docker compose build`.

Supported Python versions: `3.11`, `3.10`

Supported Pytorch versions: `2.1.1`, `2.0.1`
Supported Pytorch versions: `2.2.0`, `2.1.2`, `2.1.1`

Supported Platforms: `NVIDIA CUDA`, `AMD ROCm`, `CPU`

Expand Down Expand Up @@ -177,7 +177,7 @@ You can use the included `cloudflared` service to make secure connections withou
| Variable | Description |
| ------------------------ | ----------- |
| `CF_TUNNEL_TOKEN` | Cloudflare zero trust tunnel token - See [documentation](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/). |
| `CF_QUICK_TUNNELS` | Create ephemeral Cloudflare tunnels for web services (default `false`) |
| `CF_QUICK_TUNNELS` | Create ephemeral Cloudflare tunnels for web services (default `true`) |
| `COMFYUI_BRANCH` | ComfyUI branch/commit hash. Defaults to `master` |
| `COMFYUI_FLAGS` | Startup flags. eg. `--gpu-only --highvram` |
| `COMFYUI_PORT` | ComfyUI interface port (default `8188`) |
Expand All @@ -188,12 +188,13 @@ You can use the included `cloudflared` service to make secure connections withou
| `RCLONE_*` | Rclone configuration - See [rclone documentation](https://rclone.org/docs/#config-file) |
| `SKIP_ACL` | Set `true` to skip modifying workspace ACL |
| `SSH_PORT_LOCAL` | Set a non-standard port for SSH (default `22`) |
| `SSH_PUBKEY` | Your public key for SSH |
| `USER_NAME` | System account username (default `user`)|
| `USER_PASSWORD` | System account username (default `password`)|
| `WEB_ENABLE_AUTH` | Enable password protection for web services (default `true`) |
| `WEB_USER` | Username for web services (default `user`) |
| `WEB_PASSWORD` | Password for web services (default `password`) |
| `WEB_PASSWORD` | Password for web services (default `auto generated`) |
| `WORKSPACE` | A volume path. Defaults to `/workspace/` |
| `WORKSPACE_SYNC` | Move mamba environments and services to workspace if mounted (default `true`) |
| `WORKSPACE_SYNC` | Move mamba environments and services to workspace if mounted (default `false`) |

Environment variables can be specified by using any of the standard methods (`docker-compose.yaml`, `docker run -e...`). Additionally, environment variables can also be passed as parameters of `init.sh`.

Expand All @@ -203,19 +204,32 @@ Example usage: `docker run -e STANDARD_VAR1="this value" -e STANDARD_VAR2="that

## Security

By default, all exposed web services other than the port redirect page are protected by HTTP basic authentication.
All ai-dock containers are interactive and will not drop root privileges. You should ensure that your docker daemon runs as an unprivileged user.

The default username is `user` and the password is `password`.
### System

You can set your credentials by passing environment variables as shown above.
A system user will be created at startup. The UID will be either 1000 or will match the UID of the `$WORKSPACE` bind mount.

The user will share the root user's ssh public key.

Some processes may start in the user context for convenience only.

### Web Services

By default, all exposed web services are protected by a single login form at `:1111/login`.

The password is stored as a bcrypt hash. If you prefer not to pass a plain text password to the container you can pre-hash and use the variable `WEB_PASSWORD_HASH`.
The default username is `user` and the password is auto generated unless you have passed a value in the environment variable `WEB_PASSWORD`. To find the auto-generated password and related tokens you should type `env | grep WEB_` from inside the container.

You can set your credentials by passing environment variables as shown above.

If you are running the image locally on a trusted network, you may disable authentication by setting the environment variable `WEB_ENABLE_AUTH=false`.

>[!NOTE]
>You can use `set-web-credentials.sh <username> <password>` change the username and password in a running container.
If you need to connect programmatically to the web services you can authenticate using either `Bearer $WEB_TOKEN` or `Basic $WEB_PASSWORD_B64`.

The security measures included aim to be as secure as basic authentication, i.e. not secure without HTTPS. Please use the provided cloudflare connections wherever possible.

>[!NOTE]
>You can use `set-web-credentials.sh <username> <password>` to change the username and password in a running container.
## Provisioning script

Expand All @@ -228,8 +242,7 @@ The URL must point to a plain text file - GitHub Gists/Pastebin (raw) are suitab
If you are running locally you may instead opt to mount a script at `/opt/ai-dock/bin/provisioning.sh`.

>[!NOTE]
>If configured, `sshd`, `caddy`, `cloudflared`, `rclone`, `serviceportal`, `storagemonitor` & `logtail` will be launched before provisioning; Any other processes will launch after.
>If configured, `sshd`, `caddy`, `cloudflared`, `serviceportal`, `storagemonitor` & `logtail` will be launched before provisioning; Any other processes will launch after.
>[!WARNING]
>Only use scripts that you trust and which cannot be changed without your consent.
Expand Down Expand Up @@ -283,8 +296,6 @@ As docker containers generally run as the root user, new files created in /works

To ensure that the files remain accessible to the local user that owns the directory, the docker entrypoint will set a default ACL on the directory by executing the commamd `setfacl -d -m u:${WORKSPACE_UID}:rwx /workspace`.

If you do not want this, you can set the environment variable `SKIP_ACL=true`.

## Running Services

This image will spawn multiple processes upon starting a container because some of our remote environments do not support more than one container per instance.
Expand All @@ -309,7 +320,7 @@ To manage this service you can use `supervisorctl [start|stop|restart] comfyui`.

This service is available on port `8188` and is used to test the [RunPod serverless](https://link.ai-dock.org/runpod-serverless) API.

You can access the api directly at `/rp-api/runsync` or you can use the Swager/openAPI playground at `/rp-api/docs`.
You can access the api directly at `/rp-api/runsync` or you can use the Swager/openAPI playground at `/rp-api`.

There are several [example payloads](https://github.com/ai-dock/comfyui/tree/main/build/COPY_ROOT/opt/serverless/docs/example_payloads) included in this repository.

Expand Down Expand Up @@ -411,33 +422,6 @@ See [this guide](https://link.ai-dock.org/guide-sshd-do) by DigitalOcean for an
>[!NOTE]
>_SSHD is included because the end-user should be able to know the version prior to deloyment. Using a providers add-on, if available, does not guarantee this._
### Rclone mount

Rclone allows you to access your cloud storage from within the container by configuring one or more remotes. If you are unfamiliar with the project you can find out more at the [Rclone website](https://rclone.org/).

Any Rclone remotes that you have specified, either through mounting the config directory or via setting environment variables will be mounted at `/workspace/remote/[remote name]`. For this service to start, the following conditions must be met:

- Fuse3 installed in the host operating system
- Kernel module `fuse` loaded in the host
- Host `/etc/passwd` mounted in the container
- Host `/etc/group` mounted in the container
- Host device `/dev/fuse` made available to the container
- Container must run with `cap-add SYS_ADMIN`
- Container must run with `securiry-opt apparmor:unconfined`
- At least one remote must be configured

The provided docker-compose.yaml includes a working configuration (add your own remotes).

In the event that the conditions listed cannot be met, `rclone` will still be available to use via the CLI - only mounts will be unavailable.

If you intend to use the `rclone create` command to interactively generate remote configurations you should ensure port `53682` is accessible. See https://rclone.org/remote_setup/ for further details.

>[!NOTE]
>_Rclone is included to give the end-user an opportunity to easily transfer files between the instance and their cloud storage provider._
>[!WARNING]
>You should only provide auth tokens in secure cloud environments.
### Logtail

This script follows and prints the log files for each of the above services to stdout. This allows you to follow the progress of all running services through docker's own logging system.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[program:comfyui]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=/opt/ai-dock/bin/supervisor-comfyui.sh
process_name=%(program_name)s
numprocs=1
Expand All @@ -18,4 +20,4 @@ stdout_logfile_backups=1
stderr_logfile=/dev/null
stderr_logfile_maxbytes=0
stderr_logfile_backups=0
environment=PROC_NAME="%(program_name)s"
redirect_stderr=true
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
[program:comfyui_rp_api]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=/opt/ai-dock/bin/supervisor-comfyui-rp-api.sh
process_name=%(program_name)s
numprocs=1
directory=/opt/serverless/providers/runpod
directory=/home/$USER_NAME
priority=100
autostart=true
startsecs=5
Expand All @@ -18,4 +20,4 @@ stdout_logfile_backups=1
stderr_logfile=/dev/null
stderr_logfile_maxbytes=0
stderr_logfile_backups=0
environment=PROC_NAME="%(program_name)s"
redirect_stderr=true
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[program:serverless]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=/opt/ai-dock/bin/supervisor-serverless.sh
process_name=%(program_name)s
numprocs=1
Expand Down
5 changes: 5 additions & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/clean.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@
# Tidy up and keep image small
apt-get clean -y
micromamba clean -ay

fix-permissions.sh -o container

rm /etc/ld.so.cache
ldconfig
14 changes: 10 additions & 4 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,25 @@ build_common_main() {

build_common_create_env() {
apt-get update
$APT_INSTALL libgl1 \
libgoogle-perftools4
$APT_INSTALL \
libgl1-mesa-glx \
libtcmalloc-minimal4
#libgoogle-perftools4

ln -sf $(ldconfig -p | grep -Po "libtcmalloc.so.\d" | head -n 1) \
ln -sf $(ldconfig -p | grep -Po "libtcmalloc_minimal.so.\d" | head -n 1) \
/lib/x86_64-linux-gnu/libtcmalloc.so


#$MAMBA_INSTALL -n ${MAMBA_DEFAULT_ENV} pocl

# A new pytorch env costs ~ 300Mb
exported_env=/tmp/${MAMBA_DEFAULT_ENV}.yaml
micromamba env export -n ${MAMBA_DEFAULT_ENV} > "${exported_env}"
$MAMBA_CREATE -n comfyui --file "${exported_env}"
printf "/opt/micromamba/envs/comfyui/lib\n" >> /etc/ld.so.conf.d/x86_64-linux-gnu.micromamba.10-comfyui.conf

# RunPod serverless support
$MAMBA_CREATE -n serverless python=3.10
printf "/opt/micromamba/envs/serverless/lib\n" >> /etc/ld.so.conf.d/x86_64-linux-gnu.micromamba.20-serverless.conf
$MAMBA_INSTALL -n serverless \
python-magic
micromamba run -n serverless $PIP_INSTALL \
Expand Down
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Must exit and fail to build if any command fails
set -eo pipefail
umask 002

source /opt/ai-dock/bin/build/layer0/common.sh

Expand Down
Loading

0 comments on commit 573bbd4

Please sign in to comment.