Skip to content

Commit

Permalink
fully user-aware
Browse files Browse the repository at this point in the history
  • Loading branch information
robballantyne committed Jan 13, 2024
1 parent 1dc2a49 commit 067bdc6
Show file tree
Hide file tree
Showing 29 changed files with 95 additions and 106 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
[program:cf_tunnel]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=supervisor-cloudflared.sh
process_name=%(program_name)s
numprocs=%(ENV_SUPERVISOR_START_CLOUDFLARED)s
directory=/root
directory=/home/$USER_NAME
priority=100
autostart=true
startsecs=5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
; Tail all the logs to stdout/err for 'docker logs' commmand
; Use this process for capture & events
[program:logtail]
user=user
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=supervisor-logtail.sh
process_name=%(program_name)s
numprocs=1
Expand All @@ -18,4 +20,4 @@ stdout_logfile=/dev/fd/1
stdout_logfile_maxbytes=0
stdout_capture_maxbytes=1MB
redirect_stderr=true
environment=PROC_NAME="%(program_name)s"

Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
[program:cf_quicktunnel]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",PROC_NUM=%(process_num)d,USER=$USER_NAME,HOME=/home/$USER_NAME
command=supervisor-quicktunnel.sh
process_name=="%(program_name)s_%(process_num)s"
numprocs=%(ENV_CF_QUICK_TUNNELS_COUNT)s
directory=%(ENV_WORKSPACE)s
directory=/home/$USER_NAME
priority=1000
autostart=true
startsecs=5
Expand All @@ -16,4 +18,3 @@ stdout_logfile=/var/log/supervisor/quicktunnel-%(process_num)s.log
stdout_logfile_maxbytes=20MB
stdout_logfile_backups=1
redirect_stderr=true
environment=PROC_NAME="%(program_name)s",PROC_NUM=%(process_num)d
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
[program:serviceportal]
user=user
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=supervisor-serviceportal.sh
process_name=%(program_name)s
numprocs=1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
[program:storagemonitor]
user=$USER_NAME
environment=PROC_NAME="%(program_name)s",USER=$USER_NAME,HOME=/home/$USER_NAME
command=supervisor-storagemonitor.sh
process_name=%(program_name)s
numprocs=1
directory=/root
directory=/home/$USER_NAME
priority=50
autostart=true
startsecs=5
Expand All @@ -16,4 +18,4 @@ stdout_logfile=/var/log/supervisor/storagemonitor.log
stdout_logfile_maxbytes=10MB
stdout_logfile_backups=1
redirect_stderr=true
environment=PROC_NAME="%(program_name)s"

6 changes: 3 additions & 3 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/amd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@ if [[ -z $ROCM_VERSION ]]; then
fi

export ROCM_VERSION="$ROCM_VERSION"
env-store ROCM_VERSION
export ROCM_LEVEL="$ROCM_LEVEL"
env-store ROCM_LEVEL
export PATH=/opt/rocm/bin:$PATH
printf "export ROCM_VERSION=\"%s\"\n" "${ROCM_VERSION}" >> /opt/ai-dock/etc/environment.sh
printf "export ROCM_LEVEL=\"%s\"\n" "${ROCM_LEVEL}" >> /opt/ai-dock/etc/environment.sh
printf "export PATH=\"%s\"\n" "${PATH}" >> /etc/bash.bashrc
env-store PATH

curl -Ss https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null

Expand Down
11 changes: 7 additions & 4 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/common.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#!/bin/false

export MAMBA_CREATE="micromamba create --always-softlink -y -c conda-forge"
env-store MAMBA_CREATE
export MAMBA_INSTALL="micromamba install --always-softlink -y -c conda-forge"
printf "export MAMBA_CREATE=\"%s\"\n" "${MAMBA_CREATE}" >> /opt/ai-dock/etc/environment.sh
printf "export MAMBA_INSTALL=\"%s\"\n" "${MAMBA_INSTALL}" >> /opt/ai-dock/etc/environment.sh
printf "git config --global --add safe.directory \"*\"\n" >> /opt/ai-dock/etc/environment.sh
env-store MAMBA_INSTALL

groupadd -g 1111 ai-dock

dpkg --add-architecture i386
apt-get update
Expand Down Expand Up @@ -41,11 +42,13 @@ $APT_INSTALL \
locales \
lsb-release \
lsof \
man \
mlocate \
net-tools \
nano \
openssh-server \
pkg-config \
psmisc \
python3-pip \
rar \
rclone \
Expand Down Expand Up @@ -120,5 +123,5 @@ touch /etc/rclone/rclone.conf
printf "source /opt/ai-dock/etc/environment.sh\n" >> /etc/profile.d/02-ai-dock.sh
printf "source /opt/ai-dock/etc/environment.sh\n" >> /etc/bash.bashrc

# Give our runtime user full access (added to users group)
# Give our runtime user full access (added to ai-dock group)
/opt/ai-dock/bin/fix-permissions.sh -o container
2 changes: 2 additions & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,6 @@ else
exit 1
fi

fix-permissions.sh -o container

source /opt/ai-dock/bin/build/layer0/clean.sh
12 changes: 6 additions & 6 deletions build/COPY_ROOT/opt/ai-dock/bin/build/layer0/nvidia.sh
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/bin/false

export CUDA_VERSION=${CUDA_VERSION}
env-store CUDA_VERSION
export CUDNN_VERSION=${CUDNN_VERSION}
env-store CUDNN_VERSION
export CUDA_LEVEL=${CUDA_LEVEL}
env-store CUDA_LEVEL
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
env-store LD_LIBRARY_PATH
export MAMBA_CREATE="micromamba create --always-softlink -y -c nvidia -c conda-forge"
env-store MAMBA_CREATE
export MAMBA_INSTALL="micromamba install --always-softlink -y -c nvidia -c conda-forge"
printf "export CUDA_VERSION=\"%s\"\n" "${CUDA_VERSION}" >> /opt/ai-dock/etc/environment.sh
printf "export CUDNN_VERSION=\"%s\"\n" "${CUDNN_VERSION}" >> /opt/ai-dock/etc/environment.sh
printf "export CUDA_LEVEL=\"%s\"\n" "${CUDA_LEVEL}" >> /opt/ai-dock/etc/environment.sh
printf "export LD_LIBRARY_PATH=\"%s\"\n" "${LD_LIBRARY_PATH}" >> /opt/ai-dock/etc/environment.sh
printf "export MAMBA_CREATE=\"%s\"\n" "${MAMBA_CREATE}" >> /opt/ai-dock/etc/environment.sh
printf "export MAMBA_INSTALL=\"%s\"\n" "${MAMBA_INSTALL}" >> /opt/ai-dock/etc/environment.sh
env-store MAMBA_INSTALL
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/cfnt-url
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/cfqt-url
38 changes: 0 additions & 38 deletions build/COPY_ROOT/opt/ai-dock/bin/disabled.supervisor-logviewer.sh

This file was deleted.

1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/fix-permissions
19 changes: 5 additions & 14 deletions build/COPY_ROOT/opt/ai-dock/bin/fix-permissions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,11 @@ function main() {
}

function fix_container() {
printf "Fixing container permissions...\n"
items=micromamba:$OPT_SYNC
IFS=: read -r -d '' -a path_array < <(printf '%s:\0' "$items")
for item in "${path_array[@]}"; do
if [[ -n $item ]]; then
opt_dir="/opt/${item}"
chown -R root.users "$opt_dir"
chmod -R g+s "$opt_dir"
chmod -R ug+rw "$opt_dir"
setfacl -R -d -m g:users:rwx "$opt_dir"
setfacl -R -d -m m:rwx "$opt_dir"
fi
done

chown -R root.ai-dock /opt
chmod -R g+s /opt
chmod -R ug+rwX /opt
setfacl -R -d -m g:ai-dock:rwx /opt
setfacl -R -d -m m:rwx /opt
}

function fix_workspace() {
Expand Down
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/hash-password
37 changes: 16 additions & 21 deletions build/COPY_ROOT/opt/ai-dock/bin/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ function init_main() {
init_set_cf_tunnel_wanted
touch /run/container_config
touch /run/workspace_sync
# Opportunity to process & manipulate config before supervisor
init_source_config_script
init_write_environment
# Allow autostart processes to run early
supervisord -c /etc/supervisor/supervisord.conf &
Expand All @@ -38,7 +36,7 @@ function init_main() {
init_source_preflight_script > /var/log/preflight.log 2>&1
init_debug_print > /var/log/debug.log 2>&1
init_get_provisioning_script > /var/log/provisioning.log 2>&1
init_source_provisioning_script >> /var/log/provisioning.log 2>&1
init_run_provisioning_script >> /var/log/provisioning.log 2>&1
# Removal of this file will trigger fastapi shutdown and service start
rm /run/container_config
printf "Init complete: %s\n" "$(date +"%x %T.%3N")" >> /var/log/timing_data
Expand All @@ -53,12 +51,12 @@ init_serverless() {
export CF_QUICK_TUNNELS_COUNT=0
export SUPERVISOR_START_CLOUDFLARED=0
init_set_workspace
init_create_user
init_count_gpus
init_create_directories
init_create_logfiles
touch /run/container_config
touch /run/workspace_sync
init_source_config_script
init_write_environment
init_sync_mamba_envs > /var/log/sync.log 2>&1
init_sync_opt >> /var/log/sync.log 2>&1
Expand Down Expand Up @@ -195,9 +193,11 @@ function init_create_user() {
groupadd -g $WORKSPACE_GID $USER_NAME
useradd -ms /bin/bash $USER_NAME -d $home_dir -u $WORKSPACE_UID -g $WORKSPACE_GID
usermod -a -G $USER_GROUPS $USER_NAME
# May not exist
# May not exist - todo check device ownership
usermod -a -G render $USER_NAME
usermod -a -G sgx $USER_NAME
ln -s $home_dir /home/${USER_NAME}
# See the README (in)security notice
echo "${USER_NAME} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers
if [[ ! -e ${home_dir}/.bashrc ]]; then
cp -f /root/.bashrc ${home_dir}
Expand All @@ -209,6 +209,8 @@ function init_create_user() {
cp /root/.ssh/authorized_keys ${home_dir}/.ssh
chmod 600 ${home_dir}/.ssh/authorized_keys
fi
# Set username in startup sctipts
sed -i "s/\$USER_NAME/$USER_NAME/g" /etc/supervisor/supervisord/conf.d/*
}

function init_sync_mamba_envs() {
Expand Down Expand Up @@ -359,7 +361,8 @@ function init_direct_address() {
}

function init_create_directories() {
mkdir -p /run/http_ports
mkdir -m 2770 -p /run/http_ports
chown root.ai-dock /run/http_ports
mkdir -p /opt/caddy/etc
}

Expand All @@ -368,16 +371,6 @@ function init_create_logfiles() {
touch /var/log/{logtail.log,config.log,debug.log,preflight.log,provisioning.log,sync.log}
}

function init_source_config_script() {
# Child images can provide in their PATH
printf "Looking for config.sh...\n"
if [[ ! -f /opt/ai-dock/bin/config.sh ]]; then
printf "Not found\n"
else
source /opt/ai-dock/bin/config.sh
fi
}

function init_source_preflight_script() {
# Child images can provide in their PATH
printf "Looking for preflight.sh...\n"
Expand All @@ -395,7 +388,7 @@ function init_write_environment() {
)
while IFS='=' read -r -d '' key val; do
if [[ ! ${skip_keys[@]} =~ "$key" ]]; then
printf "export %s=\"%s\"\n" "$key" "$val" >> /opt/ai-dock/etc/environment.sh
env-store "$key"
fi
done < <(env -0)

Expand All @@ -422,14 +415,16 @@ function init_get_provisioning_script() {
fi
}

function init_source_provisioning_script() {
function init_run_provisioning_script() {
if [[ ! -e "$WORKSPACE"/.update_lock ]]; then
# Child images can provide in their PATH
file="/opt/ai-dock/bin/provisioning.sh"
printf "Looking for provisioning.sh...\n"
if [[ ! -f /opt/ai-dock/bin/provisioning.sh ]]; then
if [[ ! -f ${file} ]]; then
printf "Not found\n"
else
source /opt/ai-dock/bin/provisioning.sh
chown ${USER_NAME}:ai-dock ${file}
chmod 0755 ${file}
su ${USER_NAME} -c ${file}
fi
else
printf "Refusing to provision container with %s.update_lock present\n" "$WORKSPACE"
Expand Down
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/link-mamba-envs
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/logtail
1 change: 1 addition & 0 deletions build/COPY_ROOT/opt/ai-dock/bin/set-web-credentials
7 changes: 5 additions & 2 deletions build/COPY_ROOT/opt/ai-dock/bin/supervisor-caddy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,12 @@ function cleanup() {
}

function start() {
source /opt/ai-dock/etc/environment.sh

if [[ ${SERVERLESS,,} = "true" ]]; then
printf "Refusing to start Caddy service in serverless mode\n"
exec sleep 10
sleep 5
exit 0
fi

# Give processes time to register their ports
Expand Down Expand Up @@ -42,7 +45,7 @@ function start() {
done

caddy fmt --overwrite /opt/caddy/etc/Caddyfile
exec caddy run --config /opt/caddy/etc/Caddyfile
caddy run --config /opt/caddy/etc/Caddyfile
}

start 2>&1
5 changes: 3 additions & 2 deletions build/COPY_ROOT/opt/ai-dock/bin/supervisor-cloudflared.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,16 @@ function cleanup() {
}

function start() {
source /opt/ai-dock/etc/environment.sh

if [[ -z $CF_TUNNEL_TOKEN ]]; then
printf "Skipping Cloudflare daemon: No token\n"
# No error - Supervisor will not atempt restart
exec sleep 10
fi

printf "Starting Cloudflare daemon...\n"

exec cloudflared tunnel --metrics localhost:2999 run --token "${CF_TUNNEL_TOKEN}"
cloudflared tunnel --metrics localhost:2999 run --token "${CF_TUNNEL_TOKEN}"
}

start 2>&1
Loading

0 comments on commit 067bdc6

Please sign in to comment.