diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 9bdd5817..c96740ea 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -59,32 +59,54 @@ jobs: ./hack/test-smoke.sh multi-node: - name: "Multi node (emulated using LXD)" + name: "Multi node (emulated using Lima)" runs-on: ubuntu-24.04 timeout-minutes: 30 strategy: fail-fast: false matrix: include: - - lxc-image: ubuntu:24.04 + - lima-template: template://ubuntu-24.04 engine: docker -# LXD is now banned from pulling images:fedora from https://images.linuxcontainers.org/ -# TODO: switch away from LXD to Incus: https://github.com/rootless-containers/usernetes/pull/332 -# - lxc-image: images:fedora/39/cloud -# engine: podman + - lima-template: template://ubuntu-24.04 + engine: podman + # FIXME: Flannel seems to fail with Fedora + Podman: + # https://github.com/rootless-containers/usernetes/pull/339#issuecomment-2511139718 + # > plugin type=\\\"flannel\\\" failed (add): loadFlannelSubnetEnv failed: + # > open /run/flannel/subnet.env: no such file or directory\"" + # > pod="kube-system/coredns-6f6b679f8f-p9spl" env: - LXC_IMAGE: "${{ matrix.lxc-image }}" + LIMA_TEMPLATE: "${{ matrix.lima-template }}" CONTAINER_ENGINE: "${{ matrix.engine }}" steps: - - run: sudo modprobe vxlan - uses: actions/checkout@v3 - - uses: canonical/setup-lxd@v0.1.1 + - name: "Install QEMU" + run: | + set -eux + sudo apt-get update + sudo apt-get install -y --no-install-recommends ovmf qemu-system-x86 qemu-utils + sudo modprobe kvm + # `sudo usermod -aG kvm $(whoami)` does not take an effect on GHA + sudo chown $(whoami) /dev/kvm + + - name: "Install Lima" + run: | + set -eux + LIMA_VERSION=$(curl -fsSL https://api.github.com/repos/lima-vm/lima/releases/latest | jq -r .tag_name) + curl -fsSL https://github.com/lima-vm/lima/releases/download/${LIMA_VERSION}/lima-${LIMA_VERSION:1}-Linux-x86_64.tar.gz | sudo tar Cxzvf /usr/local - + + - name: "Cache ~/.cache/lima" + uses: actions/cache@v4 + with: + path: ~/.cache/lima + key: lima-${{ env.LIMA_VERSION }} + - name: "Relax disk pressure limit" run: | set -x sudo snap install yq yq -i 'select(.kind=="KubeletConfiguration").evictionHard."imagefs.available"="3Gi"' kubeadm-config.yaml - - run: ./hack/create-cluster-lxd.sh + - run: ./hack/create-cluster-lima.sh - run: kubectl taint nodes --all node-role.kubernetes.io/control-plane- || true - run: ./hack/test-smoke.sh - if: failure() @@ -93,16 +115,21 @@ jobs: set -x kubectl get nodes -o wide kubectl get nodes -o yaml - ssh -F ${HOME}/.u7s-ci-hosts/ssh_config host0 -- df -h - ssh -F ${HOME}/.u7s-ci-hosts/ssh_config host1 -- df -h + limactl shell host0 df -h + limactl shell host1 df -h - if: failure() name: "kubectl get pods" run: | set -x kubectl get pods -A -o yaml - ssh -F ${HOME}/.u7s-ci-hosts/ssh_config host0 -- journalctl --user --no-pager --since "10 min ago" + limactl shell host0 journalctl --user --no-pager --since "10 min ago" - name: "Test data persistency after restarting the node" run: | - lxc restart host0 host1 + limactl stop host0 + limactl stop host1 + limactl start host0 + limactl start host1 + # The plain mode of Lima disables automatic port forwarding + ssh -q -f -N -L 6443:127.0.0.1:6443 -F ~/.lima/host0/ssh.config lima-host0 sleep 30 ./hack/test-smoke.sh diff --git a/hack/create-cluster-lima.sh b/hack/create-cluster-lima.sh new file mode 100755 index 00000000..008c3920 --- /dev/null +++ b/hack/create-cluster-lima.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -eux -o pipefail + +: "${LIMACTL:=limactl --tty=false}" +: "${LIMA_TEMPLATE:=template://default}" +: "${CONTAINER_ENGINE:=docker}" +: "${LOCKDOWN_SUDO:=1}" + +guest_home="/home/${USER}.linux" + +# Create Rootless Docker hosts +for host in host0 host1; do + # Set --plain to minimize Limaism + ${LIMACTL} start --plain --network lima:user-v2 --name="${host}" "${LIMA_TEMPLATE}" + ${LIMACTL} copy -r "$(pwd)" "${host}:${guest_home}/usernetes" + ${LIMACTL} shell "${host}" sudo CONTAINER_ENGINE="${CONTAINER_ENGINE}" "${guest_home}/usernetes/init-host/init-host.root.sh" + # Terminate the current session so that the cgroup delegation takes an effect. This command exits with status 255 as SSH terminates. + ${LIMACTL} shell "${host}" sudo loginctl terminate-user "${USER}" || true + ${LIMACTL} shell "${host}" sudo loginctl enable-linger "${USER}" + if [ "${LOCKDOWN_SUDO}" = "1" ]; then + # Lockdown sudo to ensure rootless-ness + ${LIMACTL} shell "${host}" sudo sh -euxc 'rm -rf /etc/sudoers.d/*-cloud-init-users' + fi + ${LIMACTL} shell "${host}" CONTAINER_ENGINE="${CONTAINER_ENGINE}" "${guest_home}/usernetes/init-host/init-host.rootless.sh" +done + +# Launch a Kubernetes node inside a Rootless Docker host +for host in host0 host1; do + ${LIMACTL} shell "${host}" CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" up +done + +# Bootstrap a cluster with host0 +${LIMACTL} shell host0 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-init install-flannel kubeconfig join-command + +# Let host1 join the cluster +${LIMACTL} copy host0:~/usernetes/join-command host1:~/usernetes/join-command +${LIMACTL} shell host1 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" kubeadm-join +${LIMACTL} shell host0 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C "${guest_home}/usernetes" sync-external-ip + +# Enable kubectl +ssh -q -f -N -L 6443:127.0.0.1:6443 -F ~/.lima/host0/ssh.config lima-host0 +${LIMACTL} copy host0:${guest_home}/usernetes/kubeconfig ./kubeconfig +KUBECONFIG="$(pwd)/kubeconfig" +export KUBECONFIG +kubectl get nodes -o wide +kubectl get pods -A diff --git a/hack/create-cluster-lxd.sh b/hack/create-cluster-lxd.sh deleted file mode 100755 index e4a17b78..00000000 --- a/hack/create-cluster-lxd.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -set -eux -o pipefail - -: "${CONTAINER_ENGINE:=docker}" - -# Create Rootless Docker hosts -./hack/create-hosts-lxd.sh "${HOME}/.u7s-ci-hosts" host0 host1 -SCP="scp -F ${HOME}/.u7s-ci-hosts/ssh_config" -SSH="ssh -F ${HOME}/.u7s-ci-hosts/ssh_config" -for host in host0 host1; do - $SCP -r "$(pwd)" "${host}:~/usernetes" - $SSH "${USER}-sudo@${host}" sudo CONTAINER_ENGINE="${CONTAINER_ENGINE}" "~${USER}/usernetes/init-host/init-host.root.sh" - $SSH "${USER}-sudo@${host}" sudo loginctl enable-linger "${USER}" - $SSH "${host}" CONTAINER_ENGINE="${CONTAINER_ENGINE}" ~/usernetes/init-host/init-host.rootless.sh -done - -# Launch a Kubernetes node inside a Rootless Docker host -for host in host0 host1; do - $SSH "${host}" CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C ~/usernetes up -done - -# Bootstrap a cluster with host0 -$SSH host0 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C ~/usernetes kubeadm-init install-flannel kubeconfig join-command - -# Let host1 join the cluster -$SCP host0:~/usernetes/join-command host1:~/usernetes/join-command -$SSH host1 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C ~/usernetes kubeadm-join -$SSH host0 CONTAINER_ENGINE="${CONTAINER_ENGINE}" make -C ~/usernetes sync-external-ip - -# Enable kubectl -$SCP host0:~/usernetes/kubeconfig ./kubeconfig -sed -i -e "s/127.0.0.1/$($SSH host0 ip --json route get 1 | jq -r .[0].prefsrc)/g" ./kubeconfig -KUBECONFIG="$(pwd)/kubeconfig" -export KUBECONFIG -kubectl get nodes -o wide -kubectl get pods -A diff --git a/hack/create-hosts-lxd.sh b/hack/create-hosts-lxd.sh deleted file mode 100755 index cbab9847..00000000 --- a/hack/create-hosts-lxd.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -set -eux -o pipefail -if [ "$#" -lt 2 ]; then - echo "Usage: $0 DIR INST..." - exit 1 -fi -dir=$1 -shift -names=$* - -: "${LXC_IMAGE:="ubuntu:24.04"}" -LXC="sudo lxc" - -echo "USER=${USER}" -ssh_config="${dir}/ssh_config" -echo "SSH_CONFIG=${ssh_config}" - -# ssh-copy-id wants ~/.ssh to exist -mkdir -p "${HOME}/.ssh" -mkdir -p "${dir}" -prvkey="${dir}/ssh_key" -pubkey="${prvkey}.pub" -if [ ! -e "${pubkey}" ]; then - ssh-keygen -f "${prvkey}" -q -N "" -fi -echo "IdentityFile ${prvkey}" >"${ssh_config}" - -userdata="${dir}/user-data" -if [ ! -e "${userdata}" ]; then - cat <"${userdata}" -#cloud-config -users: - - name: "${USER}" - shell: /bin/bash - ssh-authorized-keys: - - $(cat "${pubkey}") - - name: "${USER}-sudo" - shell: /bin/bash - ssh-authorized-keys: - - $(cat "${pubkey}") - sudo: ALL=(ALL) NOPASSWD:ALL -EOF -fi - -for name in ${names}; do - ${LXC} init "${LXC_IMAGE}" "${name}" -c security.privileged=true -c security.nesting=true - ${LXC} config device add "${name}" bind-boot disk source=/boot path=/boot readonly=true - ${LXC} config set "${name}" user.user-data - <"${userdata}" - ${LXC} start "${name}" - - # Apply fixes for AppArmor (apparantely only needed on LXD) - # `slirp4netns --enable-sandbox` inside LXD (Ubuntu 24.04) requires amending the AppArmor rule. - # https://github.com/rootless-containers/slirp4netns/issues/348#issuecomment-2288124206 - ${LXC} shell "${name}" -- bash -c 'echo "pivot_root," >>/etc/apparmor.d/local/slirp4netns' - # runc requires pivot_root: - # > runc run failed: unable to start container process: error during container init: error jailing process inside rootfs: pivot_root .: permission denied - ${LXC} shell "${name}" -- bash -c 'echo "pivot_root," >>/etc/apparmor.d/local/runc' - # Propagate the profile for /usr/sbin/runc (Canonical's package) to /usr/bin/runc (Docker's package) - ${LXC} shell "${name}" -- bash -c 'sed -e s@/usr/sbin/runc@/usr/bin/runc@g /etc/apparmor.d/runc > /etc/apparmor.d/usr.bin.runc' - ${LXC} shell "${name}" -- bash -c 'systemctl restart apparmor' - - sleep 10 - ip="$(${LXC} exec "${name}" -- ip --json route get 1 | jq -r .[0].prefsrc)" - echo "Host ${name}" >>"${ssh_config}" - echo " Hostname ${ip}" >>"${ssh_config}" - echo " # For a test env, the host key can be just ignored" - echo " StrictHostKeyChecking=no" - echo " UserKnownHostsFile=/dev/null" - ssh-copy-id -F "${ssh_config}" -i "${prvkey}" -o StrictHostKeyChecking=no "${USER}@${name}" -done diff --git a/init-host/init-host.root.d/install-podman.sh b/init-host/init-host.root.d/install-podman.sh index 79604ab8..afa632a5 100755 --- a/init-host/init-host.root.d/install-podman.sh +++ b/init-host/init-host.root.d/install-podman.sh @@ -1,6 +1,5 @@ #!/bin/bash -# This script installs the latest release of Podman. -# Repository information is from https://podman.io/docs/installation#linux-distributions +# This script installs Podman. set -eux -o pipefail if [ "$(id -u)" != "0" ]; then echo "Must run as the root" @@ -10,16 +9,6 @@ fi if command -v dnf >/dev/null 2>&1; then dnf install -y podman podman-compose else - mkdir -p /etc/apt/keyrings - curl -fsSL "https://download.opensuse.org/repositories/devel:kubic:libcontainers:unstable/xUbuntu_$(lsb_release -rs)/Release.key" | - gpg --dearmor | - tee /etc/apt/keyrings/devel_kubic_libcontainers_unstable.gpg >/dev/null - echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/devel_kubic_libcontainers_unstable.gpg]\ - https://download.opensuse.org/repositories/devel:kubic:libcontainers:unstable/xUbuntu_$(lsb_release -rs)/ /" | - tee /etc/apt/sources.list.d/devel:kubic:libcontainers:unstable.list >/dev/null apt-get update -qq - apt-get -qq -y install podman - # No dpkg for podman-compose ? - pip3 install podman-compose + apt-get -qq -y install podman podman-compose fi diff --git a/init-host/init-host.root.sh b/init-host/init-host.root.sh index 5151eb17..b5e3f625 100755 --- a/init-host/init-host.root.sh +++ b/init-host/init-host.root.sh @@ -19,11 +19,30 @@ EOF fi cat >/etc/modules-load.d/usernetes.conf </etc/sysctl.d/99-usernetes.conf </etc/sysctl.d/99-usernetes.conf </dev/null 2>&1; then dnf install -y git shadow-utils make jq - # Workaround: SUID bit on newuidmap is dropped on LXC images:fedora/38/cloud, - # so it has to be reinstalled - dnf reinstall -y shadow-utils else apt-get update apt-get install -y git uidmap make jq @@ -58,7 +73,7 @@ case "${CONTAINER_ENGINE}" in systemctl disable --now docker ;; "podman") - if ! command -v podman >/dev/null 2>&1; then + if ! command -v podman-compose >/dev/null 2>&1; then "${script_dir}"/init-host.root.d/install-podman.sh fi ;;