Skip to content

Commit

Permalink
Merge pull request #210 from AkihiroSuda/crio-cgroup2
Browse files Browse the repository at this point in the history
crio: support resource limitation with cgroup2
  • Loading branch information
AkihiroSuda authored Feb 1, 2021
2 parents 172b3bd + f56021d commit 3dd61fc
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 11 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,5 +54,5 @@ jobs:
vagrant destroy -f
vagrant up
vagrant ssh-config > ~/vagrant-ssh-config
- name: "Smoke test (CRI-O, w/o cgroups)"
- name: "Smoke test (CRI-O, w/ systemd-delegated cgroupfs)"
run: ssh -F ~/vagrant-ssh-config default /vagrant/hack/smoketest-binaries.sh --cri=crio
5 changes: 5 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ RUN git clone -q https://github.com/cri-o/cri-o.git /go/src/github.com/cri-o/cri
WORKDIR /go/src/github.com/cri-o/cri-o
ARG CRIO_COMMIT
RUN git pull && git checkout ${CRIO_COMMIT}
COPY ./src/patches/crio /patches
# `git am` requires user info to be set
RUN git config user.email "nobody@example.com" && \
git config user.name "Usernetes Build Script" && \
git am /patches/* && git show --summary
RUN EXTRA_LDFLAGS='-linkmode external -extldflags "-static"' make binaries && \
mkdir /out && cp bin/crio bin/crio-status bin/pinns /out

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ Just run `install.sh`:
$ ./install.sh
```

Note: CRI-O (`--cri=crio`) does not support rootless cgroup yet.
Both containerd (`--cri=containerd`) and CRI-O (`--cri=crio`) are supported.

### Expose netns ports to the host

Expand Down
4 changes: 3 additions & 1 deletion boot/crio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ source $U7S_BASE_DIR/common/common.inc.sh

export _CRIO_ROOTLESS=1

log::warning "Running without cgroup (rootless cgroup is not supported yet by CRI-O)"
if [[ "$U7S_CGROUP_ENABLED" != "1" ]]; then
log::warning "Running without cgroup"
fi

mkdir -p $XDG_CONFIG_HOME/usernetes/crio $XDG_CONFIG_HOME/usernetes/containers/oci/hooks.d

Expand Down
9 changes: 6 additions & 3 deletions boot/rootlesskit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ if [[ $_U7S_CHILD == 0 ]]; then
--net=slirp4netns --mtu=65520 --disable-host-loopback --slirp4netns-sandbox=true --slirp4netns-seccomp=true \
--port-driver=builtin \
--copy-up=/etc --copy-up=/run --copy-up=/var/lib --copy-up=/opt \
--copy-up=/etc --copy-up=/run --copy-up=/var/lib --copy-up=/opt \
--cgroupns \
--pidns \
--ipcns \
Expand Down Expand Up @@ -70,10 +69,14 @@ else
mkdir -p /opt/cni/bin
mount --bind $U7S_BASE_DIR/bin/cni /opt/cni/bin

# These bind-mounts are needed at the moment because the paths are hard-coded in Kube.
binds=(/var/lib/kubelet /var/lib/cni /var/log)
# These bind-mounts are needed at the moment because the paths are hard-coded in Kube and CRI-O.
binds=(/var/lib/kubelet /var/lib/cni /var/log /var/lib/containers)
for f in ${binds[@]}; do
src=$XDG_DATA_HOME/usernetes/$(echo $f | sed -e s@/@_@g)
if [[ -L $f ]]; then
# Remove link created by `rootlesskit --copy-up` if any
rm -rf $f
fi
mkdir -p $src $f
mount --bind $src $f
done
Expand Down
8 changes: 3 additions & 5 deletions install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,7 @@ fi

# check cgroup config
U7S_CGROUP_ENABLED=
if [[ "$cri" != "containerd" ]]; then
WARNING "Disabling Rootless cgroup: Rootless cgroup is not supported by CRI-O yet"
elif [[ ! -f /sys/fs/cgroup/cgroup.controllers ]]; then
if [[ ! -f /sys/fs/cgroup/cgroup.controllers ]]; then
WARNING "Disabling Rootless cgroup: the system is using cgroup v1, you need to reboot the system with systemd.unified_cgroup_hierarchy=1"
else
f="/sys/fs/cgroup/user.slice/user-$(id -u).slice/user@$(id -u).service/cgroup.controllers"
Expand Down Expand Up @@ -261,7 +259,7 @@ EOF

### RootlessKit
if [ -n "$cri" ]; then
cat <<EOF | x u7s-rootlesskit.service
cat <<EOF | x u7s-rootlesskit.service
[Unit]
Description=Usernetes RootlessKit service ($cri)
PartOf=u7s.target
Expand All @@ -272,7 +270,7 @@ Delegate=yes
${service_common}
EOF
else
cat <<EOF | x u7s-rootlesskit.service
cat <<EOF | x u7s-rootlesskit.service
[Unit]
Description=Usernetes RootlessKit service
PartOf=u7s.target
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
From 365a74c6ab043af21a14a6db8bcdc8af14a624fe Mon Sep 17 00:00:00 2001
From: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Date: Mon, 1 Feb 2021 18:52:30 +0900
Subject: [PATCH] rootless: enable resource limit when cgroup v2 controllers
are delegated

Requires systemd-delegated cgroupfs driver.
See Usernetes PR XXXX for the actual usage.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
---
server/rootless.go | 64 +++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/server/rootless.go b/server/rootless.go
index 39c65a90b..424669966 100644
--- a/server/rootless.go
+++ b/server/rootless.go
@@ -1,10 +1,14 @@
package server

import (
+ "io/ioutil"
+ "path/filepath"
"strings"

+ "github.com/opencontainers/runc/libcontainer/cgroups"
rspec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/runtime-tools/generate"
+ "github.com/sirupsen/logrus"
)

func hasNetworkNamespace(config *rspec.Spec) bool {
@@ -17,7 +21,41 @@ func hasNetworkNamespace(config *rspec.Spec) bool {
}

func makeOCIConfigurationRootless(g *generate.Generator) {
- g.Config.Linux.Resources = nil
+ // Resource limitations requires cgroup v2 delegation (https://rootlesscontaine.rs/getting-started/common/cgroup2/).
+ if g.Config.Linux.Resources != nil {
+ // cannot control device eBPF with rootless
+ g.Config.Linux.Resources.Devices = nil
+ v2Controllers := getAvailableV2Controllers()
+ if _, ok := v2Controllers["memory"]; !ok && g.Config.Linux.Resources.Memory != nil {
+ logrus.Warn("rootless: cgroup v2 memory controller is not delegated. Discarding memory limit.")
+ g.Config.Linux.Resources.Memory = nil
+ }
+ if _, ok := v2Controllers["cpu"]; !ok && g.Config.Linux.Resources.CPU != nil {
+ logrus.Warn("rootless: cgroup v2 cpu controller is not delegated. Discarding cpu limit.")
+ g.Config.Linux.Resources.CPU = nil
+ }
+ if _, ok := v2Controllers["cpuset"]; !ok && g.Config.Linux.Resources.CPU != nil {
+ logrus.Warn("rootless: cgroup v2 cpuset controller is not delegated. Discarding cpuset limit.")
+ g.Config.Linux.Resources.CPU.Cpus = ""
+ g.Config.Linux.Resources.CPU.Mems = ""
+ }
+ if _, ok := v2Controllers["pids"]; !ok && g.Config.Linux.Resources.Pids != nil {
+ logrus.Warn("rootless: cgroup v2 pids controller is not delegated. Discarding pids limit.")
+ g.Config.Linux.Resources.Pids = nil
+ }
+ if _, ok := v2Controllers["io"]; !ok && g.Config.Linux.Resources.BlockIO != nil {
+ logrus.Warn("rootless: cgroup v2 io controller is not delegated. Discarding block I/O limit.")
+ g.Config.Linux.Resources.BlockIO = nil
+ }
+ if _, ok := v2Controllers["rdma"]; !ok && g.Config.Linux.Resources.Rdma != nil {
+ logrus.Warn("rootless: cgroup v2 rdma controller is not delegated. Discarding RDMA limit.")
+ g.Config.Linux.Resources.Rdma = nil
+ }
+ if _, ok := v2Controllers["hugetlb"]; !ok && g.Config.Linux.Resources.HugepageLimits != nil {
+ logrus.Warn("rootless: cgroup v2 hugetlb controller is not delegated. Discarding RDMA limit.")
+ g.Config.Linux.Resources.HugepageLimits = nil
+ }
+ }
g.Config.Process.OOMScoreAdj = nil
g.Config.Process.ApparmorProfile = ""

@@ -45,3 +83,27 @@ func makeOCIConfigurationRootless(g *generate.Generator) {

g.SetLinuxCgroupsPath("")
}
+
+// getAvailableV2Controllers returns the entries in /sys/fs/cgroup/<SELF>/cgroup.controllers
+func getAvailableV2Controllers() map[string]struct{} {
+ procSelfCgroup, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
+ if err != nil {
+ logrus.WithError(err).Error("failed to parse /proc/self/cgroup")
+ return nil
+ }
+ v2Group := procSelfCgroup[""]
+ if v2Group == "" {
+ return nil
+ }
+ controllersPath := filepath.Join("/sys/fs/cgroup", v2Group, "cgroup.controllers")
+ controllersBytes, err := ioutil.ReadFile(controllersPath)
+ if err != nil {
+ logrus.WithError(err).Errorf("failed to read %s", controllersPath)
+ return nil
+ }
+ result := make(map[string]struct{})
+ for _, controller := range strings.Fields(string(controllersBytes)) {
+ result[controller] = struct{}{}
+ }
+ return result
+}
--
2.27.0

0 comments on commit 3dd61fc

Please sign in to comment.