From 8207cb0cfe4fda412cffc16170aa29c4fac5d0ed Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Wed, 7 Jul 2021 19:33:49 +0900 Subject: [PATCH] remove most of the patches + deprecate "none" cgroup driver The k/k PR was merged: https://github.com/kubernetes/kubernetes/commit/ebbe63f116500b5f3c4fcd1dacbc8d7eb1c7e7a2 We no longer need any patch for cgroup v2, but we still need the `[Not for Upstream] kubelet: new cgroup driver: "none"` patch for cgroup v1. The "none" cgroup driver is deprecated and will be removed in a future release of Usernetes. Signed-off-by: Akihiro Suda --- Dockerfile | 8 +-- README.md | 9 +-- boot/containerd.sh | 2 +- boot/crio.sh | 2 +- boot/kube-proxy.sh | 3 - boot/kubelet.sh | 2 +- install.sh | 4 ++ src/patches/README.md | 18 ------ ...-feature-gate-KubeletInUserNamespace.patch | 46 ------------- ...ream-kubelet-new-cgroup-driver-none.patch} | 8 +-- ...e-sysctl-error-when-running-in-usern.patch | 47 -------------- ...3-kube-proxy-allow-running-in-userns.patch | 50 --------------- ...ev-kmsg-error-when-running-in-userns.patch | 64 ------------------- 13 files changed, 20 insertions(+), 243 deletions(-) delete mode 100644 src/patches/README.md delete mode 100644 src/patches/kubernetes/0001-New-feature-gate-KubeletInUserNamespace.patch rename src/patches/kubernetes/{0005-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch => 0001-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch} (97%) delete mode 100644 src/patches/kubernetes/0002-kubelet-cm-ignore-sysctl-error-when-running-in-usern.patch delete mode 100644 src/patches/kubernetes/0003-kube-proxy-allow-running-in-userns.patch delete mode 100644 src/patches/kubernetes/0004-kubelet-ignore-dev-kmsg-error-when-running-in-userns.patch diff --git a/Dockerfile b/Dockerfile index e173b16..493ef43 100644 --- a/Dockerfile +++ b/Dockerfile @@ -8,10 +8,10 @@ ARG ROOTLESSKIT_COMMIT=2eaa8b0825324d162649786dac7fac279938f71f # 2021-07-02T21:54:58Z ARG CONTAINERD_COMMIT=7eceeb950b84396c165d972efb35116a113966fd -# 2021-07-05T18:00:12Z -ARG CRIO_COMMIT=c8b82d6c26015c760a03a0e7b3972f98d2fecca7 -# 2021-07-07T05:11:48Z -ARG KUBE_NODE_COMMIT=656d00e8949f64cd663ca532d12e00982a6664c4 +# 2021-07-07T13:25:10Z +ARG CRIO_COMMIT=3e5a1e48158316b1f378cd2dceb67b6ab670bc36 +# 2021-07-08T01:09:29Z +ARG KUBE_NODE_COMMIT=f915aa39e80260e23fdc2453566f4942d2ad96d1 # Version definitions (cont.) ARG SLIRP4NETNS_RELEASE=v1.1.11 diff --git a/README.md b/README.md index 51a263a..7fc6e91 100644 --- a/README.md +++ b/README.md @@ -53,8 +53,6 @@ Usernetes aims to provide a reference distribution of Kubernetes that can be ins * Flannel (VXLAN) * CoreDNS -Currently, Usernetes uses our patched version of `kubelet` and `kube-proxy`. We are proposing our patches to the Kubernetes upstream. See [#42](https://github.com/rootless-containers/usernetes/issues/42) for the current status. - Installer scripts are in POC status. See [Adoption](#adoption) for Usernetes-based Kubernetes distributions. @@ -69,7 +67,7 @@ See [Adoption](#adoption) for Usernetes-based Kubernetes distributions. ## Adoption -We encourage other Kubernetes distributions to adopt Usernetes patches and tools. +We encourage other Kubernetes distributions to adopt Usernetes. Currently, the following distributions adopt Usernetes: * [k3s](https://github.com/k3s-io/k3s/blob/master/k3s-rootless.service) @@ -88,13 +86,16 @@ No SETUID/SETCAP binary is needed, except [`newuidmap(1)`](http://man7.org/linux * Usermode networking called [slirp4netns](https://github.com/rootless-containers/slirp4netns) is used instead of kernel-mode [vEth](http://man7.org/linux/man-pages/man4/veth.4.html) pairs. * [fuse-overlayfs](https://github.com/containers/fuse-overlayfs) is used instead of kernel-mode overlayfs. * Node ports are network-namespaced -* No support for cgroup v1. Resource limitations are ignored on cgroup v1 hosts. To enable support for cgroup (v2 only), see [Enabling cgroups](#enabling-cgroups). * Apparmor is unsupported ## Requirements * Kernel >= 4.18. +* cgroup v2. See [Enabling cgroups](#enabling-cgroups). + On cgroup v1, the cgroup driver is set to "none" and the resource limitation configurations are ignored. + The "none" cgroup driver is deprecated and will be removed in a future release of Usernetes. + * Recent version of systemd. Known to work with systemd >= 242. * `mount.fuse3` binary. Provided by `fuse3` package on most distros. diff --git a/boot/containerd.sh b/boot/containerd.sh index 6ece17b..7c238aa 100755 --- a/boot/containerd.sh +++ b/boot/containerd.sh @@ -7,7 +7,7 @@ disable_cgroup="true" if [[ "$U7S_CGROUP_ENABLED" = "1" ]]; then disable_cgroup="false" else - log::warning "Running without cgroup" + log::warning "Running without cgroup. This mode is deprecated and will be removed in a future release of Usernetes." fi mkdir -p $XDG_RUNTIME_DIR/usernetes diff --git a/boot/crio.sh b/boot/crio.sh index dd5b796..c215295 100755 --- a/boot/crio.sh +++ b/boot/crio.sh @@ -6,7 +6,7 @@ source $U7S_BASE_DIR/common/common.inc.sh export _CRIO_ROOTLESS=1 if [[ "$U7S_CGROUP_ENABLED" != "1" ]]; then - log::warning "Running without cgroup" + log::warning "Running without cgroup. This mode is deprecated and will be removed in a future release of Usernetes." fi mkdir -p $XDG_CONFIG_HOME/usernetes/crio $XDG_CONFIG_HOME/usernetes/containers/oci/hooks.d diff --git a/boot/kube-proxy.sh b/boot/kube-proxy.sh index 92225f2..74750bd 100755 --- a/boot/kube-proxy.sh +++ b/boot/kube-proxy.sh @@ -9,9 +9,6 @@ kind: KubeProxyConfiguration mode: "userspace" clientConnection: kubeconfig: "$XDG_CONFIG_HOME/usernetes/node/kube-proxy.kubeconfig" -featureGates: -# EndpointSliceProxying seems to break ClusterIP: https://github.com/rootless-containers/usernetes/pull/179 - EndpointSliceProxying: false conntrack: # Skip setting sysctl value "net.netfilter.nf_conntrack_max" maxPerCore: 0 diff --git a/boot/kubelet.sh b/boot/kubelet.sh index 1866837..f8716aa 100755 --- a/boot/kubelet.sh +++ b/boot/kubelet.sh @@ -8,7 +8,7 @@ if [[ "$U7S_CGROUP_ENABLED" = "1" ]]; then cgroup_driver="cgroupfs" cgroups_per_qos="true" else - log::warning "Running without cgroup" + log::warning "Running without cgroup. This mode is deprecated and will be removed in a future release of Usernetes." fi mkdir -p $XDG_RUNTIME_DIR/usernetes diff --git a/install.sh b/install.sh index 2f46809..c39c449 100755 --- a/install.sh +++ b/install.sh @@ -165,6 +165,10 @@ else fi fi +if [[ -z "$U7S_CGROUP_ENABLED" ]]; then + WARNING "Cgroup is disabled. In future version of Usernetes, cgroup (v2) will be an essential requirement." +fi + # Delay for debugging if [[ -n "$delay" ]]; then INFO "Delay: $delay seconds..." diff --git a/src/patches/README.md b/src/patches/README.md deleted file mode 100644 index 32adf44..0000000 --- a/src/patches/README.md +++ /dev/null @@ -1,18 +0,0 @@ -This directory contains Usernetes patch set for Kubernetes and its dependencies. -We will propose our patch set to the Kubernetes upstream later. - -## Contributing - -Please feel free to replace/add/remove `*.patch` files in this directory! - -Steps (e.g. for Kubernetes): -* Clone the upstream Kubernetes (`git clone https://github.com/kubernetes/kubernetes.git`) -* Checkout `KUBERNETES_COMMIT` specified in [`../../Dockerfile`](../../Dockerfile) -* Apply patches in this directory (`git am *.patch`) -* Commit your own change with `Signed-off-by` line (`git commit -a -s`) -* Consider melding your change into existing commits if your change is trivial (`git rebase -i ...`) -* Run `git format-patch upstream/master` and put the new patch set into this directory. -* Open a PR to the Usernetes repo. For changes to the Kubernetes patch set, please sign [the Kubernetes CLA](https://github.com/kubernetes/community/blob/master/CLA.md). - [_Your Github email address must match the same address you use when signing the CLA._](https://github.com/kubernetes/community/blob/master/CLA.md#4-ensure-your-github-e-mail-address-matches-address-used-to-sign-cla). When you contribute to the Usernetes repo first time, please include "[X] I signed the Kubernetes CLA" in your PR description text. - -Note: We may squash your commit to another commit but we will keep your `Signed-off-by` line. diff --git a/src/patches/kubernetes/0001-New-feature-gate-KubeletInUserNamespace.patch b/src/patches/kubernetes/0001-New-feature-gate-KubeletInUserNamespace.patch deleted file mode 100644 index 577aa46..0000000 --- a/src/patches/kubernetes/0001-New-feature-gate-KubeletInUserNamespace.patch +++ /dev/null @@ -1,46 +0,0 @@ -From b16323e37ce8b30b1767eb9ab56db7836ba83aa8 Mon Sep 17 00:00:00 2001 -From: Akihiro Suda -Date: Mon, 24 May 2021 23:18:02 +0900 -Subject: [PATCH 1/5] New feature gate: KubeletInUserNamespace - -Enables support for running kubelet in a user namespace. -The user namespace has to be created before running kubelet. -All the node components such as CRI need to be running in the same user namespace. - -See kubernetes/enhancements PR 1371 (merged) and issue 2033. - -Signed-off-by: Akihiro Suda ---- - pkg/features/kube_features.go | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go -index 7f32ad1db5a..ced805c7f0e 100644 ---- a/pkg/features/kube_features.go -+++ b/pkg/features/kube_features.go -@@ -761,6 +761,14 @@ const ( - // - // Allows clients to request a duration for certificates issued via the Kubernetes CSR API. - CSRDuration featuregate.Feature = "CSRDuration" -+ -+ // owner: @AkihiroSuda -+ // alpha: v1.22 -+ // -+ // Enables support for running kubelet in a user namespace. -+ // The user namespace has to be created before running kubelet. -+ // All the node components such as CRI need to be running in the same user namespace. -+ KubeletInUserNamespace featuregate.Feature = "KubeletInUserNamespace" - ) - - func init() { -@@ -875,6 +883,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS - ReadWriteOncePod: {Default: false, PreRelease: featuregate.Alpha}, - CSRDuration: {Default: true, PreRelease: featuregate.Beta}, - DelegateFSGroupToCSIDriver: {Default: false, PreRelease: featuregate.Alpha}, -+ KubeletInUserNamespace: {Default: false, PreRelease: featuregate.Alpha}, - - // inherited features from generic apiserver, relisted here to get a conflict if it is changed - // unintentionally on either side: --- -2.30.2 - diff --git a/src/patches/kubernetes/0005-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch b/src/patches/kubernetes/0001-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch similarity index 97% rename from src/patches/kubernetes/0005-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch rename to src/patches/kubernetes/0001-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch index cdaa1c7..7f0bec9 100644 --- a/src/patches/kubernetes/0005-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch +++ b/src/patches/kubernetes/0001-Not-for-Upstream-kubelet-new-cgroup-driver-none.patch @@ -1,7 +1,7 @@ -From c54269424463dd6c28f94234517c424787e73b54 Mon Sep 17 00:00:00 2001 +From c298bb88e5c7c48d8732ddadcfcda66ed5c5de97 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Sun, 2 Jun 2019 18:39:05 +0900 -Subject: [PATCH 5/5] [Not for Upstream] kubelet: new cgroup driver: "none" +Subject: [PATCH] [Not for Upstream] kubelet: new cgroup driver: "none" The "none" driver is used for running "rootless" mode on a host that does not support cgroup v2. @@ -17,10 +17,10 @@ Signed-off-by: Akihiro Suda 4 files changed, 86 insertions(+), 20 deletions(-) diff --git a/cmd/kubelet/app/options/options.go b/cmd/kubelet/app/options/options.go -index 98da4b7b387..56724a39026 100644 +index 5f5fa99a94e..af431fe7057 100644 --- a/cmd/kubelet/app/options/options.go +++ b/cmd/kubelet/app/options/options.go -@@ -488,7 +488,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig +@@ -489,7 +489,7 @@ func AddKubeletConfigFlags(mainfs *pflag.FlagSet, c *kubeletconfig.KubeletConfig fs.StringVar(&c.ProviderID, "provider-id", c.ProviderID, "Unique identifier for identifying the node in a machine database, i.e cloudprovider") fs.BoolVar(&c.CgroupsPerQOS, "cgroups-per-qos", c.CgroupsPerQOS, "Enable creation of QoS cgroup hierarchy, if true top level QoS and pod cgroups are created.") diff --git a/src/patches/kubernetes/0002-kubelet-cm-ignore-sysctl-error-when-running-in-usern.patch b/src/patches/kubernetes/0002-kubelet-cm-ignore-sysctl-error-when-running-in-usern.patch deleted file mode 100644 index 990b3fc..0000000 --- a/src/patches/kubernetes/0002-kubelet-cm-ignore-sysctl-error-when-running-in-usern.patch +++ /dev/null @@ -1,47 +0,0 @@ -From dbe015513971b69b2f69c02de1bb5960fe230b51 Mon Sep 17 00:00:00 2001 -From: Akihiro Suda -Date: Tue, 21 Aug 2018 16:45:04 +0900 -Subject: [PATCH 2/5] kubelet/cm: ignore sysctl error when running in userns - -Errors during setting the following sysctl values are ignored: -- vm.overcommit_memory -- vm.panic_on_oom -- kernel.panic -- kernel.panic_on_oops -- kernel.keys.root_maxkeys -- kernel.keys.root_maxbytes - -Signed-off-by: Akihiro Suda ---- - pkg/kubelet/cm/container_manager_linux.go | 8 ++++++++ - 1 file changed, 8 insertions(+) - -diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go -index d98162ec8a8..03f5aa36503 100644 ---- a/pkg/kubelet/cm/container_manager_linux.go -+++ b/pkg/kubelet/cm/container_manager_linux.go -@@ -39,6 +39,7 @@ import ( - utilpath "k8s.io/utils/path" - - libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices" -+ libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" - utilerrors "k8s.io/apimachinery/pkg/util/errors" -@@ -455,6 +456,13 @@ func setupKernelTunables(option KernelTunableBehavior) error { - klog.V(2).InfoS("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val) - err = sysctl.SetSysctl(flag, expectedValue) - if err != nil { -+ if libcontaineruserns.RunningInUserNS() { -+ if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) { -+ klog.V(2).InfoS("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err) -+ continue -+ } -+ klog.ErrorS(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag) -+ } - errList = append(errList, err) - } - } --- -2.30.2 - diff --git a/src/patches/kubernetes/0003-kube-proxy-allow-running-in-userns.patch b/src/patches/kubernetes/0003-kube-proxy-allow-running-in-userns.patch deleted file mode 100644 index 142fbaa..0000000 --- a/src/patches/kubernetes/0003-kube-proxy-allow-running-in-userns.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 192790c52fed05a865ef5e5e35f57236cd23e9c1 Mon Sep 17 00:00:00 2001 -From: Akihiro Suda -Date: Thu, 23 Aug 2018 14:14:44 +0900 -Subject: [PATCH 3/5] kube-proxy: allow running in userns - -Ignore an error during setting RLIMIT_NOFILE. - -Signed-off-by: Akihiro Suda ---- - pkg/proxy/userspace/proxier.go | 9 ++++++++- - 1 file changed, 8 insertions(+), 1 deletion(-) - -diff --git a/pkg/proxy/userspace/proxier.go b/pkg/proxy/userspace/proxier.go -index b9c14d28a4e..381fa842a2f 100644 ---- a/pkg/proxy/userspace/proxier.go -+++ b/pkg/proxy/userspace/proxier.go -@@ -26,14 +26,17 @@ import ( - "sync/atomic" - "time" - -+ libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" - v1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/types" - utilerrors "k8s.io/apimachinery/pkg/util/errors" - utilnet "k8s.io/apimachinery/pkg/util/net" - "k8s.io/apimachinery/pkg/util/runtime" - "k8s.io/apimachinery/pkg/util/sets" -+ utilfeature "k8s.io/apiserver/pkg/util/feature" - servicehelper "k8s.io/cloud-provider/service/helpers" - "k8s.io/klog/v2" -+ kubefeatures "k8s.io/kubernetes/pkg/features" - "k8s.io/kubernetes/pkg/proxy" - "k8s.io/kubernetes/pkg/proxy/config" - utilproxy "k8s.io/kubernetes/pkg/proxy/util" -@@ -231,7 +234,11 @@ func NewCustomProxier(loadBalancer LoadBalancer, listenIP net.IP, iptables iptab - - err = setRLimit(64 * 1000) - if err != nil { -- return nil, fmt.Errorf("failed to set open file handler limit: %v", err) -+ if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) && libcontaineruserns.RunningInUserNS() { -+ klog.V(2).InfoS("Failed to set open file handler limit to 64000 (running in UserNS, ignoring)", "err", err) -+ } else { -+ return nil, fmt.Errorf("failed to set open file handler limit to 64000: %w", err) -+ } - } - - proxyPorts := newPortAllocator(pr) --- -2.30.2 - diff --git a/src/patches/kubernetes/0004-kubelet-ignore-dev-kmsg-error-when-running-in-userns.patch b/src/patches/kubernetes/0004-kubelet-ignore-dev-kmsg-error-when-running-in-userns.patch deleted file mode 100644 index 2acd112..0000000 --- a/src/patches/kubernetes/0004-kubelet-ignore-dev-kmsg-error-when-running-in-userns.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 26e83ac4d4398ed94ed5391e4faed54824ed9a4d Mon Sep 17 00:00:00 2001 -From: Akihiro Suda -Date: Mon, 24 May 2021 23:35:22 +0900 -Subject: [PATCH 4/5] kubelet: ignore /dev/kmsg error when running in userns - -oomwatcher.NewWatcher returns "open /dev/kmsg: operation not permitted" error, -when running with sysctl value `kernel.dmesg_restrict=1`. - -The error is negligible for KubeletInUserNamespace. - -Signed-off-by: Akihiro Suda ---- - pkg/kubelet/kubelet.go | 21 ++++++++++++++++++--- - 1 file changed, 18 insertions(+), 3 deletions(-) - -diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go -index 43cad89dba0..4cf6fe201ab 100644 ---- a/pkg/kubelet/kubelet.go -+++ b/pkg/kubelet/kubelet.go -@@ -34,6 +34,7 @@ import ( - "k8s.io/client-go/informers" - - cadvisorapi "github.com/google/cadvisor/info/v1" -+ libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" - "k8s.io/mount-utils" - "k8s.io/utils/integer" - -@@ -481,7 +482,19 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, - - oomWatcher, err := oomwatcher.NewWatcher(kubeDeps.Recorder) - if err != nil { -- return nil, err -+ if libcontaineruserns.RunningInUserNS() { -+ if utilfeature.DefaultFeatureGate.Enabled(features.KubeletInUserNamespace) { -+ // oomwatcher.NewWatcher returns "open /dev/kmsg: operation not permitted" error, -+ // when running in a user namespace with sysctl value `kernel.dmesg_restrict=1`. -+ klog.V(2).InfoS("Failed to create an oomWatcher (running in UserNS, ignoring)", "err", err) -+ oomWatcher = nil -+ } else { -+ klog.ErrorS(err, "Failed to create an oomWatcher (running in UserNS, Hint: enable KubeletInUserNamespace feature flag to ignore the error)") -+ return nil, err -+ } -+ } else { -+ return nil, err -+ } - } - - clusterDNS := make([]net.IP, 0, len(kubeCfg.ClusterDNS)) -@@ -1360,8 +1373,10 @@ func (kl *Kubelet) initializeModules() error { - } - - // Start out of memory watcher. -- if err := kl.oomWatcher.Start(kl.nodeRef); err != nil { -- return fmt.Errorf("failed to start OOM watcher %v", err) -+ if kl.oomWatcher != nil { -+ if err := kl.oomWatcher.Start(kl.nodeRef); err != nil { -+ return fmt.Errorf("failed to start OOM watcher: %w", err) -+ } - } - - // Start resource analyzer --- -2.30.2 -