From 4aa0f2826edaceaaf70a3aebad0d663da4b1d44e Mon Sep 17 00:00:00 2001 From: Devin Leamy Date: Fri, 19 Apr 2024 12:06:15 -0400 Subject: [PATCH 1/5] [cgroups2] Introduce build files for the MemoryControllerProcess. --- src/CMakeLists.txt | 3 ++- src/Makefile.am | 4 +++- .../isolators/cgroups2/controllers/memory.cpp | 17 ++++++++++++++++ .../isolators/cgroups2/controllers/memory.hpp | 20 +++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp create mode 100644 src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 84f423fa583..963d4201afe 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -355,7 +355,8 @@ if (ENABLE_CGROUPS_v2) linux/ebpf.cpp slave/containerizer/mesos/isolators/cgroups2/controller.cpp slave/containerizer/mesos/isolators/cgroups2/controllers/core.cpp - slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.cpp) + slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.cpp + slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp) endif () diff --git a/src/Makefile.am b/src/Makefile.am index 3677df5076e..779b893fca3 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1503,7 +1503,9 @@ MESOS_LINUX_FILES += \ slave/containerizer/mesos/isolators/cgroups2/controllers/core.cpp \ slave/containerizer/mesos/isolators/cgroups2/controllers/core.hpp \ slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.cpp \ - slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.hpp + slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.hpp \ + slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp \ + slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp endif if ENABLE_SECCOMP_ISOLATOR diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp new file mode 100644 index 00000000000..45e1fdef0bf --- /dev/null +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp @@ -0,0 +1,17 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp" diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp new file mode 100644 index 00000000000..2f005747d4a --- /dev/null +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp @@ -0,0 +1,20 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef __MEMORY_HPP__ +#define __MEMORY_HPP__ + +#endif // __MEMORY_HPP__ From 0a676afbd0596bf1a4fd336064913f3b6f2c73a1 Mon Sep 17 00:00:00 2001 From: Devin Leamy Date: Fri, 26 Apr 2024 12:14:00 -0400 Subject: [PATCH 2/5] [cgroups2] Introduces the MemoryControllerProcess. Introduces the `MemoryControllerProcess`, the cgroups v2 memory isolator, which will be used by the `Cgroups2IsolatorProcess`. Unlike the `MemorySubsystemProcess`, the cgroups v1 memory isolator, we: - Don't allow limits on swap memory to be set. - Don't report memory pressure levels (this facility is no longer part of the cgroups memory controller's API) Future work may include: - Adding support for swap memory, and - Reporting the (now available) memory pressure stall information --- .../mesos/isolators/cgroups2/cgroups2.cpp | 4 +- .../mesos/isolators/cgroups2/constants.hpp | 4 + .../isolators/cgroups2/controllers/memory.cpp | 186 ++++++++++++++++++ .../isolators/cgroups2/controllers/memory.hpp | 65 ++++++ 4 files changed, 258 insertions(+), 1 deletion(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp b/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp index a53e65cfbf3..ae6e5aa7854 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/cgroups2.cpp @@ -20,6 +20,7 @@ #include "slave/containerizer/mesos/isolators/cgroups2/cgroups2.hpp" #include "slave/containerizer/mesos/isolators/cgroups2/controllers/core.hpp" #include "slave/containerizer/mesos/isolators/cgroups2/controllers/cpu.hpp" +#include "slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp" #include #include @@ -74,7 +75,8 @@ Try Cgroups2IsolatorProcess::create(const Flags& flags) { hashmap>(*)(const Flags&)> creators = { {"core", &CoreControllerProcess::create}, - {"cpu", &CpuControllerProcess::create} + {"cpu", &CpuControllerProcess::create}, + {"mem", &MemoryControllerProcess::create} }; hashmap> controllers; diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp index dafc7f92fff..9498a4779fd 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/constants.hpp @@ -32,8 +32,12 @@ const uint64_t MIN_CPU_SHARES = 2; // Linux constant. const Duration CPU_CFS_PERIOD = Milliseconds(100); // Linux default. const Duration MIN_CPU_CFS_QUOTA = Milliseconds(1); +// Memory controller constants. +const Bytes MIN_MEMORY = Megabytes(32); + const std::string CGROUPS_V2_CONTROLLER_CORE_NAME = "core"; const std::string CGROUPS_V2_CONTROLLER_CPU_NAME = "cpu"; +const std::string CGROUPS_V2_CONTROLLER_MEMORY_NAME = "memory"; } // namespace slave { } // namespace internal { diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp index 45e1fdef0bf..7908540de3d 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp @@ -14,4 +14,190 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + +#include +#include +#include + +#include + +#include "common/protobuf_utils.hpp" + +#include "linux/cgroups2.hpp" + +#include "slave/containerizer/mesos/isolators/cgroups2/constants.hpp" #include "slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp" + +using process::Failure; +using process::Future; +using process::PID; +using process::Owned; + +using cgroups2::memory::Stats; + +using mesos::slave::ContainerConfig; +using mesos::slave::ContainerLimitation; + +using std::ostringstream; +using std::string; + +namespace mesos { +namespace internal { +namespace slave { + +Try> MemoryControllerProcess::create(const Flags& flags) +{ + return Owned(new MemoryControllerProcess(flags)); +} + + +MemoryControllerProcess::MemoryControllerProcess(const Flags& _flags) + : ProcessBase(process::ID::generate("cgroups-v2-memory-controller")), + ControllerProcess(_flags) {} + + +string MemoryControllerProcess::name() const +{ + return CGROUPS_V2_CONTROLLER_MEMORY_NAME; +} + + +Future MemoryControllerProcess::prepare( + const ContainerID& containerId, + const string& cgroup, + const ContainerConfig& containerConfig) +{ + if (infos.contains(containerId)) { + return Failure("Already prepared"); + } + + infos.put(containerId, Info()); + + return Nothing(); +} + + +Future MemoryControllerProcess::isolate( + const ContainerID& containerId, + const string& cgroup, + pid_t pid) +{ + if (!infos.contains(containerId)) { + return Failure("Unknown container"); + } + + // TODO(dleamy): Implement manual OOM score adjustment, similar to as it done + // in the cgroups v1 isolator. + + return Nothing(); +} + + +Future MemoryControllerProcess::recover( + const ContainerID& containerId, + const string& cgroup) +{ + if (infos.contains(containerId)) { + return Failure("Already recovered"); + } + + infos.put(containerId, Info()); + infos[containerId].hardLimitUpdated = true; + + return Nothing(); +} + + +Future MemoryControllerProcess::update( + const ContainerID& containerId, + const string& cgroup, + const Resources& resourceRequests, + const google::protobuf::Map& resourceLimits) +{ + if (!infos.contains(containerId)) { + return Failure("Unknown container"); + } + + if (resourceRequests.mem().isNone()) { + return Failure("No memory resources requested"); + } + + Bytes memory = *resourceRequests.mem(); + Bytes softLimit = std::max(memory, MIN_MEMORY); + + // Set the soft memory limit. + Try high = cgroups2::memory::set_high(cgroup, softLimit); + if (high.isError()) { + return Failure("Failed to set soft memory limit: " + high.error()); + } + + LOG(INFO) << "Updated soft memory limit to " << softLimit << " for container " + << containerId; + + // Determine the new hard memory limit. + Option newHardLimit = [&resourceLimits, &softLimit]() -> Option + { + if (resourceLimits.count("mem") > 0) { + double requestedLimit = resourceLimits.at("mem").value(); + if (std::isinf(requestedLimit)) { + return None(); + } + + return std::max( + Megabytes(static_cast(requestedLimit)), MIN_MEMORY); + } + + return softLimit; + }(); + + Result currentHardLimit = cgroups2::memory::max(cgroup); + if (currentHardLimit.isError()) { + return Failure("Failed to get current hard memory limit: " + + currentHardLimit.error()); + } + + // We only update the hard limit if: + // 1) The hard limit has not yet been set for the container, or + // 2) The new hard limit is greater than the existing hard limit. + // + // This is done to avoid the chance of triggering an OOM by reducing the + // hard limit to below the current memory usage. + + bool updateHardLimit = !infos[containerId].hardLimitUpdated + || newHardLimit.isNone() // infinite memory limit + || *newHardLimit > *currentHardLimit; + + if (updateHardLimit) { + Try max = cgroups2::memory::set_max(cgroup, newHardLimit); + if (max.isError()) { + return Failure("Failed to set hard memory limit: " + max.error()); + } + + infos[containerId].hardLimitUpdated = true; + } + + return Nothing(); +} + + +Future MemoryControllerProcess::cleanup( + const ContainerID& containerId, + const string& cgroup) +{ + if (!infos.contains(containerId)) { + LOG(INFO) << "Ignoring memory cleanup for unknown container " + << containerId; + + return Nothing(); + } + + infos.erase(containerId); + + return Nothing(); +} + + +} // namespace slave { +} // namespace internal { +} // namespace mesos { diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp index 2f005747d4a..2e60b2c19a7 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp @@ -17,4 +17,69 @@ #ifndef __MEMORY_HPP__ #define __MEMORY_HPP__ +#include + +#include + +#include + +#include "slave/flags.hpp" +#include "slave/containerizer/mesos/isolators/cgroups2/controller.hpp" + +namespace mesos { +namespace internal { +namespace slave { + +class MemoryControllerProcess : public ControllerProcess +{ +public: + static Try> create( + const Flags& flags); + + ~MemoryControllerProcess() override = default; + + std::string name() const override; + + process::Future prepare( + const ContainerID& containerId, + const std::string& cgroup, + const mesos::slave::ContainerConfig& containerConfig) override; + + process::Future isolate( + const ContainerID& containerId, + const std::string& cgroup, + pid_t pid) override; + + process::Future recover( + const ContainerID& containerId, + const std::string& cgroup) override; + + process::Future update( + const ContainerID& containerId, + const std::string& cgroup, + const Resources& resourceRequests, + const google::protobuf::Map< + std::string, Value::Scalar>& resourceLimits = {}) override; + + process::Future cleanup( + const ContainerID& containerId, + const std::string& cgroup) override; + +private: + struct Info + { + // Check if the hard memory limit has been updated for the container. + // Also true if the container was recovered. + bool hardLimitUpdated = false; + }; + + MemoryControllerProcess(const Flags& flags); + + hashmap infos; +}; + +} // namespace slave { +} // namespace internal { +} // namespace mesos { + #endif // __MEMORY_HPP__ From 180ae5512bf641019de39474848920bd4172070c Mon Sep 17 00:00:00 2001 From: Devin Leamy Date: Fri, 26 Apr 2024 12:14:00 -0400 Subject: [PATCH 3/5] [cgroups2] Introduces the MemoryControllerProcess. Introduces the `MemoryControllerProcess`, the cgroups v2 memory isolator, which will be used by the `Cgroups2IsolatorProcess`. Unlike the `MemorySubsystemProcess`, the cgroups v1 memory isolator, we: - Don't allow limits on swap memory to be set. - Don't report memory pressure levels (this facility is no longer part of the cgroups memory controller's API) Future work may include: - Adding support for swap memory, and - Reporting the (now available) memory pressure stall information --- .../mesos/isolators/cgroups2/controllers/memory.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp index 2e60b2c19a7..dfe64c8fd81 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp @@ -61,6 +61,13 @@ class MemoryControllerProcess : public ControllerProcess const google::protobuf::Map< std::string, Value::Scalar>& resourceLimits = {}) override; +<<<<<<< HEAD +======= + process::Future usage( + const ContainerID& containerId, + const std::string& cgroup) override; + +>>>>>>> 1a92a6a2c ([cgroups2] Introduces the MemoryControllerProcess.) process::Future cleanup( const ContainerID& containerId, const std::string& cgroup) override; From f522e5202bfcd2afd069b47ba962b3c44f8a1f92 Mon Sep 17 00:00:00 2001 From: Devin Leamy Date: Fri, 26 Apr 2024 12:24:34 -0400 Subject: [PATCH 4/5] [cgroups2] Add OOM listening to the MemoryControllerProcess. Introduces OOM listening to the MemoryControllerProcess so that we detect, report, and respond to OOM events. --- .../isolators/cgroups2/controllers/memory.cpp | 135 +++++++++++++++++- .../isolators/cgroups2/controllers/memory.hpp | 26 ++-- 2 files changed, 152 insertions(+), 9 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp index 7908540de3d..38b9c5a14c9 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp @@ -74,6 +74,8 @@ Future MemoryControllerProcess::prepare( infos.put(containerId, Info()); + oomListen(containerId, cgroup); + return Nothing(); } @@ -105,10 +107,24 @@ Future MemoryControllerProcess::recover( infos.put(containerId, Info()); infos[containerId].hardLimitUpdated = true; + oomListen(containerId, cgroup); + return Nothing(); } +Future MemoryControllerProcess::watch( + const ContainerID& containerId, + const string& cgroup) +{ + if (!infos.contains(containerId)) { + return Failure("Unknown container"); + } + + return infos[containerId].limitation.future(); +} + + Future MemoryControllerProcess::update( const ContainerID& containerId, const string& cgroup, @@ -192,12 +208,129 @@ Future MemoryControllerProcess::cleanup( return Nothing(); } + if (infos[containerId].oom.isPending()) { + infos[containerId].oom.discard(); + } + infos.erase(containerId); return Nothing(); } +void MemoryControllerProcess::oomListen( + const ContainerID& containerId, + const string& cgroup) +{ + if (!infos.contains(containerId)) { + LOG(ERROR) << "Cannot listen for OOM events for unknown container " + << containerId; + return; + } + + infos[containerId].oom = cgroups2::memory::oom(cgroup); + + LOG(INFO) << "Listening for OOM events for container " + << containerId; + + infos[containerId].oom.onAny( + defer(PID(this), + &MemoryControllerProcess::oomed, + containerId, + cgroup, + lambda::_1)); +} + + +void MemoryControllerProcess::oomed( + const ContainerID& containerId, + const string& cgroup, + const Future& oom) +{ + if (oom.isDiscarded()) { + LOG(INFO) << "OOM event listener discarded"; + return; + } + + if (oom.isFailed()) { + LOG(ERROR) << "OOM event listener failed: " << oom.failure(); + return; + } + + if (!infos.contains(containerId)) { + // It is likely that process exited is executed before this + // function (e.g. The kill and OOM events happen at the same time, + // and the process exit event arrives first). Therefore, we should + // not report a fatal error here. + LOG(INFO) << "OOM event received for terminated container"; + return; + } + + LOG(INFO) << "OOM detected for container" << containerId; + + // Construct a message for the limitation to help with debugging + // the OOM. + + ostringstream limitMessage; + limitMessage << "Memory limit exceeded: "; + + // TODO(dleamy): Report the peak memory usage of the container. The + // 'memory.peak' control is only available on newer Linux + // kernels. + + // Report memory statistics. + limitMessage << "\nMEMORY STATISTICS: \n"; + Try memoryStats = cgroups2::memory::stats(cgroup); + if (memoryStats.isError()) { + LOG(ERROR) << "Failed to get cgroup memory stats: " << memoryStats.error(); + return; + } + + limitMessage << "Anon: " << memoryStats->anon << "\n"; + limitMessage << "File: " << memoryStats->file << "\n"; + limitMessage << "Kernel: " << memoryStats->kernel << "\n"; + limitMessage << "Kernel TCP: " << memoryStats->sock << "\n"; + limitMessage << "Kernel Stack: " << memoryStats->kernel_stack << "\n"; + limitMessage << "Page Tables: " << memoryStats->pagetables << "\n"; + limitMessage << "VMalloc: " << memoryStats->vmalloc << "\n"; + + // TODO(dleamy): Report 'total_mapped_file' cgroups v2 equivalent. + // "The amount of file backed memory, in bytes, mapped into + // a process' address space." + + LOG(INFO) << limitMessage.str(); + + Result hardLimit = cgroups2::memory::max(cgroup); + if (hardLimit.isError()) { + LOG(ERROR) << "Failed to get hard memory limit: " << hardLimit.error(); + return; + } + + if (hardLimit.isNone()) { + LOG(ERROR) << "No hard limit was set for the container - unexpected OOM"; + return; + } + + // Complete the container limitation promise with a memory resource + // limitation. + // + // TODO(jieyu): This is not accurate if the memory resource is from + // a non-star role or spans roles (e.g., "*" and "role"). Ideally, + // we should save the resources passed in and report it here. + // + // TODO(dleamy): We report the hard limit because not all machines have + // access to 'memory.peak', the peak memory usage of the + // cgroup. + double megabytes = (double) hardLimit->bytes() / Bytes::MEGABYTES; + Resources memory = *Resources::parse("mem", stringify(megabytes), "*"); + + infos[containerId].limitation.set( + protobuf::slave::createContainerLimitation( + memory, + limitMessage.str(), + TaskStatus::REASON_CONTAINER_LIMITATION_MEMORY)); +} + } // namespace slave { } // namespace internal { -} // namespace mesos { +} // namespace mesos { \ No newline at end of file diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp index dfe64c8fd81..3dbfca91796 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp @@ -54,6 +54,10 @@ class MemoryControllerProcess : public ControllerProcess const ContainerID& containerId, const std::string& cgroup) override; + process::Future watch( + const ContainerID& containerId, + const std::string& cgroup) override; + process::Future update( const ContainerID& containerId, const std::string& cgroup, @@ -61,13 +65,6 @@ class MemoryControllerProcess : public ControllerProcess const google::protobuf::Map< std::string, Value::Scalar>& resourceLimits = {}) override; -<<<<<<< HEAD -======= - process::Future usage( - const ContainerID& containerId, - const std::string& cgroup) override; - ->>>>>>> 1a92a6a2c ([cgroups2] Introduces the MemoryControllerProcess.) process::Future cleanup( const ContainerID& containerId, const std::string& cgroup) override; @@ -75,6 +72,10 @@ class MemoryControllerProcess : public ControllerProcess private: struct Info { + process::Future oom; + + process::Promise limitation; + // Check if the hard memory limit has been updated for the container. // Also true if the container was recovered. bool hardLimitUpdated = false; @@ -82,6 +83,15 @@ class MemoryControllerProcess : public ControllerProcess MemoryControllerProcess(const Flags& flags); + void oomListen( + const ContainerID& containerId, + const std::string& cgroup); + + void oomed( + const ContainerID& containerId, + const std::string& cgroup, + const process::Future& oomFuture); + hashmap infos; }; @@ -89,4 +99,4 @@ class MemoryControllerProcess : public ControllerProcess } // namespace internal { } // namespace mesos { -#endif // __MEMORY_HPP__ +#endif // __MEMORY_HPP__ \ No newline at end of file From 31e00d7d7dbe5b358601cf3c2b06adf3a5e6deda Mon Sep 17 00:00:00 2001 From: Devin Leamy Date: Fri, 26 Apr 2024 12:27:54 -0400 Subject: [PATCH 5/5] [cgroups2] Add memory usage reporting to the MemoryControllerProcess Introduces `::usage` to the MemoryControllerProcess to report the total memory usage of a cgroup as well as memory usage statistics provided by `cgroups2::memory:stats`. --- .../isolators/cgroups2/controllers/memory.cpp | 58 +++++++++++++++++++ .../isolators/cgroups2/controllers/memory.hpp | 4 ++ .../containerizer/memory_isolator_tests.cpp | 12 ++-- src/tests/mesos.cpp | 4 +- 4 files changed, 71 insertions(+), 7 deletions(-) diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp index 38b9c5a14c9..5a69cb95acf 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.cpp @@ -197,6 +197,64 @@ Future MemoryControllerProcess::update( } +Future MemoryControllerProcess::usage( + const ContainerID& containerId, + const string& cgroup) +{ + if (!infos.contains(containerId)) { + return Failure("Cannot report resource usage for unknown container " + "'" + stringify(containerId) + "'"); + } + + LOG(INFO) << "Collecting 'memory' usage statistics for container " + << containerId; + + ResourceStatistics stats; + + // User memory usage. + Try usage = cgroups2::memory::usage(cgroup); + if (usage.isError()) { + return Failure("Failed to get user memory usage: " + usage.error()); + } + + stats.set_mem_total_bytes(usage->bytes()); + stats.set_mem_rss_bytes(usage->bytes()); + + // Kernel memory usage. + Try memoryStats = cgroups2::memory::stats(cgroup); + if (memoryStats.isError()) { + return Failure("Failed to get cgroup memory stats: " + memoryStats.error()); + } + + stats.set_mem_kmem_usage_bytes(memoryStats->kernel.bytes()); + + // Kernel TCP buffers usage. + stats.set_mem_kmem_tcp_usage_bytes(memoryStats->sock.bytes()); + + // Page cache usage. + stats.set_mem_file_bytes(memoryStats->file.bytes()); + stats.set_mem_cache_bytes(memoryStats->file.bytes()); + + // Anonymous memory usage. + stats.set_mem_anon_bytes(memoryStats->anon.bytes()); + + // File mapped memory usage. + stats.set_mem_mapped_file_bytes(memoryStats->file_mapped.bytes()); + + // Total unevictable memory. + // Equivalent to 'memory.min', the minimum amount of memory that will never + // be reclaimed by the kernel. + Try min = cgroups2::memory::min(cgroup); + if (min.isError()) { + return Failure("Failed to get unrevocable memory size: " + min.error()); + } + + stats.set_mem_unevictable_bytes(min->bytes()); + + return stats; +} + + Future MemoryControllerProcess::cleanup( const ContainerID& containerId, const string& cgroup) diff --git a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp index 3dbfca91796..4a80717e670 100644 --- a/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp +++ b/src/slave/containerizer/mesos/isolators/cgroups2/controllers/memory.hpp @@ -65,6 +65,10 @@ class MemoryControllerProcess : public ControllerProcess const google::protobuf::Map< std::string, Value::Scalar>& resourceLimits = {}) override; + process::Future usage( + const ContainerID& containerId, + const std::string& cgroup) override; + process::Future cleanup( const ContainerID& containerId, const std::string& cgroup) override; diff --git a/src/tests/containerizer/memory_isolator_tests.cpp b/src/tests/containerizer/memory_isolator_tests.cpp index ec0f359a253..be8475f515b 100644 --- a/src/tests/containerizer/memory_isolator_tests.cpp +++ b/src/tests/containerizer/memory_isolator_tests.cpp @@ -45,7 +45,7 @@ namespace internal { namespace tests { class MemoryIsolatorTest - : public MesosTest, + : public ContainerizerTest, public WithParamInterface {}; @@ -152,6 +152,11 @@ TEST_P(MemoryIsolatorTest, ROOT_MemUsage) // Metrics for kmem are only enabled with memory isolation. if (GetParam() == "cgroups/mem") { + // Check that at least one page of kernel memory is used. Each page is + // 4096 bytes so we expect at least that much memory to be used. +#ifdef ENABLE_CGROUPS_V2 + ASSERT_LT(4096u, usage->mem_kmem_usage_bytes()); +#else Result hierarchy = cgroups::hierarchy("memory"); ASSERT_SOME(hierarchy); @@ -160,11 +165,10 @@ TEST_P(MemoryIsolatorTest, ROOT_MemUsage) ASSERT_SOME(kmemExists); if (kmemExists.get()) { - // We can assume more than 4096 bytes here, since a kernel page size is - // 4kB and we are allocating at least one. ASSERT_LT(4096u, usage->mem_kmem_usage_bytes()); } - } +#endif // ENABLE_CGROUPS_V2 + } driver.stop(); driver.join(); diff --git a/src/tests/mesos.cpp b/src/tests/mesos.cpp index 762200cda73..ffbb30b9551 100644 --- a/src/tests/mesos.cpp +++ b/src/tests/mesos.cpp @@ -586,9 +586,7 @@ slave::Flags ContainerizerTest::CreateSlaveFlags() // Use cgroup isolators if they're available and we're root. // TODO(idownes): Refactor the cgroups/non-cgroups code. if (cgroupsV2() && *user == "root") { - // TODO(dleamy): Add the memory isolator once it's supported by the cgroups - // v2 isolator. - flags.isolation = "cgroups/cpu"; + flags.isolation = "cgroups/cpu,cgroups/mem"; flags.cgroups_hierarchy = "/sys/fs/cgroup"; flags.cgroups_root = TEST_CGROUPS_ROOT; } else if (cgroups::enabled() && *user == "root") {