Skip to content

Commit

Permalink
Merge pull request #50 from andreas-kupries/ak-gosigar-48-kubecf-1312…
Browse files Browse the repository at this point in the history
…-mem-limits

fix: Query cgroup memory data and fuse with /proc data for more reliable information.
  • Loading branch information
klakin-pivotal authored Apr 30, 2021
2 parents 6efc1e3 + 34f6fb9 commit e52716b
Show file tree
Hide file tree
Showing 2 changed files with 1,002 additions and 56 deletions.
288 changes: 288 additions & 0 deletions sigar_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package sigar
import (
"bufio"
"bytes"
"errors"
"io"
"io/ioutil"
"os"
Expand All @@ -19,11 +20,47 @@ var system struct {
}

var Procd string
var Sysd1 string
var Sysd2 string

// Files in system directories used here
// - Procd
// - /stat
// - /meminfo
// - /self/cgroup | 'grep :memory:' | split ':' | last => cgroup
// - /self/cgroup | 'grep ::' | split ':' | last => cgroup/fallback
// - /self/mounts
// - Sysd1 (cgroup v1)
// - memory/<cgroup>/memory.limit_in_bytes
// - memory/<cgroup>/memory.stat
// - Sysd2 (cgroup v2)
// - <cgroup>/memory.high
// - <cgroup>/memory.current
// - <cgroup>/memory.swap.current
//
// While Procd is fixed `/proc` the `Sysd*` directories are
// dynamic. I.e. while there are semi-standard mount points for the
// cgroup controllers, this is just convention. They can be mounted
// anywhere. The file `/proc/self/mounts` contains the information we
// need.

func init() {
system.ticks = 100 // C.sysconf(C._SC_CLK_TCK)

Procd = "/proc"
Sysd1 = ""
Sysd2 = ""

determineControllerMounts(&Sysd1, &Sysd2)

// Fallbacks for cgroup controller mount points if nothing was
// found in /proc/self/mounts
if Sysd1 == "" {
Sysd1 = "/sys/fs/cgroup/memory"
}
if Sysd2 == "" {
Sysd2 = "/sys/fs/cgroup/unified"
}

// grab system boot time
readFile(Procd+"/stat", func(line string) bool {
Expand Down Expand Up @@ -86,6 +123,70 @@ func (self *Mem) Get() error {
self.Used = self.Total - self.Free
self.ActualUsed = self.Total - self.ActualFree

// Instead of detecting if this code is run within a container
// or not (*), we simply attempt to retrieve the cgroup
// information about memory limits and usage and if present
// incorporate them into the results.
//
// 0. If we are unable to determine the Cgroup for the process
// we ignore it and stay with the host data.
//
// 1. If the cgroup limit is not available we ignore it and
// stay with the host data.
//
// 2. Note that we are taking the smaller of host total and
// cgroup limit, as the safer value for the total. The
// reason here is that there are Linux systems which report
// something like 8 EiB (Exa!) (**) as the cgroup limit, on
// systems which have only 64 GiB (Giga) of physical RAM.
//
// (*) There does not seem to be a truly reliable and portable
// means of detecting execution inside a container vs
// outside. Between all the platforms (macos, linux,
// windows), and container runtimes (docker, lxc, oci, ...).
//
// (**) The exact value actually is 2^63 - 4096, i.e
// 8 EiB - 4 KiB. This is, as far as is known, the
// maximum limit of the Linux virtual memory system.

var cgroup string
if err := determineSelfCgroup(&cgroup); err != nil {
// Unable to determine process' Cgroup
return nil
}

cgroupLimit, err := determineMemoryLimit(cgroup)
// (x) If the limit is not available or bogus we keep the host data as limit.

if err == nil && cgroupLimit < self.Total {
// See (2) above why only a cgroup limit less than the
// host total is accepted as the new total available
// memory in the cgroup.
self.Total = cgroupLimit
}

rss, err := determineMemoryUsage(cgroup)

if err != nil {
return nil
}

swap, err := determineSwapUsage(cgroup)
if err != nil {
// Swap information is optional. I.e. the kernel may
// have swap accounting disabled. Because of this any
// kind of trouble determining the swap usage is
// mapped to `no swap used`. This allows us to limp
// on with some inaccuracies, instead of aborting.
swap = 0
}

self.Used = rss + swap
self.Free = self.Total - self.Used

self.ActualUsed = self.Used
self.ActualFree = self.Free

return nil
}

Expand Down Expand Up @@ -316,6 +417,119 @@ func (self *ProcExe) Get(pid int) error {
return nil
}

func determineSwapUsage(cgroup string) (uint64, error) {
// Check v2 over v1
usageAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.swap.current")
if err == nil {
return strtoull(strings.Split(string(usageAsString), "\n")[0])
}

var swap uint64
table := map[string]*uint64{
"swap": &swap,
}

err, found := parseCgroupMeminfo(Sysd1+cgroup, table)
if err == nil {
if !found {
// If no data was found, simply claim `zero swap used`.
return 0, errors.New("no data found")
}
return swap, nil
}

return 0, err
}

func determineMemoryUsage(cgroup string) (uint64, error) {
// Check v2 over v1
usageAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.current")
if err == nil {
return strtoull(strings.Split(string(usageAsString), "\n")[0])
}

var rss uint64
table := map[string]*uint64{
"total_rss": &rss,
}

err, found := parseCgroupMeminfo(Sysd1+cgroup, table)
if err == nil {
if !found {
return 0, errors.New("no data found")
}
return rss, nil
}

return 0, err
}

func determineMemoryLimit(cgroup string) (uint64, error) {
// Check v2 over v1
limitAsString, err := ioutil.ReadFile(Sysd2 + cgroup + "/memory.high")
if err == nil {
val := strings.Split(string(limitAsString), "\n")[0]
if val == "max" {
return 0, errors.New("no limit")
// See (x) in the caller where this keeps the host's self.Total.
}
return strtoull(val)
}

limitAsString, err = ioutil.ReadFile(Sysd1 + cgroup + "/memory.limit_in_bytes")
if err == nil {
return strtoull(strings.Split(string(limitAsString), "\n")[0])
}

return 0, err
}

func determineSelfCgroup(cgroup *string) error {
// - /proc/self/cgroup
// Expected line syntax - id:tag:path
// Three fields required in each line.

// Look for a cgroup v1 memory controller first
err := readFile(Procd+"/self/cgroup", func(line string) bool {
fields := strings.Split(line, ":")
// Match: `*:memory:/path`
if len(fields) < 3 {
return true
}
if fields[1] == "memory" {
*cgroup = strings.Trim(fields[len(fields)-1], " ")
}
return true
})
if err != nil {
return err
}
if *cgroup != "" {
return nil
}

// Fall back to a cgroup v2 memory controller
err = readFile(Procd+"/self/cgroup", func(line string) bool {
fields := strings.Split(line, ":")
// Match: `0::/path`
if len(fields) < 3 {
return true
}
if (fields[0] == "0") && (fields[1] == "") {
*cgroup = strings.Trim(fields[len(fields)-1], " ")
}
return true
})
if err != nil {
return err
}
if *cgroup != "" {
return nil
}

return errors.New("unable to determine control group")
}

func parseMeminfo(table map[string]*uint64) error {
return readFile(Procd+"/meminfo", func(line string) bool {
fields := strings.Split(line, ":")
Expand All @@ -332,6 +546,27 @@ func parseMeminfo(table map[string]*uint64) error {
})
}

func parseCgroupMeminfo(cgroupDir string, table map[string]*uint64) (error, bool) {
var found bool
err := readFile(cgroupDir+"/memory.stat", func(line string) bool {
fields := strings.Split(line, " ")
if ptr := table[fields[0]]; ptr != nil {
num := strings.TrimLeft(fields[1], " ")
val, err := strtoull(strings.Fields(num)[0])
if err == nil {
*ptr = val
found = true
}
}

return true
})
if err != nil {
return err, false
}
return nil, found
}

func parseCpuStat(self *Cpu, line string) error {
fields := strings.Fields(line)

Expand Down Expand Up @@ -390,3 +625,56 @@ func readProcFile(pid int, name string) ([]byte, error) {

return contents, err
}

func determineControllerMounts(sysd1, sysd2 *string) {
// grab cgroup controller mount points
readFile(Procd+"/self/mounts", func(line string) bool {

// Entries have the form `device path type options`.
// The elements are separated by single spaces.
//
// v2: `path` element of entry fulfilling `type == "cgroup2"`.
// v1: `path` element of entry fulfilling `type == "cgroup" && options ~ "memory"`
//
// NOTE: The `device` column can be anything. It
// cannot be used to pare down the set of entries
// going into the full check.

fields := strings.Split(line, " ")
if len(fields) < 4 {
return true
}

mpath := fields[1]
mtype := fields[2]
moptions := fields[3]

if mtype == "cgroup2" {
if *sysd2 != "" {
panic("Multiple cgroup v2 mount points")
}
*sysd2 = mpath
return true
}
if mtype == "cgroup" {
options := strings.Split(moptions, ",")
if stringSliceContains(options, "memory") {
if *sysd1 != "" {
panic("Multiple cgroup v1 mount points")
}
*sysd1 = mpath
return true
}
}
return true
})
}

func stringSliceContains(a []string, x string) bool {
for _, n := range a {
if x == n {
return true
}
}
return false
}
Loading

0 comments on commit e52716b

Please sign in to comment.