From 835cad50fcc9bf6b19c1c32277f35ff707dc60d6 Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 25 Jul 2024 13:19:35 +0530 Subject: [PATCH 1/5] make metrics build specific --- .../opentelemetry/internal/metrics/metrics.go | 67 ++++++++++ .../internal/metrics/metricsImpl.go | 100 +++++++++++++++ .../internal/metrics/metricsNoop.go | 21 +++ .../internal/metrics/system_metrics.go | 120 ------------------ 4 files changed, 188 insertions(+), 120 deletions(-) create mode 100644 instrumentation/opentelemetry/internal/metrics/metrics.go create mode 100644 instrumentation/opentelemetry/internal/metrics/metricsImpl.go create mode 100644 instrumentation/opentelemetry/internal/metrics/metricsNoop.go delete mode 100644 instrumentation/opentelemetry/internal/metrics/system_metrics.go diff --git a/instrumentation/opentelemetry/internal/metrics/metrics.go b/instrumentation/opentelemetry/internal/metrics/metrics.go new file mode 100644 index 0000000..73841eb --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/metrics.go @@ -0,0 +1,67 @@ +package metrics + +import ( + "context" + "fmt" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/metric" + "log" +) + +const meterName = "goagent.hypertrace.org/metrics" + +type systemMetrics interface { + getMemory() (float64, error) + getCPU() (float64, error) + getCurrentMetrics() error +} + +func InitializeSystemMetrics() { + meterProvider := otel.GetMeterProvider() + meter := meterProvider.Meter(meterName) + err := setUpMetricRecorder(meter) + if err != nil { + log.Printf("error initializing metrics, failed to setup metric recorder: %v\n", err) + } +} + +func setUpMetricRecorder(meter metric.Meter) error { + if meter == nil { + return fmt.Errorf("error while setting up metric recorder: meter is nil") + } + cpuSeconds, err := meter.Float64ObservableCounter("hypertrace.agent.cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) + if err != nil { + return fmt.Errorf("error while setting up cpu seconds metric counter: %v", err) + } + memory, err := meter.Float64ObservableGauge("hypertrace.agent.memory", metric.WithDescription("Metric to monitor memory usage")) + if err != nil { + return fmt.Errorf("error while setting up memory metric counter: %v", err) + } + // Register the callback function for both cpu_seconds and memory observable gauges + _, err = meter.RegisterCallback( + func(ctx context.Context, result metric.Observer) error { + sysMetrics := newSystemMetrics() + err := sysMetrics.getCurrentMetrics() + if err != nil { + return err + } + cpus, err := sysMetrics.getCPU() + if err != nil { + return err + } + mem, err := sysMetrics.getMemory() + if err != nil { + return err + } + result.ObserveFloat64(cpuSeconds, cpus) + result.ObserveFloat64(memory, mem) + return nil + }, + cpuSeconds, memory, + ) + if err != nil { + log.Fatalf("failed to register callback: %v", err) + return err + } + return nil +} diff --git a/instrumentation/opentelemetry/internal/metrics/metricsImpl.go b/instrumentation/opentelemetry/internal/metrics/metricsImpl.go new file mode 100644 index 0000000..4139291 --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/metricsImpl.go @@ -0,0 +1,100 @@ +//go:build linux + +package metrics + +import ( + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/tklauser/go-sysconf" +) + +const procStatArrayLength = 52 + +var ( + clkTck = getClockTicks() + pageSize = float64(os.Getpagesize()) +) + +type processStats struct { + utime float64 + stime float64 + cutime float64 + cstime float64 + rss float64 +} + +type linuxMetrics struct { + memory float64 + cpuSecondsTotal float64 +} + +func newSystemMetrics() systemMetrics { + return &linuxMetrics{} +} + +func (lm *linuxMetrics) getCurrentMetrics() error { + stats, err := lm.processStatsFromPid(os.Getpid()) + if err != nil { + return err + } + lm.memory = stats.rss * pageSize + lm.cpuSecondsTotal = (stats.stime + stats.utime + stats.cstime + stats.cutime) / clkTck + return nil +} + +func (lm *linuxMetrics) getMemory() (float64, error) { + return lm.memory, nil +} + +func (lm *linuxMetrics) getCPU() (float64, error) { + return lm.cpuSecondsTotal, nil +} + +func (lm *linuxMetrics) processStatsFromPid(pid int) (*processStats, error) { + procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") + var err error + if procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)); err == nil { + if stat, err := lm.parseProcStatFile(procStatFileBytes, procFilepath); err == nil { + if err != nil { + return nil, err + } + return stat, nil + } + return nil, err + } + return nil, err +} + +// ref: /proc/pid/stat section of https://man7.org/linux/man-pages/man5/proc.5.html +func (lm *linuxMetrics) parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, error) { + infos := strings.Split(string(bytesArr), " ") + if len(infos) != procStatArrayLength { + return nil, fmt.Errorf("%s file could not be parsed", procFilepath) + } + return &processStats{ + utime: parseFloat(infos[13]), + stime: parseFloat(infos[14]), + cutime: parseFloat(infos[15]), + cstime: parseFloat(infos[16]), + rss: parseFloat(infos[23]), + }, nil +} + +func parseFloat(val string) float64 { + floatVal, _ := strconv.ParseFloat(val, 64) + return floatVal +} + +// sysconf for go. claims to work without cgo or external binaries +// https://pkg.go.dev/github.com/tklauser/go-sysconf@v0.3.14#section-readme +func getClockTicks() float64 { + clktck, err := sysconf.Sysconf(sysconf.SC_CLK_TCK) + if err != nil { + return float64(100) + } + return float64(clktck) +} diff --git a/instrumentation/opentelemetry/internal/metrics/metricsNoop.go b/instrumentation/opentelemetry/internal/metrics/metricsNoop.go new file mode 100644 index 0000000..2bfea27 --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/metricsNoop.go @@ -0,0 +1,21 @@ +//go:build !linux + +package metrics + +type noopMetrics struct{} + +func newSystemMetrics() systemMetrics { + return &noopMetrics{} +} + +func (nm *noopMetrics) getMemory() (float64, error) { + return 0, nil +} + +func (nm *noopMetrics) getCPU() (float64, error) { + return 0, nil +} + +func (nm *noopMetrics) getCurrentMetrics() error { + return nil +} diff --git a/instrumentation/opentelemetry/internal/metrics/system_metrics.go b/instrumentation/opentelemetry/internal/metrics/system_metrics.go deleted file mode 100644 index 6557f56..0000000 --- a/instrumentation/opentelemetry/internal/metrics/system_metrics.go +++ /dev/null @@ -1,120 +0,0 @@ -package metrics - -import ( - "context" - "fmt" - "log" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/tklauser/go-sysconf" - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/metric" -) - -const meterName = "goagent.hypertrace.org/metrics" - -type systemMetrics struct { - memory float64 - cpuSecondsTotal float64 -} - -type processStats struct { - utime float64 - stime float64 - cutime float64 - cstime float64 - rss float64 -} - -const procStatArrayLength = 52 - -var ( - clkTck = getClockTicks() - pageSize = float64(os.Getpagesize()) -) - -func InitializeSystemMetrics() { - meterProvider := otel.GetMeterProvider() - meter := meterProvider.Meter(meterName) - err := setUpMetricRecorder(meter) - if err != nil { - log.Printf("error initialising metrics, failed to setup metric recorder: %v\n", err) - } -} - -func processStatsFromPid(pid int) (*systemMetrics, error) { - sysInfo := &systemMetrics{} - procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") - var err error - if procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)); err == nil { - if stat, err := parseProcStatFile(procStatFileBytes, procFilepath); err == nil { - sysInfo.memory = stat.rss * pageSize - sysInfo.cpuSecondsTotal = (stat.stime + stat.utime + stat.cstime + stat.cutime) / clkTck - return sysInfo, nil - } - return nil, err - } - return nil, err -} - -// ref: /proc/pid/stat section of https://man7.org/linux/man-pages/man5/proc.5.html -func parseProcStatFile(bytesArr []byte, procFilepath string) (*processStats, error) { - infos := strings.Split(string(bytesArr), " ") - if len(infos) != procStatArrayLength { - return nil, fmt.Errorf("%s file could not be parsed", procFilepath) - } - return &processStats{ - utime: parseFloat(infos[13]), - stime: parseFloat(infos[14]), - cutime: parseFloat(infos[15]), - cstime: parseFloat(infos[16]), - rss: parseFloat(infos[23]), - }, nil -} - -func parseFloat(val string) float64 { - floatVal, _ := strconv.ParseFloat(val, 64) - return floatVal -} - -// sysconf for go. claims to work without cgo or external binaries -// https://pkg.go.dev/github.com/tklauser/go-sysconf@v0.3.14#section-readme -func getClockTicks() float64 { - clktck, err := sysconf.Sysconf(sysconf.SC_CLK_TCK) - if err != nil { - return float64(100) - } - return float64(clktck) -} - -func setUpMetricRecorder(meter metric.Meter) error { - if meter == nil { - return fmt.Errorf("error while setting up metric recorder: meter is nil") - } - cpuSeconds, err := meter.Float64ObservableCounter("hypertrace.agent.cpu.seconds.total", metric.WithDescription("Metric to monitor total CPU seconds")) - if err != nil { - return fmt.Errorf("error while setting up cpu seconds metric counter: %v", err) - } - memory, err := meter.Float64ObservableGauge("hypertrace.agent.memory", metric.WithDescription("Metric to monitor memory usage")) - if err != nil { - return fmt.Errorf("error while setting up memory metric counter: %v", err) - } - // Register the callback function for both cpu_seconds and memory observable gauges - _, err = meter.RegisterCallback( - func(ctx context.Context, result metric.Observer) error { - systemMetrics, err := processStatsFromPid(os.Getpid()) - result.ObserveFloat64(cpuSeconds, systemMetrics.cpuSecondsTotal) - result.ObserveFloat64(memory, systemMetrics.memory) - return err - }, - cpuSeconds, memory, - ) - if err != nil { - log.Fatalf("failed to register callback: %v", err) - return err - } - return nil -} From 8e845884c2b7ae28b3d38790c75a1723033552ab Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 25 Jul 2024 13:35:03 +0530 Subject: [PATCH 2/5] refactoring file names --- .../internal/metrics/{metricsImpl.go => linux_metrics.go} | 0 .../internal/metrics/{metricsNoop.go => noop_metrics.go} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename instrumentation/opentelemetry/internal/metrics/{metricsImpl.go => linux_metrics.go} (100%) rename instrumentation/opentelemetry/internal/metrics/{metricsNoop.go => noop_metrics.go} (100%) diff --git a/instrumentation/opentelemetry/internal/metrics/metricsImpl.go b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go similarity index 100% rename from instrumentation/opentelemetry/internal/metrics/metricsImpl.go rename to instrumentation/opentelemetry/internal/metrics/linux_metrics.go diff --git a/instrumentation/opentelemetry/internal/metrics/metricsNoop.go b/instrumentation/opentelemetry/internal/metrics/noop_metrics.go similarity index 100% rename from instrumentation/opentelemetry/internal/metrics/metricsNoop.go rename to instrumentation/opentelemetry/internal/metrics/noop_metrics.go From 0646f586d93f697edae82ad19ce6dca1fed675dc Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 25 Jul 2024 14:04:41 +0530 Subject: [PATCH 3/5] review comments fix --- .../internal/metrics/linux_metrics.go | 19 ++++++++---------- .../opentelemetry/internal/metrics/metrics.go | 20 +++++-------------- .../internal/metrics/noop_metrics.go | 16 ++++++--------- 3 files changed, 19 insertions(+), 36 deletions(-) diff --git a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go index 4139291..6ac10eb 100644 --- a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go @@ -32,26 +32,23 @@ type linuxMetrics struct { cpuSecondsTotal float64 } -func newSystemMetrics() systemMetrics { - return &linuxMetrics{} -} - -func (lm *linuxMetrics) getCurrentMetrics() error { +func newSystemMetrics() (systemMetrics, error) { + lm := &linuxMetrics{} stats, err := lm.processStatsFromPid(os.Getpid()) if err != nil { - return err + return nil, err } lm.memory = stats.rss * pageSize lm.cpuSecondsTotal = (stats.stime + stats.utime + stats.cstime + stats.cutime) / clkTck - return nil + return lm, nil } -func (lm *linuxMetrics) getMemory() (float64, error) { - return lm.memory, nil +func (lm *linuxMetrics) getMemory() float64 { + return lm.memory } -func (lm *linuxMetrics) getCPU() (float64, error) { - return lm.cpuSecondsTotal, nil +func (lm *linuxMetrics) getCPU() float64 { + return lm.cpuSecondsTotal } func (lm *linuxMetrics) processStatsFromPid(pid int) (*processStats, error) { diff --git a/instrumentation/opentelemetry/internal/metrics/metrics.go b/instrumentation/opentelemetry/internal/metrics/metrics.go index 73841eb..1661cb6 100644 --- a/instrumentation/opentelemetry/internal/metrics/metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/metrics.go @@ -11,9 +11,8 @@ import ( const meterName = "goagent.hypertrace.org/metrics" type systemMetrics interface { - getMemory() (float64, error) - getCPU() (float64, error) - getCurrentMetrics() error + getMemory() float64 + getCPU() float64 } func InitializeSystemMetrics() { @@ -40,21 +39,12 @@ func setUpMetricRecorder(meter metric.Meter) error { // Register the callback function for both cpu_seconds and memory observable gauges _, err = meter.RegisterCallback( func(ctx context.Context, result metric.Observer) error { - sysMetrics := newSystemMetrics() - err := sysMetrics.getCurrentMetrics() + sysMetrics, err := newSystemMetrics() if err != nil { return err } - cpus, err := sysMetrics.getCPU() - if err != nil { - return err - } - mem, err := sysMetrics.getMemory() - if err != nil { - return err - } - result.ObserveFloat64(cpuSeconds, cpus) - result.ObserveFloat64(memory, mem) + result.ObserveFloat64(cpuSeconds, sysMetrics.getCPU()) + result.ObserveFloat64(memory, sysMetrics.getMemory()) return nil }, cpuSeconds, memory, diff --git a/instrumentation/opentelemetry/internal/metrics/noop_metrics.go b/instrumentation/opentelemetry/internal/metrics/noop_metrics.go index 2bfea27..0688b52 100644 --- a/instrumentation/opentelemetry/internal/metrics/noop_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/noop_metrics.go @@ -4,18 +4,14 @@ package metrics type noopMetrics struct{} -func newSystemMetrics() systemMetrics { - return &noopMetrics{} +func newSystemMetrics() (systemMetrics, error) { + return &noopMetrics{}, nil } -func (nm *noopMetrics) getMemory() (float64, error) { - return 0, nil +func (nm *noopMetrics) getMemory() float64 { + return 0 } -func (nm *noopMetrics) getCPU() (float64, error) { - return 0, nil -} - -func (nm *noopMetrics) getCurrentMetrics() error { - return nil +func (nm *noopMetrics) getCPU() float64 { + return 0 } From 8f4114cb95600b6779b918607162f43cb3ea4df8 Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 25 Jul 2024 14:18:30 +0530 Subject: [PATCH 4/5] review comments fix --- .../internal/metrics/linux_metrics.go | 15 +++++++-------- .../opentelemetry/internal/metrics/metrics.go | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go index 6ac10eb..bf38cd6 100644 --- a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go @@ -54,16 +54,15 @@ func (lm *linuxMetrics) getCPU() float64 { func (lm *linuxMetrics) processStatsFromPid(pid int) (*processStats, error) { procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") var err error - if procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)); err == nil { - if stat, err := lm.parseProcStatFile(procStatFileBytes, procFilepath); err == nil { - if err != nil { - return nil, err - } - return stat, nil - } + procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)) + if err != nil { + return nil, err + } + stat, err := lm.parseProcStatFile(procStatFileBytes, procFilepath) + if err != nil { return nil, err } - return nil, err + return stat, nil } // ref: /proc/pid/stat section of https://man7.org/linux/man-pages/man5/proc.5.html diff --git a/instrumentation/opentelemetry/internal/metrics/metrics.go b/instrumentation/opentelemetry/internal/metrics/metrics.go index 1661cb6..7855a7b 100644 --- a/instrumentation/opentelemetry/internal/metrics/metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/metrics.go @@ -3,9 +3,10 @@ package metrics import ( "context" "fmt" + "log" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/metric" - "log" ) const meterName = "goagent.hypertrace.org/metrics" From 0572059c6a0855f759193a7ced1e6a3914ca9227 Mon Sep 17 00:00:00 2001 From: Yashwant Date: Thu, 25 Jul 2024 15:36:55 +0530 Subject: [PATCH 5/5] adding test --- .../internal/metrics/linux_metrics.go | 1 - .../internal/metrics/linux_metrics_test.go | 28 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 instrumentation/opentelemetry/internal/metrics/linux_metrics_test.go diff --git a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go index bf38cd6..fc16166 100644 --- a/instrumentation/opentelemetry/internal/metrics/linux_metrics.go +++ b/instrumentation/opentelemetry/internal/metrics/linux_metrics.go @@ -53,7 +53,6 @@ func (lm *linuxMetrics) getCPU() float64 { func (lm *linuxMetrics) processStatsFromPid(pid int) (*processStats, error) { procFilepath := filepath.Join("/proc", strconv.Itoa(pid), "stat") - var err error procStatFileBytes, err := os.ReadFile(filepath.Clean(procFilepath)) if err != nil { return nil, err diff --git a/instrumentation/opentelemetry/internal/metrics/linux_metrics_test.go b/instrumentation/opentelemetry/internal/metrics/linux_metrics_test.go new file mode 100644 index 0000000..a8600db --- /dev/null +++ b/instrumentation/opentelemetry/internal/metrics/linux_metrics_test.go @@ -0,0 +1,28 @@ +//go:build linux + +package metrics + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +const ( + // Mock data simulating /proc/[pid]/stat content + mockData = "85 (md) I 2 0 0 0 -1 698880 0 0 0 0 100 200 300 400 0 -20 1 0 223 0 550 18437615 0 0 0 0 0 0 0 2647 0 1 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0" +) + +func TestParseProcStatFile(t *testing.T) { + lm := &linuxMetrics{} + procFilepath := "/proc/123/stat" + + stat, err := lm.parseProcStatFile([]byte(mockData), procFilepath) + assert.NoError(t, err, "unexpected error while parsing proc stat file") + + assert.Equal(t, 100.0, stat.utime, "utime does not match expected value") + assert.Equal(t, 200.0, stat.stime, "stime does not match expected value") + assert.Equal(t, 300.0, stat.cutime, "cutime does not match expected value") + assert.Equal(t, 400.0, stat.cstime, "cstime does not match expected value") + assert.Equal(t, 550.0, stat.rss, "rss does not match expected value") +}