diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index cd9cafea971f8b..a33ea3f5c3cc26 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -237,6 +237,7 @@ /cmd/system-probe/modules/dynamic_instrumentation* @DataDog/debugger /cmd/system-probe/windows_resources/ @DataDog/windows-kernel-integrations /cmd/system-probe/main_windows*.go @DataDog/windows-kernel-integrations +/cmd/system-probe/api/client/client_windows.go @DataDog/windows-kernel-integrations /cmd/systray/ @DataDog/windows-agent /cmd/security-agent/ @DataDog/agent-security /cmd/installer/ @DataDog/fleet @DataDog/windows-agent diff --git a/cmd/system-probe/api/client/client.go b/cmd/system-probe/api/client/client.go index 81f8efd38bebb3..4cfc13909aaf2c 100644 --- a/cmd/system-probe/api/client/client.go +++ b/cmd/system-probe/api/client/client.go @@ -7,10 +7,31 @@ package client import ( + "errors" "net/http" + "time" + + "github.com/DataDog/datadog-agent/pkg/util/funcs" +) + +var ( + // ErrNotImplemented is an error used when system-probe is attempted to be accessed on an unsupported OS + ErrNotImplemented = errors.New("system-probe unsupported") ) // Get returns a http client configured to talk to the system-probe -func Get(socketPath string) *http.Client { - return newSystemProbeClient(socketPath) +var Get = funcs.MemoizeArgNoError[string, *http.Client](get) + +func get(socketPath string) *http.Client { + return &http.Client{ + Timeout: 10 * time.Second, + Transport: &http.Transport{ + MaxIdleConns: 2, + IdleConnTimeout: idleConnTimeout, + DialContext: DialContextFunc(socketPath), + TLSHandshakeTimeout: 1 * time.Second, + ResponseHeaderTimeout: 5 * time.Second, + ExpectContinueTimeout: 50 * time.Millisecond, + }, + } } diff --git a/cmd/system-probe/api/client/client_linux.go b/cmd/system-probe/api/client/client_linux.go deleted file mode 100644 index b644838e805448..00000000000000 --- a/cmd/system-probe/api/client/client_linux.go +++ /dev/null @@ -1,37 +0,0 @@ -// Unless explicitly stated otherwise all files in this repository are licensed -// under the Apache License Version 2.0. -// This product includes software developed at Datadog (https://www.datadoghq.com/). -// Copyright 2016-present Datadog, Inc. - -//go:build linux - -package client - -import ( - "context" - "net" - "net/http" - "time" -) - -const ( - netType = "unix" -) - -// newSystemProbeClient returns a http client configured to talk to the system-probe -// Linux is unable to import pkg/process/net due to size restrictions. -func newSystemProbeClient(socketPath string) *http.Client { - return &http.Client{ - Timeout: 10 * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 2, - IdleConnTimeout: 30 * time.Second, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial(netType, socketPath) - }, - TLSHandshakeTimeout: 1 * time.Second, - ResponseHeaderTimeout: 5 * time.Second, - ExpectContinueTimeout: 50 * time.Millisecond, - }, - } -} diff --git a/cmd/system-probe/api/client/client_others.go b/cmd/system-probe/api/client/client_others.go index 1204bb37e7d1a8..61938de68e46c6 100644 --- a/cmd/system-probe/api/client/client_others.go +++ b/cmd/system-probe/api/client/client_others.go @@ -3,34 +3,23 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). // Copyright 2016-present Datadog, Inc. -//go:build !linux && !windows +//go:build !unix && !windows package client import ( "context" "net" - "net/http" "time" ) const ( - netType = "tcp" + idleConnTimeout = 30 * time.Second ) -// newSystemProbeClient returns a http client configured to talk to the system-probe -func newSystemProbeClient(socketPath string) *http.Client { - return &http.Client{ - Timeout: 10 * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 2, - IdleConnTimeout: 30 * time.Second, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial(netType, socketPath) - }, - TLSHandshakeTimeout: 1 * time.Second, - ResponseHeaderTimeout: 5 * time.Second, - ExpectContinueTimeout: 50 * time.Millisecond, - }, +// DialContextFunc is not supported on this platform. +func DialContextFunc(_ string) func(context.Context, string, string) (net.Conn, error) { + return func(_ context.Context, _, _ string) (net.Conn, error) { + return nil, ErrNotImplemented } } diff --git a/cmd/system-probe/api/client/client_unix.go b/cmd/system-probe/api/client/client_unix.go new file mode 100644 index 00000000000000..9fc8a00855f559 --- /dev/null +++ b/cmd/system-probe/api/client/client_unix.go @@ -0,0 +1,26 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build unix + +package client + +import ( + "context" + "net" + "time" +) + +const ( + idleConnTimeout = 30 * time.Second +) + +// DialContextFunc returns a function to be used in http.Transport.DialContext for connecting to system-probe. +// The result will be OS-specific. +func DialContextFunc(socketPath string) func(context.Context, string, string) (net.Conn, error) { + return func(_ context.Context, _, _ string) (net.Conn, error) { + return net.Dial("unix", socketPath) + } +} diff --git a/cmd/system-probe/api/client/client_windows.go b/cmd/system-probe/api/client/client_windows.go index d4649c90bd5c62..1e8d667af08791 100644 --- a/cmd/system-probe/api/client/client_windows.go +++ b/cmd/system-probe/api/client/client_windows.go @@ -8,14 +8,43 @@ package client import ( - "net/http" + "context" + "fmt" + "net" + "time" - processNet "github.com/DataDog/datadog-agent/pkg/process/net" + "github.com/Microsoft/go-winio" + + "github.com/DataDog/datadog-agent/pkg/util/log" +) + +const ( + idleConnTimeout = 5 * time.Second ) -// newSystemProbeClient returns a http client configured to talk to the system-probe -// This is a simple wrapper around process_net.NewSystemProbeHttpClient because -// Linux is unable to import pkg/process/net due to size restrictions. -func newSystemProbeClient(_ string) *http.Client { - return processNet.NewSystemProbeClient() +var ( + // SystemProbePipeName is the production named pipe for system probe + SystemProbePipeName = `\\.\pipe\dd_system_probe` +) + +// DialContextFunc returns a function to be used in http.Transport.DialContext for connecting to system-probe. +// The result will be OS-specific. +func DialContextFunc(_ string) func(context.Context, string, string) (net.Conn, error) { + return func(_ context.Context, _, _ string) (net.Conn, error) { + // Go clients do not immediately close (named pipe) connections when done, + // they keep connections idle for a while. Make sure the idle time + // is not too high and the timeout is generous enough for pending connections. + var timeout = 30 * time.Second + + namedPipe, err := winio.DialPipe(SystemProbePipeName, &timeout) + if err != nil { + // This important error may not get reported upstream, making connection failures + // very difficult to diagnose. Explicitly log the error here too for diagnostics. + var namedPipeErr = fmt.Errorf("error connecting to named pipe %q: %s", SystemProbePipeName, err) + log.Error(namedPipeErr.Error()) + return nil, namedPipeErr + } + + return namedPipe, nil + } } diff --git a/pkg/process/net/common_linux.go b/pkg/process/net/common_linux.go index 330010c6bcfb78..aa4efa7cd7860f 100644 --- a/pkg/process/net/common_linux.go +++ b/pkg/process/net/common_linux.go @@ -8,14 +8,13 @@ package net import ( - "context" + "errors" "fmt" "io" - "net" "net/http" "os" - "time" + "github.com/DataDog/datadog-agent/cmd/system-probe/api/client" sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config" ) @@ -34,14 +33,13 @@ const ( conntrackCachedURL = "http://unix/" + string(sysconfig.NetworkTracerModule) + "/debug/conntrack/cached" conntrackHostURL = "http://unix/" + string(sysconfig.NetworkTracerModule) + "/debug/conntrack/host" ebpfBTFLoaderURL = "http://unix/debug/ebpf_btf_loader_info" - netType = "unix" ) // CheckPath is used in conjunction with calling the stats endpoint, since we are calling this // From the main agent and want to ensure the socket exists func CheckPath(path string) error { if path == "" { - return fmt.Errorf("socket path is empty") + return errors.New("socket path is empty") } if _, err := os.Stat(path); err != nil { @@ -53,34 +51,18 @@ func CheckPath(path string) error { // newSystemProbe creates a group of clients to interact with system-probe. func newSystemProbe(path string) *RemoteSysProbeUtil { return &RemoteSysProbeUtil{ - path: path, - httpClient: http.Client{ - Timeout: 10 * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 2, - IdleConnTimeout: 30 * time.Second, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial(netType, path) - }, - TLSHandshakeTimeout: 1 * time.Second, - ResponseHeaderTimeout: 5 * time.Second, - ExpectContinueTimeout: 50 * time.Millisecond, - }, - }, + path: path, + httpClient: *client.Get(path), pprofClient: http.Client{ Transport: &http.Transport{ - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial(netType, path) - }, + DialContext: client.DialContextFunc(path), }, }, tracerouteClient: http.Client{ // no timeout set here, the expected usage of this client // is that the caller will set a timeout on each request Transport: &http.Transport{ - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return net.Dial(netType, path) - }, + DialContext: client.DialContextFunc(path), }, }, } diff --git a/pkg/process/net/common_windows.go b/pkg/process/net/common_windows.go index ccf70d591b1acc..49a7d740ad0b89 100644 --- a/pkg/process/net/common_windows.go +++ b/pkg/process/net/common_windows.go @@ -8,13 +8,11 @@ package net import ( - "context" "errors" - "fmt" - "net" "net/http" "time" + "github.com/DataDog/datadog-agent/cmd/system-probe/api/client" sysconfig "github.com/DataDog/datadog-agent/cmd/system-probe/config" ) @@ -26,7 +24,6 @@ const ( statsURL = "http://localhost:3333/debug/stats" pprofURL = "http://localhost:3333/debug/pprof" tracerouteURL = "http://localhost:3333/" + string(sysconfig.TracerouteModule) + "/traceroute/" - netType = "tcp" telemetryURL = "http://localhost:3333/telemetry" // discovery* is not used on Windows, the value is added to avoid a compilation error @@ -40,9 +37,6 @@ const ( // conntrackHostURL is not used on Windows, the value is added to avoid a compilation error conntrackHostURL = "http://localhost:3333/" + string(sysconfig.NetworkTracerModule) + "/debug/conntrack/host" - // SystemProbePipeName is the production named pipe for system probe - SystemProbePipeName = `\\.\pipe\dd_system_probe` - // systemProbeMaxIdleConns sets the maximum number of idle named pipe connections. systemProbeMaxIdleConns = 2 @@ -55,40 +49,21 @@ const ( // CheckPath is used to make sure the globalSocketPath has been set before attempting to connect func CheckPath(path string) error { if path == "" { - return fmt.Errorf("socket path is empty") + return errors.New("socket path is empty") } return nil } -// NewSystemProbeClient returns a http client configured to talk to the system-probe -func NewSystemProbeClient() *http.Client { - return &http.Client{ - Timeout: 10 * time.Second, - Transport: &http.Transport{ - MaxIdleConns: systemProbeMaxIdleConns, - IdleConnTimeout: systemProbeIdleConnTimeout, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return DialSystemProbe() - }, - TLSHandshakeTimeout: 1 * time.Second, - ResponseHeaderTimeout: 2 * time.Second, - ExpectContinueTimeout: 50 * time.Millisecond, - }, - } -} - // newSystemProbe creates a group of clients to interact with system-probe. func newSystemProbe(path string) *RemoteSysProbeUtil { return &RemoteSysProbeUtil{ path: path, - httpClient: *NewSystemProbeClient(), + httpClient: *client.Get(path), pprofClient: http.Client{ Transport: &http.Transport{ MaxIdleConns: systemProbeMaxIdleConns, IdleConnTimeout: systemProbeIdleConnTimeout, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return DialSystemProbe() - }, + DialContext: client.DialContextFunc(path), }, }, tracerouteClient: http.Client{ @@ -97,9 +72,7 @@ func newSystemProbe(path string) *RemoteSysProbeUtil { Transport: &http.Transport{ MaxIdleConns: systemProbeMaxIdleConns, IdleConnTimeout: systemProbeIdleConnTimeout, - DialContext: func(_ context.Context, _, _ string) (net.Conn, error) { - return DialSystemProbe() - }, + DialContext: client.DialContextFunc(path), }, }, } diff --git a/pkg/process/net/windows_pipe.go b/pkg/process/net/windows_pipe.go index 2d2baea60532a9..3b5b63d8cbd9c4 100644 --- a/pkg/process/net/windows_pipe.go +++ b/pkg/process/net/windows_pipe.go @@ -10,10 +10,10 @@ package net import ( "fmt" "net" - "time" - "github.com/DataDog/datadog-agent/pkg/util/log" "github.com/Microsoft/go-winio" + + "github.com/DataDog/datadog-agent/cmd/system-probe/api/client" ) const ( @@ -36,9 +36,6 @@ type WindowsPipeListener struct { pipePath string } -// systemProbePipeName is the effective named pipe path for system probe -var systemProbePipeName = SystemProbePipeName - // systemProbePipSecurityDescriptor has the effective DACL for the system probe named pipe. var systemProbePipSecurityDescriptor = namedPipeSecurityDescriptor @@ -60,12 +57,12 @@ func newPipeListener(namedPipeName string) (net.Listener, error) { func NewSystemProbeListener(_ string) (*WindowsPipeListener, error) { // socketAddr not used - namedPipe, err := newPipeListener(systemProbePipeName) + namedPipe, err := newPipeListener(client.SystemProbePipeName) if err != nil { - return nil, fmt.Errorf("error named pipe %s : %s", systemProbePipeName, err) + return nil, fmt.Errorf("error named pipe %s : %s", client.SystemProbePipeName, err) } - return &WindowsPipeListener{namedPipe, systemProbePipeName}, nil + return &WindowsPipeListener{namedPipe, client.SystemProbePipeName}, nil } // GetListener will return underlying Listener's conn @@ -77,22 +74,3 @@ func (wp *WindowsPipeListener) GetListener() net.Listener { func (wp *WindowsPipeListener) Stop() { wp.conn.Close() } - -// DialSystemProbe connects to the system-probe service endpoint -func DialSystemProbe() (net.Conn, error) { - // Go clients do not immediately close (named pipe) connections when done, - // they keep connections idle for a while. Make sure the idle time - // is not too high and the timeout is generous enough for pending connections. - var timeout = time.Duration(30 * time.Second) - - namedPipe, err := winio.DialPipe(systemProbePipeName, &timeout) - if err != nil { - // This important error may not get reported upstream, making connection failures - // very difficult to diagnose. Explicitly log the error here too for diagnostics. - var namedPipeErr = fmt.Errorf("error connecting to named pipe %s : %s", systemProbePipeName, err) - log.Errorf("%s", namedPipeErr.Error()) - return nil, namedPipeErr - } - - return namedPipe, nil -} diff --git a/pkg/process/net/windows_pipe_testutil.go b/pkg/process/net/windows_pipe_testutil.go index c0656efeb29038..d3e1a9413db23d 100644 --- a/pkg/process/net/windows_pipe_testutil.go +++ b/pkg/process/net/windows_pipe_testutil.go @@ -7,12 +7,14 @@ package net +import "github.com/DataDog/datadog-agent/cmd/system-probe/api/client" + // OverrideSystemProbeNamedPipeConfig sets the active named pipe path and its DACL for // System Probe connections. // This is used by tests only to avoid conflicts with an existing locally installed Datadog agent. func OverrideSystemProbeNamedPipeConfig(path string, securityDescriptor string) { if path != "" { - systemProbePipeName = path + client.SystemProbePipeName = path } if securityDescriptor != "" { diff --git a/pkg/util/funcs/memoize_arg_noerror.go b/pkg/util/funcs/memoize_arg_noerror.go new file mode 100644 index 00000000000000..bbdf2578c22432 --- /dev/null +++ b/pkg/util/funcs/memoize_arg_noerror.go @@ -0,0 +1,33 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2024-present Datadog, Inc. + +package funcs + +import "sync" + +type memoizedArgNoErrorFunc[K comparable, T any] struct { + sync.Mutex + fn func(K) T + results map[K]T +} + +func (mf *memoizedArgNoErrorFunc[K, T]) do(arg K) T { + mf.Lock() + defer mf.Unlock() + + res, ok := mf.results[arg] + if !ok { + res = mf.fn(arg) + mf.results[arg] = res + } + return res +} + +// MemoizeArgNoError memoizes the result of a function call based on the argument. +// +// fn is only ever called once for each argument +func MemoizeArgNoError[K comparable, T any](fn func(K) T) func(K) T { + return (&memoizedArgNoErrorFunc[K, T]{fn: fn, results: make(map[K]T)}).do +} diff --git a/tools/NamedPipeCmd/main.go b/tools/NamedPipeCmd/main.go index 01d02b8aa03ab1..503cdf591c5546 100644 --- a/tools/NamedPipeCmd/main.go +++ b/tools/NamedPipeCmd/main.go @@ -18,8 +18,9 @@ import ( "os" "time" - "github.com/DataDog/datadog-agent/pkg/api/util" winio "github.com/Microsoft/go-winio" + + "github.com/DataDog/datadog-agent/pkg/api/util" ) var ( @@ -59,7 +60,7 @@ func main() { flag.Parse() // This should match SystemProbePipeName in - // "github.com/DataDog/datadog-agent/pkg/process/net" + // "github.com/DataDog/datadog-agent/cmd/system-probe/client" pipePath := `\\.\pipe\dd_system_probe` fprintf("Connecting to named pipe %s ... ", pipePath)