Skip to content

Commit

Permalink
Reapply "[ASCII-2587] Migrating TraceAgent to use IPC cert" (DataDog#…
Browse files Browse the repository at this point in the history
  • Loading branch information
misteriaud authored Dec 19, 2024
1 parent 3763407 commit 88a7672
Show file tree
Hide file tree
Showing 14 changed files with 81 additions and 30 deletions.
22 changes: 15 additions & 7 deletions cmd/agent/subcommands/flare/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,10 +175,18 @@ func readProfileData(seconds int) (flare.ProfileData, error) {

type pprofGetter func(path string) ([]byte, error)

tcpGet := func(portConfig string) pprofGetter {
pprofURL := fmt.Sprintf("http://127.0.0.1:%d/debug/pprof", pkgconfigsetup.Datadog().GetInt(portConfig))
tcpGet := func(portConfig string, onHTTPS bool) pprofGetter {
endpoint := url.URL{
Scheme: "http",
Host: net.JoinHostPort("127.0.0.1", strconv.Itoa(pkgconfigsetup.Datadog().GetInt(portConfig))),
Path: "/debug/pprof",
}
if onHTTPS {
endpoint.Scheme = "https"
}

return func(path string) ([]byte, error) {
return util.DoGet(c, pprofURL+path, util.LeaveConnectionOpen)
return util.DoGet(c, endpoint.String()+path, util.LeaveConnectionOpen)
}
}

Expand Down Expand Up @@ -228,15 +236,15 @@ func readProfileData(seconds int) (flare.ProfileData, error) {
}

agentCollectors := map[string]agentProfileCollector{
"core": serviceProfileCollector(tcpGet("expvar_port"), seconds),
"security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port"), seconds),
"core": serviceProfileCollector(tcpGet("expvar_port", false), seconds),
"security-agent": serviceProfileCollector(tcpGet("security_agent.expvar_port", false), seconds),
}

if pkgconfigsetup.Datadog().GetBool("process_config.enabled") ||
pkgconfigsetup.Datadog().GetBool("process_config.container_collection.enabled") ||
pkgconfigsetup.Datadog().GetBool("process_config.process_collection.enabled") {

agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port"), seconds)
agentCollectors["process"] = serviceProfileCollector(tcpGet("process_config.expvar_port", false), seconds)
}

if pkgconfigsetup.Datadog().GetBool("apm_config.enabled") {
Expand All @@ -249,7 +257,7 @@ func readProfileData(seconds int) (flare.ProfileData, error) {
traceCpusec = 4
}

agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port"), traceCpusec)
agentCollectors["trace"] = serviceProfileCollector(tcpGet("apm_config.debug.port", true), traceCpusec)
}

if pkgconfigsetup.SystemProbe().GetBool("system_probe_config.enabled") {
Expand Down
18 changes: 14 additions & 4 deletions cmd/agent/subcommands/flare/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ type commandTestSuite struct {
suite.Suite
sysprobeSocketPath string
tcpServer *httptest.Server
tcpTLSServer *httptest.Server
unixServer *httptest.Server
systemProbeServer *httptest.Server
}
Expand All @@ -42,13 +43,17 @@ func (c *commandTestSuite) SetupSuite() {
// This should be called by each test that requires them.
func (c *commandTestSuite) startTestServers() {
t := c.T()
c.tcpServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer()
c.tcpServer, c.tcpTLSServer, c.unixServer, c.systemProbeServer = c.getPprofTestServer()

t.Cleanup(func() {
if c.tcpServer != nil {
c.tcpServer.Close()
c.tcpServer = nil
}
if c.tcpTLSServer != nil {
c.tcpTLSServer.Close()
c.tcpTLSServer = nil
}
if c.unixServer != nil {
c.unixServer.Close()
c.unixServer = nil
Expand Down Expand Up @@ -82,12 +87,13 @@ func newMockHandler() http.HandlerFunc {
})
}

func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) {
func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, tcpTLSServer *httptest.Server, unixServer *httptest.Server, sysProbeServer *httptest.Server) {
var err error
t := c.T()

handler := newMockHandler()
tcpServer = httptest.NewServer(handler)
tcpTLSServer = httptest.NewTLSServer(handler)
if runtime.GOOS == "linux" {
unixServer = httptest.NewUnstartedServer(handler)
unixServer.Listener, err = net.Listen("unix", c.sysprobeSocketPath)
Expand All @@ -101,7 +107,7 @@ func (c *commandTestSuite) getPprofTestServer() (tcpServer *httptest.Server, uni
sysProbeServer.Start()
}

return tcpServer, unixServer, sysProbeServer
return tcpServer, tcpTLSServer, unixServer, sysProbeServer
}

func TestCommandTestSuite(t *testing.T) {
Expand All @@ -116,10 +122,14 @@ func (c *commandTestSuite) TestReadProfileData() {
require.NoError(t, err)
port := u.Port()

u, err = url.Parse(c.tcpTLSServer.URL)
require.NoError(t, err)
httpsPort := u.Port()

mockConfig := configmock.New(t)
mockConfig.SetWithoutSource("expvar_port", port)
mockConfig.SetWithoutSource("apm_config.enabled", true)
mockConfig.SetWithoutSource("apm_config.debug.port", port)
mockConfig.SetWithoutSource("apm_config.debug.port", httpsPort)
mockConfig.SetWithoutSource("apm_config.receiver_timeout", "10")
mockConfig.SetWithoutSource("process_config.expvar_port", port)
mockConfig.SetWithoutSource("security_agent.expvar_port", port)
Expand Down
2 changes: 1 addition & 1 deletion cmd/agent/subcommands/secret/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func traceAgentSecretRefresh(conf config.Component) ([]byte, error) {
c := apiutil.GetClient(false)
c.Timeout = conf.GetDuration("server_timeout") * time.Second

url := fmt.Sprintf("http://127.0.0.1:%d/secret/refresh", port)
url := fmt.Sprintf("https://127.0.0.1:%d/secret/refresh", port)
res, err := apiutil.DoGet(c, url, apiutil.CloseConnection)
if err != nil {
return nil, fmt.Errorf("could not contact trace-agent: %s", err)
Expand Down
4 changes: 4 additions & 0 deletions comp/trace/agent/impl/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"go.opentelemetry.io/collector/pdata/ptrace"
"go.uber.org/fx"

"github.com/DataDog/datadog-agent/comp/api/authtoken"
"github.com/DataDog/datadog-agent/comp/core/secrets"
tagger "github.com/DataDog/datadog-agent/comp/core/tagger/def"
"github.com/DataDog/datadog-agent/comp/dogstatsd/statsd"
Expand Down Expand Up @@ -68,6 +69,7 @@ type dependencies struct {
Statsd statsd.Component
Tagger tagger.Component
Compressor compression.Component
At authtoken.Component
}

var _ traceagent.Component = (*component)(nil)
Expand All @@ -93,6 +95,7 @@ type component struct {
params *Params
tagger tagger.Component
telemetryCollector telemetry.TelemetryCollector
at authtoken.Component
wg *sync.WaitGroup
}

Expand All @@ -115,6 +118,7 @@ func NewAgent(deps dependencies) (traceagent.Component, error) {
params: deps.Params,
telemetryCollector: deps.TelemetryCollector,
tagger: deps.Tagger,
at: deps.At,
wg: &sync.WaitGroup{},
}
statsdCl, err := setupMetrics(deps.Statsd, c.config, c.telemetryCollector)
Expand Down
3 changes: 3 additions & 0 deletions comp/trace/agent/impl/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,9 @@ func runAgentSidekicks(ag component) error {
}))
}

// Configure the Trace Agent Debug server to use the IPC certificate
ag.Agent.DebugServer.SetTLSConfig(ag.at.GetTLSServerConfig())

log.Infof("Trace agent running on host %s", tracecfg.Hostname)
if pcfg := profilingConfig(tracecfg); pcfg != nil {
if err := profiling.Start(*pcfg); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions comp/trace/bundle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"github.com/stretchr/testify/require"
"go.uber.org/fx"

"github.com/DataDog/datadog-agent/comp/api/authtoken/createandfetchimpl"
"github.com/DataDog/datadog-agent/comp/api/authtoken/fetchonlyimpl"
"github.com/DataDog/datadog-agent/comp/core"
coreconfig "github.com/DataDog/datadog-agent/comp/core/config"
log "github.com/DataDog/datadog-agent/comp/core/log/def"
Expand Down Expand Up @@ -45,6 +47,7 @@ func TestBundleDependencies(t *testing.T) {
zstdfx.Module(),
taggerfx.Module(tagger.Params{}),
fx.Supply(&traceagentimpl.Params{}),
createandfetchimpl.Module(),
)
}

Expand Down Expand Up @@ -75,6 +78,7 @@ func TestMockBundleDependencies(t *testing.T) {
fx.Invoke(func(_ traceagent.Component) {}),
MockBundle(),
taggerfx.Module(tagger.Params{}),
fetchonlyimpl.MockModule(),
))

require.NotNil(t, cfg.Object())
Expand Down
2 changes: 1 addition & 1 deletion comp/trace/status/statusimpl/status.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ func (s statusProvider) populateStatus() map[string]interface{} {
port := s.Config.GetInt("apm_config.debug.port")

c := client()
url := fmt.Sprintf("http://localhost:%d/debug/vars", port)
url := fmt.Sprintf("https://localhost:%d/debug/vars", port)
resp, err := apiutil.DoGet(c, url, apiutil.CloseConnection)
if err != nil {
return map[string]interface{}{
Expand Down
2 changes: 1 addition & 1 deletion pkg/config/fetcher/from_processes.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TraceAgentConfig(config config.Reader) (string, error) {
c := util.GetClient(false)
c.Timeout = config.GetDuration("server_timeout") * time.Second

ipcAddressWithPort := fmt.Sprintf("http://127.0.0.1:%d/config", port)
ipcAddressWithPort := fmt.Sprintf("https://127.0.0.1:%d/config", port)

client := settingshttp.NewClient(c, ipcAddressWithPort, "trace-agent", settingshttp.NewHTTPClientOptions(util.CloseConnection))
return client.FullConfig()
Expand Down
2 changes: 1 addition & 1 deletion pkg/flare/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ func getExpVar(fb flaretypes.FlareBuilder) error {

apmDebugPort := pkgconfigsetup.Datadog().GetInt("apm_config.debug.port")
f := filepath.Join("expvar", "trace-agent")
resp, err := http.Get(fmt.Sprintf("http://127.0.0.1:%d/debug/vars", apmDebugPort))
resp, err := http.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", apmDebugPort))
if err != nil {
return fb.AddFile(f, []byte(fmt.Sprintf("Error retrieving vars: %v", err)))
}
Expand Down
18 changes: 15 additions & 3 deletions pkg/trace/api/api_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1042,14 +1042,26 @@ func TestExpvar(t *testing.T) {
}

c := newTestReceiverConfig()
c.DebugServerPort = 5012
c.DebugServerPort = 6789
info.InitInfo(c)

// Starting a TLS httptest server to retrieve tlsCert
ts := httptest.NewTLSServer(http.HandlerFunc(func(_ http.ResponseWriter, _ *http.Request) {}))
tlsConfig := ts.TLS.Clone()
// Setting a client with the proper TLS configuration
client := ts.Client()
ts.Close()

// Starting Debug Server
s := NewDebugServer(c)
s.SetTLSConfig(tlsConfig)

// Starting the Debug server
s.Start()
defer s.Stop()

resp, err := http.Get("http://127.0.0.1:5012/debug/vars")
assert.NoError(t, err)
resp, err := client.Get(fmt.Sprintf("https://127.0.0.1:%d/debug/vars", c.DebugServerPort))
require.NoError(t, err)
defer resp.Body.Close()

t.Run("read-expvars", func(t *testing.T) {
Expand Down
18 changes: 13 additions & 5 deletions pkg/trace/api/debug_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ package api

import (
"context"
"crypto/tls"
"expvar"
"fmt"
"net"
Expand All @@ -29,9 +30,10 @@ const (

// DebugServer serves /debug/* endpoints
type DebugServer struct {
conf *config.AgentConfig
server *http.Server
mux *http.ServeMux
conf *config.AgentConfig
server *http.Server
mux *http.ServeMux
tlsConfig *tls.Config
}

// NewDebugServer returns a debug server
Expand All @@ -53,13 +55,14 @@ func (ds *DebugServer) Start() {
WriteTimeout: defaultTimeout,
Handler: ds.setupMux(),
}
listener, err := net.Listen("tcp", fmt.Sprintf("127.0.0.1:%d", ds.conf.DebugServerPort))
listener, err := net.Listen("tcp", net.JoinHostPort("127.0.0.1", strconv.Itoa(ds.conf.DebugServerPort)))
if err != nil {
log.Errorf("Error creating debug server listener: %s", err)
return
}
tlsListener := tls.NewListener(listener, ds.tlsConfig)
go func() {
if err := ds.server.Serve(listener); err != nil && err != http.ErrServerClosed {
if err := ds.server.Serve(tlsListener); err != nil && err != http.ErrServerClosed {
log.Errorf("Could not start debug server: %s. Debug server disabled.", err)
}
}()
Expand All @@ -82,6 +85,11 @@ func (ds *DebugServer) AddRoute(route string, handler http.Handler) {
ds.mux.Handle(route, handler)
}

// SetTLSConfig adds the provided tls.Config to the internal http.Server
func (ds *DebugServer) SetTLSConfig(config *tls.Config) {
ds.tlsConfig = config
}

func (ds *DebugServer) setupMux() *http.ServeMux {
ds.mux.HandleFunc("/debug/pprof/", pprof.Index)
ds.mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
Expand Down
6 changes: 4 additions & 2 deletions pkg/trace/info/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package info

import (
"bytes"
"crypto/tls"
"encoding/json"
"expvar" // automatically publish `/debug/vars` on HTTP port
"fmt"
Expand Down Expand Up @@ -236,8 +237,9 @@ func getProgramBanner(version string) (string, string) {
// If error is nil, means the program is running.
// If not, it displays a pretty-printed message anyway (for support)
func Info(w io.Writer, conf *config.AgentConfig) error {
url := fmt.Sprintf("http://127.0.0.1:%d/debug/vars", conf.DebugServerPort)
client := http.Client{Timeout: 3 * time.Second}
url := fmt.Sprintf("https://127.0.0.1:%d/debug/vars", conf.DebugServerPort)
tr := &http.Transport{TLSClientConfig: &tls.Config{InsecureSkipVerify: true}}
client := http.Client{Timeout: 3 * time.Second, Transport: tr}
resp, err := client.Get(url)
if err != nil {
// OK, here, we can't even make an http call on the agent port,
Expand Down
8 changes: 4 additions & 4 deletions pkg/trace/info/info_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {

func testServer(t *testing.T, testFile string) *httptest.Server {
t.Helper()
server := httptest.NewServer(&testServerHandler{t: t, testFile: testFile})
server := httptest.NewTLSServer(&testServerHandler{t: t, testFile: testFile})
t.Logf("test server (serving fake yet valid data) listening on %s", server.URL)
return server
}
Expand Down Expand Up @@ -94,7 +94,7 @@ func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ
}

func testServerWarning(t *testing.T) *httptest.Server {
server := httptest.NewServer(&testServerWarningHandler{t: t})
server := httptest.NewTLSServer(&testServerWarningHandler{t: t})
t.Logf("test server (serving data containing worrying values) listening on %s", server.URL)
return server
}
Expand All @@ -119,7 +119,7 @@ func (h *testServerErrorHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques
}

func testServerError(t *testing.T) *httptest.Server {
server := httptest.NewServer(&testServerErrorHandler{t: t})
server := httptest.NewTLSServer(&testServerErrorHandler{t: t})
t.Logf("test server (serving bad data to trigger errors) listening on %s", server.URL)
return server
}
Expand Down Expand Up @@ -331,7 +331,7 @@ func TestError(t *testing.T) {
assert.Equal(len(lines[1]), len(lines[2]))
assert.Equal("", lines[3])
assert.Regexp(regexp.MustCompile(`^ Error: .*$`), lines[4])
assert.Equal(fmt.Sprintf(" URL: http://127.0.0.1:%d/debug/vars", port), lines[5])
assert.Equal(fmt.Sprintf(" URL: https://127.0.0.1:%d/debug/vars", port), lines[5])
assert.Equal("", lines[6])
assert.Equal("", lines[7])
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ type agentConfigEndpointInfo struct {
}

func traceConfigEndpoint(port int) agentConfigEndpointInfo {
return agentConfigEndpointInfo{"trace-agent", "http", port, "/config"}
return agentConfigEndpointInfo{"trace-agent", "https", port, "/config"}
}

func processConfigEndpoint(port int) agentConfigEndpointInfo {
Expand Down

0 comments on commit 88a7672

Please sign in to comment.