From cfaf5ee0ba972b929169d2e1d24b61e0b2f50bd2 Mon Sep 17 00:00:00 2001 From: Sylvain Afchain Date: Mon, 25 Nov 2024 14:22:17 +0100 Subject: [PATCH 01/12] [CWS] add kernel bpf filter for raw packet (#30288) --- LICENSE-3rdparty.csv | 1 + go.mod | 3 +- go.sum | 6 +- .../ebpf/c/include/constants/custom.h | 8 +- pkg/security/ebpf/c/include/helpers/network.h | 5 - .../ebpf/c/include/hooks/network/dns.h | 4 +- .../ebpf/c/include/hooks/network/raw.h | 55 ++-- .../ebpf/c/include/hooks/network/router.h | 4 +- .../ebpf/c/include/hooks/network/tc.h | 39 ++- pkg/security/ebpf/c/include/maps.h | 5 +- pkg/security/ebpf/c/include/structs/network.h | 2 +- .../tests/activity_dump_ratelimiter_test.h | 6 +- pkg/security/ebpf/c/include/tests/baloum.h | 1 + .../ebpf/c/include/tests/discarders_test.h | 8 +- .../ebpf/c/include/tests/raw_packet_test.h | 30 +++ pkg/security/ebpf/c/include/tests/tests.h | 1 + pkg/security/ebpf/probes/all.go | 8 +- pkg/security/ebpf/probes/const.go | 15 +- pkg/security/ebpf/probes/raw_packet.go | 15 ++ .../ebpf/probes/rawpacket/bpffilter.go | 19 ++ pkg/security/ebpf/probes/rawpacket/pcap.go | 251 ++++++++++++++++++ .../ebpf/probes/rawpacket/pcap_unsupported.go | 39 +++ pkg/security/ebpf/probes/tc.go | 49 ++-- .../tests/activity_dump_ratelimiter_test.go | 8 +- pkg/security/ebpf/tests/discarders_test.go | 8 +- pkg/security/ebpf/tests/raw_packet_test.go | 194 ++++++++++++++ pkg/security/probe/model_ebpf.go | 10 +- pkg/security/probe/probe_ebpf.go | 121 +++++++-- ...ilter_unix.go => oo_packet_filter_unix.go} | 14 +- ...ted.go => oo_packet_filter_unsupported.go} | 0 pkg/security/tests/network_test.go | 2 +- tasks/security_agent.py | 17 +- 32 files changed, 805 insertions(+), 143 deletions(-) create mode 100644 pkg/security/ebpf/c/include/tests/raw_packet_test.h create mode 100644 pkg/security/ebpf/probes/raw_packet.go create mode 100644 pkg/security/ebpf/probes/rawpacket/bpffilter.go create mode 100644 pkg/security/ebpf/probes/rawpacket/pcap.go create mode 100644 pkg/security/ebpf/probes/rawpacket/pcap_unsupported.go create mode 100644 pkg/security/ebpf/tests/raw_packet_test.go rename pkg/security/secl/model/{packet_filter_unix.go => oo_packet_filter_unix.go} (83%) rename pkg/security/secl/model/{packet_filter_unsupported.go => oo_packet_filter_unsupported.go} (100%) diff --git a/LICENSE-3rdparty.csv b/LICENSE-3rdparty.csv index 0c14f7e40d15b..3bd5a574fa3fe 100644 --- a/LICENSE-3rdparty.csv +++ b/LICENSE-3rdparty.csv @@ -690,6 +690,7 @@ core,github.com/cilium/ebpf/perf,MIT,"Copyright (c) 2017 Nathan Sweet | Copyrigh core,github.com/cilium/ebpf/ringbuf,MIT,"Copyright (c) 2017 Nathan Sweet | Copyright (c) 2018, 2019 Cloudflare | Copyright (c) 2019 Authors of Cilium" core,github.com/cilium/ebpf/rlimit,MIT,"Copyright (c) 2017 Nathan Sweet | Copyright (c) 2018, 2019 Cloudflare | Copyright (c) 2019 Authors of Cilium" core,github.com/clbanning/mxj,MIT,Copyright (c) 2012-2016 Charles Banning . All rights reserved | Copyright 2009 The Go Authors. All rights reserved +core,github.com/cloudflare/cbpfc,BSD-3-Clause,"Copyright (c) 2019, Cloudflare. All rights reserved" core,github.com/cloudflare/circl/dh/x25519,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved | Copyright (c) 2019 Cloudflare. All rights reserved core,github.com/cloudflare/circl/dh/x448,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved | Copyright (c) 2019 Cloudflare. All rights reserved core,github.com/cloudflare/circl/ecc/goldilocks,BSD-3-Clause,Copyright (c) 2009 The Go Authors. All rights reserved | Copyright (c) 2019 Cloudflare. All rights reserved diff --git a/go.mod b/go.mod index 26930c22eacb8..d39bc25dbbadd 100644 --- a/go.mod +++ b/go.mod @@ -510,7 +510,7 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect github.com/rs/cors v1.11.1 // indirect - github.com/safchain/baloum v0.0.0-20221229104256-b1fc8f70a86b + github.com/safchain/baloum v0.0.0-20241120122234-f22c9bd19f3b github.com/saracen/walker v0.1.3 // indirect github.com/sassoftware/go-rpmutils v0.3.0 // indirect github.com/secure-systems-lab/go-securesystemslib v0.8.0 // indirect @@ -604,6 +604,7 @@ require ( github.com/DataDog/datadog-agent/pkg/util/defaultpaths v0.0.0-00010101000000-000000000000 github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 github.com/NVIDIA/go-nvml v0.12.4-0 + github.com/cloudflare/cbpfc v0.0.0-20240920015331-ff978e94500b github.com/containerd/containerd/api v1.8.0 github.com/containerd/errdefs v1.0.0 github.com/distribution/reference v0.6.0 diff --git a/go.sum b/go.sum index 799c0bcda1aba..a8cc5f6743724 100644 --- a/go.sum +++ b/go.sum @@ -463,6 +463,8 @@ github.com/circonus-labs/circonusllhist v0.1.3/go.mod h1:kMXHVDlOchFAehlya5ePtbp github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I= github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudflare/cbpfc v0.0.0-20240920015331-ff978e94500b h1:EgR1t4Lnq6uP6QxJQ+oIFtENOHUY3/7gMOE76vL0KcA= +github.com/cloudflare/cbpfc v0.0.0-20240920015331-ff978e94500b/go.mod h1:X/9cHz8JVzKlvoZyKBgMgrogKZlLf+pWjmm5gSUm5dI= github.com/cloudflare/circl v1.3.7 h1:qlCDlTPz2n9fu58M0Nh1J/JzcFpfgkFHHX3O35r5vcU= github.com/cloudflare/circl v1.3.7/go.mod h1:sRTcRWXGLrKw6yIGJ+l7amYJFfAXbZG0kBSc8r4zxgA= github.com/cloudfoundry-community/go-cfclient/v2 v2.0.1-0.20230503155151-3d15366c5820 h1:ixkQUDJYG6eSxgUEl6LLE2l2TD2C5AYmlm+fVhsr6Zs= @@ -1638,8 +1640,8 @@ github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkBk= github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= -github.com/safchain/baloum v0.0.0-20221229104256-b1fc8f70a86b h1:cTiH46CYvPhgOlE0t82N+rgQw44b7vB39ay+P+wiVz8= -github.com/safchain/baloum v0.0.0-20221229104256-b1fc8f70a86b/go.mod h1:1+GWOH32bsIEAHknYja6/H1efcDs+/Q2XrtYMM200Ho= +github.com/safchain/baloum v0.0.0-20241120122234-f22c9bd19f3b h1:ZeznXGJOGRRGKuU7GEUmNobE4swH0PbMqukrQS3XCLE= +github.com/safchain/baloum v0.0.0-20241120122234-f22c9bd19f3b/go.mod h1:azfM30OkV7er0g2EIbpI+Jl4P6T5RMpsED0+7Up/Gog= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= diff --git a/pkg/security/ebpf/c/include/constants/custom.h b/pkg/security/ebpf/c/include/constants/custom.h index dd522026b761c..88be17fa3c80b 100644 --- a/pkg/security/ebpf/c/include/constants/custom.h +++ b/pkg/security/ebpf/c/include/constants/custom.h @@ -61,12 +61,14 @@ enum DENTRY_ERPC_RESOLUTION_CODE enum TC_TAIL_CALL_KEYS { - UNKNOWN, - DNS_REQUEST, + DNS_REQUEST = 1, DNS_REQUEST_PARSER, IMDS_REQUEST, - RAW_PACKET, +}; + +enum TC_RAWPACKET_KEYS { RAW_PACKET_FILTER, + // reserved keys for raw packet filter tail calls }; #define DNS_MAX_LENGTH 256 diff --git a/pkg/security/ebpf/c/include/helpers/network.h b/pkg/security/ebpf/c/include/helpers/network.h index 0899d6e88e30c..1198e49d41071 100644 --- a/pkg/security/ebpf/c/include/helpers/network.h +++ b/pkg/security/ebpf/c/include/helpers/network.h @@ -82,10 +82,6 @@ __attribute__((always_inline)) void fill_network_context(struct network_context_ fill_network_device_context(&net_ctx->device, skb, pkt); } -__attribute__((always_inline)) void tail_call_to_classifier(struct __sk_buff *skb, int classifier_id) { - bpf_tail_call_compat(skb, &classifier_router, classifier_id); -} - __attribute__((always_inline)) void parse_tuple(struct nf_conntrack_tuple *tuple, struct flow_t *flow) { flow->sport = tuple->src.u.all; flow->dport = tuple->dst.u.all; @@ -94,7 +90,6 @@ __attribute__((always_inline)) void parse_tuple(struct nf_conntrack_tuple *tuple bpf_probe_read(&flow->daddr, sizeof(flow->daddr), &tuple->dst.u3.all); } - __attribute__((always_inline)) struct packet_t * parse_packet(struct __sk_buff *skb, int direction) { struct cursor c = {}; tc_cursor_init(&c, skb); diff --git a/pkg/security/ebpf/c/include/hooks/network/dns.h b/pkg/security/ebpf/c/include/hooks/network/dns.h index d6f40c2277ea5..46fd79393fa7d 100644 --- a/pkg/security/ebpf/c/include/hooks/network/dns.h +++ b/pkg/security/ebpf/c/include/hooks/network/dns.h @@ -80,7 +80,7 @@ int classifier_dns_request(struct __sk_buff *skb) { evt->id = htons(header.id); // tail call to the dns request parser - tail_call_to_classifier(skb, DNS_REQUEST_PARSER); + bpf_tail_call_compat(skb, &classifier_router, DNS_REQUEST_PARSER); // tail call failed, ignore packet return ACT_OK; @@ -116,7 +116,7 @@ int classifier_dns_request_parser(struct __sk_buff *skb) { send_event_with_size_ptr(skb, EVENT_DNS, evt, offsetof(struct dns_event_t, name) + qname_length); if (!is_dns_request_parsing_done(skb, pkt)) { - tail_call_to_classifier(skb, DNS_REQUEST_PARSER); + bpf_tail_call_compat(skb, &classifier_router, DNS_REQUEST_PARSER); } return ACT_OK; diff --git a/pkg/security/ebpf/c/include/hooks/network/raw.h b/pkg/security/ebpf/c/include/hooks/network/raw.h index 2b707952a177a..6f46f6b4eb1a2 100644 --- a/pkg/security/ebpf/c/include/hooks/network/raw.h +++ b/pkg/security/ebpf/c/include/hooks/network/raw.h @@ -4,61 +4,44 @@ #include "helpers/network.h" #include "perf_ring.h" -__attribute__((always_inline)) struct raw_packet_t *get_raw_packet_event() { +__attribute__((always_inline)) struct raw_packet_event_t *get_raw_packet_event() { u32 key = 0; - return bpf_map_lookup_elem(&raw_packets, &key); + return bpf_map_lookup_elem(&raw_packet_event, &key); } -SEC("classifier/raw_packet") -int classifier_raw_packet(struct __sk_buff *skb) { +SEC("classifier/raw_packet_sender") +int classifier_raw_packet_sender(struct __sk_buff *skb) { struct packet_t *pkt = get_packet(); if (pkt == NULL) { // should never happen return ACT_OK; } - struct raw_packet_t *evt = get_raw_packet_event(); - if ((evt == NULL) || (skb == NULL)) { + struct raw_packet_event_t *evt = get_raw_packet_event(); + if (evt == NULL || skb == NULL || evt->len == 0) { // should never happen return ACT_OK; } - bpf_skb_pull_data(skb, 0); + // process context + fill_network_process_context(&evt->process, pkt); - u32 len = *(u32 *)(skb + offsetof(struct __sk_buff, len)); - if (len > sizeof(evt->data)) { - len = sizeof(evt->data); + struct proc_cache_t *entry = get_proc_cache(evt->process.pid); + if (entry == NULL) { + evt->container.container_id[0] = 0; + } else { + copy_container_id_no_tracing(entry->container.container_id, &evt->container.container_id); } - // NOTE(safchain) inline asm because clang isn't generating the proper instructions for : - // if (len == 0) return ACT_OK; - /*asm ("r4 = %[len]\n" - "if r4 > 0 goto + 2\n" - "r0 = 0\n" - "exit\n" :: [len]"r"((u64)len));*/ - - if (len > 1) { - if (bpf_skb_load_bytes(skb, 0, evt->data, len) < 0) { - return ACT_OK; - } - evt->len = skb->len; - - // process context - fill_network_process_context(&evt->process, pkt); - - struct proc_cache_t *entry = get_proc_cache(evt->process.pid); - if (entry == NULL) { - evt->container.container_id[0] = 0; - } else { - copy_container_id_no_tracing(entry->container.container_id, &evt->container.container_id); - } + fill_network_device_context(&evt->device, skb, pkt); - fill_network_device_context(&evt->device, skb, pkt); - - u32 size = offsetof(struct raw_packet_t, data) + len; - send_event_with_size_ptr(skb, EVENT_RAW_PACKET, evt, size); + u32 len = evt->len; + if (len > sizeof(evt->data)) { + len = sizeof(evt->data); } + send_event_with_size_ptr(skb, EVENT_RAW_PACKET, evt, offsetof(struct raw_packet_event_t, data) + len); + return ACT_OK; } diff --git a/pkg/security/ebpf/c/include/hooks/network/router.h b/pkg/security/ebpf/c/include/hooks/network/router.h index e2b8361869c46..93cca5f4889ee 100644 --- a/pkg/security/ebpf/c/include/hooks/network/router.h +++ b/pkg/security/ebpf/c/include/hooks/network/router.h @@ -9,14 +9,14 @@ __attribute__((always_inline)) int route_pkt(struct __sk_buff *skb, struct packe // route DNS requests if (is_event_enabled(EVENT_DNS)) { if (pkt->l4_protocol == IPPROTO_UDP && pkt->translated_ns_flow.flow.dport == htons(53)) { - tail_call_to_classifier(skb, DNS_REQUEST); + bpf_tail_call_compat(skb, &classifier_router, DNS_REQUEST); } } // route IMDS requests if (is_event_enabled(EVENT_IMDS)) { if (pkt->l4_protocol == IPPROTO_TCP && ((pkt->ns_flow.flow.saddr[0] & 0xFFFFFFFF) == get_imds_ip() || (pkt->ns_flow.flow.daddr[0] & 0xFFFFFFFF) == get_imds_ip())) { - tail_call_to_classifier(skb, IMDS_REQUEST); + bpf_tail_call_compat(skb, &classifier_router, IMDS_REQUEST); } } diff --git a/pkg/security/ebpf/c/include/hooks/network/tc.h b/pkg/security/ebpf/c/include/hooks/network/tc.h index 92222aff85474..2bb8f8b5791c8 100644 --- a/pkg/security/ebpf/c/include/hooks/network/tc.h +++ b/pkg/security/ebpf/c/include/hooks/network/tc.h @@ -26,6 +26,33 @@ int classifier_egress(struct __sk_buff *skb) { return route_pkt(skb, pkt, EGRESS); }; +__attribute__((always_inline)) int prepare_raw_packet_event(struct __sk_buff *skb) { + struct raw_packet_event_t *evt = get_raw_packet_event(); + if (evt == NULL) { + // should never happen + return ACT_OK; + } + + bpf_skb_pull_data(skb, 0); + + u32 len = *(u32 *)(skb + offsetof(struct __sk_buff, len)); + if (len > sizeof(evt->data)) { + len = sizeof(evt->data); + } + + if (len > 1) { + if (bpf_skb_load_bytes(skb, 0, evt->data, len) < 0) { + return ACT_OK; + } + evt->len = skb->len; + } else { + evt->len = 0; + } + + return ACT_OK; +} + + SEC("classifier/ingress") int classifier_raw_packet_ingress(struct __sk_buff *skb) { struct packet_t *pkt = parse_packet(skb, INGRESS); @@ -33,7 +60,11 @@ int classifier_raw_packet_ingress(struct __sk_buff *skb) { return ACT_OK; } - tail_call_to_classifier(skb, RAW_PACKET_FILTER); + if (prepare_raw_packet_event(skb) != ACT_OK) { + return ACT_OK; + } + + bpf_tail_call_compat(skb, &raw_packet_classifier_router, RAW_PACKET_FILTER); return ACT_OK; }; @@ -45,7 +76,11 @@ int classifier_raw_packet_egress(struct __sk_buff *skb) { return ACT_OK; } - tail_call_to_classifier(skb, RAW_PACKET_FILTER); + if (prepare_raw_packet_event(skb) != ACT_OK) { + return ACT_OK; + } + + bpf_tail_call_compat(skb, &raw_packet_classifier_router, RAW_PACKET_FILTER); return ACT_OK; }; diff --git a/pkg/security/ebpf/c/include/maps.h b/pkg/security/ebpf/c/include/maps.h index 02974e0286dd5..c5050fd5545c7 100644 --- a/pkg/security/ebpf/c/include/maps.h +++ b/pkg/security/ebpf/c/include/maps.h @@ -88,14 +88,15 @@ BPF_PERCPU_ARRAY_MAP(packets, struct packet_t, 1) BPF_PERCPU_ARRAY_MAP(selinux_write_buffer, struct selinux_write_buffer_t, 1) BPF_PERCPU_ARRAY_MAP(is_new_kthread, u32, 1) BPF_PERCPU_ARRAY_MAP(syscalls_stats, struct syscalls_stats_t, EVENT_MAX) -BPF_PERCPU_ARRAY_MAP(raw_packets, struct raw_packet_t, 1) +BPF_PERCPU_ARRAY_MAP(raw_packet_event, struct raw_packet_event_t, 1) BPF_PROG_ARRAY(args_envs_progs, 3) BPF_PROG_ARRAY(dentry_resolver_kprobe_or_fentry_callbacks, EVENT_MAX) BPF_PROG_ARRAY(dentry_resolver_tracepoint_callbacks, EVENT_MAX) BPF_PROG_ARRAY(dentry_resolver_kprobe_or_fentry_progs, 6) BPF_PROG_ARRAY(dentry_resolver_tracepoint_progs, 3) -BPF_PROG_ARRAY(classifier_router, 100) +BPF_PROG_ARRAY(classifier_router, 10) BPF_PROG_ARRAY(sys_exit_progs, 64) +BPF_PROG_ARRAY(raw_packet_classifier_router, 32) #endif diff --git a/pkg/security/ebpf/c/include/structs/network.h b/pkg/security/ebpf/c/include/structs/network.h index d212e09d1b3c9..9efed0aa257b3 100644 --- a/pkg/security/ebpf/c/include/structs/network.h +++ b/pkg/security/ebpf/c/include/structs/network.h @@ -83,7 +83,7 @@ struct network_context_t { u16 l4_protocol; }; -struct raw_packet_t { +struct raw_packet_event_t { struct kevent_t event; struct process_context_t process; struct span_context_t span; diff --git a/pkg/security/ebpf/c/include/tests/activity_dump_ratelimiter_test.h b/pkg/security/ebpf/c/include/tests/activity_dump_ratelimiter_test.h index f65e82e941805..6d6ab486f8011 100644 --- a/pkg/security/ebpf/c/include/tests/activity_dump_ratelimiter_test.h +++ b/pkg/security/ebpf/c/include/tests/activity_dump_ratelimiter_test.h @@ -39,7 +39,7 @@ int test_ad_ratelimiter_basic() { assert_zero(activity_dump_rate_limiter_allow(&config, cookie, now, 0), "event allowed which should not be"); } - return 0; + return 1; } SEC("test/ad_ratelimiter_basic_half") @@ -73,7 +73,7 @@ int test_ad_ratelimiter_basic_half() { assert_zero(activity_dump_rate_limiter_allow(&config, cookie, now, 0), "event allowed which should not be"); } - return 0; + return 1; } __attribute__((always_inline)) int test_ad_ratelimiter_variable_droprate(int algo) { @@ -106,7 +106,7 @@ __attribute__((always_inline)) int test_ad_ratelimiter_variable_droprate(int alg assert_greater_than(total_allowed, AD_RL_TEST_RATE * 3 / 4, "nope"); assert_lesser_than(total_allowed, AD_RL_TEST_RATE / 10, "nope"); } - return 0; + return 1; } SEC("test/ad_ratelimiter_decreasing_droprate") diff --git a/pkg/security/ebpf/c/include/tests/baloum.h b/pkg/security/ebpf/c/include/tests/baloum.h index 5b6f263c18b73..4128525735140 100644 --- a/pkg/security/ebpf/c/include/tests/baloum.h +++ b/pkg/security/ebpf/c/include/tests/baloum.h @@ -15,6 +15,7 @@ static int (*baloum_call)(struct baloum_ctx *ctx, const char *section) = (void * static int (*baloum_strcmp)(const char *s1, const char *s2) = (void *)0xfffd; static int (*baloum_memcmp)(const void *b1, const void *b2, __u32 size) = (void *)0xfffc; static int (*baloum_sleep)(__u64 ns) = (void *)0xfffb; +static int (*baloum_memcpy)(const void *b1, const void *b2, __u32 size) = (void *)0xfffa; #define assert_memcmp(b1, b2, s, msg) \ if (baloum_memcmp(b1, b2, s) != 0) { \ diff --git a/pkg/security/ebpf/c/include/tests/discarders_test.h b/pkg/security/ebpf/c/include/tests/discarders_test.h index 6738e64eca392..24970f491920a 100644 --- a/pkg/security/ebpf/c/include/tests/discarders_test.h +++ b/pkg/security/ebpf/c/include/tests/discarders_test.h @@ -53,7 +53,7 @@ int test_discarders_event_mask() { ret = _is_discarded_by_inode(EVENT_CHMOD, mount_id, inode); assert_not_zero(ret, "inode should be discarded"); - return 0; + return 1; } SEC("test/discarders_retention") @@ -93,7 +93,7 @@ int test_discarders_retention() { ret = _is_discarded_by_inode(EVENT_OPEN, mount_id, inode); assert_not_zero(ret, "inode should be discarded"); - return 0; + return 1; } SEC("test/discarders_revision") @@ -142,7 +142,7 @@ int test_discarders_revision() { ret = _is_discarded_by_inode(EVENT_OPEN, mount_id1, inode1); assert_not_zero(ret, "inode should be discarded"); - return 0; + return 1; } SEC("test/discarders_mount_revision") @@ -183,7 +183,7 @@ int test_discarders_mount_revision() { ret = _is_discarded_by_inode(EVENT_OPEN, mount_id1, inode1); assert_not_zero(ret, "inode should be discarded"); - return 0; + return 1; } #endif diff --git a/pkg/security/ebpf/c/include/tests/raw_packet_test.h b/pkg/security/ebpf/c/include/tests/raw_packet_test.h new file mode 100644 index 0000000000000..a00f55225b6ea --- /dev/null +++ b/pkg/security/ebpf/c/include/tests/raw_packet_test.h @@ -0,0 +1,30 @@ +#ifndef _RAW_PACKET_TEST_H +#define _RAW_PACKET_TEST_H + +#include "helpers/network.h" +#include "baloum.h" + +SEC("test/raw_packet_tail_calls") +int raw_packet_tail_calls(struct __sk_buff *skb) { + struct raw_packet_event_t *evt = get_raw_packet_event(); + assert_not_null(evt, "unable to get raw packet event") + + // tcp dst port 5555 and tcp[tcpflags] == tcp-syn + unsigned char data[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x10, + 0x00, 0x30, 0xf4, 0xa2, 0x40, 0x00, 0x40, 0x06, + 0x48, 0x13, 0x7f, 0x00, 0x00, 0x01, 0x7f, 0x00, + 0x00, 0x01, 0xa2, 0x36, 0x15, 0xb3, 0x1c, 0x5b, + 0x89, 0x33, 0x00, 0x00, 0x00, 0x00, 0x70, 0x02, + 0xff, 0xd7, 0xfe, 0x24, 0x00, 0x00, 0x02, 0x04, + 0xff, 0xd7, 0x01, 0x03, 0x03, 0x07 + }; + baloum_memcpy(evt->data, data, sizeof(data)); + + bpf_tail_call_compat(skb, &raw_packet_classifier_router, RAW_PACKET_FILTER); + + return 1; +} + +#endif diff --git a/pkg/security/ebpf/c/include/tests/tests.h b/pkg/security/ebpf/c/include/tests/tests.h index 9e9652d486857..2abe84b70854f 100644 --- a/pkg/security/ebpf/c/include/tests/tests.h +++ b/pkg/security/ebpf/c/include/tests/tests.h @@ -3,5 +3,6 @@ #include "discarders_test.h" #include "activity_dump_ratelimiter_test.h" +#include "raw_packet_test.h" #endif diff --git a/pkg/security/ebpf/probes/all.go b/pkg/security/ebpf/probes/all.go index e501a61ff926f..770d883b64dd7 100644 --- a/pkg/security/ebpf/probes/all.go +++ b/pkg/security/ebpf/probes/all.go @@ -131,7 +131,7 @@ func AllMaps() []*manager.Map { {Name: "syscalls_stats_enabled"}, {Name: "kill_list"}, // used by raw packet filters - {Name: "packets"}, + {Name: "raw_packet_event"}, } } @@ -252,14 +252,14 @@ func AllRingBuffers() []*manager.RingBuffer { } // AllTailRoutes returns the list of all the tail call routes -func AllTailRoutes(ERPCDentryResolutionEnabled, networkEnabled, supportMmapableMaps bool) []manager.TailCallRoute { +func AllTailRoutes(eRPCDentryResolutionEnabled, networkEnabled, rawPacketEnabled, supportMmapableMaps bool) []manager.TailCallRoute { var routes []manager.TailCallRoute routes = append(routes, getExecTailCallRoutes()...) - routes = append(routes, getDentryResolverTailCallRoutes(ERPCDentryResolutionEnabled, supportMmapableMaps)...) + routes = append(routes, getDentryResolverTailCallRoutes(eRPCDentryResolutionEnabled, supportMmapableMaps)...) routes = append(routes, getSysExitTailCallRoutes()...) if networkEnabled { - routes = append(routes, getTCTailCallRoutes()...) + routes = append(routes, getTCTailCallRoutes(rawPacketEnabled)...) } return routes diff --git a/pkg/security/ebpf/probes/const.go b/pkg/security/ebpf/probes/const.go index 7b10d86ca29ae..3d984a02b9b0b 100644 --- a/pkg/security/ebpf/probes/const.go +++ b/pkg/security/ebpf/probes/const.go @@ -77,6 +77,11 @@ const ( DentryResolverCGroupWriteCallbackTracepointKey ) +const ( + // RawPacketFilterMaxTailCall defines the maximum of tail calls + RawPacketFilterMaxTailCall = 5 +) + const ( // TCDNSRequestKey is the key to the DNS request program TCDNSRequestKey uint32 = iota + 1 @@ -84,10 +89,14 @@ const ( TCDNSRequestParserKey // TCIMDSRequestParserKey is the key to the IMDS request program TCIMDSRequestParserKey - // TCRawPacketParserKey is the key to the raw packet program - TCRawPacketParserKey +) + +const ( // TCRawPacketFilterKey is the key to the raw packet filter program - TCRawPacketFilterKey + // reserve 5 tail calls for the filtering + TCRawPacketFilterKey uint32 = iota + // TCRawPacketParserSenderKey is the key to the raw packet sender program + TCRawPacketParserSenderKey = TCRawPacketFilterKey + RawPacketFilterMaxTailCall // reserved key for filter tail calls ) const ( diff --git a/pkg/security/ebpf/probes/raw_packet.go b/pkg/security/ebpf/probes/raw_packet.go new file mode 100644 index 0000000000000..18dc27434d043 --- /dev/null +++ b/pkg/security/ebpf/probes/raw_packet.go @@ -0,0 +1,15 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +// Package probes holds probes related files +package probes + +// RawPacketTCProgram returns the list of TC classifier sections +var RawPacketTCProgram = []string{ + "classifier_raw_packet_egress", + "classifier_raw_packet_ingress", +} diff --git a/pkg/security/ebpf/probes/rawpacket/bpffilter.go b/pkg/security/ebpf/probes/rawpacket/bpffilter.go new file mode 100644 index 0000000000000..b01e57dffa64f --- /dev/null +++ b/pkg/security/ebpf/probes/rawpacket/bpffilter.go @@ -0,0 +1,19 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux + +// Package rawpacket holds rawpacket related files +package rawpacket + +import ( + "github.com/DataDog/datadog-agent/pkg/security/secl/compiler/eval" +) + +// Filter defines a raw packet filter +type Filter struct { + RuleID eval.RuleID + BPFFilter string +} diff --git a/pkg/security/ebpf/probes/rawpacket/pcap.go b/pkg/security/ebpf/probes/rawpacket/pcap.go new file mode 100644 index 0000000000000..8ffc7c451c6ab --- /dev/null +++ b/pkg/security/ebpf/probes/rawpacket/pcap.go @@ -0,0 +1,251 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux && pcap && cgo + +// Package rawpacket holds rawpacket related files +package rawpacket + +import ( + "errors" + "fmt" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/cloudflare/cbpfc" + "github.com/google/gopacket/layers" + "github.com/google/gopacket/pcap" + "github.com/hashicorp/go-multierror" + "golang.org/x/net/bpf" + + "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes" +) + +const ( + // progPrefix prefix used for raw packet filter programs + progPrefix = "raw_packet_prog_" + + // packetCaptureSize see kernel definition + packetCaptureSize = 256 +) + +// ProgOpts defines options +type ProgOpts struct { + *cbpfc.EBPFOpts + + // MaxTailCalls maximun number of tail calls generated + MaxTailCalls int + // number of instructions + MaxProgSize int + // Number of nop instruction inserted in each program + NopInstLen int + + // internals + sendEventLabel string + ctxSave asm.Register + tailCallMapFd int +} + +// DefaultProgOpts default options +var DefaultProgOpts = ProgOpts{ + EBPFOpts: &cbpfc.EBPFOpts{ + PacketStart: asm.R1, + PacketEnd: asm.R2, + Result: asm.R3, + Working: [4]asm.Register{ + asm.R4, + asm.R5, + asm.R6, + asm.R7, + }, + StackOffset: 16, // adapt using the stack size used outside of the filter itself, ex: map_lookup + }, + sendEventLabel: "send_event", + ctxSave: asm.R9, + MaxTailCalls: probes.RawPacketFilterMaxTailCall, + MaxProgSize: 4000, +} + +// BPFFilterToInsts compile a bpf filter expression +func BPFFilterToInsts(index int, filter string, opts ProgOpts) (asm.Instructions, error) { + pcapBPF, err := pcap.CompileBPFFilter(layers.LinkTypeEthernet, 256, filter) + if err != nil { + return nil, err + } + bpfInsts := make([]bpf.Instruction, len(pcapBPF)) + for i, ri := range pcapBPF { + bpfInsts[i] = bpf.RawInstruction{Op: ri.Code, Jt: ri.Jt, Jf: ri.Jf, K: ri.K}.Disassemble() + } + + var cbpfcOpts cbpfc.EBPFOpts + if opts.EBPFOpts != nil { + // make a copy so that we can modify the labels + cbpfcOpts = *opts.EBPFOpts + } + cbpfcOpts.LabelPrefix = fmt.Sprintf("cbpfc_%d_", index) + cbpfcOpts.ResultLabel = fmt.Sprintf("check_result_%d", index) + + insts, err := cbpfc.ToEBPF(bpfInsts, cbpfcOpts) + if err != nil { + return nil, err + } + + resultLabel := cbpfcOpts.ResultLabel + + // add nop insts, used to test the max insts and artificially generate tail calls + for i := 0; i != opts.NopInstLen; i++ { + insts = append(insts, + asm.JEq.Imm(asm.R9, 0, opts.sendEventLabel).WithSymbol(resultLabel), + ) + resultLabel = "" + } + + // filter result + insts = append(insts, + asm.JNE.Imm(cbpfcOpts.Result, 0, opts.sendEventLabel).WithSymbol(resultLabel), + ) + + return insts, nil +} + +func filtersToProgs(filters []Filter, opts ProgOpts, headerInsts, senderInsts asm.Instructions) ([]asm.Instructions, *multierror.Error) { + var ( + progInsts []asm.Instructions + mErr *multierror.Error + tailCalls int + header bool + ) + + // prepend a return instruction in case of fail + footerInsts := append(asm.Instructions{ + asm.Return(), + }, senderInsts...) + + isMaxSizeExceeded := func(filterInsts, tailCallInsts asm.Instructions) bool { + return len(filterInsts)+len(tailCallInsts)+len(footerInsts) > opts.MaxProgSize + } + + for i, filter := range filters { + filterInsts, err := BPFFilterToInsts(i, filter.BPFFilter, opts) + if err != nil { + mErr = multierror.Append(mErr, fmt.Errorf("unable to generate eBPF bytecode for rule `%s`: %s", filter.RuleID, err)) + continue + } + + var tailCallInsts asm.Instructions + + // insert tail call to the current filter if not the last prog + if i+1 < len(filters) { + tailCallInsts = asm.Instructions{ + asm.Mov.Reg(asm.R1, opts.ctxSave), + asm.LoadMapPtr(asm.R2, opts.tailCallMapFd), + asm.Mov.Imm(asm.R3, int32(probes.TCRawPacketFilterKey+uint32(tailCalls)+1)), + asm.FnTailCall.Call(), + } + } + + // single program exceeded the limit + if isMaxSizeExceeded(filterInsts, tailCallInsts) { + mErr = multierror.Append(mErr, fmt.Errorf("max number of intructions exceeded for rule `%s`", filter.RuleID)) + continue + } + + if !header { + progInsts = append(progInsts, headerInsts) + header = true + } + progInsts[tailCalls] = append(progInsts[tailCalls], filterInsts...) + + // max size exceeded, generate a new tail call + if isMaxSizeExceeded(progInsts[tailCalls], tailCallInsts) { + if opts.MaxTailCalls != 0 && tailCalls >= opts.MaxTailCalls { + mErr = multierror.Append(mErr, fmt.Errorf("maximum allowed tail calls reach: %d vs %d", tailCalls, opts.MaxTailCalls)) + break + } + + // insert tail call to the current filter if not the last prog + progInsts[tailCalls] = append(progInsts[tailCalls], tailCallInsts...) + + // insert the event sender instructions + progInsts[tailCalls] = append(progInsts[tailCalls], footerInsts...) + + // start a new program + header = false + tailCalls++ + } + } + + if tailCalls < len(progInsts) && header { + progInsts[tailCalls] = append(progInsts[tailCalls], footerInsts...) + } + + return progInsts, mErr +} + +// FiltersToProgramSpecs returns list of program spec from raw packet filters definitions +func FiltersToProgramSpecs(rawPacketEventMapFd, clsRouterMapFd int, filters []Filter, opts ProgOpts) ([]*ebpf.ProgramSpec, error) { + var mErr *multierror.Error + + const ( + // raw packet data, see kernel definition + dataSize = 256 + dataOffset = 164 + ) + + opts.tailCallMapFd = clsRouterMapFd + + headerInsts := append(asm.Instructions{}, + // save ctx + asm.Mov.Reg(opts.ctxSave, asm.R1), + // load raw event + asm.Mov.Reg(asm.R2, asm.RFP), + asm.Add.Imm(asm.R2, -4), + asm.StoreImm(asm.R2, 0, 0, asm.Word), // index 0 + asm.LoadMapPtr(asm.R1, rawPacketEventMapFd), + asm.FnMapLookupElem.Call(), + asm.JNE.Imm(asm.R0, 0, "raw-packet-event-not-null"), + asm.Return(), + // place in result in the start register and end register + asm.Mov.Reg(opts.PacketStart, asm.R0).WithSymbol("raw-packet-event-not-null"), + asm.Add.Imm(opts.PacketStart, dataOffset), + asm.Mov.Reg(opts.PacketEnd, opts.PacketStart), + asm.Add.Imm(opts.PacketEnd, dataSize), + ) + + senderInsts := asm.Instructions{ + asm.Mov.Reg(asm.R1, opts.ctxSave).WithSymbol(opts.sendEventLabel), + asm.LoadMapPtr(asm.R2, clsRouterMapFd), + asm.Mov.Imm(asm.R3, int32(probes.TCRawPacketParserSenderKey)), + asm.FnTailCall.Call(), + asm.Mov.Imm(asm.R0, 0), + asm.Return(), + } + + // compile and convert to eBPF progs + progInsts, err := filtersToProgs(filters, opts, headerInsts, senderInsts) + if err.ErrorOrNil() != nil { + mErr = multierror.Append(mErr, err) + } + + // should be possible + if len(progInsts) == 0 { + return nil, errors.New("no program were generated") + } + + progSpecs := make([]*ebpf.ProgramSpec, len(progInsts)) + + for i, insts := range progInsts { + name := fmt.Sprintf("%s%d", progPrefix, i) + + progSpecs[i] = &ebpf.ProgramSpec{ + Name: name, + Type: ebpf.SchedCLS, + Instructions: insts, + License: "GPL", + } + } + + return progSpecs, mErr.ErrorOrNil() +} diff --git a/pkg/security/ebpf/probes/rawpacket/pcap_unsupported.go b/pkg/security/ebpf/probes/rawpacket/pcap_unsupported.go new file mode 100644 index 0000000000000..f2d8896930dea --- /dev/null +++ b/pkg/security/ebpf/probes/rawpacket/pcap_unsupported.go @@ -0,0 +1,39 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux && !(pcap && cgo) + +// Package rawpacket holds raw_packet related files +package rawpacket + +import ( + "errors" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" +) + +// ProgOpts defines options +type ProgOpts struct { + // MaxTailCalls maximun number of tail calls generated + MaxTailCalls int + // number of instructions + MaxProgSize int + // Number of nop instruction inserted in each program + NopInstLen int +} + +// DefaultProgOpts default options +var DefaultProgOpts ProgOpts + +// BPFFilterToInsts compile a bpf filter expression +func BPFFilterToInsts(_ int, _ string, _ ProgOpts) (asm.Instructions, error) { + return asm.Instructions{}, errors.New("not supported") +} + +// FiltersToProgramSpecs returns list of program spec from raw packet filters definitions +func FiltersToProgramSpecs(_, _ int, _ []Filter, _ ProgOpts) ([]*ebpf.ProgramSpec, error) { + return nil, errors.New("not supported") +} diff --git a/pkg/security/ebpf/probes/tc.go b/pkg/security/ebpf/probes/tc.go index 0a26151ce7590..e0a721986e9f5 100644 --- a/pkg/security/ebpf/probes/tc.go +++ b/pkg/security/ebpf/probes/tc.go @@ -10,8 +10,6 @@ package probes import ( manager "github.com/DataDog/ebpf-manager" - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" "golang.org/x/sys/unix" ) @@ -68,27 +66,12 @@ func GetTCProbes(withNetworkIngress bool, withRawPacket bool) []*manager.Probe { return out } -// RawPacketTCProgram returns the list of TC classifier sections -var RawPacketTCProgram = []string{ - "classifier_raw_packet_egress", - "classifier_raw_packet_ingress", -} - -// GetRawPacketTCFilterProg returns a first tc filter -func GetRawPacketTCFilterProg(_, clsRouterMapFd int) (*ebpf.ProgramSpec, error) { - insts := asm.Instructions{ - asm.LoadMapPtr(asm.R2, clsRouterMapFd), - asm.Mov.Imm(asm.R3, int32(TCRawPacketParserKey)), - asm.FnTailCall.Call(), - asm.Mov.Imm(asm.R0, 0), - asm.Return(), +// GetRawPacketTCProgramFunctions returns the raw packet functions +func GetRawPacketTCProgramFunctions() []string { + return []string{ + "classifier_raw_packet", + "classifier_raw_packet_sender", } - - return &ebpf.ProgramSpec{ - Type: ebpf.SchedCLS, - Instructions: insts, - License: "GPL", - }, nil } // GetAllTCProgramFunctions returns the list of TC classifier sections @@ -97,9 +80,10 @@ func GetAllTCProgramFunctions() []string { "classifier_dns_request_parser", "classifier_dns_request", "classifier_imds_request", - "classifier_raw_packet", } + output = append(output, GetRawPacketTCProgramFunctions()...) + for _, tcProbe := range GetTCProbes(true, true) { output = append(output, tcProbe.EBPFFuncName) } @@ -115,8 +99,8 @@ func GetAllTCProgramFunctions() []string { return output } -func getTCTailCallRoutes() []manager.TailCallRoute { - return []manager.TailCallRoute{ +func getTCTailCallRoutes(withRawPacket bool) []manager.TailCallRoute { + tcr := []manager.TailCallRoute{ { ProgArrayName: "classifier_router", Key: TCDNSRequestKey, @@ -138,12 +122,17 @@ func getTCTailCallRoutes() []manager.TailCallRoute { EBPFFuncName: "classifier_imds_request", }, }, - { - ProgArrayName: "classifier_router", - Key: TCRawPacketParserKey, + } + + if withRawPacket { + tcr = append(tcr, manager.TailCallRoute{ + ProgArrayName: "raw_packet_classifier_router", + Key: TCRawPacketParserSenderKey, ProbeIdentificationPair: manager.ProbeIdentificationPair{ - EBPFFuncName: "classifier_raw_packet", + EBPFFuncName: "classifier_raw_packet_sender", }, - }, + }) } + + return tcr } diff --git a/pkg/security/ebpf/tests/activity_dump_ratelimiter_test.go b/pkg/security/ebpf/tests/activity_dump_ratelimiter_test.go index 15b099d093980..06a30a1a3ce1a 100644 --- a/pkg/security/ebpf/tests/activity_dump_ratelimiter_test.go +++ b/pkg/security/ebpf/tests/activity_dump_ratelimiter_test.go @@ -17,7 +17,7 @@ import ( func TestActivityDumpRateLimiterBasic(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/ad_ratelimiter_basic") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -25,7 +25,7 @@ func TestActivityDumpRateLimiterBasic(t *testing.T) { func TestActivityDumpRateLimiterBasicHalf(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/ad_ratelimiter_basic_half") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -33,7 +33,7 @@ func TestActivityDumpRateLimiterBasicHalf(t *testing.T) { func TestActivityDumpRateLimiterDecreasingDroprate(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/ad_ratelimiter_decreasing_droprate") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -41,7 +41,7 @@ func TestActivityDumpRateLimiterDecreasingDroprate(t *testing.T) { func TestActivityDumpRateLimiterIncreasingDroprate(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/ad_ratelimiter_increasing_droprate") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } diff --git a/pkg/security/ebpf/tests/discarders_test.go b/pkg/security/ebpf/tests/discarders_test.go index 11fdb6f63272f..c044a2d752bc0 100644 --- a/pkg/security/ebpf/tests/discarders_test.go +++ b/pkg/security/ebpf/tests/discarders_test.go @@ -17,7 +17,7 @@ import ( func TestDiscarderEventMask(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/discarders_event_mask") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -25,7 +25,7 @@ func TestDiscarderEventMask(t *testing.T) { func TestDiscarderRetention(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/discarders_retention") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -33,7 +33,7 @@ func TestDiscarderRetention(t *testing.T) { func TestDiscarderRevision(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/discarders_revision") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } @@ -41,7 +41,7 @@ func TestDiscarderRevision(t *testing.T) { func TestDiscarderMountRevision(t *testing.T) { var ctx baloum.StdContext code, err := newVM(t).RunProgram(&ctx, "test/discarders_mount_revision") - if err != nil || code != 0 { + if err != nil || code != 1 { t.Errorf("unexpected error: %v, %d", err, code) } } diff --git a/pkg/security/ebpf/tests/raw_packet_test.go b/pkg/security/ebpf/tests/raw_packet_test.go new file mode 100644 index 0000000000000..e0238036542e5 --- /dev/null +++ b/pkg/security/ebpf/tests/raw_packet_test.go @@ -0,0 +1,194 @@ +// Unless explicitly stated otherwise all files in this repository are licensed +// under the Apache License Version 2.0. +// This product includes software developed at Datadog (https://www.datadoghq.com/). +// Copyright 2016-present Datadog, Inc. + +//go:build linux && ebpf_bindata && pcap && cgo + +// Package tests holds tests related files +package tests + +import ( + "testing" + + "github.com/cilium/ebpf" + "github.com/cilium/ebpf/asm" + "github.com/safchain/baloum/pkg/baloum" + "github.com/stretchr/testify/assert" + + "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes" + "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes/rawpacket" +) + +func testRawPacketFilter(t *testing.T, filters []rawpacket.Filter, expRetCode int64, expProgNum int, opts rawpacket.ProgOpts, catchCompilerError bool) { + var ctx baloum.StdContext + + vm := newVM(t) + + rawPacketEventMap, err := vm.LoadMap("raw_packet_event") + assert.Nil(t, err, "map not found") + + routerMap, err := vm.LoadMap("raw_packet_classifier_router") + assert.Nil(t, err, "map not found") + + progSpecs, err := rawpacket.FiltersToProgramSpecs(rawPacketEventMap.FD(), routerMap.FD(), filters, opts) + if err != nil { + if catchCompilerError { + t.Fatal(err) + } else { + t.Log(err) + } + } + + assert.Equal(t, expProgNum, len(progSpecs), "number of expected programs") + + for i, progSpec := range progSpecs { + fd := vm.AddProgram(progSpec) + + _, err := routerMap.Update(probes.TCRawPacketFilterKey+uint32(i), fd, baloum.BPF_ANY) + assert.Nil(t, err, "map update error") + } + + // override the TCRawPacketParserSenderKey program with a test program + sendProgSpec := ebpf.ProgramSpec{ + Type: ebpf.SchedCLS, + Instructions: asm.Instructions{ + asm.Mov.Imm(asm.R0, 2), // put 2 as a success return value + asm.Return(), + }, + License: "GPL", + } + sendProgFD := vm.AddProgram(&sendProgSpec) + + _, err = routerMap.Update(probes.TCRawPacketParserSenderKey, sendProgFD, baloum.BPF_ANY) + assert.Nil(t, err, "map update error") + + code, err := vm.RunProgram(&ctx, "test/raw_packet_tail_calls", ebpf.SchedCLS) + if expRetCode != -1 { + assert.Nil(t, err, "program execution error") + } + assert.Equal(t, expRetCode, code, "return code error: %v", err) +} + +func TestRawPacketTailCalls(t *testing.T) { + t.Run("syn-port-std-ok", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ok", + BPFFilter: "tcp dst port 5555 and tcp[tcpflags] == tcp-syn", + }, + } + testRawPacketFilter(t, filters, 2, 1, rawpacket.DefaultProgOpts, true) + }) + + t.Run("syn-port-std-ko", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ko", + BPFFilter: "tcp dst port 6666 and tcp[tcpflags] == tcp-syn", + }, + } + testRawPacketFilter(t, filters, 0, 1, rawpacket.DefaultProgOpts, true) + }) + + t.Run("syn-port-std-limit-ko", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ko", + BPFFilter: "tcp dst port 5555 and tcp[tcpflags] == tcp-syn", + }, + } + + opts := rawpacket.DefaultProgOpts + opts.NopInstLen = opts.MaxProgSize + + testRawPacketFilter(t, filters, -1, 0, opts, false) + }) + + t.Run("syn-port-std-syntax-err", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ok", + BPFFilter: "tcp dst port number and tcp[tcpflags] == tcp-syn", + }, + } + testRawPacketFilter(t, filters, -1, 0, rawpacket.DefaultProgOpts, false) + }) + + t.Run("syn-port-multi-ok", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ko", + BPFFilter: "tcp dst port 6666 and tcp[tcpflags] == tcp-syn", + }, + { + RuleID: "ok", + BPFFilter: "tcp dst port 5555 and tcp[tcpflags] == tcp-syn", + }, + } + + opts := rawpacket.DefaultProgOpts + opts.NopInstLen = opts.MaxProgSize - 50 + + testRawPacketFilter(t, filters, 2, 2, opts, true) + }) + + t.Run("syn-port-multi-ko", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ko1", + BPFFilter: "tcp dst port 6666 and tcp[tcpflags] == tcp-syn", + }, + { + RuleID: "ko2", + BPFFilter: "tcp dst port 7777 and tcp[tcpflags] == tcp-syn", + }, + } + + opts := rawpacket.DefaultProgOpts + opts.NopInstLen = opts.MaxProgSize - 50 + + testRawPacketFilter(t, filters, 0, 2, opts, true) + }) + + t.Run("syn-port-multi-syntax-err", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ko", + BPFFilter: "tcp dst port number and tcp[tcpflags] == tcp-syn", + }, + { + RuleID: "ok", + BPFFilter: "tcp dst port 5555 and tcp[tcpflags] == tcp-syn", + }, + } + + opts := rawpacket.DefaultProgOpts + opts.NopInstLen = opts.MaxProgSize - 50 + + testRawPacketFilter(t, filters, 2, 1, opts, false) + }) + + t.Run("syn-port-multi-limit-ok", func(t *testing.T) { + filters := []rawpacket.Filter{ + { + RuleID: "ok", + BPFFilter: "tcp dst port 5555 and tcp[tcpflags] == tcp-syn", + }, + { + RuleID: "ko1", + BPFFilter: "tcp dst port number and tcp[tcpflags] == tcp-syn", + }, + { + RuleID: "ko2", + BPFFilter: "tcp dst port 7777 and tcp[tcpflags] == tcp-syn", + }, + } + + opts := rawpacket.DefaultProgOpts + opts.MaxTailCalls = 0 + opts.NopInstLen = opts.MaxProgSize - 50 + + testRawPacketFilter(t, filters, 2, 2, opts, false) + }) +} diff --git a/pkg/security/probe/model_ebpf.go b/pkg/security/probe/model_ebpf.go index 2854cc2f8cf4b..c26e96a0b91f3 100644 --- a/pkg/security/probe/model_ebpf.go +++ b/pkg/security/probe/model_ebpf.go @@ -12,6 +12,7 @@ import ( "fmt" "time" + "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes/rawpacket" "github.com/DataDog/datadog-agent/pkg/security/probe/constantfetch" "github.com/DataDog/datadog-agent/pkg/security/secl/compiler/eval" "github.com/DataDog/datadog-agent/pkg/security/secl/model" @@ -20,7 +21,7 @@ import ( // NewEBPFModel returns a new model with some extra field validation func NewEBPFModel(probe *EBPFProbe) *model.Model { return &model.Model{ - ExtraValidateFieldFnc: func(field eval.Field, _ eval.FieldValue) error { + ExtraValidateFieldFnc: func(field eval.Field, value eval.FieldValue) error { switch field { case "bpf.map.name": if offset, found := probe.constantOffsets[constantfetch.OffsetNameBPFMapStructName]; !found || offset == constantfetch.ErrorSentinel { @@ -31,6 +32,13 @@ func NewEBPFModel(probe *EBPFProbe) *model.Model { if offset, found := probe.constantOffsets[constantfetch.OffsetNameBPFProgAuxStructName]; !found || offset == constantfetch.ErrorSentinel { return fmt.Errorf("%s is not available on this kernel version", field) } + case "packet.filter": + if probe.isRawPacketNotSupported() { + return fmt.Errorf("%s is not available on this kernel version", field) + } + if _, err := rawpacket.BPFFilterToInsts(0, value.Value.(string), rawpacket.DefaultProgOpts); err != nil { + return err + } } return nil diff --git a/pkg/security/probe/probe_ebpf.go b/pkg/security/probe/probe_ebpf.go index ae258025ee7f5..37db8edde5422 100644 --- a/pkg/security/probe/probe_ebpf.go +++ b/pkg/security/probe/probe_ebpf.go @@ -40,6 +40,7 @@ import ( "github.com/DataDog/datadog-agent/pkg/security/ebpf" "github.com/DataDog/datadog-agent/pkg/security/ebpf/kernel" "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes" + "github.com/DataDog/datadog-agent/pkg/security/ebpf/probes/rawpacket" "github.com/DataDog/datadog-agent/pkg/security/events" "github.com/DataDog/datadog-agent/pkg/security/metrics" pconfig "github.com/DataDog/datadog-agent/pkg/security/probe/config" @@ -117,8 +118,9 @@ type EBPFProbe struct { cancelFnc context.CancelFunc wg sync.WaitGroup - // TC Classifier - newTCNetDevices chan model.NetDevice + // TC Classifier & raw packets + newTCNetDevices chan model.NetDevice + rawPacketFilterCollection *lib.Collection // Ring eventStream EventStream @@ -197,6 +199,14 @@ func (p *EBPFProbe) selectFentryMode() { p.useFentry = supported } +func (p *EBPFProbe) isNetworkNotSupported() bool { + return p.kernelVersion.IsRH7Kernel() +} + +func (p *EBPFProbe) isRawPacketNotSupported() bool { + return p.isNetworkNotSupported() || (p.kernelVersion.IsAmazonLinuxKernel() && p.kernelVersion.Code < kernel.Kernel4_15) +} + func (p *EBPFProbe) sanityChecks() error { // make sure debugfs is mounted if _, err := tracefs.Root(); err != nil { @@ -207,11 +217,16 @@ func (p *EBPFProbe) sanityChecks() error { return errors.New("eBPF not supported in lockdown `confidentiality` mode") } - if p.config.Probe.NetworkEnabled && p.kernelVersion.IsRH7Kernel() { - seclog.Warnf("The network feature of CWS isn't supported on Centos7, setting event_monitoring_config.network.enabled to false") + if p.config.Probe.NetworkEnabled && p.isNetworkNotSupported() { + seclog.Warnf("the network feature of CWS isn't supported on this kernel version") p.config.Probe.NetworkEnabled = false } + if p.config.Probe.NetworkRawPacketEnabled && p.isRawPacketNotSupported() { + seclog.Warnf("the raw packet feature of CWS isn't supported on this kernel version") + p.config.Probe.NetworkRawPacketEnabled = false + } + return nil } @@ -350,39 +365,87 @@ func (p *EBPFProbe) IsRuntimeCompiled() bool { return p.runtimeCompiled } -func (p *EBPFProbe) setupRawPacketProgs() error { - packetsMap, _, err := p.Manager.GetMap("packets") +func (p *EBPFProbe) setupRawPacketProgs(rs *rules.RuleSet) error { + rawPacketEventMap, _, err := p.Manager.GetMap("raw_packet_event") if err != nil { return err } - routerMap, _, err := p.Manager.GetMap("classifier_router") + if rawPacketEventMap == nil { + return errors.New("unable to find `rawpacket_event` map") + } + + routerMap, _, err := p.Manager.GetMap("raw_packet_classifier_router") if err != nil { return err } + if routerMap == nil { + return errors.New("unable to find `classifier_router` map") + } - progSpec, err := probes.GetRawPacketTCFilterProg(packetsMap.FD(), routerMap.FD()) + var rawPacketFilters []rawpacket.Filter + for id, rule := range rs.GetRules() { + for _, field := range rule.GetFieldValues("packet.filter") { + rawPacketFilters = append(rawPacketFilters, rawpacket.Filter{ + RuleID: id, + BPFFilter: field.Value.(string), + }) + } + } + + // unload the previews one + if p.rawPacketFilterCollection != nil { + p.rawPacketFilterCollection.Close() + ddebpf.RemoveNameMappingsCollection(p.rawPacketFilterCollection) + } + + // adapt max instruction limits depending of the kernel version + opts := rawpacket.DefaultProgOpts + if p.kernelVersion.Code >= kernel.Kernel5_2 { + opts.MaxProgSize = 1_000_000 + } + + seclog.Debugf("generate rawpacker filter programs with a limit of %d max instructions", opts.MaxProgSize) + + // compile the filters + progSpecs, err := rawpacket.FiltersToProgramSpecs(rawPacketEventMap.FD(), routerMap.FD(), rawPacketFilters, opts) if err != nil { return err } + if len(progSpecs) == 0 { + return nil + } + colSpec := lib.CollectionSpec{ - Programs: map[string]*lib.ProgramSpec{ - progSpec.Name: progSpec, - }, + Programs: make(map[string]*lib.ProgramSpec), + } + for _, progSpec := range progSpecs { + colSpec.Programs[progSpec.Name] = progSpec } col, err := lib.NewCollection(&colSpec) if err != nil { return fmt.Errorf("failed to load program: %w", err) } + p.rawPacketFilterCollection = col + + // check that the sender program is not overridden. The default opts should avoid this. + if probes.TCRawPacketFilterKey+uint32(len(progSpecs)) >= probes.TCRawPacketParserSenderKey { + return fmt.Errorf("sender program overridden") + } - return p.Manager.UpdateTailCallRoutes( - manager.TailCallRoute{ + // setup tail calls + for i, progSpec := range progSpecs { + if err := p.Manager.UpdateTailCallRoutes(manager.TailCallRoute{ Program: col.Programs[progSpec.Name], - Key: probes.TCRawPacketFilterKey, - ProgArrayName: "classifier_router", - }, - ) + Key: probes.TCRawPacketFilterKey + uint32(i), + ProgArrayName: "raw_packet_classifier_router", + }); err != nil { + return err + } + } + + return nil } // Setup the probe @@ -402,12 +465,6 @@ func (p *EBPFProbe) Setup() error { p.profileManagers.Start(p.ctx, &p.wg) - if p.probe.IsNetworkRawPacketEnabled() { - if err := p.setupRawPacketProgs(); err != nil { - return err - } - } - return nil } @@ -1500,6 +1557,10 @@ func (p *EBPFProbe) Close() error { // we wait until both the reorderer and the monitor are stopped p.wg.Wait() + if p.rawPacketFilterCollection != nil { + p.rawPacketFilterCollection.Close() + } + ddebpf.RemoveNameMappings(p.Manager) ebpftelemetry.UnregisterTelemetry(p.Manager) // Stopping the manager will stop the perf map reader and unload eBPF programs @@ -1720,6 +1781,12 @@ func (p *EBPFProbe) ApplyRuleSet(rs *rules.RuleSet) (*kfilters.ApplyRuleSetRepor } } + if p.probe.IsNetworkRawPacketEnabled() { + if err := p.setupRawPacketProgs(rs); err != nil { + seclog.Errorf("unable to load raw packet filter programs: %v", err) + } + } + // do not replay the snapshot if we are in the first rule set version, this was already done in the start method if p.ruleSetVersion != 0 { p.playSnapShotState.Store(true) @@ -1990,15 +2057,17 @@ func NewEBPFProbe(probe *Probe, config *config.Config, opts Opts, telemetry tele } // tail calls - p.managerOptions.TailCallRouter = probes.AllTailRoutes(config.Probe.ERPCDentryResolutionEnabled, config.Probe.NetworkEnabled, useMmapableMaps) + p.managerOptions.TailCallRouter = probes.AllTailRoutes(config.Probe.ERPCDentryResolutionEnabled, config.Probe.NetworkEnabled, config.Probe.NetworkRawPacketEnabled, useMmapableMaps) if !config.Probe.ERPCDentryResolutionEnabled || useMmapableMaps { // exclude the programs that use the bpf_probe_write_user helper p.managerOptions.ExcludedFunctions = probes.AllBPFProbeWriteUserProgramFunctions() } - if !config.Probe.NetworkEnabled { - // prevent all TC classifiers from loading + // prevent some TC classifiers from loading + if !p.config.Probe.NetworkEnabled { p.managerOptions.ExcludedFunctions = append(p.managerOptions.ExcludedFunctions, probes.GetAllTCProgramFunctions()...) + } else if !p.config.Probe.NetworkRawPacketEnabled { + p.managerOptions.ExcludedFunctions = append(p.managerOptions.ExcludedFunctions, probes.GetRawPacketTCProgramFunctions()...) } if p.useFentry { diff --git a/pkg/security/secl/model/packet_filter_unix.go b/pkg/security/secl/model/oo_packet_filter_unix.go similarity index 83% rename from pkg/security/secl/model/packet_filter_unix.go rename to pkg/security/secl/model/oo_packet_filter_unix.go index af1329d84040f..18f3d72bec50a 100644 --- a/pkg/security/secl/model/packet_filter_unix.go +++ b/pkg/security/secl/model/oo_packet_filter_unix.go @@ -32,7 +32,7 @@ func errorNonStaticPacketFilterField(a eval.Evaluator, b eval.Evaluator) error { return fmt.Errorf("field `%s` only supports matching a single static value", field) } -func newPacketFilterEvaluator(field string, value string) (*eval.BoolEvaluator, error) { +func newPacketFilterEvaluator(field string, value string, state *eval.State) (*eval.BoolEvaluator, error) { switch field { case "packet.filter": captureLength := 256 // sizeof(struct raw_packet_t.data) @@ -40,6 +40,12 @@ func newPacketFilterEvaluator(field string, value string) (*eval.BoolEvaluator, if err != nil { return nil, fmt.Errorf("failed to compile packet filter `%s` on field `%s`: %v", value, field, err) } + + // needed to track filter values and to apply tc filters + if err := state.UpdateFieldValues(field, eval.FieldValue{Value: value, Type: eval.ScalarValueType}); err != nil { + return nil, err + } + return &eval.BoolEvaluator{ EvalFnc: func(ctx *eval.Context) bool { ev := ctx.Event.(*Event) @@ -53,11 +59,11 @@ func newPacketFilterEvaluator(field string, value string) (*eval.BoolEvaluator, // PacketFilterMatching is a set of overrides for packet filter fields, it only supports matching a single static value var PacketFilterMatching = &eval.OpOverrides{ - StringEquals: func(a *eval.StringEvaluator, b *eval.StringEvaluator, _ *eval.State) (*eval.BoolEvaluator, error) { + StringEquals: func(a *eval.StringEvaluator, b *eval.StringEvaluator, state *eval.State) (*eval.BoolEvaluator, error) { if a.IsStatic() { - return newPacketFilterEvaluator(b.GetField(), a.Value) + return newPacketFilterEvaluator(b.GetField(), a.Value, state) } else if b.IsStatic() { - return newPacketFilterEvaluator(a.GetField(), b.Value) + return newPacketFilterEvaluator(a.GetField(), b.Value, state) } return nil, errorNonStaticPacketFilterField(a, b) }, diff --git a/pkg/security/secl/model/packet_filter_unsupported.go b/pkg/security/secl/model/oo_packet_filter_unsupported.go similarity index 100% rename from pkg/security/secl/model/packet_filter_unsupported.go rename to pkg/security/secl/model/oo_packet_filter_unsupported.go diff --git a/pkg/security/tests/network_test.go b/pkg/security/tests/network_test.go index 0f2e27ad5baf4..e094e20ba528f 100644 --- a/pkg/security/tests/network_test.go +++ b/pkg/security/tests/network_test.go @@ -82,7 +82,7 @@ func TestRawPacket(t *testing.T) { checkKernelCompatibility(t, "RHEL, SLES, SUSE and Oracle kernels", func(kv *kernel.Version) bool { // TODO: Oracle because we are missing offsets // OpenSUSE distributions are missing the dummy kernel module - return kv.IsRH7Kernel() || kv.IsOracleUEKKernel() || kv.IsSLESKernel() || kv.IsOpenSUSELeapKernel() + return kv.IsRH7Kernel() || kv.IsOracleUEKKernel() || kv.IsSLESKernel() || kv.IsOpenSUSELeapKernel() || (kv.IsAmazonLinuxKernel() && kv.Code < kernel.Kernel4_15) }) if testEnvironment != DockerEnvironment && !env.IsContainerized() { diff --git a/tasks/security_agent.py b/tasks/security_agent.py index 6b5f20ac20092..3d0dc7e752d95 100644 --- a/tasks/security_agent.py +++ b/tasks/security_agent.py @@ -727,12 +727,23 @@ def e2e_prepare_win(ctx): @task -def run_ebpf_unit_tests(ctx, verbose=False, trace=False): +def run_ebpf_unit_tests(ctx, verbose=False, trace=False, testflags=''): build_cws_object_files( ctx, major_version='7', kernel_release=None, with_unit_test=True, bundle_ebpf=True, arch=CURRENT_ARCH ) - flags = '-tags ebpf_bindata' + env = {"CGO_ENABLED": "1"} + + build_libpcap(ctx) + cgo_flags = get_libpcap_cgo_flags(ctx) + # append libpcap cgo-related environment variables to any existing ones + for k, v in cgo_flags.items(): + if k in env: + env[k] += f" {v}" + else: + env[k] = v + + flags = '-tags ebpf_bindata,cgo,pcap' if verbose: flags += " -test.v" @@ -740,7 +751,7 @@ def run_ebpf_unit_tests(ctx, verbose=False, trace=False): if trace: args += " -trace" - ctx.run(f"go test {flags} ./pkg/security/ebpf/tests/... {args}") + ctx.run(f"go test {flags} ./pkg/security/ebpf/tests/... {args} {testflags}", env=env) @task From a608272947ad5c278665018ccee2b3bdefd406a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hugo=20Beauz=C3=A9e-Luyssen?= Date: Mon, 25 Nov 2024 15:13:36 +0100 Subject: [PATCH 02/12] CI: remove constant --major-version parameter (#31286) --- .gitlab/binary_build/cluster_agent_cloudfoundry.yml | 2 +- .gitlab/binary_build/linux.yml | 12 ++++++------ .gitlab/deploy_containers/deploy_containers_a7.yml | 4 ++-- .../deploy_cws_instrumentation.yml | 2 +- .gitlab/deploy_dca/deploy_dca.yml | 2 +- .gitlab/deploy_packages/nix.yml | 4 ++-- .gitlab/e2e/e2e.yml | 2 +- .gitlab/e2e_install_packages/common.yml | 4 ++-- .gitlab/e2e_install_packages/suse.yml | 2 +- .gitlab/e2e_install_packages/ubuntu.yml | 4 ++-- .gitlab/e2e_install_packages/windows.yml | 8 ++++---- .gitlab/kitchen_testing/common.yml | 7 +++---- .gitlab/package_build/dmg.yml | 4 +--- .gitlab/package_build/heroku.yml | 3 +-- .gitlab/package_build/installer.yml | 6 +----- .gitlab/package_build/linux.yml | 8 +++----- .gitlab/package_build/windows.yml | 6 ++---- .gitlab/packaging/deb.yml | 5 ++--- .gitlab/packaging/oci.yml | 6 +++--- .gitlab/packaging/rpm.yml | 3 +-- .gitlab/trigger_release/trigger_release.yml | 2 +- 21 files changed, 41 insertions(+), 55 deletions(-) diff --git a/.gitlab/binary_build/cluster_agent_cloudfoundry.yml b/.gitlab/binary_build/cluster_agent_cloudfoundry.yml index 02cbb2d572914..1bfce1afa4923 100644 --- a/.gitlab/binary_build/cluster_agent_cloudfoundry.yml +++ b/.gitlab/binary_build/cluster_agent_cloudfoundry.yml @@ -21,5 +21,5 @@ cluster_agent_cloudfoundry-build_amd64: - inv -e cluster-agent-cloudfoundry.build - cd $CI_PROJECT_DIR/$CLUSTER_AGENT_CLOUDFOUNDRY_BINARIES_DIR - mkdir -p $OMNIBUS_PACKAGE_DIR - - PACKAGE_VERSION=$(inv agent.version --url-safe --major-version 7) || exit $? + - PACKAGE_VERSION=$(inv agent.version --url-safe) || exit $? - tar cf $OMNIBUS_PACKAGE_DIR/datadog-cluster-agent-cloudfoundry-$PACKAGE_VERSION-$ARCH.tar.xz datadog-cluster-agent-cloudfoundry diff --git a/.gitlab/binary_build/linux.yml b/.gitlab/binary_build/linux.yml index b9dd1d9f69439..6973d6b9d4517 100644 --- a/.gitlab/binary_build/linux.yml +++ b/.gitlab/binary_build/linux.yml @@ -10,7 +10,7 @@ build_dogstatsd_static-binary_x64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e dogstatsd.build --static --major-version 7 + - inv -e dogstatsd.build --static - $S3_CP_CMD $CI_PROJECT_DIR/$STATIC_BINARIES_DIR/dogstatsd $S3_ARTIFACTS_URI/static/dogstatsd.$ARCH build_dogstatsd_static-binary_arm64: @@ -27,7 +27,7 @@ build_dogstatsd_static-binary_arm64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e dogstatsd.build --static --major-version 7 + - inv -e dogstatsd.build --static - $S3_CP_CMD $CI_PROJECT_DIR/$STATIC_BINARIES_DIR/dogstatsd $S3_ARTIFACTS_URI/static/dogstatsd.$ARCH build_dogstatsd-binary_x64: @@ -42,7 +42,7 @@ build_dogstatsd-binary_x64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e dogstatsd.build --major-version 7 + - inv -e dogstatsd.build - $S3_CP_CMD $CI_PROJECT_DIR/$DOGSTATSD_BINARIES_DIR/dogstatsd $S3_ARTIFACTS_URI/dogstatsd/dogstatsd build_dogstatsd-binary_arm64: @@ -59,7 +59,7 @@ build_dogstatsd-binary_arm64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e dogstatsd.build --major-version 7 + - inv -e dogstatsd.build - $S3_CP_CMD $CI_PROJECT_DIR/$DOGSTATSD_BINARIES_DIR/dogstatsd $S3_ARTIFACTS_URI/dogstatsd/dogstatsd.$ARCH # IoT Agent builds to make sure the build is not broken because of build flags @@ -77,7 +77,7 @@ build_iot_agent-binary_x64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e agent.build --flavor iot --major-version 7 + - inv -e agent.build --flavor iot - $S3_CP_CMD $CI_PROJECT_DIR/$AGENT_BINARIES_DIR/agent $S3_ARTIFACTS_URI/iot/agent build_iot_agent-binary_arm64: @@ -93,5 +93,5 @@ build_iot_agent-binary_arm64: - !reference [.retrieve_linux_go_deps] script: - inv check-go-version - - inv -e agent.build --flavor iot --major-version 7 + - inv -e agent.build --flavor iot diff --git a/.gitlab/deploy_containers/deploy_containers_a7.yml b/.gitlab/deploy_containers/deploy_containers_a7.yml index 2d18d21dcf1ba..62c845a825aae 100644 --- a/.gitlab/deploy_containers/deploy_containers_a7.yml +++ b/.gitlab/deploy_containers/deploy_containers_a7.yml @@ -25,7 +25,7 @@ include: stage: deploy_containers dependencies: [] before_script: - - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --major-version 7 --url-safe --pipeline-id $PARENT_PIPELINE_ID)" || exit $?; fi + - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --url-safe --pipeline-id $PARENT_PIPELINE_ID)" || exit $?; fi - export IMG_BASE_SRC="${SRC_AGENT}:v${PARENT_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}" - export IMG_LINUX_SOURCES="${IMG_BASE_SRC}-7${JMX}-amd64,${IMG_BASE_SRC}-7${JMX}-arm64" - export IMG_WINDOWS_SOURCES="${IMG_BASE_SRC}-7${JMX}-win1809${FLAVOR}-amd64,${IMG_BASE_SRC}-7${JMX}-winltsc2022${FLAVOR}-amd64" @@ -132,7 +132,7 @@ deploy_containers-dogstatsd: !reference [.manual_on_deploy_auto_on_rc] dependencies: [] before_script: - - VERSION="$(inv agent.version --major-version 7 --url-safe --pipeline-id $PARENT_PIPELINE_ID)" || exit $? + - VERSION="$(inv agent.version --url-safe --pipeline-id $PARENT_PIPELINE_ID)" || exit $? - export IMG_SOURCES="${SRC_DSD}:v${PARENT_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-amd64,${SRC_DSD}:v${PARENT_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}-arm64" - export IMG_DESTINATIONS="${DSD_REPOSITORY}:${VERSION}" diff --git a/.gitlab/deploy_cws_instrumentation/deploy_cws_instrumentation.yml b/.gitlab/deploy_cws_instrumentation/deploy_cws_instrumentation.yml index 330c60bb239ea..03dfa9440f353 100644 --- a/.gitlab/deploy_cws_instrumentation/deploy_cws_instrumentation.yml +++ b/.gitlab/deploy_cws_instrumentation/deploy_cws_instrumentation.yml @@ -11,7 +11,7 @@ include: stage: deploy_cws_instrumentation dependencies: [] before_script: - - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --major-version 7 --url-safe)" || exit $?; fi + - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --url-safe)" || exit $?; fi - if [[ "$CWS_INSTRUMENTATION_REPOSITORY" == "" ]]; then export CWS_INSTRUMENTATION_REPOSITORY="cws-instrumentation"; fi - export IMG_BASE_SRC="${SRC_CWS_INSTRUMENTATION}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}" - export IMG_SOURCES="${IMG_BASE_SRC}-amd64,${IMG_BASE_SRC}-arm64" diff --git a/.gitlab/deploy_dca/deploy_dca.yml b/.gitlab/deploy_dca/deploy_dca.yml index 63ef1ed56d74c..fcd7b471025c8 100644 --- a/.gitlab/deploy_dca/deploy_dca.yml +++ b/.gitlab/deploy_dca/deploy_dca.yml @@ -15,7 +15,7 @@ include: - job: "docker_build_cluster_agent_arm64" artifacts: false before_script: - - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --major-version 7 --url-safe)" || exit $?; fi + - if [[ "$VERSION" == "" ]]; then VERSION="$(inv agent.version --url-safe)" || exit $?; fi - if [[ "$CLUSTER_AGENT_REPOSITORY" == "" ]]; then export CLUSTER_AGENT_REPOSITORY="cluster-agent"; fi - export IMG_BASE_SRC="${SRC_DCA}:v${CI_PIPELINE_ID}-${CI_COMMIT_SHORT_SHA}" - export IMG_SOURCES="${IMG_BASE_SRC}-amd64,${IMG_BASE_SRC}-arm64" diff --git a/.gitlab/deploy_packages/nix.yml b/.gitlab/deploy_packages/nix.yml index 2e7c06ef6e36e..8f16b27a38b55 100644 --- a/.gitlab/deploy_packages/nix.yml +++ b/.gitlab/deploy_packages/nix.yml @@ -132,7 +132,7 @@ deploy_staging_dsd: needs: ["build_dogstatsd-binary_x64"] script: - $S3_CP_CMD $S3_ARTIFACTS_URI/dogstatsd/dogstatsd ./dogstatsd - - PACKAGE_VERSION=$(inv agent.version --url-safe --major-version 7) || exit $? + - PACKAGE_VERSION=$(inv agent.version --url-safe) || exit $? - $S3_CP_CMD ./dogstatsd $S3_DSD6_URI/linux/dogstatsd-$PACKAGE_VERSION --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers full=id=3a6e02b08553fd157ae3fb918945dd1eaae5a1aa818940381ef07a430cf25732 # deploy iot-agent x64 binary to staging bucket @@ -145,7 +145,7 @@ deploy_staging_iot_agent: needs: ["build_iot_agent-binary_x64"] script: - $S3_CP_CMD $S3_ARTIFACTS_URI/iot/agent ./agent - - PACKAGE_VERSION=$(inv agent.version --url-safe --major-version 7) || exit $? + - PACKAGE_VERSION=$(inv agent.version --url-safe) || exit $? - $S3_CP_CMD ./agent $S3_DSD6_URI/linux/iot/agent-$PACKAGE_VERSION --grants read=uri=http://acs.amazonaws.com/groups/global/AllUsers full=id=3a6e02b08553fd157ae3fb918945dd1eaae5a1aa818940381ef07a430cf25732 # Datadog Installer diff --git a/.gitlab/e2e/e2e.yml b/.gitlab/e2e/e2e.yml index 11e95e75e5580..b48c9d5c4d41c 100644 --- a/.gitlab/e2e/e2e.yml +++ b/.gitlab/e2e/e2e.yml @@ -410,7 +410,7 @@ new-e2e-installer-windows: # Must run before new_e2e_template changes the aws profile # Note: this is similar to the WINDOWS_AGENT_VERSION in new-e2e_windows_msi but this job is running cross platforms # Note 2: new_e2e_template does not define AGENT_MAJOR_VERSION, so define it as 7 below. - - CURRENT_AGENT_VERSION=$(invoke agent.version --major-version 7) || exit $?; export CURRENT_AGENT_VERSION + - CURRENT_AGENT_VERSION=$(invoke agent.version) || exit $?; export CURRENT_AGENT_VERSION - export STABLE_AGENT_VERSION_PACKAGE=$(curl -sS https://hub.docker.com/v2/namespaces/datadog/repositories/agent-package/tags | jq -r '.results[] | .name' | sort | tail -n 2 | head -n 1) - export STABLE_INSTALLER_VERSION_PACKAGE=$(curl -sS https://hub.docker.com/v2/namespaces/datadog/repositories/installer-package/tags | jq -r '.results[] | .name' | sort | tail -n 2 | head -n 1) - !reference [.new_e2e_template, before_script] diff --git a/.gitlab/e2e_install_packages/common.yml b/.gitlab/e2e_install_packages/common.yml index 64abe046a1135..35b2b0b1f4619 100644 --- a/.gitlab/e2e_install_packages/common.yml +++ b/.gitlab/e2e_install_packages/common.yml @@ -8,14 +8,14 @@ variables: TARGETS: ./tests/agent-platform/install-script TEAM: agent-delivery - EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --major-version $AGENT_MAJOR_VERSION --arch $E2E_ARCH --flavor $FLAVOR --no-verbose + EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR --no-verbose .new-e2e_step_by_step: stage: e2e_install_packages variables: TARGETS: ./tests/agent-platform/step-by-step TEAM: agent-delivery - EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --major-version $AGENT_MAJOR_VERSION --arch $E2E_ARCH --flavor $FLAVOR + EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR .new-e2e_script_upgrade7: stage: e2e_install_packages diff --git a/.gitlab/e2e_install_packages/suse.yml b/.gitlab/e2e_install_packages/suse.yml index ced5856448a5b..fd25763ed2363 100644 --- a/.gitlab/e2e_install_packages/suse.yml +++ b/.gitlab/e2e_install_packages/suse.yml @@ -6,7 +6,7 @@ variables: TARGETS: ./tests/agent-platform/install-script TEAM: agent-delivery - EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --major-version $AGENT_MAJOR_VERSION --arch $E2E_ARCH --flavor $FLAVOR + EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR .new-e2e_suse_a7_x86_64: variables: diff --git a/.gitlab/e2e_install_packages/ubuntu.yml b/.gitlab/e2e_install_packages/ubuntu.yml index a7678a9f6ee69..8ae1604230d75 100644 --- a/.gitlab/e2e_install_packages/ubuntu.yml +++ b/.gitlab/e2e_install_packages/ubuntu.yml @@ -7,13 +7,13 @@ variables: TARGETS: ./tests/agent-platform/install-script TEAM: agent-delivery - EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --major-version $AGENT_MAJOR_VERSION --arch $E2E_ARCH --flavor $FLAVOR + EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR .new-e2e_step_by_step: variables: TARGETS: ./tests/agent-platform/step-by-step TEAM: agent-delivery - EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --major-version $AGENT_MAJOR_VERSION --arch $E2E_ARCH --flavor $FLAVOR + EXTRA_PARAMS: --osversion $E2E_OSVERS --platform $E2E_PLATFORM --cws-supported-osversion $E2E_CWS_SUPPORTED_OSVERS --arch $E2E_ARCH --flavor $FLAVOR .new-e2e_ubuntu_a7_x86_64: variables: diff --git a/.gitlab/e2e_install_packages/windows.yml b/.gitlab/e2e_install_packages/windows.yml index a5585774c9aa5..223812e0e0c4e 100644 --- a/.gitlab/e2e_install_packages/windows.yml +++ b/.gitlab/e2e_install_packages/windows.yml @@ -10,11 +10,11 @@ before_script: # WINDOWS_AGENT_VERSION is used to verify the installed agent version # Must run before new_e2e_template changes the aws profile - - WINDOWS_AGENT_VERSION=$(invoke agent.version --major-version $AGENT_MAJOR_VERSION) || exit $?; export WINDOWS_AGENT_VERSION + - WINDOWS_AGENT_VERSION=$(invoke agent.version) || exit $?; export WINDOWS_AGENT_VERSION - !reference [.new_e2e_template, before_script] script: # LAST_STABLE_VERSION is used for upgrade test - - export LAST_STABLE_VERSION=$(invoke release.get-release-json-value "last_stable::$AGENT_MAJOR_VERSION") + - export LAST_STABLE_VERSION=$(invoke release.get-release-json-value "last_stable::7") - !reference [.new_e2e_template, script] .new-e2e_windows_domain_test: @@ -24,11 +24,11 @@ before_script: # WINDOWS_AGENT_VERSION is used to verify the installed agent version # Must run before new_e2e_template changes the aws profile - - WINDOWS_AGENT_VERSION=$(invoke agent.version --major-version $AGENT_MAJOR_VERSION) || exit $?; export WINDOWS_AGENT_VERSION + - WINDOWS_AGENT_VERSION=$(invoke agent.version) || exit $?; export WINDOWS_AGENT_VERSION - !reference [.new_e2e_template, before_script] script: # LAST_STABLE_VERSION is used for upgrade test - - export LAST_STABLE_VERSION=$(invoke release.get-release-json-value "last_stable::$AGENT_MAJOR_VERSION") + - export LAST_STABLE_VERSION=$(invoke release.get-release-json-value "last_stable::7") - !reference [.new_e2e_template, script] .new-e2e_windows_installer_v7_tests: diff --git a/.gitlab/kitchen_testing/common.yml b/.gitlab/kitchen_testing/common.yml index 2e8a593a1b96d..e99f4b9d42f00 100644 --- a/.gitlab/kitchen_testing/common.yml +++ b/.gitlab/kitchen_testing/common.yml @@ -81,7 +81,6 @@ rules: !reference [.on_kitchen_tests] variables: - AGENT_MAJOR_VERSION: 7 DD_PIPELINE_ID: $CI_PIPELINE_ID-a7 @@ -90,16 +89,16 @@ .kitchen_test_chef: script: - - tasks/run-test-kitchen.sh chef-test $AGENT_MAJOR_VERSION + - tasks/run-test-kitchen.sh chef-test 7 .kitchen_test_upgrade5: script: - - tasks/run-test-kitchen.sh upgrade5-test $AGENT_MAJOR_VERSION + - tasks/run-test-kitchen.sh upgrade5-test 7 .kitchen_test_upgrade7: script: - export LAST_STABLE_VERSION=$(cd ../.. && invoke release.get-release-json-value "last_stable::7") - - tasks/run-test-kitchen.sh upgrade7-test $AGENT_MAJOR_VERSION + - tasks/run-test-kitchen.sh upgrade7-test 7 # Kitchen: Agent flavor # ------------------------------- diff --git a/.gitlab/package_build/dmg.yml b/.gitlab/package_build/dmg.yml index e676e6255ecaa..fd800a910ab65 100644 --- a/.gitlab/package_build/dmg.yml +++ b/.gitlab/package_build/dmg.yml @@ -9,7 +9,7 @@ - $S3_CP_CMD $S3_ARTIFACTS_URI/agent-version.cache . - export VERSION_CACHE_CONTENT=$(cat agent-version.cache | base64 -) - python3 -m pip install -r tasks/libs/requirements-github.txt - - inv -e github.trigger-macos --workflow-type "build" --datadog-agent-ref "$CI_COMMIT_SHA" --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --destination "$OMNIBUS_PACKAGE_DIR" --version-cache "$VERSION_CACHE_CONTENT" --integrations-core-ref "$INTEGRATIONS_CORE_VERSION" + - inv -e github.trigger-macos --workflow-type "build" --datadog-agent-ref "$CI_COMMIT_SHA" --release-version "$RELEASE_VERSION" --destination "$OMNIBUS_PACKAGE_DIR" --version-cache "$VERSION_CACHE_CONTENT" --integrations-core-ref "$INTEGRATIONS_CORE_VERSION" - !reference [.upload_sbom_artifacts] timeout: 3h # MacOS builds can take 1h~2h, increase the timeout to avoid timeout flakes artifacts: @@ -29,8 +29,6 @@ agent_dmg-x64-a7: image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES tags: ["arch:amd64"] needs: ["go_mod_tidy_check"] - variables: - AGENT_MAJOR_VERSION: 7 timeout: 6h before_script: - export RELEASE_VERSION=$RELEASE_VERSION_7 diff --git a/.gitlab/package_build/heroku.yml b/.gitlab/package_build/heroku.yml index 1231ac0eed8d4..07fa1ad5c77b5 100644 --- a/.gitlab/package_build/heroku.yml +++ b/.gitlab/package_build/heroku.yml @@ -28,7 +28,7 @@ - $S3_CP_CMD $S3_PERMANENT_ARTIFACTS_URI/llc-$CLANG_LLVM_VER.${PACKAGE_ARCH} /tmp/system-probe/llc-bpf - cp $CI_PROJECT_DIR/minimized-btfs.tar.xz /tmp/system-probe/minimized-btfs.tar.xz - chmod 0744 /tmp/system-probe/clang-bpf /tmp/system-probe/llc-bpf - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor heroku + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor heroku - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.lint_linux_packages] - !reference [.upload_sbom_artifacts] @@ -53,4 +53,3 @@ agent_heroku_deb-x64-a7: variables: DESTINATION_DBG_DEB: "datadog-heroku-agent-dbg_7_amd64.deb" RELEASE_VERSION: $RELEASE_VERSION_7 - AGENT_MAJOR_VERSION: 7 diff --git a/.gitlab/package_build/installer.yml b/.gitlab/package_build/installer.yml index 7331712a9bdf0..8e253c48b6df0 100644 --- a/.gitlab/package_build/installer.yml +++ b/.gitlab/package_build/installer.yml @@ -21,7 +21,7 @@ - chmod 0744 /tmp/system-probe/clang-bpf /tmp/system-probe/llc-bpf # NOTE: for now, we consider "ociru" to be a "redhat_target" in omnibus/lib/ostools.rb # if we ever start building on a different platform, that might need to change - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --host-distribution=ociru --install-directory="$INSTALL_DIR" + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --host-distribution=ociru --install-directory="$INSTALL_DIR" - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] variables: @@ -51,7 +51,6 @@ datadog-agent-oci-x64-a7: "generate_minimized_btfs_x64", ] variables: - AGENT_MAJOR_VERSION: 7 PACKAGE_ARCH: amd64 DESTINATION_OCI: "datadog-agent-7-remote-updater-amd64.tar.xz" DD_CC: 'x86_64-unknown-linux-gnu-gcc' @@ -76,7 +75,6 @@ datadog-agent-oci-arm64-a7: "generate_minimized_btfs_arm64", ] variables: - AGENT_MAJOR_VERSION: 7 PACKAGE_ARCH: arm64 DESTINATION_OCI: "datadog-agent-7-remote-updater-arm64.tar.xz" DD_CC: 'aarch64-unknown-linux-gnu-gcc' @@ -97,7 +95,6 @@ bootstrapper_build: tags: ["arch:amd64"] needs: ["go_mod_tidy_check", "go_deps"] variables: - AGENT_MAJOR_VERSION: 7 KUBERNETES_CPU_REQUEST: 8 KUBERNETES_MEMORY_REQUEST: "16Gi" KUBERNETES_MEMORY_LIMIT: "16Gi" @@ -132,7 +129,6 @@ bootstrapper_build: - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] variables: - AGENT_MAJOR_VERSION: 7 KUBERNETES_CPU_REQUEST: 8 KUBERNETES_MEMORY_REQUEST: "16Gi" KUBERNETES_MEMORY_LIMIT: "16Gi" diff --git a/.gitlab/package_build/linux.yml b/.gitlab/package_build/linux.yml index 6528757b0ad2c..d129676f3bc3c 100644 --- a/.gitlab/package_build/linux.yml +++ b/.gitlab/package_build/linux.yml @@ -12,7 +12,7 @@ - $S3_CP_CMD $S3_PERMANENT_ARTIFACTS_URI/llc-$CLANG_LLVM_VER.${PACKAGE_ARCH} /tmp/system-probe/llc-bpf - cp $CI_PROJECT_DIR/minimized-btfs.tar.xz /tmp/system-probe/minimized-btfs.tar.xz - chmod 0744 /tmp/system-probe/clang-bpf /tmp/system-probe/llc-bpf - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor "$FLAVOR" --config-directory "$CONFIG_DIR" --install-directory "$INSTALL_DIR" + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor "$FLAVOR" --config-directory "$CONFIG_DIR" --install-directory "$INSTALL_DIR" - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] @@ -57,14 +57,12 @@ .agent_7_build: variables: - AGENT_MAJOR_VERSION: 7 FLAVOR: base before_script: - export RELEASE_VERSION=$RELEASE_VERSION_7 .ot_agent_7_build: variables: - AGENT_MAJOR_VERSION: 7 FLAVOR: ot before_script: - export RELEASE_VERSION=$RELEASE_VERSION_7 @@ -94,7 +92,7 @@ datadog-ot-agent-7-arm64: - !reference [.cache_omnibus_ruby_deps, setup] # remove artifacts from previous pipelines that may come from the cache - rm -rf $OMNIBUS_PACKAGE_DIR/* - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version 7 --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor iot + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --system-probe-bin=/tmp/system-probe --flavor iot - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] before_script: @@ -138,7 +136,7 @@ iot-agent-armhf: - !reference [.cache_omnibus_ruby_deps, setup] # remove artifacts from previous pipelines that may come from the cache - rm -rf $OMNIBUS_PACKAGE_DIR/* - - inv -e omnibus.build --release-version $RELEASE_VERSION_7 --major-version 7 --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --target-project dogstatsd + - inv -e omnibus.build --release-version $RELEASE_VERSION_7 --base-dir $OMNIBUS_BASE_DIR ${USE_S3_CACHING} --skip-deps --go-mod-cache="$GOPATH/pkg/mod" --target-project dogstatsd - ls -la $OMNIBUS_PACKAGE_DIR - !reference [.upload_sbom_artifacts] variables: diff --git a/.gitlab/package_build/windows.yml b/.gitlab/package_build/windows.yml index 2985d1da3a413..f1e8c2a47f34a 100644 --- a/.gitlab/package_build/windows.yml +++ b/.gitlab/package_build/windows.yml @@ -21,7 +21,7 @@ -e OMNIBUS_TARGET=${OMNIBUS_TARGET} -e WINDOWS_BUILDER=true -e RELEASE_VERSION="$RELEASE_VERSION" - -e MAJOR_VERSION="$AGENT_MAJOR_VERSION" + -e MAJOR_VERSION="7" -e INTEGRATIONS_CORE_VERSION="$INTEGRATIONS_CORE_VERSION" -e GOMODCACHE="c:\modcache" -e AWS_NETWORKING=true @@ -60,7 +60,6 @@ windows_msi_and_bosh_zip_x64-a7: - when: on_success variables: ARCH: "x64" - AGENT_MAJOR_VERSION: 7 before_script: - set RELEASE_VERSION $RELEASE_VERSION_7 timeout: 2h @@ -75,7 +74,6 @@ windows_zip_agent_binaries_x64-a7: needs: ["go_mod_tidy_check", "go_deps"] variables: ARCH: "x64" - AGENT_MAJOR_VERSION: 7 OMNIBUS_TARGET: agent_binaries before_script: - set RELEASE_VERSION $RELEASE_VERSION_7 @@ -96,7 +94,7 @@ windows_zip_agent_binaries_x64-a7: -e OMNIBUS_TARGET=${OMNIBUS_TARGET} -e WINDOWS_BUILDER=true -e RELEASE_VERSION="$RELEASE_VERSION" - -e MAJOR_VERSION="$AGENT_MAJOR_VERSION" + -e MAJOR_VERSION="7" -e INTEGRATIONS_CORE_VERSION="$INTEGRATIONS_CORE_VERSION" -e GOMODCACHE="c:\modcache" -e AWS_NETWORKING=true diff --git a/.gitlab/packaging/deb.yml b/.gitlab/packaging/deb.yml index 55f7026d8df79..a133b70d749a1 100644 --- a/.gitlab/packaging/deb.yml +++ b/.gitlab/packaging/deb.yml @@ -5,7 +5,7 @@ - !reference [.cache_omnibus_ruby_deps, setup] - echo "About to package for $RELEASE_VERSION" - !reference [.setup_deb_signing_key] - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project ${DD_PROJECT} ${OMNIBUS_EXTRA_ARGS} + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project ${DD_PROJECT} ${OMNIBUS_EXTRA_ARGS} - !reference [.lint_linux_packages] artifacts: expire_in: 2 weeks @@ -37,7 +37,6 @@ .package_deb_agent_7: variables: RELEASE_VERSION: $RELEASE_VERSION_7 - AGENT_MAJOR_VERSION: 7 agent_deb-x64-a7: extends: [.package_deb_common, .package_deb_x86, .package_deb_agent_7] @@ -63,7 +62,7 @@ agent_deb-arm64-a7: - !reference [.cache_omnibus_ruby_deps, setup] - echo "About to package for $RELEASE_VERSION" - !reference [.setup_deb_signing_key] - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project ${DD_PROJECT} --flavor ot ${OMNIBUS_EXTRA_ARGS} + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project ${DD_PROJECT} --flavor ot ${OMNIBUS_EXTRA_ARGS} - !reference [.lint_linux_packages] ot_agent_deb-x64-a7: diff --git a/.gitlab/packaging/oci.yml b/.gitlab/packaging/oci.yml index 6848d24fc7080..e96ece68316fc 100644 --- a/.gitlab/packaging/oci.yml +++ b/.gitlab/packaging/oci.yml @@ -6,7 +6,7 @@ image: registry.ddbuild.io/ci/datadog-agent-buildimages/deb_x64$DATADOG_AGENT_BUILDIMAGES_SUFFIX:$DATADOG_AGENT_BUILDIMAGES tags: ["arch:amd64"] before_script: - - PACKAGE_VERSION="$(inv agent.version --url-safe --major-version 7)-1" || exit $? + - PACKAGE_VERSION="$(inv agent.version --url-safe)-1" || exit $? - export INSTALL_DIR=/opt/datadog-packages/${OCI_PRODUCT}/${PACKAGE_VERSION} variables: KUBERNETES_CPU_REQUEST: 16 @@ -17,8 +17,8 @@ - rm -f $OMNIBUS_PACKAGE_DIR/*-dbg-*.tar.xz - ls -l $OMNIBUS_PACKAGE_DIR # Python 3.12 changes default behavior how packages are installed. - # In particular, --break-system-packages command line option is - # required to use the old behavior or use a virtual env. https://github.com/actions/runner-images/issues/8615 + # In particular, --break-system-packages command line option is + # required to use the old behavior or use a virtual env. https://github.com/actions/runner-images/issues/8615 - python3 -m pip install -r tasks/libs/requirements-github.txt --break-system-packages - set +x - git config --global url."https://gitlab-ci-token:${CI_JOB_TOKEN}@gitlab.ddbuild.io/DataDog/".insteadOf "https://github.com/DataDog/" diff --git a/.gitlab/packaging/rpm.yml b/.gitlab/packaging/rpm.yml index f1c147ef52e79..9444f7062ad89 100644 --- a/.gitlab/packaging/rpm.yml +++ b/.gitlab/packaging/rpm.yml @@ -11,7 +11,7 @@ - printf -- "$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $RPM_GPG_KEY)" | gpg --import --batch - EXIT="${PIPESTATUS[0]}"; if [ $EXIT -ne 0 ]; then echo "Unable to locate credentials needs gitlab runner restart"; exit $EXIT; fi - RPM_SIGNING_PASSPHRASE=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $RPM_SIGNING_PASSPHRASE) || exit $?; export RPM_SIGNING_PASSPHRASE - - inv -e omnibus.build --release-version "$RELEASE_VERSION" --major-version "$AGENT_MAJOR_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project=${DD_PROJECT} ${OMNIBUS_EXTRA_ARGS} + - inv -e omnibus.build --release-version "$RELEASE_VERSION" --base-dir $OMNIBUS_BASE_DIR --skip-deps --target-project=${DD_PROJECT} ${OMNIBUS_EXTRA_ARGS} - ls -la $OMNIBUS_PACKAGE_DIR/ - !reference [.lint_linux_packages] stage: packaging @@ -45,7 +45,6 @@ .package_rpm_agent_7: variables: RELEASE_VERSION: $RELEASE_VERSION_7 - AGENT_MAJOR_VERSION: 7 .package_suse_rpm_common: extends: .package_rpm_common diff --git a/.gitlab/trigger_release/trigger_release.yml b/.gitlab/trigger_release/trigger_release.yml index 67cd717f9e1d5..69f8a53b1df99 100644 --- a/.gitlab/trigger_release/trigger_release.yml +++ b/.gitlab/trigger_release/trigger_release.yml @@ -18,7 +18,7 @@ script: # agent-release-management creates pipeline for both Agent 6 and Agent 7 # when triggered with major version 7 - - RELEASE_VERSION="$(inv agent.version --major-version 7 --url-safe --omnibus-format)-1" || exit $?; export RELEASE_VERSION + - RELEASE_VERSION="$(inv agent.version --url-safe --omnibus-format)-1" || exit $?; export RELEASE_VERSION - GITLAB_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $GITLAB_TOKEN write_api) || exit $?; export GITLAB_TOKEN - 'inv pipeline.trigger-child-pipeline --project-name "DataDog/agent-release-management" --git-ref "main" --variable ACTION From d65aa23354f884875747a9d4fc9b2b6245642da9 Mon Sep 17 00:00:00 2001 From: Kangyi LI Date: Mon, 25 Nov 2024 15:31:04 +0100 Subject: [PATCH 03/12] change log level (#31317) --- comp/core/workloadmeta/collectors/internal/ecs/v4parser.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/comp/core/workloadmeta/collectors/internal/ecs/v4parser.go b/comp/core/workloadmeta/collectors/internal/ecs/v4parser.go index 9fe87042b9ac3..817878d6e7794 100644 --- a/comp/core/workloadmeta/collectors/internal/ecs/v4parser.go +++ b/comp/core/workloadmeta/collectors/internal/ecs/v4parser.go @@ -77,7 +77,8 @@ func (c *collector) getTaskWithTagsFromV4Endpoint(ctx context.Context, task v1.T if metaURI == "" { err := fmt.Sprintf("failed to get client for metadata v4 API from task %s and the following containers: %v", task.Arn, task.Containers) - log.Error(err) + // log this as debug since it's expected that some recent created or deleted tasks won't have containers yet + log.Debug(err) return v1TaskToV4Task(task), errors.New(err) } From a4abc0d85a2b33cc43ec5968ec6ab731bf815027 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 25 Nov 2024 15:47:24 +0100 Subject: [PATCH 04/12] discovery: Fallback to script name for node (#31385) --- .../corechecks/servicediscovery/usm/nodejs.go | 7 ++++++- .../servicediscovery/usm/service_test.go | 15 +++++++++++++-- .../usm/testdata/root/testdata/inner/app.js | 0 .../usm/testdata/root/testdata/inner/link | 1 + 4 files changed, 20 insertions(+), 3 deletions(-) create mode 100644 pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/app.js create mode 120000 pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/link diff --git a/pkg/collector/corechecks/servicediscovery/usm/nodejs.go b/pkg/collector/corechecks/servicediscovery/usm/nodejs.go index dd0f31d3986a5..f48972f1a4aa0 100644 --- a/pkg/collector/corechecks/servicediscovery/usm/nodejs.go +++ b/pkg/collector/corechecks/servicediscovery/usm/nodejs.go @@ -63,7 +63,12 @@ func (n nodeDetector) detect(args []string) (ServiceMetadata, bool) { if ok { return NewServiceMetadata(value), true } - break + + // We couldn't find a package.json, fall back to the script/link + // name since it should be better than just using "node". + base := filepath.Base(absFile) + name := strings.TrimSuffix(base, path.Ext(base)) + return NewServiceMetadata(name), true } } } diff --git a/pkg/collector/corechecks/servicediscovery/usm/service_test.go b/pkg/collector/corechecks/servicediscovery/usm/service_test.go index a5f2b96ac10b0..ae964b2d75f73 100644 --- a/pkg/collector/corechecks/servicediscovery/usm/service_test.go +++ b/pkg/collector/corechecks/servicediscovery/usm/service_test.go @@ -222,10 +222,21 @@ func TestExtractServiceMetadata(t *testing.T) { name: "node js with a broken package.json", cmdline: []string{ "/usr/bin/node", - "./testdata/inner/index.js", + "./testdata/inner/app.js", }, lang: language.Node, - expectedGeneratedName: "node", + expectedGeneratedName: "app", + fs: &subUsmTestData, + }, + { + name: "node js with a broken package.json", + cmdline: []string{ + "/usr/bin/node", + "./testdata/inner/link", + }, + lang: language.Node, + expectedGeneratedName: "link", + fs: &subUsmTestData, }, { name: "node js with a valid package.json", diff --git a/pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/app.js b/pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/app.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/link b/pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/link new file mode 120000 index 0000000000000..f9602e9dc51d1 --- /dev/null +++ b/pkg/collector/corechecks/servicediscovery/usm/testdata/root/testdata/inner/link @@ -0,0 +1 @@ +app.js \ No newline at end of file From 88729a6617d7d407b00246f50c5f6e459a7fab1f Mon Sep 17 00:00:00 2001 From: Paul Cacheux Date: Mon, 25 Nov 2024 16:09:41 +0100 Subject: [PATCH 05/12] remove `bundle_ebpf` flag from `inv agent.build` (#31422) --- tasks/agent.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tasks/agent.py b/tasks/agent.py index b3e80cc14c360..af988249697a8 100644 --- a/tasks/agent.py +++ b/tasks/agent.py @@ -135,7 +135,6 @@ def build( go_mod="mod", windows_sysprobe=False, cmake_options='', - bundle_ebpf=False, agent_bin=None, run_on=None, # noqa: U100, F841. Used by the run_on_devcontainer decorator ): From a7e433469a51a5302690489849b0e51d1dde5ec6 Mon Sep 17 00:00:00 2001 From: Len Gamburg <98782554+iglendd@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:04:59 -0500 Subject: [PATCH 06/12] Add `agent diagnose show-metadata agent-telemetry` CLI command (#31258) Co-authored-by: Olivier G <52180542+ogaca-dd@users.noreply.github.com> Co-authored-by: Jen Gilbert --- cmd/agent/subcommands/diagnose/command.go | 14 + .../subcommands/diagnose/command_test.go | 10 + .../agenttelemetry/impl/agenttelemetry.go | 114 ++++-- .../impl/agenttelemetry_test.go | 340 ++++++++++++++++-- comp/core/agenttelemetry/impl/config.go | 64 ++-- comp/core/agenttelemetry/impl/sender.go | 123 +++---- ...nt-telemetry-cli-cmd-926f814513a02d74.yaml | 14 + 7 files changed, 531 insertions(+), 148 deletions(-) create mode 100644 releasenotes/notes/agent-telemetry-cli-cmd-926f814513a02d74.yaml diff --git a/cmd/agent/subcommands/diagnose/command.go b/cmd/agent/subcommands/diagnose/command.go index 341166d496a06..8ed4bf53699e8 100644 --- a/cmd/agent/subcommands/diagnose/command.go +++ b/cmd/agent/subcommands/diagnose/command.go @@ -269,6 +269,19 @@ This command print the security-agent metadata payload. This payload is used by }, } + agentTelemetryCmd := &cobra.Command{ + Use: "agent-telemetry", + Short: "[internal] Print agent telemetry payloads sent by the agent.", + Long: `.`, + RunE: func(_ *cobra.Command, _ []string) error { + return fxutil.OneShot(printPayload, + fx.Supply(payloadName("agent-telemetry")), + fx.Supply(command.GetDefaultCoreBundleParams(cliParams.GlobalParams)), + core.Bundle(), + ) + }, + } + showPayloadCommand.AddCommand(payloadV5Cmd) showPayloadCommand.AddCommand(payloadGohaiCmd) showPayloadCommand.AddCommand(payloadInventoriesAgentCmd) @@ -278,6 +291,7 @@ This command print the security-agent metadata payload. This payload is used by showPayloadCommand.AddCommand(payloadInventoriesPkgSigningCmd) showPayloadCommand.AddCommand(payloadSystemProbeCmd) showPayloadCommand.AddCommand(payloadSecurityAgentCmd) + showPayloadCommand.AddCommand(agentTelemetryCmd) diagnoseCommand.AddCommand(showPayloadCommand) return []*cobra.Command{diagnoseCommand} diff --git a/cmd/agent/subcommands/diagnose/command_test.go b/cmd/agent/subcommands/diagnose/command_test.go index 3316432edf833..9c1f2dcb049d4 100644 --- a/cmd/agent/subcommands/diagnose/command_test.go +++ b/cmd/agent/subcommands/diagnose/command_test.go @@ -115,3 +115,13 @@ func TestShowMetadataSecurityAgentCommand(t *testing.T) { require.Equal(t, false, secretParams.Enabled) }) } + +func TestShowAgentTelemetryCommand(t *testing.T) { + fxutil.TestOneShotSubcommand(t, + Commands(&command.GlobalParams{}), + []string{"diagnose", "show-metadata", "agent-telemetry"}, + printPayload, + func(payload payloadName) { + require.Equal(t, payloadName("agent-telemetry"), payload) + }) +} diff --git a/comp/core/agenttelemetry/impl/agenttelemetry.go b/comp/core/agenttelemetry/impl/agenttelemetry.go index 3ca0c0e663268..de05da82a5b79 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry.go @@ -8,22 +8,26 @@ package agenttelemetryimpl import ( "context" + "encoding/json" + "errors" + "fmt" + "net/http" "strconv" "golang.org/x/exp/maps" + api "github.com/DataDog/datadog-agent/comp/api/api/def" agenttelemetry "github.com/DataDog/datadog-agent/comp/core/agenttelemetry/def" "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" "github.com/DataDog/datadog-agent/comp/core/telemetry" compdef "github.com/DataDog/datadog-agent/comp/def" + httputils "github.com/DataDog/datadog-agent/pkg/util/http" + "github.com/DataDog/datadog-agent/pkg/util/scrubber" dto "github.com/prometheus/client_model/go" ) -// Embed one or more rendering templated into this binary as a resource -// to be used at runtime. - type atel struct { cfgComp config.Component logComp log.Component @@ -38,7 +42,7 @@ type atel struct { cancel context.CancelFunc } -// Requires defines the dependencies for the agenttelemtry component +// Requires defines the dependencies for the agenttelemetry component type Requires struct { compdef.In @@ -46,7 +50,15 @@ type Requires struct { Config config.Component Telemetry telemetry.Component - Lifecycle compdef.Lifecycle + Lc compdef.Lifecycle +} + +// Provides defines the output of the agenttelemetry component +type Provides struct { + compdef.Out + + Comp agenttelemetry.Component + Endpoint api.AgentEndpointProvider } // Interfacing with runner. @@ -120,19 +132,18 @@ func createAtel( } // NewComponent creates a new agent telemetry component. -func NewComponent(req Requires) agenttelemetry.Component { - // Wire up the agent telemetry provider (TODO: use FX for sender, client and runner?) +func NewComponent(deps Requires) Provides { a := createAtel( - req.Config, - req.Log, - req.Telemetry, + deps.Config, + deps.Log, + deps.Telemetry, nil, nil, ) // If agent telemetry is enabled and configured properly add the start and stop hooks if a.enabled { - req.Lifecycle.Append(compdef.Hook{ + deps.Lc.Append(compdef.Hook{ OnStart: func(_ context.Context) error { return a.start() }, @@ -142,7 +153,10 @@ func NewComponent(req Requires) agenttelemetry.Component { }) } - return a + return Provides{ + Comp: a, + Endpoint: api.NewAgentEndpointProvider(a.writePayload, "/metadata/agent-telemetry", "GET"), + } } func (a *atel) aggregateMetricTags(mCfg *MetricConfig, mt dto.MetricType, ms []*dto.Metric) []*dto.Metric { @@ -288,7 +302,7 @@ func (a *atel) transformMetricFamily(p *Profile, mfam *dto.MetricFamily) *agentm } } -func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { +func (a *atel) reportAgentMetrics(session *senderSession, pms []*telemetry.MetricFamily, p *Profile) { // If no metrics are configured nothing to report if len(p.metricsMap) == 0 { return @@ -296,15 +310,6 @@ func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { a.logComp.Debugf("Collect Agent Metric telemetry for profile %s", p.Name) - // Gather all prom metrircs. Currently Gather() does not allow filtering by - // matric name, so we need to gather all metrics and filter them on our own. - // pms, err := a.telemetry.Gather(false) - pms, err := a.telComp.Gather(false) - if err != nil { - a.logComp.Errorf("failed to get filtered telemetry metrics: %s", err) - return - } - // ... and filter them according to the profile configuration var metrics []*agentmetric for _, pm := range pms { @@ -322,38 +327,77 @@ func (a *atel) reportAgentMetrics(session *senderSession, p *Profile) { // Send the metrics if any were filtered a.logComp.Debugf("Reporting Agent Metric telemetry for profile %s", p.Name) - err = a.sender.sendAgentMetricPayloads(session, metrics) + a.sender.sendAgentMetricPayloads(session, metrics) +} + +func (a *atel) loadPayloads(profiles []*Profile) (*senderSession, error) { + // Gather all prom metrics. Currently Gather() does not allow filtering by + // metric name, so we need to gather all metrics and filter them on our own. + // pms, err := a.telemetry.Gather(false) + pms, err := a.telComp.Gather(false) if err != nil { - a.logComp.Errorf("failed to get filtered telemetry metrics: %s", err) + a.logComp.Errorf("failed to get filtered telemetry metrics: %v", err) + return nil, err + } + + session := a.sender.startSession(a.cancelCtx) + for _, p := range profiles { + a.reportAgentMetrics(session, pms, p) } + return session, nil } // run runs the agent telemetry for a given profile. It is triggered by the runner // according to the profiles schedule. func (a *atel) run(profiles []*Profile) { - if a.sender == nil { - a.logComp.Errorf("Agent telemetry sender is not initialized") + a.logComp.Info("Starting agent telemetry run") + session, err := a.loadPayloads(profiles) + if err != nil { + a.logComp.Errorf("failed to load agent telemetry session: %s", err) return } - a.logComp.Info("Starting agent telemetry run") - - session := a.sender.startSession(a.cancelCtx) + err = a.sender.flushSession(session) + if err != nil { + a.logComp.Errorf("failed to flush agent telemetry session: %s", err) + return + } +} - for _, p := range profiles { - a.reportAgentMetrics(session, p) +func (a *atel) writePayload(w http.ResponseWriter, _ *http.Request) { + if !a.enabled { + httputils.SetJSONError(w, errors.New("agent-telemetry is not enabled. See https://docs.datadoghq.com/data_security/agent/?tab=datadogyaml#telemetry-collection for more info"), 400) + return } - err := a.sender.flushSession(session) + a.logComp.Info("Showing agent telemetry payload") + payload, err := a.GetAsJSON() if err != nil { - a.logComp.Errorf("failed to flush agent telemetry session: %s", err) + httputils.SetJSONError(w, a.logComp.Error(err.Error()), 500) return } + + w.Write(payload) } -// TODO: implement when CLI tool will be implemented func (a *atel) GetAsJSON() ([]byte, error) { - return nil, nil + session, err := a.loadPayloads(a.atelCfg.Profiles) + if err != nil { + return nil, fmt.Errorf("unable to load agent telemetry payload: %w", err) + } + payload := session.flush() + + jsonPayload, err := json.MarshalIndent(payload, "", " ") + if err != nil { + return nil, fmt.Errorf("unable to marshal agent telemetry payload: %w", err) + } + + jsonPayloadScrubbed, err := scrubber.ScrubJSONString(string(jsonPayload)) + if err != nil { + return nil, fmt.Errorf("unable to scrub agent telemetry payload: %w", err) + } + + return []byte(jsonPayloadScrubbed), nil } // start is called by FX when the application starts. diff --git a/comp/core/agenttelemetry/impl/agenttelemetry_test.go b/comp/core/agenttelemetry/impl/agenttelemetry_test.go index a1cdfe85409a9..7cd659a2a45f1 100644 --- a/comp/core/agenttelemetry/impl/agenttelemetry_test.go +++ b/comp/core/agenttelemetry/impl/agenttelemetry_test.go @@ -7,6 +7,7 @@ package agenttelemetryimpl import ( "context" + "encoding/json" "fmt" "io" "net/http" @@ -18,6 +19,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" "github.com/DataDog/datadog-agent/comp/core/config" log "github.com/DataDog/datadog-agent/comp/core/log/def" @@ -55,9 +57,8 @@ func (s *senderMock) startSession(_ context.Context) *senderSession { func (s *senderMock) flushSession(_ *senderSession) error { return nil } -func (s *senderMock) sendAgentMetricPayloads(_ *senderSession, metrics []*agentmetric) error { +func (s *senderMock) sendAgentMetricPayloads(_ *senderSession, metrics []*agentmetric) { s.sentMetrics = append(s.sentMetrics, metrics...) - return nil } // Runner mock (TODO: use use mock.Mock) @@ -87,7 +88,8 @@ func newRunnerMock() runner { return &runnerMock{} } -// utilities +// ------------------------------ +// Utility functions func convertYamlStrToMap(t *testing.T, cfgStr string) map[string]any { var c map[string]any err := yaml.Unmarshal([]byte(cfgStr), &c) @@ -196,6 +198,76 @@ func getCommonOverrideConfig(enabled bool, site string) map[string]any { } } +// This is a unit test function do not use it for actual code (at least yet) +// since it is not 100% full implementation of the unmarshalling +func (p *Payload) UnmarshalJSON(b []byte) (err error) { + var itfPayload map[string]interface{} + if err := json.Unmarshal(b, &itfPayload); err != nil { + return err + } + + requestType, ok := itfPayload["request_type"] + if !ok { + return fmt.Errorf("request_type not found") + } + if requestType.(string) == "agent-metrics" { + p.RequestType = requestType.(string) + p.APIVersion = itfPayload["request_type"].(string) + p.EventTime = int64(itfPayload["event_time"].(float64)) + p.DebugFlag = itfPayload["debug"].(bool) + + var metricsItfPayload map[string]interface{} + metricsItfPayload, ok = itfPayload["payload"].(map[string]interface{}) + if !ok { + return fmt.Errorf("payload not found") + } + var metricsItf map[string]interface{} + metricsItf, ok = metricsItfPayload["metrics"].(map[string]interface{}) + if !ok { + return fmt.Errorf("metrics not found") + } + + var metricsPayload AgentMetricsPayload + metricsPayload.Metrics = make(map[string]interface{}) + for k, v := range metricsItf { + if k == "agent_metadata" { + // Re(un)marshal the meatadata + var metadata AgentMetadataPayload + var metadataBytes []byte + if metadataBytes, err = json.Marshal(v); err != nil { + return err + } + if err = json.Unmarshal(metadataBytes, &metadata); err != nil { + return err + } + metricsPayload.Metrics[k] = metadata + } else { + // Re(un)marshal the metric + var metric MetricPayload + var metricBytes []byte + if metricBytes, err = json.Marshal(v); err != nil { + return err + } + if err = json.Unmarshal(metricBytes, &metric); err != nil { + return err + } + metricsPayload.Metrics[k] = metric + } + } + p.Payload = metricsPayload + return nil + } + + if requestType.(string) == "message-batch" { + return fmt.Errorf("message-batch request_type is not supported yet") + } + + return fmt.Errorf("request_type should be either agent-metrics or message-batch") +} + +// ------------------------------ +// Tests + func TestEnabled(t *testing.T) { o := getCommonOverrideConfig(true, "foo.bar") a := getTestAtel(t, nil, o, nil, nil, nil) @@ -250,10 +322,14 @@ func TestRun(t *testing.T) { a.start() - // default configuration has 2 job with 2 profiles (more configurations needs to be tested) - // will be improved in future by providing deterministic configuration + // Default configuration has 2 job. One with 3 profiles and another with 1 profile + // Profiles with the same schedule are lumped into the same job assert.Equal(t, 2, len(r.(*runnerMock).jobs)) - assert.Equal(t, 1, len(r.(*runnerMock).jobs[0].profiles)) + + // The order is not deterministic + profile0Len := len(r.(*runnerMock).jobs[0].profiles) + profile1Len := len(r.(*runnerMock).jobs[1].profiles) + assert.True(t, (profile0Len == 1 && profile1Len == 3) || (profile0Len == 3 && profile1Len == 1)) } func TestReportMetricBasic(t *testing.T) { @@ -265,7 +341,7 @@ func TestReportMetricBasic(t *testing.T) { c := newClientMock() r := newRunnerMock() a := getTestAtel(t, tel, o, nil, c, r) - assert.True(t, a.enabled) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() @@ -297,7 +373,7 @@ func TestNoTagSpecifiedAggregationCounter(t *testing.T) { r := newRunnerMock() o := convertYamlStrToMap(t, c) a := getTestAtel(t, tel, o, s, nil, r) - assert.True(t, a.enabled) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() @@ -337,7 +413,7 @@ func TestNoTagSpecifiedAggregationGauge(t *testing.T) { s := &senderMock{} r := newRunnerMock() a := getTestAtel(t, tel, o, s, nil, r) - assert.True(t, a.enabled) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() @@ -378,13 +454,15 @@ func TestNoTagSpecifiedAggregationHistogram(t *testing.T) { s := &senderMock{} r := newRunnerMock() a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() r.(*runnerMock).run() // 1 metric sent - assert.Equal(t, 1, len(s.sentMetrics)) + require.Equal(t, 1, len(s.sentMetrics)) + require.True(t, len(s.sentMetrics[0].metrics) > 0) // aggregated to 10 + 20 + 30 = 60 m := s.sentMetrics[0].metrics[0] @@ -420,24 +498,25 @@ func TestTagSpecifiedAggregationCounter(t *testing.T) { s := &senderMock{} r := newRunnerMock() a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() r.(*runnerMock).run() // 2 metric should be sent - assert.Equal(t, 1, len(s.sentMetrics)) - assert.Equal(t, 2, len(s.sentMetrics[0].metrics)) + require.Equal(t, 1, len(s.sentMetrics)) + require.Equal(t, 2, len(s.sentMetrics[0].metrics)) // order is not deterministic, use label key to identify the metrics metrics := makeStableMetricMap(s.sentMetrics[0].metrics) // aggregated - assert.Contains(t, metrics, "tag1:a1:") + require.Contains(t, metrics, "tag1:a1:") m1 := metrics["tag1:a1:"] assert.Equal(t, float64(30), m1.Counter.GetValue()) - assert.Contains(t, metrics, "tag1:a2:") + require.Contains(t, metrics, "tag1:a2:") m2 := metrics["tag1:a2:"] assert.Equal(t, float64(30), m2.Counter.GetValue()) } @@ -471,36 +550,199 @@ func TestTagAggregateTotalCounter(t *testing.T) { s := &senderMock{} r := newRunnerMock() a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) // run the runner to trigger the telemetry report a.start() r.(*runnerMock).run() // 4 metric sent - assert.Equal(t, 1, len(s.sentMetrics)) - assert.Equal(t, 4, len(s.sentMetrics[0].metrics)) + require.Equal(t, 1, len(s.sentMetrics)) + require.Equal(t, 4, len(s.sentMetrics[0].metrics)) // order is not deterministic, use label key to identify the metrics metrics := makeStableMetricMap(s.sentMetrics[0].metrics) // aggregated - assert.Contains(t, metrics, "tag1:a1:") + require.Contains(t, metrics, "tag1:a1:") m1 := metrics["tag1:a1:"] assert.Equal(t, float64(30), m1.Counter.GetValue()) - assert.Contains(t, metrics, "tag1:a2:") + require.Contains(t, metrics, "tag1:a2:") m2 := metrics["tag1:a2:"] assert.Equal(t, float64(30), m2.Counter.GetValue()) - assert.Contains(t, metrics, "tag1:a3:") + require.Contains(t, metrics, "tag1:a3:") m3 := metrics["tag1:a3:"] assert.Equal(t, float64(150), m3.Counter.GetValue()) - assert.Contains(t, metrics, "total:6:") + require.Contains(t, metrics, "total:6:") m4 := metrics["total:6:"] assert.Equal(t, float64(210), m4.Counter.GetValue()) } +func TestTwoProfilesOnTheSameScheduleGenerateSinglePayload(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foo + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + - name: bar + metric: + metrics: + - name: foo.foo + aggregate_tags: + - tag1 + ` + // setup and initiate a tel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter2 := tel.NewCounter("foo", "foo", []string{"tag1", "tag2", "tag3"}, "") + counter2.AddWithTags(20, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // Get payload + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload Payload + err = json.Unmarshal(payloadJSON, &payload) + require.NoError(t, err) + + // ----------------------- + // for 2 profiles there are 2 metrics, but 1 payload (test is currently payload schema dependent, improve in future) + // Single payload whcich has sub-payloads for each metric + // 2 metrics + metrics := payload.Payload.(AgentMetricsPayload).Metrics + assert.Contains(t, metrics, "bar.bar") + assert.Contains(t, metrics, "foo.foo") +} + +func TestOneProfileWithOneMetricMultipleContextsGenerateTwoPayloads(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foo + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + ` + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter1.AddWithTags(20, map[string]string{"tag1": "a2", "tag2": "b2", "tag3": "c2"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload map[string]interface{} + err = json.Unmarshal(payloadJSON, &payload) + require.NoError(t, err) + + // ----------------------- + // for 1 profiles there are 2 metrics in 1 payload (test is currently payload schema dependent, improve in future) + + // One payloads each has the same metric (different tags) + requestType, ok := payload["request_type"] + require.True(t, ok) + assert.Equal(t, "message-batch", requestType) + metricPayloads, ok := payload["payload"].([]interface{}) + require.True(t, ok) + + // --------- + // 2 metrics + // 1-st + payload1, ok := metricPayloads[0].(map[string]interface{}) + require.True(t, ok) + requestType1, ok := payload1["request_type"] + require.True(t, ok) + assert.Equal(t, "agent-metrics", requestType1) + metricsPayload1, ok := payload1["payload"].(map[string]interface{}) + require.True(t, ok) + metrics1, ok := metricsPayload1["metrics"].(map[string]interface{}) + require.True(t, ok) + _, ok11 := metrics1["bar.bar"] + _, ok12 := metrics1["foo.foo"] + assert.True(t, (ok11 && !ok12) || (!ok11 && ok12)) + + // 2-nd + payload2, ok := metricPayloads[1].(map[string]interface{}) + require.True(t, ok) + requestType2, ok := payload2["request_type"] + require.True(t, ok) + assert.Equal(t, "agent-metrics", requestType2) + metricsPayload2, ok := payload2["payload"].(map[string]interface{}) + require.True(t, ok) + metrics2, ok := metricsPayload2["metrics"].(map[string]interface{}) + require.True(t, ok) + _, ok21 := metrics2["bar.bar"] + _, ok22 := metrics2["foo.foo"] + assert.True(t, (ok21 && !ok22) || (!ok21 && ok22)) + +} + +func TestOneProfileWithTwoMetricGenerateSinglePayloads(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: foobar + metric: + metrics: + - name: bar.bar + aggregate_tags: + - tag1 + - name: foo.foo + aggregate_tags: + - tag1 + ` + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("bar", "bar", []string{"tag1", "tag2", "tag3"}, "") + counter1.AddWithTags(10, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + counter2 := tel.NewCounter("foo", "foo", []string{"tag1", "tag2", "tag3"}, "") + counter2.AddWithTags(20, map[string]string{"tag1": "a1", "tag2": "b1", "tag3": "c1"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // Get payload + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload Payload + err = json.Unmarshal(payloadJSON, &payload) + require.NoError(t, err) + + // ----------------------- + // for 2 profiles there are2 metrics, but 1 payload (test is currently payload schema dependent, improve in future) + // 2 metrics + metrics := payload.Payload.(AgentMetricsPayload).Metrics + assert.Contains(t, metrics, "bar.bar") + assert.Contains(t, metrics, "foo.foo") +} + func TestSenderConfigNoConfig(t *testing.T) { c := ` agent_telemetry: @@ -634,3 +876,61 @@ func TestSenderConfigDDUrlWithEmptyAdditionalPoint(t *testing.T) { url := buildURL(sndr.(*senderImpl).endpoints.Endpoints[0]) assert.Equal(t, "https://instrumentation-telemetry-intake.us5.datadoghq.com./api/v2/apmtelemetry", url) } + +func TestGetAsJSONScrub(t *testing.T) { + var c = ` + agent_telemetry: + enabled: true + profiles: + - name: xxx + metric: + metrics: + - name: foo.bar_auth + aggregate_tags: + - password + - name: foo.bar_key + aggregate_tags: + - api_key + - name: foo.bar_text + aggregate_tags: + - text + ` + + // setup and initiate atel + tel := makeTelMock(t) + counter1 := tel.NewCounter("foo", "bar_auth", []string{"password"}, "") + counter2 := tel.NewCounter("foo", "bar_key", []string{"api_key"}, "") + counter3 := tel.NewCounter("foo", "bar_text", []string{"text"}, "") + + // Default scrubber scrubs at least ... + // api key, bearer key, app key, url, password, snmp, certificate + counter1.AddWithTags(10, map[string]string{"password": "1234567890"}) + counter2.AddWithTags(11, map[string]string{"api_key": "1234567890"}) + counter3.AddWithTags(11, map[string]string{"text": "test"}) + + o := convertYamlStrToMap(t, c) + s := makeSenderImpl(t, c) + r := newRunnerMock() + a := getTestAtel(t, tel, o, s, nil, r) + require.True(t, a.enabled) + + // Get payload + payloadJSON, err := a.GetAsJSON() + assert.NoError(t, err) + var payload Payload + err = json.Unmarshal(payloadJSON, &payload) + require.NoError(t, err) + + // Check the scrubbing + metrics := payload.Payload.(AgentMetricsPayload).Metrics + + metric, ok := metrics["foo.bar_auth"] + require.True(t, ok) + assert.Equal(t, "********", metric.(MetricPayload).Tags["password"]) + metric, ok = metrics["foo.bar_key"] + require.True(t, ok) + assert.Equal(t, "********", metric.(MetricPayload).Tags["api_key"]) + metric, ok = metrics["foo.bar_text"] + require.True(t, ok) + assert.Equal(t, "test", metric.(MetricPayload).Tags["text"]) +} diff --git a/comp/core/agenttelemetry/impl/config.go b/comp/core/agenttelemetry/impl/config.go index 39bd822dd9f28..29c7df96ff515 100644 --- a/comp/core/agenttelemetry/impl/config.go +++ b/comp/core/agenttelemetry/impl/config.go @@ -22,8 +22,8 @@ const ( // Config is the top-level config for agent telemetry type Config struct { - Enabled bool `yaml:"enabled"` - Profiles []Profile `yaml:"profiles"` + Enabled bool `yaml:"enabled"` + Profiles []*Profile `yaml:"profiles"` // compiled schedule map[Schedule][]*Profile @@ -127,7 +127,7 @@ type Schedule struct { // configured payloads willbe generated and emitted on the following schedule (the details // are described in the comments below. // -// (legend - 300s=5m, 900s=15m, 1800s=30m, 3600s=1h, 86400s=1d) +// (legend - 300s=5m, 900s=15m, 1800s=30m, 3600s=1h, 14400s=4h, 86400s=1d) // // schedule: // start_after: 30 @@ -155,7 +155,7 @@ type Schedule struct { // Note: If "aggregate_tags" are not specified, metric will be aggregated without any tags. var defaultProfiles = ` profiles: - - name: core-metrics + - name: checks metric: exclude: zero_metric: true @@ -171,37 +171,53 @@ var defaultProfiles = ` aggregate_tags: - check_name - name: pymem.inuse - - name: pymem.alloc + schedule: + start_after: 30 + iterations: 0 + period: 900 + - name: logs-and-metrics + metric: + exclude: + zero_metric: true + metrics: + - name: dogstatsd.udp_packets_bytes + - name: dogstatsd.uds_packets_bytes + - name: logs.bytes_missed + - name: logs.bytes_sent - name: logs.decoded - - name: logs.processed - - name: logs.sent - name: logs.dropped - - name: logs.sender_latency - - name: logs.bytes_sent - name: logs.encoded_bytes_sent - - name: logs.bytes_missed + - name: logs.processed + - name: logs.sent - name: point.sent - name: point.dropped + - name: transactions.input_count + - name: transactions.requeued + - name: transactions.retries + schedule: + start_after: 30 + iterations: 0 + period: 900 + - name: database + metric: + exclude: + zero_metric: true + metrics: - name: oracle.activity_samples_count - name: oracle.activity_latency - name: oracle.statement_metrics - name: oracle.statement_plan_errors - - name: postgres.schema_tables_elapsed_ms - - name: postgres.schema_tables_count + - name: postgres.collect_activity_snapshot_ms - name: postgres.collect_relations_autodiscovery_ms + - name: postgres.collect_statement_samples_ms + - name: postgres.collect_statement_samples_count - name: postgres.collect_stat_autodiscovery_ms - - name: postgres.get_new_pg_stat_activity_ms - - name: postgres.get_new_pg_stat_activity_count - name: postgres.get_active_connections_ms - name: postgres.get_active_connections_count - - name: postgres.collect_activity_snapshot_ms - - name: postgres.collect_statement_samples_ms - - name: postgres.collect_statement_samples_count - - name: transactions.input_count - - name: transactions.requeued - - name: transactions.retries - - name: dogstatsd.udp_packets_bytes - - name: dogstatsd.uds_packets_bytes + - name: postgres.get_new_pg_stat_activity_count + - name: postgres.get_new_pg_stat_activity_ms + - name: postgres.schema_tables_elapsed_ms + - name: postgres.schema_tables_count schedule: start_after: 30 iterations: 0 @@ -337,7 +353,7 @@ func compileSchedules(cfg *Config) error { cfg.schedule = make(map[Schedule][]*Profile) for i := 0; i < len(cfg.Profiles); i++ { - p := &cfg.Profiles[i] + p := cfg.Profiles[i] // Setup default schedule if it is not specified partially or at all if p.Schedule == nil { @@ -374,7 +390,7 @@ func compileSchedules(cfg *Config) error { // Compile agent telemetry config func compileConfig(cfg *Config) error { for i := 0; i < len(cfg.Profiles); i++ { - err := compileProfile(&cfg.Profiles[i]) + err := compileProfile(cfg.Profiles[i]) if err != nil { return err } diff --git a/comp/core/agenttelemetry/impl/sender.go b/comp/core/agenttelemetry/impl/sender.go index eaf1125d6a2e7..d715863f862e4 100644 --- a/comp/core/agenttelemetry/impl/sender.go +++ b/comp/core/agenttelemetry/impl/sender.go @@ -23,6 +23,7 @@ import ( logconfig "github.com/DataDog/datadog-agent/comp/logs/agent/config" metadatautils "github.com/DataDog/datadog-agent/comp/metadata/host/hostimpl/utils" httputils "github.com/DataDog/datadog-agent/pkg/util/http" + "github.com/DataDog/datadog-agent/pkg/util/scrubber" "github.com/DataDog/datadog-agent/pkg/version" ) @@ -35,7 +36,6 @@ const ( httpClientResetInterval = 5 * time.Minute httpClientTimeout = 10 * time.Second - maximumNumberOfPayloads = 50 ) // --------------- @@ -43,7 +43,7 @@ const ( type sender interface { startSession(cancelCtx context.Context) *senderSession flushSession(ss *senderSession) error - sendAgentMetricPayloads(ss *senderSession, metrics []*agentmetric) error + sendAgentMetricPayloads(ss *senderSession, metrics []*agentmetric) } type client interface { @@ -103,10 +103,11 @@ type payloadInfo struct { payload interface{} } -// senderSession is also use to batch payloads +// senderSession store and seriaizes one or more payloads type senderSession struct { - cancelCtx context.Context - payloads []payloadInfo + cancelCtx context.Context + payloadTemplate Payload + metricPayloads []*AgentMetricsPayload } // BatchPayloadWrapper exported so it can be turned into json @@ -268,53 +269,65 @@ func (s *senderImpl) addMetricPayload( func (s *senderImpl) startSession(cancelCtx context.Context) *senderSession { return &senderSession{ - cancelCtx: cancelCtx, + cancelCtx: cancelCtx, + payloadTemplate: s.payloadTemplate, } } -func (s *senderImpl) flushSession(ss *senderSession) error { - // There is nothing to do if there are no payloads - if len(ss.payloads) == 0 { - return nil - } - - s.logComp.Infof("Flushing Agent Telemetery session with %d payloads", len(ss.payloads)) - - // Defer cleanup of payloads. Even if there is an error, we want to cleanup - // but in future we may want to add retry logic. +func (ss *senderSession) flush() Payload { defer func() { - ss.payloads = nil + // Clear the payloads + ss.metricPayloads = nil }() // Create a payload with a single message or batch of messages - payload := s.payloadTemplate + payload := ss.payloadTemplate payload.EventTime = time.Now().Unix() - if len(ss.payloads) == 1 { + if len(ss.metricPayloads) == 1 { // Single payload will be sent directly using the request type of the payload - payload.RequestType = ss.payloads[0].requestType - payload.Payload = ss.payloads[0].payload - } else { - // Batch up multiple payloads into single "batch" payload type - payload.RequestType = "message-batch" - payloadWrappers := make([]BatchPayloadWrapper, 0) - for _, p := range ss.payloads { - payloadWrappers = append(payloadWrappers, - BatchPayloadWrapper{ - RequestType: p.requestType, - Payload: p.payload, - }) - } - payload.Payload = payloadWrappers + mp := ss.metricPayloads[0] + payload.RequestType = "agent-metrics" + payload.Payload = payloadInfo{"agent-metrics", mp}.payload + return payload + } + + // Batch up multiple payloads into single "batch" payload type + batch := make([]BatchPayloadWrapper, 0) + for _, mp := range ss.metricPayloads { + batch = append(batch, + BatchPayloadWrapper{ + RequestType: "agent-metrics", + Payload: payloadInfo{"agent-metrics", mp}.payload, + }) + } + payload.RequestType = "message-batch" + payload.Payload = batch + return payload +} + +func (s *senderImpl) flushSession(ss *senderSession) error { + // There is nothing to do if there are no payloads + if len(ss.metricPayloads) == 0 { + return nil } - // Marshal the payload to a byte array - reqBody, err := json.Marshal(payload) + s.logComp.Infof("Flushing Agent Telemetery session with %d payloads", len(ss.metricPayloads)) + + payloads := ss.flush() + payloadJSON, err := json.Marshal(payloads) + if err != nil { + return fmt.Errorf("failed to marshal agent telemetry payload: %w", err) + } + + reqBody, err := scrubber.ScrubBytes(payloadJSON) if err != nil { - return err + return fmt.Errorf("failed to scrubl agent telemetry payload: %w", err) } // Send the payload to all endpoints var errs error + reqType := payloads.RequestType + bodyLen := strconv.Itoa(len(reqBody)) for _, ep := range s.endpoints.Endpoints { url := buildURL(ep) req, err := http.NewRequest("POST", url, bytes.NewReader(reqBody)) @@ -322,7 +335,7 @@ func (s *senderImpl) flushSession(ss *senderSession) error { errs = errors.Join(errs, err) continue } - s.addHeaders(req, payload.RequestType, ep.GetAPIKey(), strconv.Itoa(len(reqBody))) + s.addHeaders(req, reqType, ep.GetAPIKey(), bodyLen) resp, err := s.client.Do(req.WithContext(ss.cancelCtx)) if err != nil { errs = errors.Join(errs, err) @@ -345,12 +358,7 @@ func (s *senderImpl) flushSession(ss *senderSession) error { return errs } -func (s *senderImpl) sendAgentMetricPayloads(ss *senderSession, metrics []*agentmetric) error { - // Are there any metrics - if len(metrics) == 0 { - return nil - } - +func (s *senderImpl) sendAgentMetricPayloads(ss *senderSession, metrics []*agentmetric) { // Create one or more metric payloads batching different metrics into a single payload, // but the same metric (with multiple tag sets) into different payloads. This is needed // to avoid creating JSON payloads which contains arrays (otherwise we could not @@ -360,44 +368,21 @@ func (s *senderImpl) sendAgentMetricPayloads(ss *senderSession, metrics []*agent // message/payload contains multiples metrics for a single index of tag set. Essentially // the number of message/payloads is equal to the maximum number of tag sets for a single // metric. - var payloads []*AgentMetricsPayload for _, am := range metrics { for idx, m := range am.metrics { var payload *AgentMetricsPayload // reuse or add a payload - if idx+1 > len(payloads) { + if idx+1 > len(ss.metricPayloads) { newPayload := s.agentMetricsPayloadTemplate newPayload.Metrics = make(map[string]interface{}, 0) newPayload.Metrics["agent_metadata"] = s.metadataPayloadTemplate - payloads = append(payloads, &newPayload) + ss.metricPayloads = append(ss.metricPayloads, &newPayload) } - payload = payloads[idx] + payload = ss.metricPayloads[idx] s.addMetricPayload(am.name, am.family, m, payload) } } - - // We will batch multiples metrics payloads into single "batch" payload type - // but for now send it one by one - for _, payload := range payloads { - if err := s.sendPayload(ss, "agent-metrics", payload); err != nil { - return err - } - } - - return nil -} - -func (s *senderImpl) sendPayload(ss *senderSession, requestType string, payload interface{}) error { - // Add payload to session - ss.payloads = append(ss.payloads, payloadInfo{requestType, payload}) - - // Flush session if it is full - if len(ss.payloads) >= maximumNumberOfPayloads { - return s.flushSession(ss) - } - - return nil } func (s *senderImpl) addHeaders(req *http.Request, requesttype, apikey, bodylen string) { diff --git a/releasenotes/notes/agent-telemetry-cli-cmd-926f814513a02d74.yaml b/releasenotes/notes/agent-telemetry-cli-cmd-926f814513a02d74.yaml new file mode 100644 index 0000000000000..6f48d478937c1 --- /dev/null +++ b/releasenotes/notes/agent-telemetry-cli-cmd-926f814513a02d74.yaml @@ -0,0 +1,14 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Add ability to show Agent telemetry payloads to be sent by Agent + if the telemetry is enabled. One can run it with the following command: + `agent diagnose show-metadata agent-telemetry`. See + `docs ` for more details. \ No newline at end of file From 2706272163c3131745d847dd1f534de245542c7d Mon Sep 17 00:00:00 2001 From: Brian Floersch Date: Mon, 25 Nov 2024 11:18:26 -0500 Subject: [PATCH 07/12] Rework logs pipeline utilization metrics to improve performance. (#31287) Co-authored-by: remeh --- cmd/serverless/dependencies_linux_amd64.txt | 1 - cmd/serverless/dependencies_linux_arm64.txt | 1 - comp/otelcol/ddflareextension/impl/go.mod | 1 - comp/otelcol/logsagentpipeline/go.mod | 1 - .../logsagentpipelineimpl/go.mod | 1 - .../exporter/datadogexporter/go.mod | 1 - pkg/logs/client/go.mod | 1 - pkg/logs/client/http/destination.go | 1 - pkg/logs/metrics/go.mod | 3 +- pkg/logs/metrics/utilization_monitor.go | 96 ++++++++----------- pkg/logs/metrics/utilization_monitor_test.go | 48 ++++++++-- pkg/logs/pipeline/go.mod | 1 - pkg/logs/processor/go.mod | 1 - pkg/logs/processor/processor.go | 1 - pkg/logs/sender/batch_strategy.go | 1 - pkg/logs/sender/go.mod | 1 - pkg/logs/sender/sender.go | 1 - test/otel/go.mod | 1 - 18 files changed, 80 insertions(+), 82 deletions(-) diff --git a/cmd/serverless/dependencies_linux_amd64.txt b/cmd/serverless/dependencies_linux_amd64.txt index 4bcca62d0f61c..68065a8d5744c 100644 --- a/cmd/serverless/dependencies_linux_amd64.txt +++ b/cmd/serverless/dependencies_linux_amd64.txt @@ -315,7 +315,6 @@ github.com/DataDog/datadog-agent/pkg/util/sync github.com/DataDog/datadog-agent/pkg/util/system github.com/DataDog/datadog-agent/pkg/util/system/socket github.com/DataDog/datadog-agent/pkg/util/tmplvar -github.com/DataDog/datadog-agent/pkg/util/utilizationtracker github.com/DataDog/datadog-agent/pkg/util/uuid github.com/DataDog/datadog-agent/pkg/version github.com/DataDog/datadog-api-client-go/v2 diff --git a/cmd/serverless/dependencies_linux_arm64.txt b/cmd/serverless/dependencies_linux_arm64.txt index edda30b758215..a088a7cef0b9d 100644 --- a/cmd/serverless/dependencies_linux_arm64.txt +++ b/cmd/serverless/dependencies_linux_arm64.txt @@ -315,7 +315,6 @@ github.com/DataDog/datadog-agent/pkg/util/sync github.com/DataDog/datadog-agent/pkg/util/system github.com/DataDog/datadog-agent/pkg/util/system/socket github.com/DataDog/datadog-agent/pkg/util/tmplvar -github.com/DataDog/datadog-agent/pkg/util/utilizationtracker github.com/DataDog/datadog-agent/pkg/util/uuid github.com/DataDog/datadog-agent/pkg/version github.com/DataDog/datadog-api-client-go/v2 diff --git a/comp/otelcol/ddflareextension/impl/go.mod b/comp/otelcol/ddflareextension/impl/go.mod index 8051c364fbdf4..930c7cacfed08 100644 --- a/comp/otelcol/ddflareextension/impl/go.mod +++ b/comp/otelcol/ddflareextension/impl/go.mod @@ -257,7 +257,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-api-client-go/v2 v2.31.0 // indirect github.com/DataDog/datadog-go/v5 v5.5.0 // indirect diff --git a/comp/otelcol/logsagentpipeline/go.mod b/comp/otelcol/logsagentpipeline/go.mod index 7fe6d0865adf0..a261d2ea98810 100644 --- a/comp/otelcol/logsagentpipeline/go.mod +++ b/comp/otelcol/logsagentpipeline/go.mod @@ -104,7 +104,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.56.0-rc.3 // indirect github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42 // indirect diff --git a/comp/otelcol/logsagentpipeline/logsagentpipelineimpl/go.mod b/comp/otelcol/logsagentpipeline/logsagentpipelineimpl/go.mod index ee142226df975..f3e0040fcd070 100644 --- a/comp/otelcol/logsagentpipeline/logsagentpipelineimpl/go.mod +++ b/comp/otelcol/logsagentpipeline/logsagentpipelineimpl/go.mod @@ -119,7 +119,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.56.0-rc.3 // indirect github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42 // indirect diff --git a/comp/otelcol/otlp/components/exporter/datadogexporter/go.mod b/comp/otelcol/otlp/components/exporter/datadogexporter/go.mod index 4b4a4f6f694cf..61493384ded09 100644 --- a/comp/otelcol/otlp/components/exporter/datadogexporter/go.mod +++ b/comp/otelcol/otlp/components/exporter/datadogexporter/go.mod @@ -195,7 +195,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.57.1 // indirect github.com/DataDog/datadog-api-client-go/v2 v2.26.0 // indirect diff --git a/pkg/logs/client/go.mod b/pkg/logs/client/go.mod index 2a724520207ce..f5cb07a952639 100644 --- a/pkg/logs/client/go.mod +++ b/pkg/logs/client/go.mod @@ -88,7 +88,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/viper v1.13.5 // indirect github.com/Microsoft/go-winio v0.6.1 // indirect diff --git a/pkg/logs/client/http/destination.go b/pkg/logs/client/http/destination.go index 7553b7e91d913..a307a64cc49b2 100644 --- a/pkg/logs/client/http/destination.go +++ b/pkg/logs/client/http/destination.go @@ -217,7 +217,6 @@ func (d *Destination) run(input chan *message.Payload, output chan *message.Payl d.wg.Wait() d.updateRetryState(nil, isRetrying) - d.utilization.Cancel() stopChan <- struct{}{} } diff --git a/pkg/logs/metrics/go.mod b/pkg/logs/metrics/go.mod index b33f05e60bac0..d7a851d653b61 100644 --- a/pkg/logs/metrics/go.mod +++ b/pkg/logs/metrics/go.mod @@ -13,7 +13,7 @@ replace ( require ( github.com/DataDog/datadog-agent/pkg/telemetry v0.56.0-rc.3 - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 + github.com/benbjohnson/clock v1.3.5 github.com/stretchr/testify v1.9.0 ) @@ -22,7 +22,6 @@ require ( github.com/DataDog/datadog-agent/comp/def v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/fxutil v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/optional v0.55.0 // indirect - github.com/benbjohnson/clock v1.3.5 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.2.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect diff --git a/pkg/logs/metrics/utilization_monitor.go b/pkg/logs/metrics/utilization_monitor.go index 704681d784f10..6cf460e773435 100644 --- a/pkg/logs/metrics/utilization_monitor.go +++ b/pkg/logs/metrics/utilization_monitor.go @@ -8,14 +8,13 @@ package metrics import ( "time" - "github.com/DataDog/datadog-agent/pkg/util/utilizationtracker" + "github.com/benbjohnson/clock" ) // UtilizationMonitor is an interface for monitoring the utilization of a component. type UtilizationMonitor interface { Start() Stop() - Cancel() } // NoopUtilizationMonitor is a no-op implementation of UtilizationMonitor. @@ -27,33 +26,38 @@ func (n *NoopUtilizationMonitor) Start() {} // Stop does nothing. func (n *NoopUtilizationMonitor) Stop() {} -// Cancel does nothing. -func (n *NoopUtilizationMonitor) Cancel() {} - // TelemetryUtilizationMonitor is a UtilizationMonitor that reports utilization metrics as telemetry. type TelemetryUtilizationMonitor struct { - name string - instance string - started bool - ut *utilizationtracker.UtilizationTracker - cancel func() + inUse time.Duration + idle time.Duration + startIdle time.Time + startInUse time.Time + lastSample time.Time + sampleRate time.Duration + avg float64 + name string + instance string + started bool + clock clock.Clock } // NewTelemetryUtilizationMonitor creates a new TelemetryUtilizationMonitor. func NewTelemetryUtilizationMonitor(name, instance string) *TelemetryUtilizationMonitor { + return newTelemetryUtilizationMonitorWithSampleRateAndClock(name, instance, 1*time.Second, clock.New()) +} - utilizationTracker := utilizationtracker.NewUtilizationTracker(1*time.Second, ewmaAlpha) - cancel := startTrackerTicker(utilizationTracker, 1*time.Second) - - t := &TelemetryUtilizationMonitor{ - name: name, - instance: instance, - started: false, - ut: utilizationTracker, - cancel: cancel, +func newTelemetryUtilizationMonitorWithSampleRateAndClock(name, instance string, sampleRate time.Duration, clock clock.Clock) *TelemetryUtilizationMonitor { + return &TelemetryUtilizationMonitor{ + name: name, + instance: instance, + startIdle: clock.Now(), + startInUse: clock.Now(), + lastSample: clock.Now(), + sampleRate: sampleRate, + avg: 0, + started: false, + clock: clock, } - t.startUtilizationUpdater() - return t } // Start tracks a start event in the utilization tracker. @@ -62,7 +66,9 @@ func (u *TelemetryUtilizationMonitor) Start() { return } u.started = true - u.ut.Started() + u.idle += u.clock.Since(u.startIdle) + u.startInUse = u.clock.Now() + u.reportIfNeeded() } // Stop tracks a finish event in the utilization tracker. @@ -71,43 +77,17 @@ func (u *TelemetryUtilizationMonitor) Stop() { return } u.started = false - u.ut.Finished() -} - -// Cancel stops the monitor. -func (u *TelemetryUtilizationMonitor) Cancel() { - u.cancel() - u.ut.Stop() + u.inUse += u.clock.Since(u.startInUse) + u.startIdle = u.clock.Now() + u.reportIfNeeded() } -func startTrackerTicker(ut *utilizationtracker.UtilizationTracker, interval time.Duration) func() { - ticker := time.NewTicker(interval) - cancel := make(chan struct{}, 1) - done := make(chan struct{}) - go func() { - defer ticker.Stop() - defer close(done) - for { - select { - case <-ticker.C: - ut.Tick() - case <-cancel: - return - } - } - }() - - return func() { - cancel <- struct{}{} - <-done // make sure Tick will not be called after we return. +func (u *TelemetryUtilizationMonitor) reportIfNeeded() { + if u.clock.Since(u.lastSample) >= u.sampleRate { + u.avg = ewma(float64(u.inUse)/float64(u.idle+u.inUse), u.avg) + TlmUtilizationRatio.Set(u.avg, u.name, u.instance) + u.idle = 0 + u.inUse = 0 + u.lastSample = u.clock.Now() } } - -func (u *TelemetryUtilizationMonitor) startUtilizationUpdater() { - TlmUtilizationRatio.Set(0, u.name, u.instance) - go func() { - for value := range u.ut.Output { - TlmUtilizationRatio.Set(value, u.name, u.instance) - } - }() -} diff --git a/pkg/logs/metrics/utilization_monitor_test.go b/pkg/logs/metrics/utilization_monitor_test.go index c549cfaaab55a..f1418614822d0 100644 --- a/pkg/logs/metrics/utilization_monitor_test.go +++ b/pkg/logs/metrics/utilization_monitor_test.go @@ -7,13 +7,47 @@ package metrics import ( "testing" + "time" + + "github.com/benbjohnson/clock" + "github.com/stretchr/testify/require" ) -func TestUtilizationMonitorLifecycle(_ *testing.T) { - // The core logic of the UtilizationMonitor is tested in the utilizationtracker package. - // This test just ensures the lifecycle methods don't block. - um := NewTelemetryUtilizationMonitor("", "") - um.Start() - um.Stop() - um.Cancel() +func TestUtilizationMonitorLifecycle(t *testing.T) { + clock := clock.NewMock() + um := newTelemetryUtilizationMonitorWithSampleRateAndClock("name", "instance", 2*time.Second, clock) + + // Converge on 50% utilization + for i := 0; i < 100; i++ { + um.Start() + clock.Add(1 * time.Second) + + um.Stop() + clock.Add(1 * time.Second) + } + + require.InDelta(t, 0.5, um.avg, 0.01) + + // Converge on 100% utilization + for i := 0; i < 100; i++ { + um.Start() + clock.Add(1 * time.Second) + + um.Stop() + clock.Add(1 * time.Millisecond) + } + + require.InDelta(t, 0.99, um.avg, 0.01) + + // Converge on 0% utilization + for i := 0; i < 200; i++ { + um.Start() + clock.Add(1 * time.Millisecond) + + um.Stop() + clock.Add(1 * time.Second) + } + + require.InDelta(t, 0.0, um.avg, 0.01) + } diff --git a/pkg/logs/pipeline/go.mod b/pkg/logs/pipeline/go.mod index 1f891bb34d799..9c57c8e1ced62 100644 --- a/pkg/logs/pipeline/go.mod +++ b/pkg/logs/pipeline/go.mod @@ -104,7 +104,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.56.0-rc.3 // indirect github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42 // indirect diff --git a/pkg/logs/processor/go.mod b/pkg/logs/processor/go.mod index 8893206f0a550..0730014d056f8 100644 --- a/pkg/logs/processor/go.mod +++ b/pkg/logs/processor/go.mod @@ -84,7 +84,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.56.0-rc.3 // indirect github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42 // indirect diff --git a/pkg/logs/processor/processor.go b/pkg/logs/processor/processor.go index 186b022fc0572..f4fd7649e074e 100644 --- a/pkg/logs/processor/processor.go +++ b/pkg/logs/processor/processor.go @@ -128,7 +128,6 @@ func (p *Processor) Flush(ctx context.Context) { func (p *Processor) run() { defer func() { p.done <- struct{}{} - p.utilization.Cancel() }() for { diff --git a/pkg/logs/sender/batch_strategy.go b/pkg/logs/sender/batch_strategy.go index cfb2ef8655d82..47ccbaf86009b 100644 --- a/pkg/logs/sender/batch_strategy.go +++ b/pkg/logs/sender/batch_strategy.go @@ -107,7 +107,6 @@ func (s *batchStrategy) Start() { defer func() { s.flushBuffer(s.outputChan) flushTicker.Stop() - s.utilization.Cancel() close(s.stopChan) }() for { diff --git a/pkg/logs/sender/go.mod b/pkg/logs/sender/go.mod index f3b64ee87a320..7be87afc0198e 100644 --- a/pkg/logs/sender/go.mod +++ b/pkg/logs/sender/go.mod @@ -88,7 +88,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.56.0-rc.3 // indirect github.com/DataDog/viper v1.13.5 // indirect diff --git a/pkg/logs/sender/sender.go b/pkg/logs/sender/sender.go index 48e9c6b22d936..51ec837383db4 100644 --- a/pkg/logs/sender/sender.go +++ b/pkg/logs/sender/sender.go @@ -152,7 +152,6 @@ func (s *Sender) run() { destSender.Stop() } close(sink) - s.utilization.Cancel() s.done <- struct{}{} } diff --git a/test/otel/go.mod b/test/otel/go.mod index d19f6753c7930..cdace6a7f240a 100644 --- a/test/otel/go.mod +++ b/test/otel/go.mod @@ -171,7 +171,6 @@ require ( github.com/DataDog/datadog-agent/pkg/util/statstracker v0.56.0-rc.3 // indirect github.com/DataDog/datadog-agent/pkg/util/system v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/util/system/socket v0.59.0 // indirect - github.com/DataDog/datadog-agent/pkg/util/utilizationtracker v0.0.0 // indirect github.com/DataDog/datadog-agent/pkg/util/winutil v0.59.0 // indirect github.com/DataDog/datadog-agent/pkg/version v0.57.1 // indirect github.com/DataDog/datadog-api-client-go/v2 v2.26.0 // indirect From 499fc907d381152206a45e68ce54d466622d06ff Mon Sep 17 00:00:00 2001 From: Stuart Geipel Date: Mon, 25 Nov 2024 11:57:14 -0500 Subject: [PATCH 08/12] [ebpfless] Refactor TCP Seq logic to include sequence bump from SYN and FIN (#31382) --- .../connection/ebpfless/tcp_processor.go | 63 +++++++++++++------ .../connection/ebpfless/tcp_processor_test.go | 4 +- .../tracer/connection/ebpfless/tcp_utils.go | 7 +++ pkg/network/tracer/tracer_test.go | 4 +- 4 files changed, 55 insertions(+), 23 deletions(-) diff --git a/pkg/network/tracer/connection/ebpfless/tcp_processor.go b/pkg/network/tracer/connection/ebpfless/tcp_processor.go index 6bb89638193ef..009e183e409be 100644 --- a/pkg/network/tracer/connection/ebpfless/tcp_processor.go +++ b/pkg/network/tracer/connection/ebpfless/tcp_processor.go @@ -61,6 +61,35 @@ func NewTCPProcessor() *TCPProcessor { //nolint:revive // TODO } } +// calcNextSeq returns the seq "after" this segment, aka, what the ACK will be once this segment is received +func calcNextSeq(tcp *layers.TCP, payloadLen uint16) uint32 { + nextSeq := tcp.Seq + uint32(payloadLen) + if tcp.SYN || tcp.FIN { + nextSeq++ + } + return nextSeq +} + +func checkInvalidTCP(tcp *layers.TCP) bool { + noFlagsCombo := !tcp.SYN && !tcp.FIN && !tcp.ACK && !tcp.RST + if noFlagsCombo { + // no flags at all (I think this can happen for expanding the TCP window sometimes?) + statsTelemetry.missingTCPFlags.Inc() + return true + } else if tcp.SYN && tcp.FIN { + statsTelemetry.tcpSynAndFin.Inc() + return true + } else if tcp.RST && tcp.SYN { + statsTelemetry.tcpRstAndSyn.Inc() + return true + } else if tcp.RST && tcp.FIN { + statsTelemetry.tcpRstAndFin.Inc() + return true + } + + return false +} + func (t *TCPProcessor) updateSynFlag(conn *network.ConnectionStats, st *connectionState, pktType uint8, tcp *layers.TCP, payloadLen uint16) { //nolint:revive // TODO if tcp.RST { return @@ -85,14 +114,14 @@ func (t *TCPProcessor) updateSynFlag(conn *network.ConnectionStats, st *connecti // updateTcpStats is designed to mirror the stat tracking in the windows driver's handleFlowProtocolTcp // https://github.com/DataDog/datadog-windows-filter/blob/d7560d83eb627117521d631a4c05cd654a01987e/ddfilter/flow/flow_tcp.c#L91 func (t *TCPProcessor) updateTcpStats(conn *network.ConnectionStats, st *connectionState, pktType uint8, tcp *layers.TCP, payloadLen uint16) { //nolint:revive // TODO - payloadSeq := tcp.Seq + uint32(payloadLen) + nextSeq := calcNextSeq(tcp, payloadLen) if pktType == unix.PACKET_OUTGOING { conn.Monotonic.SentPackets++ - if !st.hasSentPacket || isSeqBefore(st.maxSeqSent, payloadSeq) { + if !st.hasSentPacket || isSeqBefore(st.maxSeqSent, nextSeq) { st.hasSentPacket = true conn.Monotonic.SentBytes += uint64(payloadLen) - st.maxSeqSent = payloadSeq + st.maxSeqSent = nextSeq } ackOutdated := !st.hasLocalAck || isSeqBefore(st.lastLocalAck, tcp.Ack) @@ -100,9 +129,9 @@ func (t *TCPProcessor) updateTcpStats(conn *network.ConnectionStats, st *connect // wait until data comes in via SynStateAcked if st.hasLocalAck && st.remoteSynState == SynStateAcked { ackDiff := tcp.Ack - st.lastLocalAck - // if this is ack'ing a fin packet, there is an extra sequence number to cancel out - isFinAck := st.hasRemoteFin && tcp.Ack == st.remoteFinSeq+1 + isFinAck := st.hasRemoteFin && tcp.Ack == st.remoteFinSeq if isFinAck { + // if this is ack'ing a fin packet, there is an extra sequence number to cancel out ackDiff-- } conn.Monotonic.RecvBytes += uint64(ackDiff) @@ -123,21 +152,21 @@ func (t *TCPProcessor) updateTcpStats(conn *network.ConnectionStats, st *connect } func (t *TCPProcessor) updateFinFlag(conn *network.ConnectionStats, st *connectionState, pktType uint8, tcp *layers.TCP, payloadLen uint16) { - payloadSeq := tcp.Seq + uint32(payloadLen) + nextSeq := calcNextSeq(tcp, payloadLen) // update FIN sequence numbers if tcp.FIN { if pktType == unix.PACKET_OUTGOING { st.hasLocalFin = true - st.localFinSeq = payloadSeq + st.localFinSeq = nextSeq } else { st.hasRemoteFin = true - st.remoteFinSeq = payloadSeq + st.remoteFinSeq = nextSeq } } // if both fins have been sent and ack'd, then mark the connection closed - localFinIsAcked := st.hasLocalFin && isSeqBefore(st.localFinSeq, st.lastRemoteAck) - remoteFinIsAcked := st.hasRemoteFin && isSeqBefore(st.remoteFinSeq, st.lastLocalAck) + localFinIsAcked := st.hasLocalFin && isSeqBeforeEq(st.localFinSeq, st.lastRemoteAck) + remoteFinIsAcked := st.hasRemoteFin && isSeqBeforeEq(st.remoteFinSeq, st.lastLocalAck) if st.tcpState == ConnStatEstablished && localFinIsAcked && remoteFinIsAcked { *st = connectionState{ tcpState: ConnStatClosed, @@ -156,11 +185,13 @@ func (t *TCPProcessor) updateRstFlag(conn *network.ConnectionStats, st *connecti reason = syscall.ECONNREFUSED } + if st.tcpState == ConnStatEstablished { + conn.Monotonic.TCPClosed++ + } *st = connectionState{ tcpState: ConnStatClosed, } conn.TCPFailures[uint16(reason)]++ - conn.Monotonic.TCPClosed++ } // Process handles a TCP packet, calculating stats and keeping track of its state according to the @@ -179,15 +210,7 @@ func (t *TCPProcessor) Process(conn *network.ConnectionStats, pktType uint8, ip4 }) // skip invalid packets we don't recognize: - noFlagsCombo := !tcp.SYN && !tcp.FIN && !tcp.ACK && !tcp.RST - if noFlagsCombo { - // no flags at all (I think this can happen for expanding the TCP window sometimes?) - statsTelemetry.missingTCPFlags.Inc() - return nil - } - synFinCombo := tcp.SYN && tcp.FIN - if synFinCombo { - statsTelemetry.tcpSynAndFin.Inc() + if checkInvalidTCP(tcp) { return nil } diff --git a/pkg/network/tracer/connection/ebpfless/tcp_processor_test.go b/pkg/network/tracer/connection/ebpfless/tcp_processor_test.go index c88bea17c2361..efbda6ff1037a 100644 --- a/pkg/network/tracer/connection/ebpfless/tcp_processor_test.go +++ b/pkg/network/tracer/connection/ebpfless/tcp_processor_test.go @@ -536,7 +536,7 @@ func TestConnRefusedSyn(t *testing.T) { RecvPackets: 1, Retransmits: 0, TCPEstablished: 0, - TCPClosed: 1, + TCPClosed: 0, } require.Equal(t, expectedStats, f.conn.Monotonic) } @@ -569,7 +569,7 @@ func TestConnRefusedSynAck(t *testing.T) { RecvPackets: 1, Retransmits: 0, TCPEstablished: 0, - TCPClosed: 1, + TCPClosed: 0, } require.Equal(t, expectedStats, f.conn.Monotonic) } diff --git a/pkg/network/tracer/connection/ebpfless/tcp_utils.go b/pkg/network/tracer/connection/ebpfless/tcp_utils.go index 22e7eb976f127..3a02ea3b09e29 100644 --- a/pkg/network/tracer/connection/ebpfless/tcp_utils.go +++ b/pkg/network/tracer/connection/ebpfless/tcp_utils.go @@ -25,10 +25,14 @@ var statsTelemetry = struct { missedTCPConnections telemetry.Counter missingTCPFlags telemetry.Counter tcpSynAndFin telemetry.Counter + tcpRstAndSyn telemetry.Counter + tcpRstAndFin telemetry.Counter }{ telemetry.NewCounter(ebpflessModuleName, "missed_tcp_connections", []string{}, "Counter measuring the number of TCP connections where we missed the SYN handshake"), telemetry.NewCounter(ebpflessModuleName, "missing_tcp_flags", []string{}, "Counter measuring packets encountered with none of SYN, FIN, ACK, RST set"), telemetry.NewCounter(ebpflessModuleName, "tcp_syn_and_fin", []string{}, "Counter measuring packets encountered with SYN+FIN together"), + telemetry.NewCounter(ebpflessModuleName, "tcp_rst_and_syn", []string{}, "Counter measuring packets encountered with RST+SYN together"), + telemetry.NewCounter(ebpflessModuleName, "tcp_rst_and_fin", []string{}, "Counter measuring packets encountered with RST+FIN together"), } const tcpSeqMidpoint = 0x80000000 @@ -86,6 +90,9 @@ func isSeqBefore(prev, cur uint32) bool { // constrain the maximum difference to half the number space return diff > 0 && diff < tcpSeqMidpoint } +func isSeqBeforeEq(prev, cur uint32) bool { + return prev == cur || isSeqBefore(prev, cur) +} func debugPacketDir(pktType uint8) string { switch pktType { diff --git a/pkg/network/tracer/tracer_test.go b/pkg/network/tracer/tracer_test.go index 54cb15d3cf2f7..b032aabe44c5f 100644 --- a/pkg/network/tracer/tracer_test.go +++ b/pkg/network/tracer/tracer_test.go @@ -1172,7 +1172,9 @@ func (s *TracerSuite) TestUnconnectedUDPSendIPv4() { return cs.DPort == uint16(remotePort) }) - assert.Len(ct, outgoing, 1) + if !assert.Len(ct, outgoing, 1) { + return + } assert.Equal(ct, bytesSent, int(outgoing[0].Monotonic.SentBytes)) }, 3*time.Second, 100*time.Millisecond) } From 942529bd31dd57427e34c600a2db58a0d5aad7f4 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 25 Nov 2024 18:03:46 +0100 Subject: [PATCH 09/12] usm: Enable event monitor if USM needs it (#31420) --- cmd/system-probe/config/adjust_usm.go | 10 ++++++++++ cmd/system-probe/config/config.go | 1 + cmd/system-probe/config/config_test.go | 4 ++++ 3 files changed, 15 insertions(+) diff --git a/cmd/system-probe/config/adjust_usm.go b/cmd/system-probe/config/adjust_usm.go index d7e164a9020f1..e71679aa592ec 100644 --- a/cmd/system-probe/config/adjust_usm.go +++ b/cmd/system-probe/config/adjust_usm.go @@ -10,6 +10,7 @@ import ( "runtime" "github.com/DataDog/datadog-agent/pkg/config/model" + "github.com/DataDog/datadog-agent/pkg/util/log" ) const ( @@ -53,6 +54,15 @@ func adjustUSM(cfg model.Config) { applyDefault(cfg, spNS("process_service_inference", "enabled"), false) } + // Similar to the checkin in adjustNPM(). The process event data stream and USM have the same + // minimum kernel version requirement, but USM's check for that is done + // later. This check here prevents the EventMonitorModule from getting + // enabled on unsupported kernels by load() in config.go. + if cfg.GetBool(smNS("enable_event_stream")) && !ProcessEventDataStreamSupported() { + log.Warn("disabling USM event stream as it is not supported for this kernel version") + cfg.Set(smNS("enable_event_stream"), false, model.SourceAgentRuntime) + } + applyDefault(cfg, spNS("process_service_inference", "use_windows_service_name"), true) applyDefault(cfg, smNS("enable_ring_buffers"), true) applyDefault(cfg, smNS("max_postgres_stats_buffered"), 100000) diff --git a/cmd/system-probe/config/config.go b/cmd/system-probe/config/config.go index 49fd3e49d290c..c67bd69c18aca 100644 --- a/cmd/system-probe/config/config.go +++ b/cmd/system-probe/config/config.go @@ -137,6 +137,7 @@ func load() (*types.Config, error) { if cfg.GetBool(secNS("enabled")) || cfg.GetBool(secNS("fim_enabled")) || cfg.GetBool(evNS("process.enabled")) || + (usmEnabled && cfg.GetBool(smNS("enable_event_stream"))) || (c.ModuleIsEnabled(NetworkTracerModule) && cfg.GetBool(evNS("network_process.enabled")) || gpuEnabled) { c.EnabledModules[EventMonitorModule] = struct{}{} diff --git a/cmd/system-probe/config/config_test.go b/cmd/system-probe/config/config_test.go index 4b44204851a0b..9611f6f9eed87 100644 --- a/cmd/system-probe/config/config_test.go +++ b/cmd/system-probe/config/config_test.go @@ -24,6 +24,7 @@ func TestEventMonitor(t *testing.T) { for i, tc := range []struct { cws, fim, processEvents, networkEvents, gpu bool + usmEvents bool enabled bool }{ {cws: false, fim: false, processEvents: false, networkEvents: false, enabled: false}, @@ -43,6 +44,7 @@ func TestEventMonitor(t *testing.T) { {cws: true, fim: true, processEvents: false, networkEvents: true, enabled: true}, {cws: true, fim: true, processEvents: true, networkEvents: true, enabled: true}, {cws: false, fim: false, processEvents: false, networkEvents: false, gpu: true, enabled: true}, + {usmEvents: true, enabled: true}, } { t.Run(fmt.Sprintf("%d", i), func(t *testing.T) { t.Logf("%+v\n", tc) @@ -52,6 +54,8 @@ func TestEventMonitor(t *testing.T) { t.Setenv("DD_SYSTEM_PROBE_EVENT_MONITORING_NETWORK_PROCESS_ENABLED", strconv.FormatBool(tc.networkEvents)) t.Setenv("DD_SYSTEM_PROBE_NETWORK_ENABLED", strconv.FormatBool(tc.networkEvents)) t.Setenv("DD_GPU_MONITORING_ENABLED", strconv.FormatBool(tc.gpu)) + t.Setenv("DD_SYSTEM_PROBE_SERVICE_MONITORING_ENABLED", strconv.FormatBool(tc.usmEvents)) + t.Setenv("DD_SERVICE_MONITORING_CONFIG_ENABLE_EVENT_STREAM", strconv.FormatBool(tc.usmEvents)) cfg, err := New("/doesnotexist", "") t.Logf("%+v\n", cfg) From 84c20fb34180942675e72452b700e6065c04d0cd Mon Sep 17 00:00:00 2001 From: Ethan Wood-Thomas Date: Mon, 25 Nov 2024 12:51:53 -0500 Subject: [PATCH 10/12] [CONTINT-4499] Include empty layers in container image metadata on containerd (#31384) --- .../collectors/internal/containerd/image.go | 50 +++++++++++++------ .../internal/containerd/image_test.go | 39 +++++++++++++-- 2 files changed, 71 insertions(+), 18 deletions(-) diff --git a/comp/core/workloadmeta/collectors/internal/containerd/image.go b/comp/core/workloadmeta/collectors/internal/containerd/image.go index 7b6bda3fd873d..f7ebff23f92a2 100644 --- a/comp/core/workloadmeta/collectors/internal/containerd/image.go +++ b/comp/core/workloadmeta/collectors/internal/containerd/image.go @@ -441,37 +441,57 @@ func extractPlatform(platform *ocispec.Platform, outImage *workloadmeta.Containe func getLayersWithHistory(ocispecImage ocispec.Image, manifest ocispec.Manifest) []workloadmeta.ContainerImageLayer { var layers []workloadmeta.ContainerImageLayer - // The layers in the manifest don't include the history, and the only way to - // match the history with each layer is to rely on the order and take into - // account that some history objects don't have an associated layer - // (emptyLayer = true). - // History is optional in OCI Spec, so we have no guarantee to be able to get it. + // If history is present, we use it to associate additional metadata with each layer. + // Layers marked as "empty" in history are appended before processing the + // corresponding layer. History is optional in the OCI specification, so if no history is available, + // the function still processes all layers. Any remaining empty layers in history that + // do not correspond to a layer are appended at the end. historyIndex := 0 for _, manifestLayer := range manifest.Layers { - // Look for next history point with emptyLayer = false - historyFound := false - for ; historyIndex < len(ocispecImage.History); historyIndex++ { - if !ocispecImage.History[historyIndex].EmptyLayer { - historyFound = true + // Append all empty layers encountered before a non-empty layer + for historyIndex < len(ocispecImage.History) { + history := ocispecImage.History[historyIndex] + if history.EmptyLayer { + layers = append(layers, workloadmeta.ContainerImageLayer{ + History: &history, + }) + historyIndex++ + } else { + // Stop at the first non-empty layer break } } + // Match the non-empty history to this manifest layer, if available + var history *ocispec.History + if historyIndex < len(ocispecImage.History) { + history = &ocispecImage.History[historyIndex] + historyIndex++ + } + + // Create and append the layer with manifest and matched history layer := workloadmeta.ContainerImageLayer{ MediaType: manifestLayer.MediaType, Digest: manifestLayer.Digest.String(), SizeBytes: manifestLayer.Size, URLs: manifestLayer.URLs, + History: history, } - if historyFound { - layer.History = &ocispecImage.History[historyIndex] - historyIndex++ - } - layers = append(layers, layer) } + // Append any remaining empty layers after processing all manifest layers + for historyIndex < len(ocispecImage.History) { + history := ocispecImage.History[historyIndex] + if history.EmptyLayer { + layers = append(layers, workloadmeta.ContainerImageLayer{ + History: &history, + }) + } + historyIndex++ + } + return layers } diff --git a/comp/core/workloadmeta/collectors/internal/containerd/image_test.go b/comp/core/workloadmeta/collectors/internal/containerd/image_test.go index 9f05a421fc937..d75dc38e84bda 100644 --- a/comp/core/workloadmeta/collectors/internal/containerd/image_test.go +++ b/comp/core/workloadmeta/collectors/internal/containerd/image_test.go @@ -147,7 +147,20 @@ func TestGetLayersWithHistory(t *testing.T) { Digest: digest.FromString("foo").String(), SizeBytes: 1, History: &ocispec.History{ - Comment: "not-empty1", + Comment: "not-empty1", + EmptyLayer: false, + }, + }, + { + History: &ocispec.History{ + Comment: "empty1", + EmptyLayer: true, + }, + }, + { + History: &ocispec.History{ + Comment: "empty2", + EmptyLayer: true, }, }, { @@ -155,7 +168,14 @@ func TestGetLayersWithHistory(t *testing.T) { Digest: digest.FromString("bar").String(), SizeBytes: 2, History: &ocispec.History{ - Comment: "not-empty2", + Comment: "not-empty2", + EmptyLayer: false, + }, + }, + { + History: &ocispec.History{ + Comment: "empty3", + EmptyLayer: true, }, }, { @@ -163,7 +183,20 @@ func TestGetLayersWithHistory(t *testing.T) { Digest: digest.FromString("baz").String(), SizeBytes: 3, History: &ocispec.History{ - Comment: "not-empty3", + Comment: "not-empty3", + EmptyLayer: false, + }, + }, + { + History: &ocispec.History{ + Comment: "empty4", + EmptyLayer: true, + }, + }, + { + History: &ocispec.History{ + Comment: "empty5", + EmptyLayer: true, }, }, { From b65ed90478795391b52319ae8dab6ec891fb6a85 Mon Sep 17 00:00:00 2001 From: Jennifer Chen <32009013+jennchenn@users.noreply.github.com> Date: Mon, 25 Nov 2024 13:02:11 -0500 Subject: [PATCH 11/12] [pkg/util/cloudproviders/gce] Add host tag for `DD_PROVIDER_KIND` (#31363) --- pkg/config/setup/config.go | 2 ++ pkg/util/cloudproviders/gce/gce_tags.go | 4 ++++ pkg/util/cloudproviders/gce/gce_tags_test.go | 18 ++++++++++++++++++ ...add-provider-kind-tag-518f76ee283bc1ff.yaml | 11 +++++++++++ 4 files changed, 35 insertions(+) create mode 100644 releasenotes/notes/add-provider-kind-tag-518f76ee283bc1ff.yaml diff --git a/pkg/config/setup/config.go b/pkg/config/setup/config.go index 977b2e346a286..4197723b67aa9 100644 --- a/pkg/config/setup/config.go +++ b/pkg/config/setup/config.go @@ -804,6 +804,8 @@ func InitConfig(config pkgconfigmodel.Setup) { config.SetKnown("cluster_name") config.SetKnown("listeners") + config.BindEnv("provider_kind") + // Orchestrator Explorer DCA and core agent config.BindEnvAndSetDefault("orchestrator_explorer.enabled", true) // enabling/disabling the environment variables & command scrubbing from the container specs diff --git a/pkg/util/cloudproviders/gce/gce_tags.go b/pkg/util/cloudproviders/gce/gce_tags.go index 1f5b05011aab5..31c986277bc97 100644 --- a/pkg/util/cloudproviders/gce/gce_tags.go +++ b/pkg/util/cloudproviders/gce/gce_tags.go @@ -101,6 +101,10 @@ func GetTags(ctx context.Context) ([]string, error) { } } + if providerKind := pkgconfigsetup.Datadog().GetString("provider_kind"); providerKind != "" { + tags = append(tags, fmt.Sprintf("provider_kind:%s", providerKind)) + } + // save tags to the cache in case we exceed quotas later cache.Cache.Set(tagsCacheKey, tags, cache.NoExpiration) diff --git a/pkg/util/cloudproviders/gce/gce_tags_test.go b/pkg/util/cloudproviders/gce/gce_tags_test.go index 2eae7092d0b92..a971d4305adb3 100644 --- a/pkg/util/cloudproviders/gce/gce_tags_test.go +++ b/pkg/util/cloudproviders/gce/gce_tags_test.go @@ -57,6 +57,7 @@ var ( "google-compute-enable-pcid:true", "instance-template:projects/111111111111/global/instanceTemplates/gke-test-cluster-default-pool-0012834b", } + expectedTagsWithProviderKind = append(expectedFullTags, "provider_kind:test-provider") ) func mockMetadataRequest(t *testing.T) *httptest.Server { @@ -146,3 +147,20 @@ func TestGetHostTagsWithNonDefaultTagFilters(t *testing.T) { require.NoError(t, err) testTags(t, tags, expectedExcludedTags) } + +func TestGetHostTagsWithProviderKind(t *testing.T) { + ctx := context.Background() + mockConfig := configmock.New(t) + defaultProviderKind := mockConfig.GetString("provider_kind") + defer mockConfig.SetWithoutSource("provider_kind", defaultProviderKind) + + mockConfig.SetWithoutSource("provider_kind", "test-provider") + + server := mockMetadataRequest(t) + defer server.Close() + defer cache.Cache.Delete(tagsCacheKey) + + tags, err := GetTags(ctx) + require.NoError(t, err) + testTags(t, tags, expectedTagsWithProviderKind) +} diff --git a/releasenotes/notes/add-provider-kind-tag-518f76ee283bc1ff.yaml b/releasenotes/notes/add-provider-kind-tag-518f76ee283bc1ff.yaml new file mode 100644 index 0000000000000..208c60445e6c4 --- /dev/null +++ b/releasenotes/notes/add-provider-kind-tag-518f76ee283bc1ff.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +enhancements: + - | + Add new host tag `provider_kind` from the value of `DD_PROVIDER_KIND` for Agents running in GCE. From a22a1658c9a9ba62aa163c6771523fb36a5a3495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A9na=C3=AFc=20Huard?= Date: Mon, 25 Nov 2024 19:10:31 +0100 Subject: [PATCH 12/12] Check for world writable files in the agent docker image (#31410) --- Dockerfiles/agent/Dockerfile | 8 ++--- Dockerfiles/agent/test_image_contents.py | 42 +++++++++++++++++------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/Dockerfiles/agent/Dockerfile b/Dockerfiles/agent/Dockerfile index b7cc98210c40e..894a29f2d250e 100644 --- a/Dockerfiles/agent/Dockerfile +++ b/Dockerfiles/agent/Dockerfile @@ -245,13 +245,11 @@ COPY --from=nosys-seccomp /tmp/nosys.so /opt/lib/nosys.so ENV LD_PRELOAD=/opt/lib/nosys.so # Single entrypoint -COPY entrypoint.sh /bin/entrypoint.sh -COPY entrypoint.d /opt/entrypoints -RUN chmod 755 /bin/entrypoint.sh \ - && chmod 755 -R /opt/entrypoints +COPY --chmod=755 entrypoint.sh /bin/entrypoint.sh +COPY --chmod=755 entrypoint.d /opt/entrypoints CMD ["/bin/entrypoint.sh"] FROM release AS test -COPY test_image_contents.py /tmp/test_image_contents.py +COPY --chmod=755 test_image_contents.py /tmp/test_image_contents.py RUN ./tmp/test_image_contents.py && rm -f ./tmp/test_image_contents.py diff --git a/Dockerfiles/agent/test_image_contents.py b/Dockerfiles/agent/test_image_contents.py index 22edd4be1a4f4..f67684d149e82 100755 --- a/Dockerfiles/agent/test_image_contents.py +++ b/Dockerfiles/agent/test_image_contents.py @@ -1,7 +1,9 @@ #!/opt/datadog-agent/embedded/bin/python +import grp import os import os.path +import pwd import stat import unittest from hashlib import sha256 @@ -47,18 +49,34 @@ def test_files_checksums(self): self.assertEqual(sha.hexdigest(), digest, file + " checksum mismatch") def test_files_permissions(self): - def has_write_permissions(path): - try: - return bool(os.stat(path).st_mode & stat.S_IWOTH) - except Exception: - return False - - for root, dirs, files in os.walk("/etc"): - for name in files: - self.assertFalse(has_write_permissions(os.path.join(root, name))) - for name in dirs: - os.path.join(root, name) - self.assertFalse(has_write_permissions(os.path.join(root, name))) + for root, dirs, files in os.walk("/"): + dirs[:] = filter( + lambda dir: not os.path.ismount(os.path.join(root, dir)), dirs + ) + + for name in dirs + files: + f = os.path.join(root, name) + + try: + s = os.stat(f) + except FileNotFoundError: + pass + except Exception as e: + self.fail(f"Failed to stat {f}: {e}") + self.assertFalse( + s.st_mode & (stat.S_IWOTH | stat.S_ISVTX) == stat.S_IWOTH, + f"{f} should not be world-writable", + ) + + try: + pwd.getpwuid(s.st_uid) + except KeyError: + self.fail(f"Unknown user {s.st_uid} for {f}") + + try: + grp.getgrgid(s.st_gid) + except KeyError: + self.fail(f"Unknown group {s.st_gid} for {f}") if __name__ == "__main__":