diff --git a/README.md b/README.md index e19fd47d..239d39a9 100644 --- a/README.md +++ b/README.md @@ -175,6 +175,7 @@ Flags: --print Print parsed packet output, even if the raw packets are being saved to a file with the -w flag -r, --read-file string Read packets from file (which was created with the -w option). e.g. ptcpdump.pcapng -c, --receive-count uint Exit after receiving count packets + -s, --snapshot-length uint32 Snarf snaplen bytes of data from each packet rather than the default of 262144 bytes (default 262144) -v, --verbose count When parsing and printing, produce (slightly more) verbose output --version Print the ptcpdump and libpcap version strings and exit -w, --write-file string Write the raw packets to file rather than parsing and printing them out. They can later be printed with the -r option. Standard output is used if file is '-'. e.g. ptcpdump.pcapng @@ -198,6 +199,7 @@ Flags: | --pname *process_name* | | ✅ | | --container-id *container_id* | | ✅ | | --container-name *container_name* | | ✅ | +| --pod-name *pod_name.namespace* | | ✅ | | -f, --follow-forks | | ✅ | | -- *command [args]* | | ✅ | | --oneline | | ✅ | @@ -237,7 +239,7 @@ Flags: | -O, --no-optimize | ✅ | | | -p, --no-promiscuous-mode | ✅ | ⛔ | | -S, --absolute-tcp-sequence-numbers | ✅ | | -| -s *snaplen*, --snapshot-length=*snaplen* | ✅ | | +| -s *snaplen*, --snapshot-length=*snaplen* | ✅ | ✅ | | -T *type* | ✅ | | | -t | ✅ | ✅ | | -tt | ✅ | | diff --git a/bpf/bpf.go b/bpf/bpf.go index d85b30b7..d5ef2731 100644 --- a/bpf/bpf.go +++ b/bpf/bpf.go @@ -50,14 +50,15 @@ type BPF struct { } type Options struct { - Pid uint32 - Comm [16]int8 - filterComm uint8 - FollowForks uint8 - PcapFilter string - mntns_id uint32 - pidns_id uint32 - netns_id uint32 + Pid uint32 + Comm [16]int8 + filterComm uint8 + FollowForks uint8 + PcapFilter string + mntnsId uint32 + pidnsId uint32 + netnsId uint32 + maxPayloadSize uint32 } func NewBPF() (*BPF, error) { @@ -73,12 +74,13 @@ func NewBPF() (*BPF, error) { } func NewOptions(pid uint, comm string, followForks bool, pcapFilter string, - mntns_id uint32, pidns_id uint32, netns_id uint32) Options { + mntnsId uint32, pidnsId uint32, netnsId uint32, maxPayloadSize uint32) Options { opts := Options{ - Pid: uint32(pid), - mntns_id: mntns_id, - pidns_id: pidns_id, - netns_id: netns_id, + Pid: uint32(pid), + mntnsId: mntnsId, + pidnsId: pidnsId, + netnsId: netnsId, + maxPayloadSize: maxPayloadSize, } opts.Comm = [16]int8{} if len(comm) > 0 { @@ -107,9 +109,10 @@ func (b *BPF) Load(opts Options) error { "filter_comm": opts.Comm, "filter_comm_enable": opts.filterComm, "filter_follow_forks": opts.FollowForks, - "filter_mntns_id": opts.mntns_id, - "filter_netns_id": opts.netns_id, - "filter_pidns_id": opts.pidns_id, + "filter_mntns_id": opts.mntnsId, + "filter_netns_id": opts.netnsId, + "filter_pidns_id": opts.pidnsId, + "max_payload_size": opts.maxPayloadSize, }) if err != nil { return xerrors.Errorf("rewrite constants: %w", err) diff --git a/bpf/bpf_arm64_bpfel.go b/bpf/bpf_arm64_bpfel.go index 759964cc..e5883057 100644 --- a/bpf/bpf_arm64_bpfel.go +++ b/bpf/bpf_arm64_bpfel.go @@ -47,11 +47,7 @@ type BpfPacketEventMetaT struct { _ [4]byte } -type BpfPacketEventT struct { - Meta BpfPacketEventMetaT - Payload [1500]uint8 - _ [4]byte -} +type BpfPacketEventT struct{ Meta BpfPacketEventMetaT } type BpfProcessMetaT struct { Pid uint32 diff --git a/bpf/bpf_arm64_bpfel.o b/bpf/bpf_arm64_bpfel.o index 1d370511..364346a4 100644 Binary files a/bpf/bpf_arm64_bpfel.o and b/bpf/bpf_arm64_bpfel.o differ diff --git a/bpf/bpf_x86_bpfel.go b/bpf/bpf_x86_bpfel.go index be3a8810..1efd0aa9 100644 --- a/bpf/bpf_x86_bpfel.go +++ b/bpf/bpf_x86_bpfel.go @@ -47,11 +47,7 @@ type BpfPacketEventMetaT struct { _ [4]byte } -type BpfPacketEventT struct { - Meta BpfPacketEventMetaT - Payload [1500]uint8 - _ [4]byte -} +type BpfPacketEventT struct{ Meta BpfPacketEventMetaT } type BpfProcessMetaT struct { Pid uint32 diff --git a/bpf/bpf_x86_bpfel.o b/bpf/bpf_x86_bpfel.o index 7fc24061..00ece416 100644 Binary files a/bpf/bpf_x86_bpfel.o and b/bpf/bpf_x86_bpfel.o differ diff --git a/bpf/event.go b/bpf/event.go index e66125e4..df9eedf3 100644 --- a/bpf/event.go +++ b/bpf/event.go @@ -5,6 +5,8 @@ import ( "context" "encoding/binary" "errors" + "io" + "os" "unsafe" "github.com/cilium/ebpf/perf" @@ -13,12 +15,26 @@ import ( "github.com/mozillazg/ptcpdump/internal/log" ) -func (b *BPF) PullPacketEvents(ctx context.Context, chanSize int) (<-chan BpfPacketEventT, error) { - reader, err := perf.NewReader(b.objs.PacketEvents, 1500*1000) +type BpfPacketEventWithPayloadT struct { + BpfPacketEventT + Payload []byte +} + +func (b *BPF) PullPacketEvents(ctx context.Context, chanSize int, maxPacketSize int) (<-chan BpfPacketEventWithPayloadT, error) { + perCPUBuffer := os.Getpagesize() + log.Debugf("pagesize is %d", perCPUBuffer) + perCPUBuffer = perCPUBuffer * 64 + eventSize := int(unsafe.Sizeof(BpfPacketEventT{})) + maxPacketSize + if eventSize >= perCPUBuffer { + perCPUBuffer = perCPUBuffer * (1 + (eventSize / perCPUBuffer)) + } + log.Debugf("use %d as perCPUBuffer", perCPUBuffer) + + reader, err := perf.NewReader(b.objs.PacketEvents, perCPUBuffer) if err != nil { return nil, xerrors.Errorf(": %w", err) } - ch := make(chan BpfPacketEventT, chanSize) + ch := make(chan BpfPacketEventWithPayloadT, chanSize) go func() { defer close(ch) defer reader.Close() @@ -28,7 +44,7 @@ func (b *BPF) PullPacketEvents(ctx context.Context, chanSize int) (<-chan BpfPac return ch, nil } -func (b *BPF) handlePacketEvents(ctx context.Context, reader *perf.Reader, ch chan<- BpfPacketEventT) { +func (b *BPF) handlePacketEvents(ctx context.Context, reader *perf.Reader, ch chan<- BpfPacketEventWithPayloadT) { for { select { case <-ctx.Done(): @@ -41,6 +57,10 @@ func (b *BPF) handlePacketEvents(ctx context.Context, reader *perf.Reader, ch ch if errors.Is(err, perf.ErrClosed) { return } + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + log.Debugf("got EOF error: %s", err) + continue + } log.Errorf("read packet event failed: %s", err) continue } @@ -56,16 +76,25 @@ func (b *BPF) handlePacketEvents(ctx context.Context, reader *perf.Reader, ch ch } } -func parsePacketEvent(rawSample []byte) (*BpfPacketEventT, error) { - event := BpfPacketEventT{} +func parsePacketEvent(rawSample []byte) (*BpfPacketEventWithPayloadT, error) { + event := BpfPacketEventWithPayloadT{} if err := binary.Read(bytes.NewBuffer(rawSample), binary.LittleEndian, &event.Meta); err != nil { return nil, xerrors.Errorf("parse meta: %w", err) } - copy(event.Payload[:], rawSample[unsafe.Offsetof(event.Payload):]) + event.Payload = make([]byte, int(event.Meta.PacketSize)) + copy(event.Payload[:], rawSample[unsafe.Sizeof(BpfPacketEventT{}):]) return &event, nil } func (b *BPF) PullExecEvents(ctx context.Context, chanSize int) (<-chan BpfExecEventT, error) { + perCPUBuffer := os.Getpagesize() + log.Debugf("pagesize is %d", perCPUBuffer) + eventSize := int(unsafe.Sizeof(BpfExecEventT{})) + if eventSize >= perCPUBuffer { + perCPUBuffer = perCPUBuffer * (1 + (eventSize / perCPUBuffer)) + } + log.Debugf("use %d as perCPUBuffer", perCPUBuffer) + reader, err := perf.NewReader(b.objs.ExecEvents, 1024*256) if err != nil { return nil, xerrors.Errorf(": %w", err) @@ -93,6 +122,10 @@ func (b *BPF) handleExecEvents(ctx context.Context, reader *perf.Reader, ch chan if errors.Is(err, perf.ErrClosed) { return } + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + log.Debugf("got EOF error: %s", err) + continue + } log.Errorf("read exec event failed: %s", err) continue } diff --git a/bpf/ptcpdump.c b/bpf/ptcpdump.c index d449aeab..c2998995 100644 --- a/bpf/ptcpdump.c +++ b/bpf/ptcpdump.c @@ -23,7 +23,6 @@ #define TC_ACT_SHOT 2 #define AF_INET 2 #define AF_INET6 10 -#define MAX_PAYLOAD_SIZE 1500 #define INGRESS_PACKET 0 #define EGRESS_PACKET 1 #define EXEC_FILENAME_LEN 512 @@ -36,6 +35,7 @@ static volatile const u8 filter_comm_enable = 0; static volatile const u32 filter_mntns_id = 0; static volatile const u32 filter_netns_id = 0; static volatile const u32 filter_pidns_id = 0; +static volatile const u32 max_payload_size = 0; static const u8 u8_zero = 0; static const u32 u32_zero = 0; @@ -97,7 +97,6 @@ struct packet_event_meta_t { struct packet_event_t { struct packet_event_meta_t meta; - u8 payload[MAX_PAYLOAD_SIZE]; }; struct exec_event_t { @@ -786,11 +785,13 @@ static __always_inline void handle_tc(struct __sk_buff *skb, bool egress) { u64 payload_len = (u64)skb->len; event->meta.packet_size = payload_len; - payload_len = payload_len < MAX_PAYLOAD_SIZE ? payload_len : MAX_PAYLOAD_SIZE; + if (max_payload_size > 0) { + payload_len = payload_len < max_payload_size ? payload_len : max_payload_size; + } event->meta.payload_len = payload_len; bpf_perf_event_output(skb, &packet_events, BPF_F_CURRENT_CPU | (payload_len << 32), event, - offsetof(struct packet_event_t, payload)); + sizeof(struct packet_event_t)); return; } diff --git a/cmd/capture.go b/cmd/capture.go index a2f854ea..3d42912f 100644 --- a/cmd/capture.go +++ b/cmd/capture.go @@ -67,7 +67,7 @@ func capture(ctx context.Context, stop context.CancelFunc, opts Options) error { } defer bf.Close() - packetEvensCh, err := bf.PullPacketEvents(ctx, int(opts.eventChanSize)) + packetEvensCh, err := bf.PullPacketEvents(ctx, int(opts.eventChanSize), int(opts.snapshotLength)) if err != nil { return err } @@ -113,11 +113,13 @@ func headerTips(opts Options) { if len(opts.ifaces) > 1 { interfaces = fmt.Sprintf("[%s]", strings.Join(opts.ifaces, ", ")) } + msg := fmt.Sprintf("capturing on %s, link-type EN10MB (Ethernet), snapshot length %d bytes", + interfaces, opts.snapshotLength) if opts.verbose < 1 { log.Warn("ptcpdump: verbose output suppressed, use -v[v]... for verbose output") - log.Warnf("capturing on %s, link-type EN10MB (Ethernet)", interfaces) + log.Warn(msg) } else { - log.Warnf("tcpdump: capturing on %s, link-type EN10MB (Ethernet)", interfaces) + log.Warnf("tcpdump: %s", msg) } } @@ -149,34 +151,34 @@ func getCaptureCounts(bf *bpf.BPF, c *consumer.PacketEventConsumer) []string { func getCurrentConnects(ctx context.Context, pcache *metadata.ProcessCache, opts Options) []metadata.Connection { var pids []int - var filter_pid bool + var filterPid bool if opts.pid != 0 { - filter_pid = true + filterPid = true pids = append(pids, int(opts.pid)) } if opts.comm != "" { - filter_pid = true + filterPid = true ps := pcache.GetPidsByComm(opts.comm) pids = append(pids, ps...) } - if opts.pidns_id > 0 { - filter_pid = true - ps := pcache.GetPidsByPidNsId(int64(opts.pidns_id)) + if opts.pidnsId > 0 { + filterPid = true + ps := pcache.GetPidsByPidNsId(int64(opts.pidnsId)) pids = append(pids, ps...) } - if opts.mntns_id > 0 { - filter_pid = true - ps := pcache.GetPidsByPidNsId(int64(opts.mntns_id)) + if opts.mntnsId > 0 { + filterPid = true + ps := pcache.GetPidsByPidNsId(int64(opts.mntnsId)) pids = append(pids, ps...) } - if opts.netns_id > 0 { - filter_pid = true - ps := pcache.GetPidsByPidNsId(int64(opts.netns_id)) + if opts.netnsId > 0 { + filterPid = true + ps := pcache.GetPidsByPidNsId(int64(opts.netnsId)) pids = append(pids, ps...) } - if filter_pid { + if filterPid { if len(pids) == 0 { return nil } diff --git a/cmd/container.go b/cmd/container.go index 37caa253..389315f8 100644 --- a/cmd/container.go +++ b/cmd/container.go @@ -56,13 +56,13 @@ func applyContainerFilter(ctx context.Context, opts *Options) (*metadata.Contain for _, container := range containers { log.Debugf("filter by container %#v", container) if container.PidNamespace > 0 && container.PidNamespace != metadata.HostPidNs { - opts.pidns_id = uint32(container.PidNamespace) + opts.pidnsId = uint32(container.PidNamespace) } if container.MountNamespace > 0 && container.MountNamespace != metadata.HostMntNs { - opts.mntns_id = uint32(container.MountNamespace) + opts.mntnsId = uint32(container.MountNamespace) } if container.NetworkNamespace > 0 && container.NetworkNamespace != metadata.HostNetNs { - opts.netns_id = uint32(container.NetworkNamespace) + opts.netnsId = uint32(container.NetworkNamespace) } opts.followForks = true } diff --git a/cmd/ebpf.go b/cmd/ebpf.go index 334c5ee3..41ed30a8 100644 --- a/cmd/ebpf.go +++ b/cmd/ebpf.go @@ -26,7 +26,7 @@ func attachHooks(currentConns []metadata.Connection, opts Options) (*bpf.BPF, er return nil, err } bpfopts := bpf.NewOptions(opts.pid, opts.comm, opts.followForks, opts.pcapFilter, - opts.mntns_id, opts.pidns_id, opts.netns_id) + opts.mntnsId, opts.pidnsId, opts.netnsId, opts.snapshotLength) if err := bf.Load(bpfopts); err != nil { return nil, err } diff --git a/cmd/options.go b/cmd/options.go index bd9abc9a..f5db3699 100644 --- a/cmd/options.go +++ b/cmd/options.go @@ -43,6 +43,7 @@ type Options struct { delayBeforeHandlePacketEvents time.Duration execEventsWorkerNumber uint logLevel string + snapshotLength uint32 dockerEndpoint string containerdEndpoint string @@ -50,9 +51,9 @@ type Options struct { subProgArgs []string - mntns_id uint32 - netns_id uint32 - pidns_id uint32 + mntnsId uint32 + netnsId uint32 + pidnsId uint32 } func (o Options) filterByContainer() bool { diff --git a/cmd/root.go b/cmd/root.go index 0b0567cb..ac2d0417 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -96,6 +96,8 @@ func init() { "Address of CRI container runtime service "+ fmt.Sprintf("(default: uses in order the first successful one of [%s])", strings.Join(getDefaultCriRuntimeEndpoint(), ", "))) + rootCmd.Flags().Uint32VarP(&opts.snapshotLength, "snapshot-length", "s", 262144, + "Snarf snaplen bytes of data from each packet rather than the default of 262144 bytes") } diff --git a/internal/consumer/net.go b/internal/consumer/net.go index 0290f716..ea40699b 100644 --- a/internal/consumer/net.go +++ b/internal/consumer/net.go @@ -23,7 +23,7 @@ func NewPacketEventConsumer(writers []writer.PacketWriter) *PacketEventConsumer } } -func (c *PacketEventConsumer) Start(ctx context.Context, ch <-chan bpf.BpfPacketEventT, maxPacketCount uint) { +func (c *PacketEventConsumer) Start(ctx context.Context, ch <-chan bpf.BpfPacketEventWithPayloadT, maxPacketCount uint) { for { select { case <-ctx.Done(): @@ -38,7 +38,7 @@ func (c *PacketEventConsumer) Start(ctx context.Context, ch <-chan bpf.BpfPacket } } -func (c *PacketEventConsumer) handlePacketEvent(pt bpf.BpfPacketEventT) { +func (c *PacketEventConsumer) handlePacketEvent(pt bpf.BpfPacketEventWithPayloadT) { pevent, err := event.ParsePacketEvent(c.devices, pt) if err != nil { log.Errorf("[PacketEventConsumer] parse event failed: %s", err) diff --git a/internal/event/net.go b/internal/event/net.go index 863fb7c3..bdadceee 100644 --- a/internal/event/net.go +++ b/internal/event/net.go @@ -34,7 +34,7 @@ type Packet struct { CgroupName string } -func ParsePacketEvent(devices map[int]dev.Device, event bpf.BpfPacketEventT) (*Packet, error) { +func ParsePacketEvent(devices map[int]dev.Device, event bpf.BpfPacketEventWithPayloadT) (*Packet, error) { var p Packet if t, err := convertBpfKTimeNs(event.Meta.Timestamp); err != nil { log.Errorf("convert bpf time failed: %s", err)