diff --git a/plugins/in_ebpf/in_ebpf.c b/plugins/in_ebpf/in_ebpf.c index bf38838f8a6..88b0e38689c 100644 --- a/plugins/in_ebpf/in_ebpf.c +++ b/plugins/in_ebpf/in_ebpf.c @@ -262,7 +262,7 @@ static struct flb_config_map config_map[] = { { FLB_CONFIG_MAP_STR, "Trace", NULL, FLB_CONFIG_MAP_MULT, FLB_FALSE, 0, - "Set the eBPF trace to enable (for example, bind, exec, malloc, signal, vfs, tcp). Can be set multiple times" + "Set the eBPF trace to enable (for example, bind, dns, exec, malloc, signal, vfs, tcp). Can be set multiple times" }, /* EOF */ {0} diff --git a/plugins/in_ebpf/traces/dns/bpf.c b/plugins/in_ebpf/traces/dns/bpf.c new file mode 100644 index 00000000000..9a944bec35d --- /dev/null +++ b/plugins/in_ebpf/traces/dns/bpf.c @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +#include + +#define _LINUX_TYPES_H +#define _LINUX_POSIX_TYPES_H + +#include +#include + +#include +#include +#include +#include + +#include "common/events.h" + +#ifndef AF_INET +#define AF_INET 2 +#endif +#ifndef AF_INET6 +#define AF_INET6 10 +#endif + +#ifndef DNS_PORT +#define DNS_PORT 53 +#endif + +#define DNS_HEADER_SIZE 12 + +struct dns_query_key { + __u32 pid; + __s32 fd; + __u16 txid; +}; + +struct dns_socket_key { + __u32 pid; + __s32 fd; +}; + +struct dns_query_state { + __u64 start_ns; + __u64 mntns_id; + __u16 txid; + __u16 query_raw_len; + __u8 query_raw[DNS_QUERY_RAW_MAX]; +}; + +struct dns_recv_args { + int fd; + const void *buf; +}; + +struct dns_connect_args { + int fd; + __u16 family; + __u16 port; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 16384); + __type(key, struct dns_query_key); + __type(value, struct dns_query_state); +} dns_queries SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, struct dns_recv_args); +} dns_recv SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, struct dns_connect_args); +} dns_connect_pending SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16384); + __type(key, struct dns_socket_key); + __type(value, __u64); +} dns_sockets SEC(".maps"); + +GADGET_TRACER_MAP(events, 1024 * 256); + +static __always_inline void fill_common(struct event *event, __u64 mntns_id) +{ + __u64 pid_tgid; + __u64 uid_gid; + + pid_tgid = bpf_get_current_pid_tgid(); + uid_gid = bpf_get_current_uid_gid(); + + event->common.timestamp_raw = bpf_ktime_get_boot_ns(); + event->common.pid = (__u32) (pid_tgid >> 32); + event->common.tid = (__u32) pid_tgid; + event->common.uid = (__u32) uid_gid; + event->common.gid = (__u32) (uid_gid >> 32); + event->common.mntns_id = mntns_id; + bpf_get_current_comm(event->common.comm, sizeof(event->common.comm)); +} + +static __always_inline int parse_dns_query_header(const void *payload, __u64 len, + __u16 *txid) +{ + __u8 header[DNS_HEADER_SIZE]; + __u16 flags; + __u16 qdcount; + + if (len < DNS_HEADER_SIZE) { + return -1; + } + + if (bpf_probe_read_user(header, sizeof(header), payload) != 0) { + return -1; + } + + *txid = ((__u16) header[0] << 8) | header[1]; + flags = ((__u16) header[2] << 8) | header[3]; + qdcount = ((__u16) header[4] << 8) | header[5]; + + if ((flags & 0x8000) != 0 || qdcount == 0) { + return -1; + } + + return 0; +} + +static __always_inline int parse_dns_response(const void *payload, __u64 len, + __u16 *txid, __u8 *rcode) +{ + __u8 header[DNS_HEADER_SIZE]; + __u16 flags; + + if (len < DNS_HEADER_SIZE) { + return -1; + } + + if (bpf_probe_read_user(header, sizeof(header), payload) != 0) { + return -1; + } + + *txid = ((__u16) header[0] << 8) | header[1]; + flags = ((__u16) header[2] << 8) | header[3]; + + if ((flags & 0x8000) == 0) { + return -1; + } + + *rcode = (__u8) (flags & 0x000f); + + return 0; +} + +static __always_inline int is_dns_destination(const struct sockaddr *addr, __u32 addrlen) +{ + struct sockaddr_in dst; + struct sockaddr_in6 dst6; + + if (!addr) { + return 0; + } + + if (addrlen >= sizeof(struct sockaddr_in)) { + if (bpf_probe_read_user(&dst, sizeof(dst), addr) == 0) { + if (dst.sin_family == AF_INET && bpf_ntohs(dst.sin_port) == DNS_PORT) { + return 1; + } + } + } + + if (addrlen < sizeof(struct sockaddr_in6)) { + return 0; + } + + if (bpf_probe_read_user(&dst6, sizeof(dst6), addr) != 0) { + return 0; + } + + if (dst6.sin6_family != AF_INET6 || bpf_ntohs(dst6.sin6_port) != DNS_PORT) { + return 0; + } + + return 1; +} + +static __always_inline int is_dns_family_port(__u16 family, __u16 port) +{ + if ((family != AF_INET && family != AF_INET6) || port != DNS_PORT) { + return 0; + } + + return 1; +} + +SEC("tracepoint/syscalls/sys_enter_connect") +int trace_dns_connect_enter(struct syscall_trace_enter *ctx) +{ + __u32 tid; + __u64 pid_tgid; + struct dns_connect_args args; + __u16 family; + __u16 port; + struct sockaddr sa; + struct sockaddr_in sa4; + struct sockaddr_in6 sa6; + + args.fd = (int) ctx->args[0]; + family = 0; + port = 0; + + if (!ctx->args[1]) { + return 0; + } + + if (bpf_probe_read_user(&sa, sizeof(sa), (const void *) ctx->args[1]) != 0) { + return 0; + } + + family = sa.sa_family; + if (family == AF_INET && (__u32) ctx->args[2] >= sizeof(struct sockaddr_in)) { + if (bpf_probe_read_user(&sa4, sizeof(sa4), (const void *) ctx->args[1]) == 0) { + port = bpf_ntohs(sa4.sin_port); + } + } + else if (family == AF_INET6 && (__u32) ctx->args[2] >= sizeof(struct sockaddr_in6)) { + if (bpf_probe_read_user(&sa6, sizeof(sa6), (const void *) ctx->args[1]) == 0) { + port = bpf_ntohs(sa6.sin6_port); + } + } + + args.family = family; + args.port = port; + + pid_tgid = bpf_get_current_pid_tgid(); + tid = (__u32) pid_tgid; + + bpf_map_update_elem(&dns_connect_pending, &tid, &args, BPF_ANY); + + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_connect") +int trace_dns_connect_exit(struct syscall_trace_exit *ctx) +{ + __u32 tid; + __u32 pid; + __u64 pid_tgid; + __u64 mntns_id; + __s64 ret; + struct dns_connect_args *args; + struct dns_socket_key key; + + pid_tgid = bpf_get_current_pid_tgid(); + tid = (__u32) pid_tgid; + pid = (__u32) (pid_tgid >> 32); + + args = bpf_map_lookup_elem(&dns_connect_pending, &tid); + if (!args) { + return 0; + } + + ret = ctx->ret; + key.pid = pid; + key.fd = args->fd; + + if (ret == 0 && is_dns_family_port(args->family, args->port)) { + mntns_id = gadget_get_mntns_id(); + if (!gadget_should_discard_mntns_id(mntns_id)) { + bpf_map_update_elem(&dns_sockets, &key, &mntns_id, BPF_ANY); + } + } + else { + bpf_map_delete_elem(&dns_sockets, &key); + } + + bpf_map_delete_elem(&dns_connect_pending, &tid); + + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_close") +int trace_dns_close_enter(struct syscall_trace_enter *ctx) +{ + __u64 pid_tgid; + __u32 pid; + struct dns_socket_key key; + + pid_tgid = bpf_get_current_pid_tgid(); + pid = (__u32) (pid_tgid >> 32); + + key.pid = pid; + key.fd = (__s32) ((int) ctx->args[0]); + + bpf_map_delete_elem(&dns_sockets, &key); + + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_sendto") +int trace_dns_sendto_enter(struct syscall_trace_enter *ctx) +{ + __u64 pid_tgid; + __u32 pid; + __u16 txid; + __u16 raw_len; + __u64 mntns_id; + __u64 len; + int fd; + __u32 addrlen; + const struct sockaddr *addr; + const void *payload; + struct dns_query_key key; + struct dns_socket_key socket_key; + __u64 *socket_mntns_id; + struct dns_query_state state = {0}; + struct event *event; + + mntns_id = gadget_get_mntns_id(); + if (gadget_should_discard_mntns_id(mntns_id)) { + return 0; + } + + pid_tgid = bpf_get_current_pid_tgid(); + pid = (__u32) (pid_tgid >> 32); + + fd = (int) ctx->args[0]; + payload = (const void *) ctx->args[1]; + len = (__u64) ctx->args[2]; + addr = (const struct sockaddr *) ctx->args[4]; + addrlen = (__u32) ctx->args[5]; + + if (!payload || len < DNS_HEADER_SIZE) { + return 0; + } + + socket_key.pid = pid; + socket_key.fd = fd; + socket_mntns_id = bpf_map_lookup_elem(&dns_sockets, &socket_key); + + if (!is_dns_destination(addr, addrlen) && !socket_mntns_id) { + return 0; + } + + if (parse_dns_query_header(payload, len, &txid) != 0) { + return 0; + } + + key.pid = pid; + key.fd = fd; + key.txid = txid; + + state.start_ns = bpf_ktime_get_ns(); + if (socket_mntns_id) { + state.mntns_id = *socket_mntns_id; + } + else { + state.mntns_id = mntns_id; + } + state.txid = txid; + + raw_len = (__u16) (len > DNS_QUERY_RAW_MAX ? DNS_QUERY_RAW_MAX : len); + state.query_raw_len = raw_len; + + if (raw_len > 0) { + if (bpf_probe_read_user(state.query_raw, raw_len, payload) != 0) { + return 0; + } + } + + bpf_map_update_elem(&dns_queries, &key, &state, BPF_ANY); + + event = gadget_reserve_buf(&events, sizeof(*event)); + if (!event) { + return 0; + } + + fill_common(event, state.mntns_id); + event->type = EVENT_TYPE_DNS; + event->details.dns.txid = txid; + event->details.dns.query_type = 0; + event->details.dns.rcode = 0; + event->details.dns.response = 0; + event->details.dns.latency_ns = 0; + event->details.dns.error_raw = 0; + event->details.dns.query_raw_len = state.query_raw_len; + __builtin_memcpy(event->details.dns.query_raw, + state.query_raw, + sizeof(event->details.dns.query_raw)); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_recvfrom") +int trace_dns_recvfrom_enter(struct syscall_trace_enter *ctx) +{ + __u32 tid; + __u64 pid_tgid; + struct dns_recv_args args; + + args.fd = (int) ctx->args[0]; + args.buf = (const void *) ctx->args[1]; + + if (!args.buf) { + return 0; + } + + pid_tgid = bpf_get_current_pid_tgid(); + tid = (__u32) pid_tgid; + + bpf_map_update_elem(&dns_recv, &tid, &args, BPF_ANY); + + return 0; +} + +SEC("tracepoint/syscalls/sys_exit_recvfrom") +int trace_dns_recvfrom_exit(struct syscall_trace_exit *ctx) +{ + __u32 tid; + __u32 pid; + __u64 now_ns; + __u64 pid_tgid; + __s64 ret; + __u16 txid; + __u8 rcode; + struct dns_recv_args *args; + struct dns_query_key key; + struct dns_query_state *state; + struct event *event; + + pid_tgid = bpf_get_current_pid_tgid(); + tid = (__u32) pid_tgid; + pid = (__u32) (pid_tgid >> 32); + + args = bpf_map_lookup_elem(&dns_recv, &tid); + if (!args) { + return 0; + } + + ret = ctx->ret; + if (ret <= 0) { + bpf_map_delete_elem(&dns_recv, &tid); + return 0; + } + + if (parse_dns_response(args->buf, (__u64) ret, &txid, &rcode) != 0) { + bpf_map_delete_elem(&dns_recv, &tid); + return 0; + } + + key.pid = pid; + key.fd = args->fd; + key.txid = txid; + + state = bpf_map_lookup_elem(&dns_queries, &key); + if (!state) { + bpf_map_delete_elem(&dns_recv, &tid); + return 0; + } + + event = gadget_reserve_buf(&events, sizeof(*event)); + if (!event) { + bpf_map_delete_elem(&dns_queries, &key); + bpf_map_delete_elem(&dns_recv, &tid); + return 0; + } + + now_ns = bpf_ktime_get_ns(); + + fill_common(event, state->mntns_id); + event->type = EVENT_TYPE_DNS; + event->details.dns.txid = txid; + event->details.dns.query_type = 0; + event->details.dns.rcode = rcode; + event->details.dns.response = 1; + event->details.dns.latency_ns = now_ns - state->start_ns; + event->details.dns.error_raw = 0; + event->details.dns.query_raw_len = state->query_raw_len; + __builtin_memcpy(event->details.dns.query_raw, + state->query_raw, + sizeof(event->details.dns.query_raw)); + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + + bpf_map_delete_elem(&dns_queries, &key); + bpf_map_delete_elem(&dns_recv, &tid); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/plugins/in_ebpf/traces/dns/handler.c b/plugins/in_ebpf/traces/dns/handler.c new file mode 100644 index 00000000000..877202c0f53 --- /dev/null +++ b/plugins/in_ebpf/traces/dns/handler.c @@ -0,0 +1,220 @@ +#include +#include +#include +#include + +#include "common/events.h" +#include "common/event_context.h" +#include "common/encoder.h" + +#include "handler.h" + +#define DNS_HEADER_SIZE 12 + +static int decode_dns_query_name(const uint8_t *raw, size_t raw_len, char *out, size_t out_len, + uint16_t *query_type) +{ + size_t cursor; + size_t out_i; + size_t label_end; + uint8_t label_len; + + if (!raw || raw_len < DNS_HEADER_SIZE || !out || out_len == 0) { + return -1; + } + + cursor = DNS_HEADER_SIZE; + out_i = 0; + + while (cursor < raw_len) { + label_len = raw[cursor++]; + + if (label_len == 0) { + break; + } + + if ((label_len & 0xc0) != 0 || cursor + label_len > raw_len) { + return -1; + } + + if (out_i > 0) { + if (out_i + 1 >= out_len) { + return -1; + } + out[out_i++] = '.'; + } + + label_end = cursor + label_len; + while (cursor < label_end) { + if (out_i + 1 >= out_len) { + return -1; + } + out[out_i++] = (char) raw[cursor++]; + } + } + + if (out_i == 0) { + if (out_len < 2) { + return -1; + } + out[out_i++] = '.'; + } + + out[out_i] = '\0'; + + if (cursor + 4 > raw_len) { + return -1; + } + + if (query_type) { + *query_type = (uint16_t) ((raw[cursor] << 8) | raw[cursor + 1]); + } + + return 0; +} + +int encode_dns_event(struct flb_input_instance *ins, + struct flb_log_event_encoder *log_encoder, + const struct event *ev) +{ + char query_name[DNS_NAME_MAX]; + uint16_t query_type; + int ret; + (void) ins; + + query_name[0] = '\0'; + query_type = ev->details.dns.query_type; + + if (ev->details.dns.query_raw_len > 0 && + ev->details.dns.query_raw_len <= DNS_QUERY_RAW_MAX) { + if (decode_dns_query_name(ev->details.dns.query_raw, + ev->details.dns.query_raw_len, + query_name, + sizeof(query_name), + &query_type) != 0) { + if (ev->details.dns.response == 0) { + return -2; + } + + memcpy(query_name, "", 9); + query_name[9] = '\0'; + } + } + else { + if (ev->details.dns.response == 0) { + return -2; + } + + memcpy(query_name, "", 9); + query_name[9] = '\0'; + } + + if (ev->details.dns.response == 0 && query_type == 0) { + return -2; + } + + ret = flb_log_event_encoder_begin_record(log_encoder); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + return -1; + } + + ret = encode_common_fields(log_encoder, ev); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "query"); + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, query_name); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "query_type"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_uint16(log_encoder, query_type); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "txid"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_uint16(log_encoder, ev->details.dns.txid); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "response"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_uint8(log_encoder, ev->details.dns.response); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "rcode"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_uint8(log_encoder, ev->details.dns.rcode); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "latency_ns"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_uint64(log_encoder, ev->details.dns.latency_ns); + } + + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "error_raw"); + } + if (ret == FLB_EVENT_ENCODER_SUCCESS) { + ret = flb_log_event_encoder_append_body_int32(log_encoder, ev->details.dns.error_raw); + } + + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_commit_record(log_encoder); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + return -1; + } + + return 0; +} + +int trace_dns_handler(void *ctx, void *data, size_t data_sz) +{ + struct trace_event_context *event_ctx; + struct event *ev; + struct flb_log_event_encoder *encoder; + int ret; + + event_ctx = (struct trace_event_context *) ctx; + ev = (struct event *) data; + encoder = event_ctx->log_encoder; + + if (data_sz < sizeof(struct event) || ev->type != EVENT_TYPE_DNS) { + return -1; + } + + ret = encode_dns_event(event_ctx->ins, encoder, ev); + if (ret == -2) { + return 0; + } + if (ret != 0) { + flb_log_event_encoder_reset(encoder); + return -1; + } + + ret = flb_input_log_append(event_ctx->ins, NULL, 0, + encoder->output_buffer, + encoder->output_length); + flb_log_event_encoder_reset(encoder); + if (ret == -1) { + return -1; + } + + return 0; +} diff --git a/plugins/in_ebpf/traces/dns/handler.h b/plugins/in_ebpf/traces/dns/handler.h new file mode 100644 index 00000000000..df9c834e6fb --- /dev/null +++ b/plugins/in_ebpf/traces/dns/handler.h @@ -0,0 +1,12 @@ +#ifndef TRACE_DNS_HANDLER_H +#define TRACE_DNS_HANDLER_H + +#include +#include + +int trace_dns_handler(void *ctx, void *data, size_t data_sz); +int encode_dns_event(struct flb_input_instance *ins, + struct flb_log_event_encoder *log_encoder, + const struct event *ev); + +#endif diff --git a/plugins/in_ebpf/traces/includes/common/encoder.h b/plugins/in_ebpf/traces/includes/common/encoder.h index 91d7b4379e2..bb89b0810ba 100644 --- a/plugins/in_ebpf/traces/includes/common/encoder.h +++ b/plugins/in_ebpf/traces/includes/common/encoder.h @@ -25,6 +25,10 @@ static inline char *event_type_to_string(enum event_type type) { return "accept"; case EVENT_TYPE_CONNECT: return "connect"; + case EVENT_TYPE_DNS: + return "dns"; + case EVENT_TYPE_SCHED: + return "sched"; default: return "unknown"; } @@ -87,4 +91,4 @@ static inline int encode_common_fields(struct flb_log_event_encoder *log_encoder return FLB_EVENT_ENCODER_SUCCESS; } -#endif // ENCODER_HELPERS_H \ No newline at end of file +#endif // ENCODER_HELPERS_H diff --git a/plugins/in_ebpf/traces/includes/common/events.h b/plugins/in_ebpf/traces/includes/common/events.h index fcf66f2c129..6202525d64e 100644 --- a/plugins/in_ebpf/traces/includes/common/events.h +++ b/plugins/in_ebpf/traces/includes/common/events.h @@ -8,6 +8,8 @@ #define VFS_PATH_MAX 256 #define EXECVE_ARG_MAX 3 #define EXECVE_ARG_LEN 256 +#define DNS_NAME_MAX 128 +#define DNS_QUERY_RAW_MAX 96 enum event_type { EVENT_TYPE_EXECVE, @@ -18,6 +20,8 @@ enum event_type { EVENT_TYPE_LISTEN, EVENT_TYPE_ACCEPT, EVENT_TYPE_CONNECT, + EVENT_TYPE_DNS, + EVENT_TYPE_SCHED, }; enum vfs_op { @@ -130,6 +134,29 @@ struct connect_event { int error_raw; }; +struct dns_event { + __u16 txid; + __u16 query_type; + __u8 rcode; + __u8 response; + __u16 query_raw_len; + __u64 latency_ns; + int error_raw; + char query[DNS_NAME_MAX]; + __u8 query_raw[DNS_QUERY_RAW_MAX]; +}; + +struct sched_event { + __u32 prev_pid; + int prev_prio; + long prev_state; + __u32 next_pid; + int next_prio; + __u32 cpu; + __u64 runq_latency_ns; + __u8 wakeup_tracked; +}; + struct event { enum event_type type; // Type of event (execve, signal, mem, bind) struct event_common common; // Common fields for all events @@ -142,6 +169,8 @@ struct event { struct listen_event listen; struct accept_event accept; struct connect_event connect; + struct dns_event dns; + struct sched_event sched; } details; }; diff --git a/plugins/in_ebpf/traces/sched/bpf.c b/plugins/in_ebpf/traces/sched/bpf.c new file mode 100644 index 00000000000..85c28e9896e --- /dev/null +++ b/plugins/in_ebpf/traces/sched/bpf.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#define __TARGET_ARCH_x86_64 + +#include + +#define _LINUX_TYPES_H +#define _LINUX_POSIX_TYPES_H + +#include +#include + +#include +#include + +#include "common/events.h" + +struct wakeup_info { + __u64 wakeup_ns; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 65536); + __type(key, __u32); + __type(value, struct wakeup_info); +} wakeup_by_pid SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 20); +} events SEC(".maps"); + +static __always_inline int track_wakeup(__u32 pid) +{ + struct wakeup_info info = {0}; + + if (pid == 0) { + return 0; + } + + info.wakeup_ns = bpf_ktime_get_ns(); + bpf_map_update_elem(&wakeup_by_pid, &pid, &info, BPF_ANY); + + return 0; +} + +SEC("tracepoint/sched/sched_wakeup") +int trace_sched_wakeup(struct trace_event_raw_sched_wakeup_template *ctx) +{ + return track_wakeup((__u32) ctx->pid); +} + +SEC("tracepoint/sched/sched_wakeup_new") +int trace_sched_wakeup_new(struct trace_event_raw_sched_wakeup_template *ctx) +{ + return track_wakeup((__u32) ctx->pid); +} + +SEC("tracepoint/sched/sched_switch") +int trace_sched_switch(struct trace_event_raw_sched_switch *ctx) +{ + struct event *event; + struct wakeup_info *wakeup; + __u64 now_ns; + __u32 next_pid = 0; + __u32 prev_pid = 0; + int prev_prio = 0; + long prev_state = 0; + int next_prio = 0; + __u64 uid_gid; + __u64 mntns_id; + + bpf_core_read(&next_pid, sizeof(next_pid), &ctx->next_pid); + bpf_core_read(&prev_pid, sizeof(prev_pid), &ctx->prev_pid); + bpf_core_read(&prev_prio, sizeof(prev_prio), &ctx->prev_prio); + bpf_core_read(&prev_state, sizeof(prev_state), &ctx->prev_state); + bpf_core_read(&next_prio, sizeof(next_prio), &ctx->next_prio); + + mntns_id = gadget_get_mntns_id(); + if (gadget_should_discard_mntns_id(mntns_id)) { + return 0; + } + + if (next_pid == 0) { + return 0; + } + + event = gadget_reserve_buf(&events, sizeof(*event)); + if (!event) { + return 0; + } + + now_ns = bpf_ktime_get_ns(); + uid_gid = bpf_get_current_uid_gid(); + + event->type = EVENT_TYPE_SCHED; + event->common.timestamp_raw = bpf_ktime_get_boot_ns(); + event->common.pid = next_pid; + event->common.tid = next_pid; + event->common.uid = (u32) uid_gid; + event->common.gid = (u32) (uid_gid >> 32); + event->common.mntns_id = mntns_id; + bpf_core_read(event->common.comm, sizeof(event->common.comm), &ctx->next_comm); + + event->details.sched.prev_pid = prev_pid; + event->details.sched.prev_prio = prev_prio; + event->details.sched.prev_state = prev_state; + event->details.sched.next_pid = next_pid; + event->details.sched.next_prio = next_prio; + event->details.sched.cpu = bpf_get_smp_processor_id(); + event->details.sched.wakeup_tracked = 0; + event->details.sched.runq_latency_ns = 0; + + wakeup = bpf_map_lookup_elem(&wakeup_by_pid, &next_pid); + if (wakeup) { + event->details.sched.wakeup_tracked = 1; + if (now_ns > wakeup->wakeup_ns) { + event->details.sched.runq_latency_ns = now_ns - wakeup->wakeup_ns; + } + bpf_map_delete_elem(&wakeup_by_pid, &next_pid); + } + + gadget_submit_buf(ctx, &events, event, sizeof(*event)); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/plugins/in_ebpf/traces/sched/handler.c b/plugins/in_ebpf/traces/sched/handler.c new file mode 100644 index 00000000000..b02993c7b4a --- /dev/null +++ b/plugins/in_ebpf/traces/sched/handler.c @@ -0,0 +1,155 @@ +#include +#include + +#include "common/events.h" +#include "common/event_context.h" +#include "common/encoder.h" + +#include "handler.h" + +int encode_sched_event(struct flb_log_event_encoder *log_encoder, + const struct event *e) +{ + int ret; + + ret = flb_log_event_encoder_begin_record(log_encoder); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + return -1; + } + + ret = encode_common_fields(log_encoder, e); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "cpu"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.cpu); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_pid"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.prev_pid); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_prio"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_int32(log_encoder, e->details.sched.prev_prio); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "prev_state"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_int64(log_encoder, e->details.sched.prev_state); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "next_pid"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_uint32(log_encoder, e->details.sched.next_pid); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "next_prio"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_int32(log_encoder, e->details.sched.next_prio); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "runq_latency_ns"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_uint64(log_encoder, e->details.sched.runq_latency_ns); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_append_body_cstring(log_encoder, "wakeup_tracked"); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + ret = flb_log_event_encoder_append_body_boolean(log_encoder, e->details.sched.wakeup_tracked); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + flb_log_event_encoder_rollback_record(log_encoder); + return -1; + } + + ret = flb_log_event_encoder_commit_record(log_encoder); + if (ret != FLB_EVENT_ENCODER_SUCCESS) { + return -1; + } + + return 0; +} + +int trace_sched_handler(void *ctx, void *data, size_t data_sz) +{ + struct trace_event_context *event_ctx; + struct flb_log_event_encoder *encoder; + struct event *e; + int ret; + + event_ctx = (struct trace_event_context *) ctx; + e = (struct event *) data; + + if (data_sz < sizeof(struct event) || e->type != EVENT_TYPE_SCHED) { + return -1; + } + + encoder = event_ctx->log_encoder; + + ret = encode_sched_event(encoder, e); + if (ret != 0) { + return -1; + } + + ret = flb_input_log_append(event_ctx->ins, + NULL, + 0, + encoder->output_buffer, + encoder->output_length); + if (ret == -1) { + return -1; + } + + flb_log_event_encoder_reset(encoder); + + return 0; +} diff --git a/plugins/in_ebpf/traces/sched/handler.h b/plugins/in_ebpf/traces/sched/handler.h new file mode 100644 index 00000000000..22a6f793def --- /dev/null +++ b/plugins/in_ebpf/traces/sched/handler.h @@ -0,0 +1,13 @@ +#ifndef SCHED_HANDLER_H +#define SCHED_HANDLER_H + +#include +#include + +#include "common/events.h" + +int encode_sched_event(struct flb_log_event_encoder *log_encoder, + const struct event *e); +int trace_sched_handler(void *ctx, void *data, size_t data_sz); + +#endif diff --git a/plugins/in_ebpf/traces/traces.h b/plugins/in_ebpf/traces/traces.h index 3d27fb946b7..a32edcb061f 100644 --- a/plugins/in_ebpf/traces/traces.h +++ b/plugins/in_ebpf/traces/traces.h @@ -9,6 +9,8 @@ #include "generated/trace_vfs.skel.h" #include "generated/trace_tcp.skel.h" #include "generated/trace_exec.skel.h" +#include "generated/trace_dns.skel.h" +#include "generated/trace_sched.skel.h" #include "bind/handler.h" #include "signal/handler.h" // Include signal handler @@ -16,6 +18,8 @@ #include "vfs/handler.h" #include "tcp/handler.h" #include "exec/handler.h" +#include "dns/handler.h" +#include "sched/handler.h" /* Skeleton function pointer types */ typedef void *(*trace_skel_open_func_t)(void); @@ -69,6 +73,8 @@ DEFINE_GET_BPF_OBJECT(trace_bind) DEFINE_GET_BPF_OBJECT(trace_vfs) DEFINE_GET_BPF_OBJECT(trace_tcp) DEFINE_GET_BPF_OBJECT(trace_exec) +DEFINE_GET_BPF_OBJECT(trace_dns) +DEFINE_GET_BPF_OBJECT(trace_sched) static struct trace_registration trace_table[] = { REGISTER_TRACE(trace_signal, trace_signal_handler), @@ -77,6 +83,8 @@ static struct trace_registration trace_table[] = { REGISTER_TRACE(trace_vfs, trace_vfs_handler), REGISTER_TRACE(trace_tcp, trace_tcp_handler), REGISTER_TRACE(trace_exec, trace_exec_handler), + REGISTER_TRACE(trace_dns, trace_dns_handler), + REGISTER_TRACE(trace_sched, trace_sched_handler), }; #endif // TRACE_TRACES_H diff --git a/tests/runtime/CMakeLists.txt b/tests/runtime/CMakeLists.txt index ddc3ca71934..54e5501cbd5 100644 --- a/tests/runtime/CMakeLists.txt +++ b/tests/runtime/CMakeLists.txt @@ -177,11 +177,18 @@ if(FLB_IN_EBPF) "../../plugins/in_ebpf/traces/exec/handler.c" ) + add_ebpf_handler_test( + "sched" + "in_ebpf_sched_handler.c" + "../../plugins/in_ebpf/traces/sched/handler.c" + ) + add_dependencies(flb-rt-in_ebpf_bind_handler fluent-bit-static) add_dependencies(flb-rt-in_ebpf_signal_handler fluent-bit-static) add_dependencies(flb-rt-in_ebpf_malloc_handler fluent-bit-static) add_dependencies(flb-rt-in_ebpf_tcp_handler fluent-bit-static) add_dependencies(flb-rt-in_ebpf_exec_handler fluent-bit-static) + add_dependencies(flb-rt-in_ebpf_sched_handler fluent-bit-static) endif() diff --git a/tests/runtime/in_ebpf_sched_handler.c b/tests/runtime/in_ebpf_sched_handler.c new file mode 100644 index 00000000000..5b9f0736375 --- /dev/null +++ b/tests/runtime/in_ebpf_sched_handler.c @@ -0,0 +1,164 @@ +#include +#include +#include + +#include +#include +#include + +#include "flb_tests_runtime.h" + +#include "../../plugins/in_ebpf/traces/includes/common/event_context.h" +#include "../../plugins/in_ebpf/traces/includes/common/events.h" +#include "../../plugins/in_ebpf/traces/sched/handler.h" + +struct test_context { + struct trace_event_context event_ctx; + struct flb_input_instance *ins; + struct flb_log_event_decoder *decoder; +}; + +static struct test_context *init_test_context() +{ + struct test_context *ctx; + + ctx = flb_calloc(1, sizeof(struct test_context)); + if (!ctx) { + return NULL; + } + + ctx->ins = flb_calloc(1, sizeof(struct flb_input_instance)); + if (!ctx->ins) { + flb_free(ctx); + return NULL; + } + + ctx->event_ctx.log_encoder = flb_log_event_encoder_create(FLB_LOG_EVENT_FORMAT_DEFAULT); + if (!ctx->event_ctx.log_encoder) { + flb_free(ctx->ins); + flb_free(ctx); + return NULL; + } + + ctx->decoder = flb_log_event_decoder_create(NULL, 0); + if (!ctx->decoder) { + flb_log_event_encoder_destroy(ctx->event_ctx.log_encoder); + flb_free(ctx->ins); + flb_free(ctx); + return NULL; + } + + ctx->ins->context = &ctx->event_ctx; + ctx->event_ctx.ins = ctx->ins; + + return ctx; +} + +static void cleanup_test_context(struct test_context *ctx) +{ + if (!ctx) { + return; + } + + if (ctx->decoder) { + flb_log_event_decoder_destroy(ctx->decoder); + } + + if (ctx->event_ctx.log_encoder) { + flb_log_event_encoder_destroy(ctx->event_ctx.log_encoder); + } + + if (ctx->ins) { + flb_free(ctx->ins); + } + + flb_free(ctx); +} + +static int key_matches(msgpack_object key, const char *str) +{ + if (key.type != MSGPACK_OBJECT_STR) { + return 0; + } + + return strncmp(key.via.str.ptr, str, key.via.str.size) == 0 && + strlen(str) == key.via.str.size; +} + +static void verify_decoded_values(struct flb_log_event *event, + const struct event *original) +{ + msgpack_object_kv *kv; + int i; + + TEST_CHECK(event != NULL); + TEST_CHECK(event->body->type == MSGPACK_OBJECT_MAP); + + for (i = 0; i < event->body->via.map.size; i++) { + kv = &event->body->via.map.ptr[i]; + + if (key_matches(kv->key, "event_type")) { + TEST_CHECK(kv->val.type == MSGPACK_OBJECT_STR); + TEST_CHECK(strncmp(kv->val.via.str.ptr, "sched", kv->val.via.str.size) == 0); + } + else if (key_matches(kv->key, "cpu")) { + TEST_CHECK(kv->val.via.u64 == original->details.sched.cpu); + } + else if (key_matches(kv->key, "prev_pid")) { + TEST_CHECK(kv->val.via.u64 == original->details.sched.prev_pid); + } + else if (key_matches(kv->key, "next_pid")) { + TEST_CHECK(kv->val.via.u64 == original->details.sched.next_pid); + } + else if (key_matches(kv->key, "runq_latency_ns")) { + TEST_CHECK(kv->val.via.u64 == original->details.sched.runq_latency_ns); + } + } +} + +void test_sched_event_encoding() +{ + struct test_context *ctx; + struct flb_log_event log_event; + struct event test_event; + int ret; + + ctx = init_test_context(); + TEST_CHECK(ctx != NULL); + + memset(&test_event, 0, sizeof(struct event)); + test_event.type = EVENT_TYPE_SCHED; + test_event.common.pid = 4321; + test_event.common.tid = 4321; + memcpy(test_event.common.comm, "sched_test", strlen("sched_test")); + + test_event.details.sched.prev_pid = 1111; + test_event.details.sched.prev_prio = 120; + test_event.details.sched.prev_state = 1; + test_event.details.sched.next_pid = 4321; + test_event.details.sched.next_prio = 90; + test_event.details.sched.cpu = 3; + test_event.details.sched.runq_latency_ns = 125000; + test_event.details.sched.wakeup_tracked = 1; + + ret = encode_sched_event(ctx->event_ctx.log_encoder, &test_event); + TEST_CHECK(ret == 0); + TEST_CHECK(ctx->event_ctx.log_encoder->output_length > 0); + + ret = flb_log_event_decoder_init(ctx->decoder, + ctx->event_ctx.log_encoder->output_buffer, + ctx->event_ctx.log_encoder->output_length); + TEST_CHECK(ret == 0); + + ret = flb_log_event_decoder_next(ctx->decoder, &log_event); + TEST_CHECK(ret == FLB_EVENT_DECODER_SUCCESS); + + verify_decoded_values(&log_event, &test_event); + + cleanup_test_context(ctx); +} + +TEST_LIST = { + {"sched_event_encoding", test_sched_event_encoding}, + {NULL, NULL} +};