lock.bpf.c - Katyusha's blog
mobile wallpaper 1mobile wallpaper 2mobile wallpaper 3mobile wallpaper 4
lock.bpf.c
C Source File · 134 lines
lock.bpf.c
// SPDX-License-Identifier: GPL-2.0
//
// 场景④:锁竞争导致的性能退化 —— 基于 off-CPU 阻塞分析 + 唤醒链。
//
// 思路(赛题关键证据点:锁等待时间、调度阻塞时间、线程堆栈聚集、futex 热点):
// 1. sched_switch 中 prev_state != 0(非 RUNNING,即"被阻塞切出"而非"被抢占")时,
// 记录该线程的 off-CPU 起点,并抓取其内核调用栈(阻塞点,如 futex_wait/__mutex_lock)。
// 2. 该线程重新上 CPU 时,结算阻塞时长,按线程累计 off-CPU 阻塞时间/次数/最大值,并记下阻塞栈 id。
// 3. sched_wakeup 中记录"唤醒者"——构成 waker→wakee 唤醒链,定位是谁持锁阻塞了它。
// 内核态仅聚合,栈符号化在用户态用 /proc/kallsyms 完成,开销低。
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
char LICENSE[] SEC("license") = "GPL";
#define TASK_COMM_LEN 16
#define MAX_STACK_DEPTH 32
struct lock_stat {
__u64 offcpu_ns; // 累计阻塞型 off-CPU 时间
__u64 offcpu_count; // 阻塞次数
__u64 max_offcpu_ns; // 单次最长阻塞
__u32 last_waker; // 最近一次唤醒者 tid
__s32 stackid; // 最近一次阻塞的内核栈 id
char comm[TASK_COMM_LEN];
};
struct offcpu_info {
__u64 ts;
__s32 stackid;
__u32 pad;
};
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 16384);
__type(key, __u32);
__type(value, struct offcpu_info);
} offcpu_start SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(max_entries, 16384);
__type(key, __u32);
__type(value, struct lock_stat);
} lock_stats SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 4096);
__uint(key_size, sizeof(__u32));
__uint(value_size, MAX_STACK_DEPTH * sizeof(__u64));
} stackmap SEC(".maps");
struct sched_switch_tp {
__u64 pad;
char prev_comm[TASK_COMM_LEN];
__s32 prev_pid;
__s32 prev_prio;
__s64 prev_state;
char next_comm[TASK_COMM_LEN];
__s32 next_pid;
__s32 next_prio;
};
struct sched_wakeup_tp {
__u64 pad;
char comm[TASK_COMM_LEN];
__s32 pid;
__s32 prio;
__s32 target_cpu;
};
static __always_inline struct lock_stat *get_stat(__u32 tid)
{
struct lock_stat *st = bpf_map_lookup_elem(&lock_stats, &tid);
if (st)
return st;
struct lock_stat zero = {};
bpf_map_update_elem(&lock_stats, &tid, &zero, BPF_NOEXIST);
return bpf_map_lookup_elem(&lock_stats, &tid);
}
SEC("tracepoint/sched/sched_switch")
int handle_switch(struct sched_switch_tp *ctx)
{
__u64 now = bpf_ktime_get_ns();
__u32 prev = (__u32)ctx->prev_pid;
__u32 next = (__u32)ctx->next_pid;
// prev 因阻塞被切出(prev_state != 0 表示非 RUNNING,排除普通抢占)
if (prev != 0 && ctx->prev_state != 0) {
struct offcpu_info info = {};
info.ts = now;
info.stackid = bpf_get_stackid(ctx, &stackmap, 0);
bpf_map_update_elem(&offcpu_start, &prev, &info, BPF_ANY);
}
// next 回到 CPU:结算阻塞时长
if (next != 0) {
struct offcpu_info *info = bpf_map_lookup_elem(&offcpu_start, &next);
if (info && now > info->ts) {
__u64 dur = now - info->ts;
struct lock_stat *st = get_stat(next);
if (st) {
st->offcpu_ns += dur;
st->offcpu_count += 1;
if (dur > st->max_offcpu_ns)
st->max_offcpu_ns = dur;
st->stackid = info->stackid;
bpf_probe_read_kernel_str(&st->comm, sizeof(st->comm),
ctx->next_comm);
}
bpf_map_delete_elem(&offcpu_start, &next);
}
}
return 0;
}
SEC("tracepoint/sched/sched_wakeup")
int handle_wakeup(struct sched_wakeup_tp *ctx)
{
__u32 wakee = (__u32)ctx->pid;
if (wakee == 0)
return 0;
// 唤醒发生在唤醒者上下文:current 即 waker
__u32 waker = (__u32)bpf_get_current_pid_tgid();
struct lock_stat *st = get_stat(wakee);
if (st)
st->last_waker = waker;
return 0;
}

目录