lock.go - Katyusha's blog
mobile wallpaper 1mobile wallpaper 2mobile wallpaper 3mobile wallpaper 4
lock.go
Go Source File · 105 lines
lock.go
package rca
import (
"fmt"
"strings"
"time"
"github.com/os2026/ebpf-rca/internal/detector"
"github.com/os2026/ebpf-rca/internal/schema"
)
// 阻塞栈中出现这些符号片段,判定为锁/同步等待(而非纯 I/O 睡眠)。
var lockSymHints = []string{
"futex", "mutex", "rwsem", "rt_mutex", "down_", "__lock", "semaphore", "rwlock",
}
// BuildLockReport 将一次锁竞争/长阻塞信号转换为结构化诊断报告。
// stack 为已符号化的阻塞内核栈(top-N),由 collector.ResolveStack 提供。
func BuildLockReport(sig detector.LockSignal, stack []string) schema.AnomalyReport {
s := sig.Sample
isLock := stackHasLock(stack)
root, suggestion, confidence := classifyLock(isLock, s.LastWaker)
evidence := []schema.Evidence{
{
Type: "metric", Name: "offcpu_ratio",
Value: round2(s.OffcpuRatio), Threshold: 0.30,
Desc: "阻塞型 off-CPU 时间占墙钟比例",
},
{
Type: "metric", Name: "max_offcpu_ms",
Value: round2(s.MaxOffcpuMs),
Desc: "单次最长阻塞时长(毫秒)",
},
{
Type: "event", Name: "block_count",
Value: s.BlockCount,
Desc: "窗口内阻塞切出次数",
},
{
Type: "event", Name: "last_waker_tid",
Value: s.LastWaker,
Desc: "最近唤醒该线程者(唤醒链上游,疑似持锁方)",
},
}
// 阻塞栈作为"线程堆栈聚集"证据逐帧入链。
for i, fr := range stack {
evidence = append(evidence, schema.Evidence{
Type: "stack", Name: fmt.Sprintf("frame_%d", i), Func: fr,
Desc: "阻塞点内核调用栈",
})
}
anomalyType := "锁竞争"
if !isLock {
anomalyType = "长时间阻塞等待"
}
return schema.AnomalyReport{
AnomalyType: anomalyType,
RelatedObject: schema.RelatedObject{
Pid: s.Pid,
Tid: s.Pid,
Comm: s.Comm,
},
KeyMetrics: map[string]interface{}{
"offcpu_ratio": round2(s.OffcpuRatio),
"max_offcpu_ms": round2(s.MaxOffcpuMs),
"block_count": s.BlockCount,
"last_waker_tid": s.LastWaker,
},
TimeWindow: schema.TimeWindow{
Start: sig.WindowStart.UTC().Format(time.RFC3339),
End: sig.WindowEnd.UTC().Format(time.RFC3339),
},
SuspectedRootCause: root,
Confidence: confidence,
EvidenceChain: evidence,
Suggestion: suggestion,
}
}
func stackHasLock(stack []string) bool {
for _, fr := range stack {
low := strings.ToLower(fr)
for _, h := range lockSymHints {
if strings.Contains(low, h) {
return true
}
}
}
return false
}
func classifyLock(isLock bool, waker uint32) (root, suggestion string, confidence float64) {
if isLock {
root = "多线程争用同一锁资源导致阻塞(off-CPU 等锁,疑似临界区过大或锁粒度过粗)"
suggestion = fmt.Sprintf("缩小临界区/细化锁粒度,排查唤醒链上游持锁线程 tid=%d;可改用读写锁或无锁结构", waker)
return root, suggestion, 0.9
}
root = "线程长时间阻塞在内核等待(非计算型),疑似 I/O 或条件变量同步等待"
suggestion = "结合 I/O 场景进一步确认阻塞来源;若为同步等待,检查唤醒时机与生产消费速率"
return root, suggestion, 0.7
}

目录