diff --git a/bpf/kprobe_pwru.c b/bpf/kprobe_pwru.c index 73dbec0c..d6fb8a61 100644 --- a/bpf/kprobe_pwru.c +++ b/bpf/kprobe_pwru.c @@ -30,8 +30,6 @@ const static bool TRUE = true; const static u32 ZERO = 0; -volatile const static __u64 BPF_PROG_ADDR = 0; - enum { TRACKED_BY_FILTER = (1 << 0), TRACKED_BY_SKB = (1 << 1), @@ -401,6 +399,24 @@ get_tracing_fp(void) return fp; } +#ifdef bpf_target_arm64 +static __always_inline u64 detect_tramp_fp(void); +#endif + +static __always_inline u64 +get_tramp_fp(void) { + u64 fp_tramp = 0; + +#ifdef bpf_target_x86 + u64 fp = get_tracing_fp(); + bpf_probe_read_kernel(&fp_tramp, sizeof(fp_tramp), (void *) fp); +#elif defined(bpf_target_arm64) + fp_tramp = detect_tramp_fp(); +#endif + + return fp_tramp; +} + static __always_inline u64 get_kprobe_fp(struct pt_regs *ctx) { @@ -410,7 +426,7 @@ get_kprobe_fp(struct pt_regs *ctx) static __always_inline u64 get_stackid(void *ctx, const bool is_kprobe) { u64 caller_fp; - u64 fp = is_kprobe ? get_kprobe_fp(ctx) : get_tracing_fp(); + u64 fp = is_kprobe ? get_kprobe_fp(ctx) : get_tramp_fp(); for (int depth = 0; depth < MAX_STACK_DEPTH; depth++) { if (bpf_probe_read_kernel(&caller_fp, sizeof(caller_fp), (void *)fp) < 0) break; @@ -513,6 +529,60 @@ handle_everything(struct sk_buff *skb, void *ctx, struct event_t *event, u64 *_s return true; } +#ifdef bpf_target_x86 +/* The following gen_endbr_poison() and is_endbr() functions are taken from the + * kernel's arch/x86/include/asm/ibt.h file. + */ + +static __always_inline u32 +gen_endbr_poison(void) { + /* + * 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it + * will be unique to (former) ENDBR sites. + */ + return 0x001f0f66; /* osp nopl (%rax) */ +} + +static __always_inline bool +is_endbr(u32 val) { + static const u32 endbr64 = 0xfa1e0ff3; + static const u32 endbr32 = 0xfb1e0ff3; + + if (val == gen_endbr_poison()) + return true; + + val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */ + return val == endbr64; +} + +static __always_inline u64 +correct_func_ip(u64 ip) { + u32 endbr; + + bpf_probe_read_kernel(&endbr, sizeof(endbr), (void *) (ip - 4)); + return is_endbr(endbr) ? (ip - 4) : ip; +} +#endif + +static __always_inline u64 get_func_ip(void); + +static __always_inline u64 +get_addr(void *ctx, const bool is_kprobe, const bool has_get_func_ip) { + u64 ip; + + if (has_get_func_ip) { + ip = bpf_get_func_ip(ctx); /* endbr has been handled in helper. */ + } else { + ip = is_kprobe ? PT_REGS_IP((struct pt_regs *) ctx) : get_func_ip(); +#ifdef bpf_target_x86 + ip = correct_func_ip(ip); + ip -= is_kprobe; /* -1 always on x86 if kprobe. */ +#endif + } + + return ip; +} + static __always_inline int kprobe_skb(struct sk_buff *skb, struct pt_regs *ctx, const bool has_get_func_ip, u64 *_stackid) { struct event_t event = {}; @@ -521,7 +591,7 @@ kprobe_skb(struct sk_buff *skb, struct pt_regs *ctx, const bool has_get_func_ip, return BPF_OK; event.skb_addr = (u64) skb; - event.addr = has_get_func_ip ? bpf_get_func_ip(ctx) : PT_REGS_IP(ctx); + event.addr = get_addr(ctx, true, has_get_func_ip); event.param_second = PT_REGS_PARM2(ctx); event.param_third = PT_REGS_PARM3(ctx); if (CFG.output_caller) @@ -606,6 +676,90 @@ int BPF_PROG(fexit_skb_copy, struct sk_buff *old, gfp_t mask, struct sk_buff *ne return BPF_OK; } +#ifdef bpf_target_arm64 +/* As R10 of bpf is not A64_FP, we need to detect the FP of trampoline + * by scanning the stacks of current bpf prog and the trampoline. + * + * Since commit 5d4fa9ec5643 ("bpf, arm64: Avoid blindly saving/restoring all callee-saved registers"), + * the number of callee-saved registers saved in the bpf prog prologue is + * dynamic, not fixed anymore. + */ +static __always_inline u64 +detect_tramp_fp(void) { + static const int range_of_detection = 256; + u64 fp, r10; + + r10 = get_tracing_fp(); /* R10 of current bpf prog */ + for (int i = 6; i >= 0; i--) { + bpf_probe_read_kernel(&fp, sizeof(fp), (void *) (r10 + i * 16)); + if (r10 < fp && fp < r10 + range_of_detection) + return fp; + } + + return r10; +} +#endif + +static __always_inline u64 +get_func_ip(void) { + u64 fp_tramp, ip; + +#if defined(bpf_target_x86) + static const int ip_offset = 5/* sizeof callq insn */; + u64 fp; +#elif defined(bpf_target_arm64) + /* Ref: commit b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") */ + static const int ip_offset = 12/* sizeof 3 insns */; +#else +#error Unsupported architecture +#endif + + /* Stack layout on x86: + * +-----+ FP of tracee's caller + * | ... | + * | rip | IP of tracee's caller + * | rip | IP of tracee + * | rbp | FP of tracee's caller + * +-----+ FP of trampoline + * | ... | + * | rip | IP of trampoline + * | rbp | FP of trampoline + * +-----+ FP of current prog + * | ... | + * +-----+ SP of current prog + * + * Stack layout on arm64: + * | r9 | + * | fp | FP of tracee's caller + * | lr | IP of tracee + * | fp | FP of tracee + * +------+ FP of trampoline <-------+ + * | .. | padding | + * | .. | callee saved regs | + * | retv | retval of tracee | + * | regs | regs of tracee | + * | nreg | number of regs | + * | ip | IP of tracee if needed | possible range of + * | rctx | bpf_tramp_run_ctx | detection + * | lr | IP of trampoline | + * | fp | FP of trampoline <--------- detect it + * +------+ FP of current prog | + * | regs | callee saved regs | + * +------+ R10 of bpf prog <-------+ + * | .. | + * +------+ SP of current prog + */ + +#if defined(bpf_target_x86) + fp = get_tracing_fp(); /* FP of current prog */ + bpf_probe_read_kernel(&fp_tramp, sizeof(fp_tramp), (void *)fp); /* FP of trampoline */ +#elif defined(bpf_target_arm64) + fp_tramp = detect_tramp_fp(); /* FP of trampoline */ +#endif + bpf_probe_read_kernel(&ip, sizeof(ip), (void *)(fp_tramp + 8)); /* IP of tracee */ + return ip - ip_offset; +} + SEC("fentry/tc") int BPF_PROG(fentry_tc, struct sk_buff *skb) { struct event_t event = {}; @@ -614,7 +768,7 @@ int BPF_PROG(fentry_tc, struct sk_buff *skb) { return BPF_OK; event.skb_addr = (u64) skb; - event.addr = BPF_PROG_ADDR; + event.addr = get_addr(ctx, false, false); event.type = EVENT_TYPE_TC; bpf_map_push_elem(&events, &event, BPF_EXIST); @@ -722,7 +876,7 @@ int BPF_PROG(fentry_xdp, struct xdp_buff *xdp) { event.ts = bpf_ktime_get_ns(); event.cpu_id = bpf_get_smp_processor_id(); event.skb_addr = (u64) &xdp; - event.addr = BPF_PROG_ADDR; + event.addr = get_addr(ctx, false, false); event.type = EVENT_TYPE_XDP; bpf_map_push_elem(&events, &event, BPF_EXIST); diff --git a/internal/pwru/bpf_prog.go b/internal/pwru/bpf_prog.go index 32503d6a..7f1b45f0 100644 --- a/internal/pwru/bpf_prog.go +++ b/internal/pwru/bpf_prog.go @@ -13,8 +13,6 @@ import ( var errNotFound = errors.New("not found") -type BpfProgName2Addr map[string]uint64 - func listBpfProgs(typ ebpf.ProgramType) ([]*ebpf.Program, error) { var ( id ebpf.ProgramID @@ -42,7 +40,7 @@ func listBpfProgs(typ ebpf.ProgramType) ([]*ebpf.Program, error) { return progs, nil } -func getBpfProgInfo(prog *ebpf.Program) (entryFuncName, progName, tag string, err error) { +func getBpfProgInfo(prog *ebpf.Program) (entryFuncName string, err error) { info, err := prog.Info() if err != nil { err = fmt.Errorf("failed to get program info: %w", err) @@ -67,7 +65,7 @@ func getBpfProgInfo(prog *ebpf.Program) (entryFuncName, progName, tag string, er for _, insn := range insns { sym := insn.Symbol() if sym != "" { - return sym, info.Name, info.Tag, nil + return sym, nil } } diff --git a/internal/pwru/ksym.go b/internal/pwru/ksym.go index 3395c80c..22eed61f 100644 --- a/internal/pwru/ksym.go +++ b/internal/pwru/ksym.go @@ -42,16 +42,15 @@ func (a *Addr2Name) findNearestSym(ip uint64) string { return strings.Replace(a.Addr2NameSlice[i-1].name, "\t", "", -1) } -func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) { +func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, error) { a2n := Addr2Name{ Addr2NameMap: make(map[uint64]*ksym), Name2AddrMap: make(map[string][]uintptr), } - n2a := BpfProgName2Addr{} file, err := os.Open("/proc/kallsyms") if err != nil { - return a2n, n2a, err + return a2n, err } defer file.Close() @@ -62,7 +61,7 @@ func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) { if all || (funcs[name] > 0) { addr, err := strconv.ParseUint(line[0], 16, 64) if err != nil { - return a2n, n2a, err + return a2n, err } sym := &ksym{ addr: addr, @@ -73,18 +72,15 @@ func ParseKallsyms(funcs Funcs, all bool) (Addr2Name, BpfProgName2Addr, error) { if all { a2n.Addr2NameSlice = append(a2n.Addr2NameSlice, sym) } - if isBpfProg := strings.HasSuffix(name, "[bpf]"); isBpfProg { - n2a[name] = addr - } } } if err := scanner.Err(); err != nil { - return a2n, n2a, err + return a2n, err } if all { sort.Sort(byAddr(a2n.Addr2NameSlice)) } - return a2n, n2a, nil + return a2n, nil } diff --git a/internal/pwru/output.go b/internal/pwru/output.go index 877053f3..c1e288b4 100644 --- a/internal/pwru/output.go +++ b/internal/pwru/output.go @@ -257,19 +257,6 @@ func getExecName(pid int) string { return execName } -func getAddrByArch(event *Event, o *output) (addr uint64) { - switch runtime.GOARCH { - case "amd64": - addr = event.Addr - if !o.kprobeMulti && event.Type == eventTypeKprobe { - addr -= 1 - } - case "arm64": - addr = event.Addr - } - return addr -} - func getTupleData(event *Event) (tupleData string) { tupleData = fmt.Sprintf("%s:%d->%s:%d(%s)", addrToStr(event.Tuple.L3Proto, event.Tuple.Saddr), byteorder.NetworkToHost16(event.Tuple.Sport), @@ -350,11 +337,6 @@ func getOutFuncName(o *output, event *Event, addr uint64) string { if ksym, ok := o.addr2name.Addr2NameMap[addr]; ok { funcName = ksym.name - } else if ksym, ok := o.addr2name.Addr2NameMap[addr-4]; runtime.GOARCH == "amd64" && ok { - // Assume that function has ENDBR in its prelude (enabled by CONFIG_X86_KERNEL_IBT). - // See https://lore.kernel.org/bpf/20220811091526.172610-5-jolsa@kernel.org/ - // for more ctx. - funcName = ksym.name } else { funcName = fmt.Sprintf("0x%x", addr) } @@ -411,10 +393,7 @@ func (o *output) Print(event *Event) { ts = getRelativeTs(event, o) } - // XXX: not sure why the -1 offset is needed on x86 but not on arm64 - addr := getAddrByArch(event, o) - - outFuncName := getOutFuncName(o, event, addr) + outFuncName := getOutFuncName(o, event, event.Addr) fmt.Fprintf(o.writer, "%-18s %-3s %-16s", fmt.Sprintf("%#x", event.SkbAddr), fmt.Sprintf("%d", event.CPU), fmt.Sprintf("%s", execName)) diff --git a/internal/pwru/tracing.go b/internal/pwru/tracing.go index 0ebdd1cc..c02c12fa 100644 --- a/internal/pwru/tracing.go +++ b/internal/pwru/tracing.go @@ -62,10 +62,9 @@ func (t *tracing) addLink(l link.Link) { } func (t *tracing) traceProg(spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, prog *ebpf.Program, n2a BpfProgName2Addr, - tracingName string, + opts *ebpf.CollectionOptions, prog *ebpf.Program, tracingName string, ) error { - entryFn, progName, tag, err := getBpfProgInfo(prog) + entryFn, err := getBpfProgInfo(prog) if err != nil { if errors.Is(err, errNotFound) { log.Printf("Skip tracing bpf prog %s because cannot find its entry function name", prog) @@ -74,30 +73,7 @@ func (t *tracing) traceProg(spec *ebpf.CollectionSpec, return fmt.Errorf("failed to get entry function name: %w", err) } - // The addr may hold the wrong rip value, because two addresses could - // have one same symbol. As discussed before, that doesn't affect the - // symbol resolution because even a "wrong" rip can be matched to the - // right symbol. However, this could make a difference when we want to - // distinguish which exact bpf prog is called. - // -- @jschwinger233 - - progKsym := fmt.Sprintf("bpf_prog_%s_%s[bpf]", tag, entryFn) - addr, ok := n2a[progKsym] - if !ok { - progKsym = fmt.Sprintf("bpf_prog_%s_%s[bpf]", tag, progName) - addr, ok = n2a[progKsym] - if !ok { - return fmt.Errorf("failed to find address for function %s of bpf prog %v", progName, prog) - } - } - spec = spec.Copy() - if err := spec.RewriteConstants(map[string]any{ - "BPF_PROG_ADDR": addr, - }); err != nil { - return fmt.Errorf("failed to rewrite bpf prog addr: %w", err) - } - spec.Programs[tracingName].AttachTarget = prog spec.Programs[tracingName].AttachTo = entryFn coll, err := ebpf.NewCollectionWithOptions(spec, *opts) @@ -127,8 +103,7 @@ func (t *tracing) traceProg(spec *ebpf.CollectionSpec, } func (t *tracing) trace(coll *ebpf.Collection, spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, - n2a BpfProgName2Addr, progs []*ebpf.Program, tracingName string, + opts *ebpf.CollectionOptions, progs []*ebpf.Program, tracingName string, ) error { // Reusing maps from previous collection is to handle the events together // with the kprobes. @@ -141,7 +116,7 @@ func (t *tracing) trace(coll *ebpf.Collection, spec *ebpf.CollectionSpec, for _, prog := range progs { prog := prog errg.Go(func() error { - return t.traceProg(spec, opts, prog, n2a, tracingName) + return t.traceProg(spec, opts, prog, tracingName) }) } @@ -153,10 +128,7 @@ func (t *tracing) trace(coll *ebpf.Collection, spec *ebpf.CollectionSpec, return nil } -func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, - n2a BpfProgName2Addr, -) *tracing { +func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, opts *ebpf.CollectionOptions) *tracing { log.Printf("Attaching tc-bpf progs...\n") progs, err := listBpfProgs(ebpf.SchedCLS) @@ -168,17 +140,14 @@ func TraceTC(coll *ebpf.Collection, spec *ebpf.CollectionSpec, t.progs = progs t.links = make([]link.Link, 0, len(progs)) - if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, progs, "fentry_tc"); err != nil { + if err := t.trace(coll, spec, opts, progs, "fentry_tc"); err != nil { log.Fatalf("failed to trace TC progs: %v", err) } return &t } -func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, - opts *ebpf.CollectionOptions, outputSkb bool, outputShinfo bool, - n2a BpfProgName2Addr, -) *tracing { +func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, opts *ebpf.CollectionOptions) *tracing { log.Printf("Attaching xdp progs...\n") progs, err := listBpfProgs(ebpf.XDP) @@ -193,7 +162,7 @@ func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, { spec := spec.Copy() delete(spec.Programs, "fexit_xdp") - if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, progs, "fentry_xdp"); err != nil { + if err := t.trace(coll, spec, opts, progs, "fentry_xdp"); err != nil { log.Fatalf("failed to trace XDP progs: %v", err) } } @@ -201,7 +170,7 @@ func TraceXDP(coll *ebpf.Collection, spec *ebpf.CollectionSpec, { spec := spec.Copy() delete(spec.Programs, "fentry_xdp") - if err := t.trace(coll, spec, opts, outputSkb, outputShinfo, n2a, progs, "fexit_xdp"); err != nil { + if err := t.trace(coll, spec, opts, progs, "fexit_xdp"); err != nil { log.Fatalf("failed to trace XDP progs: %v", err) } } diff --git a/main.go b/main.go index fde15017..a5a2b86d 100644 --- a/main.go +++ b/main.go @@ -107,7 +107,7 @@ func main() { } // If --filter-trace-tc/--filter-trace-xdp, it's to retrieve and print bpf // prog's name. - addr2name, name2addr, err := pwru.ParseKallsyms(funcs, flags.OutputStack || + addr2name, err := pwru.ParseKallsyms(funcs, flags.OutputStack || len(flags.KMods) != 0 || flags.FilterTraceTc || flags.FilterTraceXdp || len(flags.FilterNonSkbFuncs) > 0 || flags.OutputCaller || flags.FilterTrackBpfHelpers) if err != nil { @@ -235,14 +235,14 @@ func main() { traceTc := false if flags.FilterTraceTc { - t := pwru.TraceTC(coll, bpfSpecFentryTc, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + t := pwru.TraceTC(coll, bpfSpecFentryTc, &opts) defer t.Detach() traceTc = t.HaveTracing() } traceXdp := false if flags.FilterTraceXdp { - t := pwru.TraceXDP(coll, bpfSpecFentryXdp, &opts, flags.OutputSkb, flags.OutputShinfo, name2addr) + t := pwru.TraceXDP(coll, bpfSpecFentryXdp, &opts) defer t.Detach() traceXdp = t.HaveTracing() }