以libbpf-bootstrap中的一个测试程序minimal.bpf.c为例。
下面是minimal.bpf.c
的源码:
minimal.bpf.c
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
/* Copyright (c) 2020 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
char LICENSE[] SEC("license") = "Dual BSD/GPL";
int my_pid = 0;
SEC("tp/syscalls/sys_enter_write")
int handle_tp(void *ctx)
{
int pid = bpf_get_current_pid_tgid() >> 32;
if (pid != my_pid)
return 0;
bpf_printk("BPF triggered from PID %d.\n", pid);
return 0;
}
这个函数在sys_enter_write
这个tracepoint上挂一个钩子,这个钩子会检查执行到这个tracepoint的进程的pid,如果等于当前进程的pid,那么执行输出,对应的用户态程序是minimal.c
,源码如下:
minimal.c
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
/* Copyright (c) 2020 Facebook */
#include <stdio.h>
#include <unistd.h>
#include <sys/resource.h>
#include <bpf/libbpf.h>
#include "minimal.skel.h"
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
{
return vfprintf(stderr, format, args);
}
int main(int argc, char **argv)
{
struct minimal_bpf *skel;
int err;
/* Set up libbpf errors and debug info callback */
libbpf_set_print(libbpf_print_fn);
/* Open BPF application */
skel = minimal_bpf__open();
if (!skel) {
fprintf(stderr, "Failed to open BPF skeleton\n");
return 1;
}
/* ensure BPF program only handles write() syscalls from our process */
skel->bss->my_pid = getpid();
/* Load & verify BPF programs */
err = minimal_bpf__load(skel);
if (err) {
fprintf(stderr, "Failed to load and verify BPF skeleton\n");
goto cleanup;
}
/* Attach tracepoint handler */
err = minimal_bpf__attach(skel);
if (err) {
fprintf(stderr, "Failed to attach BPF skeleton\n");
goto cleanup;
}
printf("Successfully started! Please run `sudo cat /sys/kernel/debug/tracing/trace_pipe` "
"to see output of the BPF programs.\n");
for (;;) {
/* trigger our BPF program */
fprintf(stderr, ".");
sleep(1);
}
cleanup:
minimal_bpf__destroy(skel);
return -err;
}
其中通过getpid获取当前进程的pid,然后设置给mypid,因为这是一个没有初始化的全局变量,所以放在bss段,在minimal_bpf__open()
返回后,通过skel->bss->my_pid
可以访问这个变量。
下面是编译命令:
clang -g -O2 -target bpf -D__TARGET_ARCH_x86 \
-I.output -I../../libbpf/include/uapi -I../../vmlinux/x86/ -I/mnt/libbpf-bootstrap_compiled/blazesym/include -idirafter /usr/lib/llvm-17/lib/clang/17/include -idirafter /usr/local/include -idirafter /usr/include/x86_64-linux-gnu -idirafter /include -idirafter /usr/include \
-c minimal.bpf.c -o .output/minimal.tmp.bpf.o
/mnt/libbpf-bootstrap_compiled/examples/c/.output/bpftool/bootstrap/bpftool gen object .output/minimal.bpf.o .output/minimal.tmp.bpf.o
/mnt/libbpf-bootstrap_compiled/examples/c/.output/bpftool/bootstrap/bpftool gen skeleton .output/minimal.bpf.o > .output/minimal.skel.h
cc -g -Wall -I.output -I../../libbpf/include/uapi -I../../vmlinux/x86/ -I/mnt/libbpf-bootstrap_compiled/blazesym/include -c minimal.c -o .output/minimal.o
cc -g -Wall .output/minimal.o /mnt/libbpf-bootstrap_compiled/examples/c/.output/libbpf.a -lelf -lz -o minimal
上面编译时生成的minimal.skel.h
文件的内容:
minimal.skel.h
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* THIS FILE IS AUTOGENERATED BY BPFTOOL! */
#ifndef __MINIMAL_BPF_SKEL_H__
#define __MINIMAL_BPF_SKEL_H__
#include <errno.h>
#include <stdlib.h>
#include <bpf/libbpf.h>
struct minimal_bpf {
struct bpf_object_skeleton *skeleton;
struct bpf_object *obj;
struct {
struct bpf_map *bss;
struct bpf_map *rodata;
} maps;
struct {
struct bpf_program *handle_tp;
} progs;
struct {
struct bpf_link *handle_tp;
} links;
struct minimal_bpf__bss {
int my_pid;
} *bss;
#ifdef __cplusplus
static inline struct minimal_bpf *open(const struct bpf_object_open_opts *opts = nullptr);
static inline struct minimal_bpf *open_and_load();
static inline int load(struct minimal_bpf *skel);
static inline int attach(struct minimal_bpf *skel);
static inline void detach(struct minimal_bpf *skel);
static inline void destroy(struct minimal_bpf *skel);
static inline const void *elf_bytes(size_t *sz);
#endif /* __cplusplus */
};
static void
minimal_bpf__destroy(struct minimal_bpf *obj)
{
if (!obj)
return;
if (obj->skeleton)
bpf_object__destroy_skeleton(obj->skeleton);
free(obj);
}
static inline int
minimal_bpf__create_skeleton(struct minimal_bpf *obj);
static inline struct minimal_bpf *
minimal_bpf__open_opts(const struct bpf_object_open_opts *opts)
{
struct minimal_bpf *obj;
int err;
obj = (struct minimal_bpf *)calloc(1, sizeof(*obj));
if (!obj) {
errno = ENOMEM;
return NULL;
}
err = minimal_bpf__create_skeleton(obj);
if (err)
goto err_out;
err = bpf_object__open_skeleton(obj->skeleton, opts);
if (err)
goto err_out;
return obj;
err_out:
minimal_bpf__destroy(obj);
errno = -err;
return NULL;
}
static inline struct minimal_bpf *
minimal_bpf__open(void)
{
return minimal_bpf__open_opts(NULL);
}
static inline int
minimal_bpf__load(struct minimal_bpf *obj)
{
return bpf_object__load_skeleton(obj->skeleton);
}
static inline struct minimal_bpf *
minimal_bpf__open_and_load(void)
{
struct minimal_bpf *obj;
int err;
obj = minimal_bpf__open();
if (!obj)
return NULL;
err = minimal_bpf__load(obj);
if (err) {
minimal_bpf__destroy(obj);
errno = -err;
return NULL;
}
return obj;
}
static inline int
minimal_bpf__attach(struct minimal_bpf *obj)
{
return bpf_object__attach_skeleton(obj->skeleton);
}
static inline void
minimal_bpf__detach(struct minimal_bpf *obj)
{
bpf_object__detach_skeleton(obj->skeleton);
}
static inline const void *minimal_bpf__elf_bytes(size_t *sz);
static inline int
minimal_bpf__create_skeleton(struct minimal_bpf *obj)
{
struct bpf_object_skeleton *s;
int err;
s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));
if (!s) {
err = -ENOMEM;
goto err;
}
s->sz = sizeof(*s);
s->name = "minimal_bpf";
s->obj = &obj->obj;
/* maps */
s->map_cnt = 2;
s->map_skel_sz = sizeof(*s->maps);
s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);
if (!s->maps) {
err = -ENOMEM;
goto err;
}
s->maps[0].name = "minimal_.bss";
s->maps[0].map = &obj->maps.bss;
s->maps[0].mmaped = (void **)&obj->bss;
s->maps[1].name = "minimal_.rodata";
s->maps[1].map = &obj->maps.rodata;
/* programs */
s->prog_cnt = 1;
s->prog_skel_sz = sizeof(*s->progs);
s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);
if (!s->progs) {
err = -ENOMEM;
goto err;
}
s->progs[0].name = "handle_tp";
s->progs[0].prog = &obj->progs.handle_tp;
s->progs[0].link = &obj->links.handle_tp;
s->data = minimal_bpf__elf_bytes(&s->data_sz);
obj->skeleton = s;
return 0;
err:
bpf_object__destroy_skeleton(s);
return err;
}
static inline const void *minimal_bpf__elf_bytes(size_t *sz)
{
static const char data[] __attribute__((__aligned__(8))) = "\
\x7f\x45\x4c\x46\x02\x01\x01\0\0\0\0\0\0\0\0\0\x01\0\xf7\0\x01\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\x70\x05\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\x40\0\x0a\0\
\x01\0\0\x2e\x73\x74\x72\x74\x61\x62\0\x2e\x73\x79\x6d\x74\x61\x62\0\x74\x70\
\x2f\x73\x79\x73\x63\x61\x6c\x6c\x73\x2f\x73\x79\x73\x5f\x65\x6e\x74\x65\x72\
\x5f\x77\x72\x69\x74\x65\0\x6c\x69\x63\x65\x6e\x73\x65\0\x2e\x62\x73\x73\0\x2e\
\x72\x6f\x64\x61\x74\x61\0\x6d\x69\x6e\x69\x6d\x61\x6c\x2e\x62\x70\x66\x2e\x63\
\0\x4c\x42\x42\x30\x5f\x32\0\x68\x61\x6e\x64\x6c\x65\x5f\x74\x70\x2e\x5f\x5f\
\x5f\x5f\x66\x6d\x74\0\x68\x61\x6e\x64\x6c\x65\x5f\x74\x70\0\x6d\x79\x5f\x70\
\x69\x64\0\x4c\x49\x43\x45\x4e\x53\x45\0\x2e\x72\x65\x6c\x74\x70\x2f\x73\x79\
\x73\x63\x61\x6c\x6c\x73\x2f\x73\x79\x73\x5f\x65\x6e\x74\x65\x72\x5f\x77\x72\
\x69\x74\x65\0\x2e\x42\x54\x46\0\x2e\x42\x54\x46\x2e\x65\x78\x74\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x42\0\0\0\x04\0\xf1\xff\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\x03\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x50\0\0\0\0\
\0\x03\0\x58\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x57\0\0\0\x01\0\x06\0\0\0\0\0\0\0\0\
\0\x1c\0\0\0\0\0\0\0\0\0\0\0\x03\0\x06\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x69\0\
\0\0\x12\0\x03\0\0\0\0\0\0\0\0\0\x68\0\0\0\0\0\0\0\x73\0\0\0\x11\0\x05\0\0\0\0\
\0\0\0\0\0\x04\0\0\0\0\0\0\0\x7a\0\0\0\x11\0\x04\0\0\0\0\0\0\0\0\0\x0d\0\0\0\0\
\0\0\0\x85\0\0\0\x0e\0\0\0\x77\0\0\0\x20\0\0\0\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\x61\x11\0\0\0\0\0\0\x5d\x01\x05\0\0\0\0\0\x18\x01\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\xb7\x02\0\0\x1c\0\0\0\xbf\x03\0\0\0\0\0\0\x85\0\0\0\x06\0\0\0\xb7\0\0\0\0\
\0\0\0\x95\0\0\0\0\0\0\0\x44\x75\x61\x6c\x20\x42\x53\x44\x2f\x47\x50\x4c\0\0\0\
\0\x42\x50\x46\x20\x74\x72\x69\x67\x67\x65\x72\x65\x64\x20\x66\x72\x6f\x6d\x20\
\x50\x49\x44\x20\x25\x64\x2e\x0a\0\0\0\0\0\x10\0\0\0\0\0\0\0\x01\0\0\0\x07\0\0\
\0\x30\0\0\0\0\0\0\0\x01\0\0\0\x05\0\0\0\x9f\xeb\x01\0\x18\0\0\0\0\0\0\0\x10\
\x01\0\0\x10\x01\0\0\x22\x01\0\0\0\0\0\0\0\0\0\x02\0\0\0\0\0\0\0\0\x01\0\0\x0d\
\x03\0\0\0\x01\0\0\0\x01\0\0\0\x05\0\0\0\0\0\0\x01\x04\0\0\0\x20\0\0\x01\x09\0\
\0\0\x01\0\0\x0c\x02\0\0\0\x13\0\0\0\0\0\0\x01\x01\0\0\0\x08\0\0\x01\0\0\0\0\0\
\0\0\x03\0\0\0\0\x05\0\0\0\x07\0\0\0\x0d\0\0\0\x18\0\0\0\0\0\0\x01\x04\0\0\0\
\x20\0\0\0\x2c\0\0\0\0\0\0\x0e\x06\0\0\0\x01\0\0\0\x34\0\0\0\0\0\0\x0e\x03\0\0\
\0\x01\0\0\0\0\0\0\0\0\0\0\x0a\x05\0\0\0\0\0\0\0\0\0\0\x03\0\0\0\0\x0a\0\0\0\
\x07\0\0\0\x1c\0\0\0\x3b\0\0\0\0\0\0\x0e\x0b\0\0\0\0\0\0\0\xf1\0\0\0\x01\0\0\
\x0f\x0d\0\0\0\x08\0\0\0\0\0\0\0\x0d\0\0\0\xf9\0\0\0\x01\0\0\x0f\x04\0\0\0\x09\
\0\0\0\0\0\0\0\x04\0\0\0\xfe\0\0\0\x01\0\0\x0f\x1c\0\0\0\x0c\0\0\0\0\0\0\0\x1c\
\0\0\0\0\x63\x74\x78\0\x69\x6e\x74\0\x68\x61\x6e\x64\x6c\x65\x5f\x74\x70\0\x63\
\x68\x61\x72\0\x5f\x5f\x41\x52\x52\x41\x59\x5f\x53\x49\x5a\x45\x5f\x54\x59\x50\
\x45\x5f\x5f\0\x4c\x49\x43\x45\x4e\x53\x45\0\x6d\x79\x5f\x70\x69\x64\0\x68\x61\
\x6e\x64\x6c\x65\x5f\x74\x70\x2e\x5f\x5f\x5f\x5f\x66\x6d\x74\0\x2f\x6d\x6e\x74\
\x2f\x6c\x69\x62\x62\x70\x66\x2d\x62\x6f\x6f\x74\x73\x74\x72\x61\x70\x2f\x65\
\x78\x61\x6d\x70\x6c\x65\x73\x2f\x63\x2f\x6d\x69\x6e\x69\x6d\x61\x6c\x2e\x62\
\x70\x66\x2e\x63\0\x09\x69\x6e\x74\x20\x70\x69\x64\x20\x3d\x20\x62\x70\x66\x5f\
\x67\x65\x74\x5f\x63\x75\x72\x72\x65\x6e\x74\x5f\x70\x69\x64\x5f\x74\x67\x69\
\x64\x28\x29\x20\x3e\x3e\x20\x33\x32\x3b\0\x09\x69\x66\x20\x28\x70\x69\x64\x20\
\x21\x3d\x20\x6d\x79\x5f\x70\x69\x64\x29\0\x09\x62\x70\x66\x5f\x70\x72\x69\x6e\
\x74\x6b\x28\x22\x42\x50\x46\x20\x74\x72\x69\x67\x67\x65\x72\x65\x64\x20\x66\
\x72\x6f\x6d\x20\x50\x49\x44\x20\x25\x64\x2e\x5c\x6e\x22\x2c\x20\x70\x69\x64\
\x29\x3b\0\x7d\0\x6c\x69\x63\x65\x6e\x73\x65\0\x2e\x62\x73\x73\0\x2e\x72\x6f\
\x64\x61\x74\x61\0\x74\x70\x2f\x73\x79\x73\x63\x61\x6c\x6c\x73\x2f\x73\x79\x73\
\x5f\x65\x6e\x74\x65\x72\x5f\x77\x72\x69\x74\x65\0\0\0\0\0\0\0\x9f\xeb\x01\0\
\x20\0\0\0\0\0\0\0\x14\0\0\0\x14\0\0\0\x6c\0\0\0\x80\0\0\0\0\0\0\0\x08\0\0\0\
\x06\x01\0\0\x01\0\0\0\0\0\0\0\x04\0\0\0\x10\0\0\0\x06\x01\0\0\x06\0\0\0\0\0\0\
\0\x4d\0\0\0\x7c\0\0\0\x0c\x34\0\0\x08\0\0\0\x4d\0\0\0\x7c\0\0\0\x27\x34\0\0\
\x10\0\0\0\x4d\0\0\0\xa9\0\0\0\x0d\x3c\0\0\x28\0\0\0\x4d\0\0\0\xa9\0\0\0\x06\
\x3c\0\0\x30\0\0\0\x4d\0\0\0\xbd\0\0\0\x02\x48\0\0\x58\0\0\0\x4d\0\0\0\xef\0\0\
\0\x01\x54\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\x03\
\0\0\0\x20\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\0\0\0\0\0\0\0\xb0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x09\0\0\0\x02\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\xf0\0\0\0\0\0\0\0\xd8\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\
\x08\0\0\0\0\0\0\0\x18\0\0\0\0\0\0\0\x11\0\0\0\x01\0\0\0\x06\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\xc8\x01\0\0\0\0\0\0\x68\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\x2d\0\0\0\x01\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\
\x30\x02\0\0\0\0\0\0\x0d\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\x35\0\0\0\x08\0\0\0\x03\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x02\0\0\0\
\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x3a\
\0\0\0\x01\0\0\0\x02\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x40\x02\0\0\0\0\0\0\x1c\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x82\0\0\0\x09\0\0\
\0\x40\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x60\x02\0\0\0\0\0\0\x20\0\0\0\0\0\0\0\x02\
\0\0\0\x03\0\0\0\x08\0\0\0\0\0\0\0\x10\0\0\0\0\0\0\0\xa2\0\0\0\x01\0\0\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0\0\0\x80\x02\0\0\0\0\0\0\x4a\x02\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\x08\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\xa7\0\0\0\x01\0\0\0\0\0\0\0\0\0\0\0\0\0\
\0\0\0\0\0\0\xd0\x04\0\0\0\0\0\0\xa0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\
\0\0\0\0\0\0\0\0\0\0\0";
*sz = sizeof(data) - 1;
return (const void *)data;
}
#ifdef __cplusplus
struct minimal_bpf *minimal_bpf::open(const struct bpf_object_open_opts *opts) { return minimal_bpf__open_opts(opts); }
struct minimal_bpf *minimal_bpf::open_and_load() { return minimal_bpf__open_and_load(); }
int minimal_bpf::load(struct minimal_bpf *skel) { return minimal_bpf__load(skel); }
int minimal_bpf::attach(struct minimal_bpf *skel) { return minimal_bpf__attach(skel); }
void minimal_bpf::detach(struct minimal_bpf *skel) { minimal_bpf__detach(skel); }
void minimal_bpf::destroy(struct minimal_bpf *skel) { minimal_bpf__destroy(skel); }
const void *minimal_bpf::elf_bytes(size_t *sz) { return minimal_bpf__elf_bytes(sz); }
#endif /* __cplusplus */
__attribute__((unused)) static void
minimal_bpf__assert(struct minimal_bpf *s __attribute__((unused)))
{
#ifdef __cplusplus
#define _Static_assert static_assert
#endif
_Static_assert(sizeof(s->bss->my_pid) == 4, "unexpected size of 'my_pid'");
#ifdef __cplusplus
#undef _Static_assert
#endif
}
#endif /* __MINIMAL_BPF_SKEL_H__ */
对中间文件.output/minimal.bpf.o
进行反汇编得到其bpf字节码:
# llvm-objdump -S .output/minimal.bpf.o
.output/minimal.bpf.o: file format ELF64-BPF
Disassembly of section tp/syscalls/sys_enter_write:
0000000000000000 handle_tp:
0: 85 00 00 00 0e 00 00 00 call 14
1: 77 00 00 00 20 00 00 00 r0 >>= 32
2: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
4: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
5: 5d 01 05 00 00 00 00 00 if r1 != r0 goto +5 <LBB0_2>
6: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
8: b7 02 00 00 1c 00 00 00 r2 = 28
9: bf 03 00 00 00 00 00 00 r3 = r0
10: 85 00 00 00 06 00 00 00 call 6
0000000000000058 LBB0_2:
11: b7 00 00 00 00 00 00 00 r0 = 0
12: 95 00 00 00 00 00 00 00 exit
可以看到,上面调用bpf_get_current_pid_tgid
和bpf_printk
函数的地方反汇编后分别变成了call 14
和call 6
,这里的数字是哪里来的呢?
这里的数字来自.output/bpf/bpf_helper_defs.h
:
bpf_helper_defs.h部分源码
/*
* bpf_get_current_pid_tgid
*
* Get the current pid and tgid.
*
* Returns
* A 64-bit integer containing the current tgid and pid, and
* created as such:
* *current_task*\ **->tgid << 32 \|**
* *current_task*\ **->pid**.
*/
static __u64 (*bpf_get_current_pid_tgid)(void) = (void *) 14;
/*
* bpf_trace_printk
*
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
* to file *\/sys/kernel/tracing/trace* from TraceFS, if
* available. It can take up to three additional **u64**
* arguments (as an eBPF helpers, the total number of arguments is
* limited to five).
*
* Each time the helper is called, it appends a line to the trace.
* Lines are discarded while *\/sys/kernel/tracing/trace* is
* open, use *\/sys/kernel/tracing/trace_pipe* to avoid this.
* The format of the trace is customizable, and the exact output
* one will get depends on the options set in
* *\/sys/kernel/tracing/trace_options* (see also the
* *README* file under the same directory). However, it usually
* defaults to something like:
*
* ::
*
* telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
*
* In the above:
*
* * ``telnet`` is the name of the current task.
* * ``470`` is the PID of the current task.
* * ``001`` is the CPU number on which the task is
* running.
* * In ``.N..``, each character refers to a set of
* options (whether irqs are enabled, scheduling
* options, whether hard/softirqs are running, level of
* preempt_disabled respectively). **N** means that
* **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
* are set.
* * ``419421.045894`` is a timestamp.
* * ``0x00000001`` is a fake value used by BPF for the
* instruction pointer register.
* * ``<formatted msg>`` is the message formatted with
* *fmt*.
*
* The conversion specifiers supported by *fmt* are similar, but
* more limited than for printk(). They are **%d**, **%i**,
* **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
* **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
* of field, padding with zeroes, etc.) is available, and the
* helper will return **-EINVAL** (but print nothing) if it
* encounters an unknown specifier.
*
* Also, note that **bpf_trace_printk**\ () is slow, and should
* only be used for debugging purposes. For this reason, a notice
* block (spanning several lines) is printed to kernel logs and
* states that the helper should not be used "for production use"
* the first time this helper is used (or more precisely, when
* **trace_printk**\ () buffers are allocated). For passing values
* to user space, perf events should be preferred.
*
* Returns
* The number of bytes written to the buffer, or a negative error
* in case of failure.
*/
static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) = (void *) 6;
在libbpf项目中的sync-kernel.sh提供了这个文件的生成命令:
# Generate bpf_helper_defs.h and commit, if anything changed
# restore Linux tip to use bpf_doc.py
cd_to ${LINUX_REPO}
git checkout ${TIP_TAG}
# re-generate bpf_helper_defs.h
cd_to ${LIBBPF_REPO}
"${LINUX_ABS_DIR}/scripts/bpf_doc.py" --header \
--file include/uapi/linux/bpf.h > src/bpf_helper_defs.h
# if anything changed, commit it
helpers_changes=$(git status --porcelain src/bpf_helper_defs.h | wc -l)
if ((${helpers_changes} == 1)); then
git add src/bpf_helper_defs.h
git commit -s -m "sync: auto-generate latest BPF helpers
Latest changes to BPF helper definitions.
" -- src/bpf_helper_defs.h
fi
当开始运行后,可以使用bpftool可以把实际运行的字节码打印出来:
# bpftool prog
...
22: tracepoint name handle_tp tag 6a5dcef153b1001e gpl
loaded_at 2023-11-01T22:26:31+0800 uid 0
xlated 104B jited 73B memlock 4096B map_ids 8,9
btf_id 19
# bpftool prog dump xlated id 22
int handle_tp(void * ctx):
; int pid = bpf_get_current_pid_tgid() >> 32;
0: (85) call bpf_get_current_pid_tgid#200752
; int pid = bpf_get_current_pid_tgid() >> 32;
1: (77) r0 >>= 32
; if (pid != my_pid)
2: (18) r1 = map[id:8][0]+0
4: (61) r1 = *(u32 *)(r1 +0)
; if (pid != my_pid)
5: (5d) if r1 != r0 goto pc+5
; bpf_printk("BPF triggered from PID %d.\n", pid);
6: (18) r1 = map[id:9][0]+0
8: (b7) r2 = 28
9: (bf) r3 = r0
10: (85) call bpf_trace_printk#-83056
; }
11: (b7) r0 = 0
12: (95) exit
在加载时,内核会把上面的id换成实际的函数调用。
下面是跟踪minimal运行时跟踪的日志(用文本文档打开): trace.xml
这里把bpf程序可以调用的内核函数转换成一个唯一的编号,然后内置到bpf字节码中,在加载到内核时,再根据实际的内核函数地址对bpf字节码进行修改。
下面这个bpf_func_id在内核中是如何定义的一些历史:
最开始的时候,这个id是通过手动枚举实现,参考下面的patch:
commit ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89
Author: Alexei Starovoitov <[email protected]>
Date: Fri Jun 12 19:39:12 2015 -0700
bpf: introduce current->pid, tgid, uid, gid, comm accessors
eBPF programs attached to kprobes need to filter based on
current->pid, uid and other fields, so introduce helper functions:
u64 bpf_get_current_pid_tgid(void)
Return: current->tgid << 32 | current->pid
u64 bpf_get_current_uid_gid(void)
Return: current_gid << 32 | current_uid
bpf_get_current_comm(char *buf, int size_of_buf)
stores current->comm into buf
They can be used from the programs attached to TC as well to classify packets
based on current task fields.
Update tracex2 example to print histogram of write syscalls for each process
instead of aggregated for all.
Signed-off-by: Alexei Starovoitov <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
后来为了解决反汇编字节码时可以显示实际的函数名,添加了__BPF_FUNC_MAPPER
,这样可以方便地根据枚举值得到函数名:
commit ebb676daa1a340ccef25eb769aefc09b79c01f8a
Author: Thomas Graf <[email protected]>
Date: Thu Oct 27 11:23:51 2016 +0200
bpf: Print function name in addition to function id
The verifier currently prints raw function ids when printing CALL
instructions or when complaining:
5: (85) call 23
unknown func 23
print a meaningful function name instead:
5: (85) call bpf_redirect#23
unknown func bpf_redirect#23
Moves the function documentation to a single comment and renames all
helpers names in the list to conform to the bpf_ prefix notation so
they can be greped in the kernel source.
Signed-off-by: Thomas Graf <[email protected]>
Acked-by: Daniel Borkmann <[email protected]>
Acked-by: Alexei Starovoitov <[email protected]>
Signed-off-by: David S. Miller <[email protected]>
再后来为了方便地看到内核函数跟id的对应关系,有对宏进行了重新设计:
commit 8a76145a2ec2a81dfe34d7ac42e8c242f095e8c8
Refs: v6.0-2740-g8a76145a2ec2
Author: Andrii Nakryiko <[email protected]>
AuthorDate: Wed Oct 5 21:24:51 2022 -0700
Commit: Alexei Starovoitov <[email protected]>
CommitDate: Thu Oct 6 08:19:30 2022 -0700
bpf: explicitly define BPF_FUNC_xxx integer values
Historically enum bpf_func_id's BPF_FUNC_xxx enumerators relied on
implicit sequential values being assigned by compiler. This is
convenient, as new BPF helpers are always added at the very end, but it
also has its downsides, some of them being:
- with over 200 helpers now it's very hard to know what's each helper's ID,
which is often important to know when working with BPF assembly (e.g.,
by dumping raw bpf assembly instructions with llvm-objdump -d
command). it's possible to work around this by looking into vmlinux.h,
dumping /sys/btf/kernel/vmlinux, looking at libbpf-provided
bpf_helper_defs.h, etc. But it always feels like an unnecessary step
and one should be able to quickly figure this out from UAPI header.
- when backporting and cherry-picking only some BPF helpers onto older
kernels it's important to be able to skip some enum values for helpers
that weren't backported, but preserve absolute integer IDs to keep BPF
helper IDs stable so that BPF programs stay portable across upstream
and backported kernels.
While neither problem is insurmountable, they come up frequently enough
and are annoying enough to warrant improving the situation. And for the
backporting the problem can easily go unnoticed for a while, especially
if backport is done with people not very familiar with BPF subsystem overall.
Anyways, it's easy to fix this by making sure that __BPF_FUNC_MAPPER
macro provides explicit helper IDs. Unfortunately that would potentially
break existing users that use UAPI-exposed __BPF_FUNC_MAPPER and are
expected to pass macro that accepts only symbolic helper identifier
(e.g., map_lookup_elem for bpf_map_lookup_elem() helper).
As such, we need to introduce a new macro (___BPF_FUNC_MAPPER) which
would specify both identifier and integer ID, but in such a way as to
allow existing __BPF_FUNC_MAPPER be expressed in terms of new
___BPF_FUNC_MAPPER macro. And that's what this patch is doing. To avoid
duplication and allow __BPF_FUNC_MAPPER stay *exactly* the same,
___BPF_FUNC_MAPPER accepts arbitrary "context" arguments, which can be
used to pass any extra macros, arguments, and whatnot. In our case we
use this to pass original user-provided macro that expects single
argument and __BPF_FUNC_MAPPER is using it's own three-argument
__BPF_FUNC_MAPPER_APPLY intermediate macro to impedance-match new and
old "callback" macros.
Once we resolve this, we use new ___BPF_FUNC_MAPPER to define enum
bpf_func_id with explicit values. The other users of __BPF_FUNC_MAPPER
in kernel (namely in kernel/bpf/disasm.c) are kept exactly the same both
as demonstration that backwards compat works, but also to avoid
unnecessary code churn.
Note that new ___BPF_FUNC_MAPPER() doesn't forcefully insert comma
between values, as that might not be appropriate in all possible cases
where ___BPF_FUNC_MAPPER might be used by users. This doesn't reduce
usability, as it's trivial to insert that comma inside "callback" macro.
To validate all the manually specified IDs are exactly right, we used
BTF to compare before and after values:
$ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > after.txt
$ git stash # stach UAPI changes
$ make -j90
... re-building kernel without UAPI changes ...
$ bpftool btf dump file ~/linux-build/default/vmlinux | rg bpf_func_id -A 211 > before.txt
$ diff -u before.txt after.txt
--- before.txt 2022-10-05 10:48:18.119195916 -0700
+++ after.txt 2022-10-05 10:46:49.446615025 -0700
@@ -1,4 +1,4 @@
-[14576] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
+[9560] ENUM 'bpf_func_id' encoding=UNSIGNED size=4 vlen=211
'BPF_FUNC_unspec' val=0
'BPF_FUNC_map_lookup_elem' val=1
'BPF_FUNC_map_update_elem' val=2
As can be seen from diff above, the only thing that changed was resulting BTF
type ID of ENUM bpf_func_id, not any of the enumerators, their names or integer
values.
The only other place that needed fixing was scripts/bpf_doc.py used to generate
man pages and bpf_helper_defs.h header for libbpf and selftests. That script is
tightly-coupled to exact shape of ___BPF_FUNC_MAPPER macro definition, so had
to be trivially adapted.
Cc: Quentin Monnet <[email protected]>
Reported-by: Andrea Terzolo <[email protected]>
Signed-off-by: Andrii Nakryiko <[email protected]>
Reviewed-by: Quentin Monnet <[email protected]>
Acked-by: Jiri Olsa <[email protected]>
Acked-by: Toke Høiland-Jørgensen <[email protected]>
Link: https://lore.kernel.org/r/[email protected]
Signed-off-by: Alexei Starovoitov <[email protected]>
完。
标签:__,00,struct,bpf,func,x01,id,minimal From: https://www.cnblogs.com/pengdonglin137/p/17803044.html