最近在分析内核问题时用了function_graph,用它来分析为什么应用的某个系统调用会返回错误。在分析的时候,根据
function_graph的输出确定代码执行流程,但是有时又需要知道函数的返回值来进一步定位出错位置。目前获取函数返回
值的方法可以用kprobe_events的kretprobe指令,或者使用bpftrace。如果让function_graph在函数返回时直接输出
返回值,那么会更加方便。
下面是在x86_64上测试过的方法:
目前这种方式的缺点是不会判断函数是否又返回值,即使是返回void,那么也会输出,所以具体返回值是什么含义需要
结合源码来分析。
From 24fdd2fbfd28c56a3396b77cbe748ea48195cfb5 Mon Sep 17 00:00:00 2001
From: Donglin Peng <[email protected]>
Date: Wed, 15 Feb 2023 22:35:01 -0800
Subject: [PATCH] Support print function return value when using function_graph
tracer
Usage:
1. Enable
echo 1 > /sys/kernel/debug/tracing/options/funcgraph-retval
2. Disable
echo 1 > /sys/kernel/debug/tracing/options/funcgraph-retval
For example:
3) | finish_task_switch() {
3) | rcu_read_lock_sched_held() {
3) | rcu_read_lock_held_common() {
3) 0.301 us | rcu_lockdep_current_cpu_online();
3) 0.972 us | } /* 0 */
3) 1.663 us | } /* 1 */
3) | __balance_callbacks() {
3) 0.321 us | do_balance_callbacks();
3) 0.841 us | } /* 1 */
3) | raw_spin_rq_unlock() {
3) | _raw_spin_unlock() {
3) | rcu_read_lock_sched_held() {
3) | rcu_read_lock_held_common() {
3) 0.271 us | rcu_lockdep_current_cpu_online();
3) 0.792 us | } /* 0 */
3) 1.302 us | } /* 1 */
3) 0.271 us | do_raw_spin_unlock();
3) 0.271 us | preempt_count_sub();
3) 2.985 us | } /* 0 */
3) 3.506 us | } /* 0 */
3) 8.706 us | } /* ffff88813dbfde80 */
Signed-off-by: Donglin Peng <[email protected]>
---
arch/arm64/kernel/entry-ftrace.S | 1 +
arch/x86/kernel/ftrace_64.S | 1 +
include/linux/ftrace.h | 1 +
kernel/trace/fgraph.c | 3 ++-
kernel/trace/trace.h | 1 +
kernel/trace/trace_functions_graph.c | 16 ++++++++++++----
7 files changed, 18 insertions(+), 5 deletions(-)
create mode 100644 kernel/trace/trac.c
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index bd5df50e4..9982bdf94 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -314,6 +314,7 @@ SYM_CODE_START(return_to_handler)
stp x6, x7, [sp, #48]
mov x0, x29 // parent's fp
+ mov x1, sp // stack pointer to get return value
bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
mov x30, x0 // restore the original return address
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index dfeb227de..36871edc2 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -327,6 +327,7 @@ SYM_CODE_START(return_to_handler)
movq %rax, (%rsp)
movq %rdx, 8(%rsp)
movq %rbp, %rdi
+ leaq (%rsp), %rsi
call ftrace_return_to_handler
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 979f6bfa2..badc0924c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -952,6 +952,7 @@ struct ftrace_graph_ent {
*/
struct ftrace_graph_ret {
unsigned long func; /* Current function */
+ unsigned long retval;
int depth;
/* Number of functions that overran the depth limit for current task */
unsigned int overrun;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 218cd95bf..86bf99161 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -240,13 +240,14 @@ static struct notifier_block ftrace_suspend_notifier = {
* Send the trace to the ring-buffer.
* @return the original return address.
*/
-unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
+unsigned long ftrace_return_to_handler(unsigned long frame_pointer, unsigned long *stack)
{
struct ftrace_graph_ret trace;
unsigned long ret;
ftrace_pop_return_trace(&trace, &ret, frame_pointer);
trace.rettime = trace_clock_local();
+ trace.retval = stack[0];
ftrace_graph_return(&trace);
/*
* The ftrace_graph_return() may still access the current
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ff816fb41..9838433aa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -828,6 +828,7 @@ static __always_inline bool ftrace_hash_empty(struct ftrace_hash *hash)
#define TRACE_GRAPH_PRINT_TAIL 0x100
#define TRACE_GRAPH_SLEEP_TIME 0x200
#define TRACE_GRAPH_GRAPH_TIME 0x400
+#define TRACE_GRAPH_PRINT_RETVAL 0x800
#define TRACE_GRAPH_PRINT_FILL_SHIFT 28
#define TRACE_GRAPH_PRINT_FILL_MASK (0x3 << TRACE_GRAPH_PRINT_FILL_SHIFT)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 203204cad..22061f29d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -60,6 +60,7 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-tail, TRACE_GRAPH_PRINT_TAIL) },
/* Include sleep time (scheduled out) between entry and return */
{ TRACER_OPT(sleep-time, TRACE_GRAPH_SLEEP_TIME) },
+ { TRACER_OPT(funcgraph-retval, TRACE_GRAPH_PRINT_RETVAL) },
#ifdef CONFIG_FUNCTION_PROFILER
/* Include time within nested functions */
@@ -948,10 +949,17 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
* belongs to, write out the function name. Always do
* that if the funcgraph-tail option is enabled.
*/
- if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL))
- trace_seq_puts(s, "}\n");
- else
- trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+ if (func_match && !(flags & TRACE_GRAPH_PRINT_TAIL)) {
+ if (flags & TRACE_GRAPH_PRINT_RETVAL)
+ trace_seq_printf(s, "} /* %lx */\n", trace->retval);
+ else
+ trace_seq_puts(s, "}\n");
+ } else {
+ if (flags & TRACE_GRAPH_PRINT_RETVAL)
+ trace_seq_printf(s, "} /* %ps %lx */\n", (void *)trace->func, trace->retval);
+ else
+ trace_seq_printf(s, "} /* %ps */\n", (void *)trace->func);
+ }
/* Overrun */
if (flags & TRACE_GRAPH_PRINT_OVERRUN)
--
2.25.1
标签:function,kernel,ftrace,return,trace,graph,us,返回值
From: https://www.cnblogs.com/pengdonglin137/p/17126952.html