前言
本篇文章主要是为了对ltp内irq模块的测试用例之一的irqbalance进行源码分析,作为对内核中断子系统测试项之一,其蕴含的技术知识,还是很值得学习一下的。
irqbalance是什么?项目主页上有以下描述:
Irqbalance is a daemon to help balance the cpu load generated by interrupts across all of a systems cpus. Irqbalance identifies the highest volume interrupt sources, and isolates each of them to a single unique cpu, so that load is spread as much as possible over an entire processor set, while minimizing cache miss rates for irq handlers.
简单来说,irqbalance这一服务可以帮助平衡所有系统cpu上的中断所产生的cpu负载。Irqbalance可以筛选出最高容量的中断源,并将它们中的每一个隔离到一个单独的cpu,以便负载尽可能多地分布在整个处理器集上,同时最大限度地减少irq 处理程序的缓存未命中率,这对对多核心系统的性能有很大的提升。
1.源码分析
1.1函数调用关系图
1.2源码分析
1.2.1setup()
根据函数调用关系图,我们可以了解到,作为bin入口的,setup(),调用了如下的两个主要函数:
static void setup(void)
{
// 收集系统中断信息
collect_irq_info();
// 打印收集到的中断信息
print_irq_info();
// 根据测试结果,确认该系统是否满足测试门槛,即CPU个数与中断源个数必须要>=1
if (nr_cpus < 1)
tst_brk(TBROK, "No CPUs found in /proc/interrupts?");
if (nr_irqs < 1)
tst_brk(TBROK, "No IRQs found in /proc/interrupts?");
}
1.2.2run()
接下来,我们对irqbalance01.c 的源码主要的测试函数run()进行分析:
// SPDX-License-Identifier: GPL-2.0-or-later
/* Copyright (c) 2021 SUSE LLC <rpalethorpe@suse.com> */
/*\
* [Description]
*
* Check that something (e.g. irqbalance daemon) is performing IRQ
* load balancing.
*
* On many systems userland needs to set /proc/irq/$IRQ/smp_affinity
* to prevent many IRQs being delivered to the same CPU.
*
* Note some drivers and IRQ controllers will distribute IRQs
* evenly. Some systems will have housekeeping CPUs configured. Some·
* IRQs can not be masked etc. So this test is not appropriate for all
* scenarios.
*
* Furthermore, exactly how IRQs should be distributed is a
* performance and/or security issue. This is only a generic smoke
* test. It will hopefully detect misconfigured systems and total
* balancing failures which are often silent errors.
*
* Heuristic: Evidence of Change
*
* 1. Find IRQs with a non-zero count
* 2. Check if they are now disallowed
*
* There are two sources of information we need to parse:
*
* 1. /proc/interrupts
* 2. /proc/irq/$IRQ/smp_affinity
*
* We get the active IRQs and CPUs from /proc/interrupts. It also
* contains the per-CPU IRQ counts and info we do not care about.
*
* We get the IRQ masks from each active IRQ's smp_affinity file. This
* is a bitmask written out in hexadecimal format. It shows which CPUs
* an IRQ may be received by.
*/
#include <stdlib.h>
#include "tst_test.h"
#include "tst_safe_stdio.h"
#include "tst_safe_file_at.h"
enum affinity {
ALLOW = '+',
DENY = '-',
};
static unsigned int *irq_stats;
static enum affinity *irq_affinity;
static unsigned int nr_cpus;
static unsigned int nr_irqs;
static unsigned int *irq_ids;
static char *read_proc_file(const char *const path, size_t *const len_out)
{
const size_t pg_len = SAFE_SYSCONF(_SC_PAGESIZE);
int fd = SAFE_OPEN(path, O_RDONLY);
size_t ret = 0, used_len = 0;
static size_t total_len;
static char *buf;
do {
if (used_len + 1 >= total_len) {
total_len += pg_len;
buf = SAFE_REALLOC(buf, total_len);
}
ret = SAFE_READ(0, fd,
buf + used_len,
total_len - used_len - 1);
used_len += ret;
} while (ret);
if (!used_len)
tst_brk(TBROK, "Empty %s?", path);
buf[used_len] = '\0';
SAFE_CLOSE(fd);
if (len_out)
*len_out = used_len;
return buf;
}
static void collect_irq_info(void)
{
char *buf, *c, *first_row;
char path[PATH_MAX];
size_t row, col, len;
long acc;
unsigned int cpu_total, bit;
nr_cpus = 0;
nr_irqs = 0;
/*
* 直接读取/proc/interrupts,里面存放了OS中断的详细信息,eg:
* root@james-HP-288-Pro-G2-MT:/home/jameschu# cat /proc/interrupts
* CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 CPU6 CPU7
* 0: 14 0 0 0 0 0 0 0 IR-IO-APIC 2-edge timer
* 8: 0 0 0 0 0 1 0 0 IR-IO-APIC 8-edge rtc0
* 9: 0 132 0 0 0 0 0 0 IR-IO-APIC 9-fasteoi acpi
* [= 1 =] [=================================== 2 ========================================] [== 3 ==] [== 4 ==] [= 5 =]
* 读取到的内容从左到右,分别为:1、逻辑中断号,2、中断在各CPU发生的次数,3、中断所属设备类名称,4、硬件中断号,5、中断处理函数。
*/
buf = read_proc_file("/proc/interrupts", NULL);
/* Count CPUs, header columns are like /CPU[0-9]+/ */
for (c = buf; *c != '\0' && *c != '\n'; c++) {
if (!strncmp(c, "CPU", 3))
nr_cpus++;
}
c++;
// 获取有效行首列指针地址
first_row = c;
/* Count IRQs, real IRQs start with /[0-9]+:/ */
while (*c != '\0') {
switch (*c) {
case ' ':
case '\t':
case '\n':
case '0' ... '9':
c++;
break;
case ':':
nr_irqs++; // 统计有几个中断源
/* fall-through */
default:
while (*c != '\n' && *c != '\0')
c++;
}
}
tst_res(TINFO, "Found %u CPUS, %u IRQs", nr_cpus, nr_irqs);
irq_ids = SAFE_REALLOC(irq_ids, nr_irqs * sizeof(*irq_ids));
irq_stats = SAFE_REALLOC(irq_stats,
nr_cpus * (nr_irqs + 1) * sizeof(*irq_stats));
irq_affinity = SAFE_REALLOC(irq_affinity,
nr_cpus * nr_irqs * sizeof(*irq_affinity));
c = first_row; // 转位至有效行首行
acc = -1;
row = col = 0; //行、列全部清零
/* Parse columns containing IRQ counts and IRQ IDs into acc. Ignore
* everything else.
*/
while (*c != '\0') {
switch (*c) {
case ' ':
case '\t':
if (acc >= 0) {
irq_stats[row * nr_cpus + col] = acc;
acc = -1;
col++;
}
break;
case '\n':
if (acc != -1)
tst_brk(TBROK, "Unexpected EOL");
col = 0;
row++;
break;
case '0' ... '9':
if (acc == -1)
acc = 0;
acc *= 10;
acc += *c - '0';
break;
case ':':
if (acc == -1 || col != 0)
tst_brk(TBROK, "Unexpected ':'");
irq_ids[row] = acc;
acc = -1;
break;
default:
acc = -1;
while (*c != '\n' && *c != '\0')
c++;
continue;
}
c++;
}
for (col = 0; col < nr_cpus; col++) {
cpu_total = 0;
for (row = 0; row < nr_irqs; row++)
cpu_total += irq_stats[row * nr_cpus + col];
irq_stats[row * nr_cpus + col] = cpu_total;
}
/* Read the CPU affinity masks for each IRQ. The first CPU is in the
* right most (least significant) bit. See bitmap_string() in the kernel
* (%*pb)
*/
// 逐个中断源
for (row = 0; row < nr_irqs; row++) {
/*
* "smp_affinity"是Linux系统中的一个参数,它用于指定CPU的亲和性,即将特定的CPU核心分配给特定的进程或线程。
* 这个参数通常用于优化系统性能,可以确保进程或线程在尽可能少的CPU核心上运行,从而提高系统的响应速度和吞吐量。
* 在多核系统中,通过设置smp_affinity参数,可以有效地避免CPU核心之间的竞争和冲突,从而提高系统的稳定性和可靠性。
*/
sprintf(path, "/proc/irq/%u/smp_affinity", irq_ids[row]);
buf = read_proc_file(path, &len);
c = buf + len;
col = 0;
/*
* 开始解析smp_affinity Mask,注意smp_affinity是一个十六进制的bitmask,
* 它和cpu No.序列的“与”运算结果就是将affinity设置在那个CPU了。
*(也即smp_affinity中被设置为1的位为CPU No.)如:8个逻辑core,那么CPU#的序列为11111111(从右到左依次为CPU0~CPU7)
*/
while (--c >= buf) {
if (col > nr_cpus) {
tst_res(TINFO, "%u/smp_affnity: %s", irq_ids[row], buf);
tst_brk(TBROK, "More mask char bits than cpus");
}
switch (*c) {
case '\n':
case ' ':
case ',':
continue;
case '0' ... '9':
acc = *c - '0';
break;
case 'a' ... 'f':
acc = 10 + *c - 'a';
break;
default:
tst_res(TINFO, "%u/smp_affnity: %s", irq_ids[row], buf);
tst_brk(TBROK, "Wasn't expecting 0x%02x", *c);
}
for (bit = 0; bit < 4 && col < nr_cpus; bit++) {
irq_affinity[row * nr_cpus + col++] = (acc & (1 << bit)) ? ALLOW : DENY;
}
}
if (col < nr_cpus) {
tst_res(TINFO, "%u/smp_affnity: %s", irq_ids[row], buf);
tst_brk(TBROK, "Only found %zu cpus", col);
}
}
}
static void print_irq_info(void)
{
size_t row, col;
unsigned int count;
enum affinity aff;
// 绘出表头
tst_printf(" IRQ ");
for (col = 0; col < nr_cpus; col++)
tst_printf("CPU%-8zu", col);
tst_printf("\n");
for (row = 0; row < nr_irqs; row++) {
tst_printf("%5u:", irq_ids[row]); // 打印中断源
for (col = 0; col < nr_cpus; col++) {
count = irq_stats[row * nr_cpus + col];
aff = irq_affinity[row * nr_cpus + col];
tst_printf("%10u%c", count, aff); // 打印中断次数 && CPU是否可以调度中断
}
tst_printf("\n");
}
tst_printf("Total:");
for (col = 0; col < nr_cpus; col++)
tst_printf("%10u ", irq_stats[row * nr_cpus + col]);
tst_printf("\n");
}
static void evidence_of_change(void)
{
size_t row, col, changed = 0;
// 遍历行列信息,确认是否irq负载均衡
for (row = 0; row < nr_irqs; row++) {
for (col = 0; col < nr_cpus; col++) {
// 中断如果在该CPU上没有发生,则跳过
if (!irq_stats[row * nr_cpus + col])
continue;
// 中断如果原本就在该CPU上可以调度,则跳过
if (irq_affinity[row * nr_cpus + col] == ALLOW)
continue;
changed++;
}
}
tst_res(changed ? TPASS : TFAIL, \
"Heuristic: Detected %zu irq-cpu pairs have been dissallowed", changed);
}
static void setup(void)
{
collect_irq_info();
print_irq_info();
if (nr_cpus < 1)
tst_brk(TBROK, "No CPUs found in /proc/interrupts?");
if (nr_irqs < 1)
tst_brk(TBROK, "No IRQs found in /proc/interrupts?");
}
static void run(void)
{
collect_irq_info();
evidence_of_change();
}
static struct tst_test test = {
.test_all = run,
.setup = setup,
.min_cpus = 2,
};
1.3测试实例
如下是irqbalance01测试的打印之一,可以对照如下的打印,理解上一小节我对程序的一些注释。此外,大家也可以尝试阅读一下irqbalance01.c作者对其的desciription,以便于更好的了解本测试的核心思想。总体来说,本测试还要对照irqbalance service的源码进行解读,irqbalance service通过修改各中断源的smp_affinity,达到了irq在系统上的负载均衡,当然,它的实现肯定会比我在此描述的复杂很多,不过在此就不展开了,大家有兴趣可以去了解一下它的源码!
irqbalance01.c:129: TINFO: Found 2 CPUS, 46 IRQs
IRQ CPU0 CPU1
......
37: 5970+ 0+
38: 0+ 0+
39: 0+ 0+
尾言
温故知新,岁岁常新!:) 因博主水平能力有限,如果有大佬在阅读过程种发现其中的缪误,希望可以不吝赐教,3Q。
标签:acc,IRQ,irqbalance01,len,源码,affinity,irq,nr,row From: https://blog.51cto.com/u_15635173/6388743