什么是 e820?
e820 是 BIOS 用来报告物理内存分布的一个基础设施。因此,e820 是一个很重要的东西,它报告了那些物理内存范围是可用的,那些范围是预留的。
e820 流程分析
linux 获取内存分布从 machine_specific_memory_setup 函数开始,这个函数在多个文件里都有定义,此处我们只看 include/asm-i386/mach-default/setup_arch_post.h 文件中的定义,函数的定义如下:
static char * __init machine_specific_memory_setup(void)
{
char *who;
who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
sanitize_e820_map(E820_MAP, &E820_MAP_NR);
if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (ALT_MEM_K < EXT_MEM_K) {
mem_size = EXT_MEM_K;
who = "BIOS-88";
} else {
mem_size = ALT_MEM_K;
who = "BIOS-e801";
}
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
}
return who;
}
函数内部调用了 sanitize_e820_map 这个函数,这个函数很重要,它对 BIOS 报告的内存分布进行了一定的处理。调用函数时传递了 E820_MAP 和 E820_MAP_NR 这两个参数,这两个参数定义在 include/asm-i386/setup.h,定义如下:
#define E820_MAP_NR (*(char*) (PARAM+E820NR))
#define E820_MAP ((struct e820entry *) (PARAM+E820MAP))
其中 PARAM 的定义如下:
extern unsigned char boot_params[PARAM_SIZE];
#define PARAM (boot_params)
可以看到,其实 PARAM 就是一个字节数组,这个数组的大小为 PARAM_SIZE ,它的值为4096。
但是 boot_params 本身也是一个声明,它的定义在哪里呢?它定义在 asm/i386/kernel/setup.c 中,定义如下:
unsigned char __initdata boot_params[PARAM_SIZE];
其实,这里已经很明显了,E820NR 和 E820MAP 就是对应参数的偏移量,一个是 e820 映射的数量在这个数组中的偏移量,另一个就是 e820entry 开始的偏移量,他们的定义如下:
#define E820MAP 0x2d0 /* our map */
#define E820MAX 128 /* number of entries in E820MAP */
#define E820NR 0x1e8 /* # entries in E820MAP */
这样,传递给 sanitize_e820_map 函数的两个实参分别是 e820entry 数组的其实地址和存有这个数组的元素数量的内存地址。
另外,boot_params 这个数组的填充在 arch/i386/kernel/head.S 中,初始化代码如下:
/*
* Copy bootup parameters out of the way.
* Note: %esi still has the pointer to the real-mode data.
*/
movl $boot_params,%edi
movl $(PARAM_SIZE/4),%ecx
cld
rep
movsl
movl boot_params+NEW_CL_POINTER,%esi
处理 BIOS 报告的内存区域可能存在重叠的问题
sanitize_e820_map 函数定义在 arch/i386/kernel/setup.c 中,定义如下:
/*
* Sanitize the BIOS e820 map.
*
* Some e820 responses include overlapping entries. The following
* replaces the original e820 map with a new one, removing overlaps.
*
*/
struct change_member {
struct e820entry *pbios; /* pointer to original bios entry */
unsigned long long addr; /* address for this change point */
};
static struct change_member change_point_list[2*E820MAX] __initdata;
static struct change_member *change_point[2*E820MAX] __initdata;
static struct e820entry *overlap_list[E820MAX] __initdata;
static struct e820entry new_bios[E820MAX] __initdata;
static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
{
struct change_member *change_tmp;
unsigned long current_type, last_type;
unsigned long long last_addr;
int chgidx, still_changing;
int overlap_entries;
int new_bios_entry;
int old_nr, new_nr, chg_nr;
int i;
/*
Visually we're performing the following (1,2,3,4 = memory types)...
Sample memory map (w/overlaps):
____22__________________
______________________4_
____1111________________
_44_____________________
11111111________________
____________________33__
___________44___________
__________33333_________
______________22________
___________________2222_
_________111111111______
_____________________11_
_________________4______
Sanitized equivalent (no overlap):
1_______________________
_44_____________________
___1____________________
____22__________________
______11________________
_________1______________
__________3_____________
___________44___________
_____________33_________
_______________2________
________________1_______
_________________4______
___________________2____
____________________33__
______________________4_
*/
/* if there's only one memory region, don't bother */
if (*pnr_map < 2)
return -1;
old_nr = *pnr_map;
/* bail out if we find any unreasonable addresses in bios map */
for (i=0; i<old_nr; i++)
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
return -1;
/* create pointers for initial change-point information (for sorting) */
for (i=0; i < 2*old_nr; i++)
change_point[i] = &change_point_list[i];
/* record all known change-points (starting and ending addresses),
omitting those that are for empty memory regions */
chgidx = 0;
for (i=0; i < old_nr; i++) {
if (biosmap[i].size != 0) {
change_point[chgidx]->addr = biosmap[i].addr;
change_point[chgidx++]->pbios = &biosmap[i];
change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
change_point[chgidx++]->pbios = &biosmap[i];
}
}
chg_nr = chgidx; /* true number of change-points */
/* sort change-point list by memory addresses (low -> high) */
still_changing = 1;
while (still_changing) {
still_changing = 0;
for (i=1; i < chg_nr; i++) {
/* if <current_addr> > <last_addr>, swap */
/* or, if current=<start_addr> & last=<end_addr>, swap */
if ((change_point[i]->addr < change_point[i-1]->addr) ||
((change_point[i]->addr == change_point[i-1]->addr) &&
(change_point[i]->addr == change_point[i]->pbios->addr) &&
(change_point[i-1]->addr != change_point[i-1]->pbios->addr))
)
{
change_tmp = change_point[i];
change_point[i] = change_point[i-1];
change_point[i-1] = change_tmp;
still_changing=1;
}
}
}
/* create a new bios memory map, removing overlaps */
overlap_entries=0; /* number of entries in the overlap table */
new_bios_entry=0; /* index for creating new bios map entries */
last_type = 0; /* start with undefined memory type */
last_addr = 0; /* start with 0 as last starting address */
/* loop through change-points, determining affect on the new bios map */
for (chgidx=0; chgidx < chg_nr; chgidx++)
{
/* keep track of all overlapping bios entries */
if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
{
/* add map entry to overlap list (> 1 entry implies an overlap) */
overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
}
else
{
/* remove entry from list (order independent, so swap with last) */
for (i=0; i<overlap_entries; i++)
{
if (overlap_list[i] == change_point[chgidx]->pbios)
overlap_list[i] = overlap_list[overlap_entries-1];
}
overlap_entries--;
}
/* if there are overlapping entries, decide which "type" to use */
/* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
current_type = 0;
for (i=0; i<overlap_entries; i++)
if (overlap_list[i]->type > current_type)
current_type = overlap_list[i]->type;
/* continue building up new bios map based on this information */
if (current_type != last_type) {
if (last_type != 0) {
new_bios[new_bios_entry].size =
change_point[chgidx]->addr - last_addr;
/* move forward only if the new size was non-zero */
if (new_bios[new_bios_entry].size != 0)
if (++new_bios_entry >= E820MAX)
break; /* no more space left for new bios entries */
}
if (current_type != 0) {
new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
new_bios[new_bios_entry].type = current_type;
last_addr=change_point[chgidx]->addr;
}
last_type = current_type;
}
}
new_nr = new_bios_entry; /* retain count for new bios entries */
/* copy new bios mapping into original location */
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
*pnr_map = new_nr;
return 0;
}
sanitize_e820_map 函数的作用就是解决 BIOS 报告的内存区域存在重叠的问题,得到一个新的内存区域分布,然后分别更新分布和数量。
拷贝内存区域分布
处理完分布之后,就开始调用 copy_e820_map 函数,从这个函数的名称就可以看出来,这个函数的作用是拷贝内存区域分布,这个函数的定义如下:
/*
* Copy the BIOS e820 map into a safe place.
*
* Sanity-check it while we're at it..
*
* If we're lucky and live on a modern system, the setup code
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
do {
unsigned long long start = biosmap->addr;
unsigned long long size = biosmap->size;
unsigned long long end = start + size;
unsigned long type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
*/
if (type == E820_RAM) {
if (start < 0x100000ULL && end > 0xA0000ULL) {
if (start < 0xA0000ULL)
add_memory_region(start, 0xA0000ULL-start, type);
if (end <= 0x100000ULL)
continue;
start = 0x100000ULL;
size = end - start;
}
}
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
这个函数的在拷贝之前会对区域类型为 E820_RAM 的分布做一个特殊的处理,如果地址小于 0x10000 且结束地址大于 0xA0000,换句话说,如果内存区域和 640k - 1M 这部分区域存在重叠,那么必须得做一些特殊的处理。如果起始地址小于0xA0000,那么只添加 start-0xA0000 这段区域。如果结束地址小于等于0x100000,那么直接跳过此次循环,因为不需要在添加区域了,如果没有跳过此次循环,则说明 end 大于0x10000,那么就添加 0x100000-end 这段内存区域。
添加内存区域
函数内部调用了 add_memory_region 函数,这个函数也定义在 arch/i386/kernel/setup.c 中,它的定义如下:
static void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
int x;
if (!efi_enabled) {
x = e820.nr_map;
if (x == E820MAX) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
}
} /* add_memory_region */
这个函数很简单,只是简单的将数据填充到 e820 的 map 数组中。
如果拷贝失败了,那么将会设置默认的内存区域。
结尾
至此,e820 的整体流程已经全部分析完了,但是里面还存在一些细节,没有详细展开说明。不过本文的重点是讲解 e820 的整个流程,所以就不再详细说明了。
标签:map,addr,获取,point,e820,内存,new,change From: https://www.cnblogs.com/riasartemis/p/17210236.html