在前面介绍的mm_init中,其调用函数mem_init(),在这个函数中会把bootmem中空闲内存释放到伙伴系统。我们下面看bootmem中一个释放内存的
函数free_all_bootmem
unsigned long __init free_all_bootmem(void)
{
unsigned long total_pages = 0;
bootmem_data_t *bdata;
reset_all_zones_managed_pages();
list_for_each_entry(bdata, &bdata_list, list)
total_pages += free_all_bootmem_core(bdata);
totalram_pages += total_pages;
return total_pages;
}
可以看到,函数针对系统内的所有node节点下内存进行释放,这里对链表bdata_list遍历处理,然后调用函数free_all_bootmem_core(),并且把释放的总页数记录到totalram_pages中。内核中很多函数会使用totalram_pages变量来了解系统有多少空闲内存。
函数free_all_bootmem_core(bootmem_data_t *bdata)是bootmem内存管理释放内存的核心函数,函数实现如下:
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
struct page *page;
unsigned long *map, start, end, pages, cur, count = 0;
if (!bdata->node_bootmem_map)
return 0;节点node下必须关联了内存,否则释放无从谈起
map = bdata->node_bootmem_map;获取node下内存以位图描述的指针,对于2^32即4GB内存来说,共有2^20个页,一个字节有8比特,所以这些页需要2^17字节
2^17 字节需要多少页呢,可想而知需要2^5个连续页,即32个页才能跟踪这4GB物理内存。
start = bdata->node_min_pfn;起始页帧号
end = bdata->node_low_pfn;结束页帧号。
bdebug("nid=%td start=%lx end=%lx\n",
bdata - bootmem_node_data, start, end);
while (start < end) {
unsigned long idx, vec;
unsigned shift;
idx = start - bdata->node_min_pfn;获取位图描述的index
shift = idx & (BITS_PER_LONG - 1);看看偏移值
/*
* vec holds at most BITS_PER_LONG map bits,
* bit 0 corresponds to start.
*/
vec = ~map[idx / BITS_PER_LONG];了解位于哪个位置
if (shift) {
vec >>= shift;
if (end - start >= BITS_PER_LONG)
vec |= ~map[idx / BITS_PER_LONG + 1] <<
(BITS_PER_LONG - shift);
}
/*
* If we have a properly aligned and fully unreserved
* BITS_PER_LONG block of pages in front of us, free
* it in one go.
*/
if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
int order = ilog2(BITS_PER_LONG);
__free_pages_bootmem(pfn_to_page(start), start, order);释放到伙伴系统
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
cur = start;
start = ALIGN(start + 1, BITS_PER_LONG);
while (vec && cur != start) {
if (vec & 1) {
page = pfn_to_page(cur);
__free_pages_bootmem(page, cur, 0);
count++;
}
vec >>= 1;
++cur;
}
}
}
cur = bdata->node_min_pfn;
page = virt_to_page(bdata->node_bootmem_map);
pages = bdata->node_low_pfn - bdata->node_min_pfn;
pages = bootmem_bootmap_pages(pages);
count += pages;
while (pages--)
__free_pages_bootmem(page++, cur++, 0);
bdata->node_bootmem_map = NULL;
bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
return count;
}
可以看到,最后把map数组占用的内存也释放了。
void __init __free_pages_bootmem(struct page *page, unsigned long pfn,unsigned int order)
{
if (early_page_uninitialised(pfn))
return;
return __free_pages_boot_core(page, order);
}
static void __init __free_pages_boot_core(struct page *page, unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
unsigned int loop;
prefetchw(p);
for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
prefetchw(p + 1);
__ClearPageReserved(p);
set_page_count(p, 0);
}
__ClearPageReserved(p);
set_page_count(p, 0);
page_zone(page)->managed_pages += nr_pages;
set_page_refcounted(page);
__free_pages(page, order);
}
在所有的页的相关属性设置后,最后通过__free_pages(page, order)把页释放到伙伴系统。
void __free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page)) {
if (order == 0)
free_hot_cold_page(page, false); 进per-cpu高速缓存队列
else
__free_pages_ok(page, order);释放到伙伴系统
}
}
static void __free_pages_ok(struct page *page, unsigned int order)
{
unsigned long flags;
int migratetype;
unsigned long pfn = page_to_pfn(page);
if (!free_pages_prepare(page, order, true))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
local_irq_save(flags);
__count_vm_events(PGFREE, 1 << order);
free_one_page(page_zone(page), page, pfn, order, migratetype);
local_irq_restore(flags);
}
函数free_hot_cold_page()把空闲内存释放到per-cpu链表。这个也是位于zone下面的pageset对象
/*
* Free a 0-order page
* cold == true ? free a cold page : free a hot page
*/
void free_hot_cold_page(struct page *page, bool cold)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
int migratetype;
if (!free_pcp_prepare(page))
return;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
local_irq_save(flags);
__count_vm_event(PGFREE);
/*
* We only track unmovable, reclaimable and movable on pcp lists.
* Free ISOLATE pages back to the allocator because they are being
* offlined but treat RESERVE as movable pages so we can get those
* areas back if necessary. Otherwise, we may have to free
* excessively into the page allocator
*/
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(zone, page, pfn, 0, migratetype);
goto out;
}
migratetype = MIGRATE_MOVABLE;
}
pcp = &this_cpu_ptr(zone->pageset)->pcp;
if (!cold)
list_add(&page->lru, &pcp->lists[migratetype]);
else
list_add_tail(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {批处理这些页,如果较多,则我们把per-cpu上面的页释放到伙伴系统。
unsigned long batch = READ_ONCE(pcp->batch);
free_pcppages_bulk(zone, batch, pcp);
pcp->count -= batch;
}
out:
local_irq_restore(flags);
}
/*
* Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
* If the zone was previously in an "all pages pinned" state then look to
* see if this freeing clears that state.
*
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
从per-cpu链表上把内存释放到伙伴系统
static void free_pcppages_bulk(struct zone *zone, int count,
struct per_cpu_pages *pcp)
{
int migratetype = 0;
int batch_free = 0;
unsigned long nr_scanned;
bool isolated_pageblocks;
spin_lock(&zone->lock);
isolated_pageblocks = has_isolate_pageblock(zone);
nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
if (nr_scanned)
__mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
while (count) {
struct page *page;
struct list_head *list;
/*
* Remove pages from lists in a round-robin fashion. A
* batch_free count is maintained that is incremented when an
* empty list is encountered. This is so more pages are freed
* off fuller lists instead of spinning excessively around empty
* lists
*/
do {
batch_free++;
if (++migratetype == MIGRATE_PCPTYPES)
migratetype = 0;
list = &pcp->lists[migratetype];
} while (list_empty(list));
/* This is the only non-empty list. Free them all. */
if (batch_free == MIGRATE_PCPTYPES)
batch_free = count;
do {
int mt; /* migratetype of the to-be-freed page */
page = list_last_entry(list, struct page, lru);
/* must delete as __free_one_page list manipulates */
list_del(&page->lru);
mt = get_pcppage_migratetype(page);
/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
/* Pageblock could have been isolated meanwhile */
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
if (bulkfree_pcp_prepare(page))
continue;
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
} while (--count && --batch_free && !list_empty(list));
}
spin_unlock(&zone->lock);
}
标签:释放,bootmem,bdata,list,free,伙伴,pages,page
From: https://blog.51cto.com/u_11860992/6410368