vmemmap是内核中page 数据的虚拟地址。针对sparse内存模型。内核申请page获取的page地址从此开始。
start_kernel:自上而下
setup_arch
arm64_memblock_init
paging_init
map_kernel
map_mem //线性映射物理内存
bootmem_init
sparse_init
//内存模型,sparse等。建立所有page并映射 vmemmap_start。供后续buddy使用。
zone_sizes_init
free_area_init //初始化zone下面的free_area[MAX_ORDER]
free_area_init_node
build_all_zonelists //建立
mm_init
mem_init
memblock_free_all //memblock将权利交给buddy
build_all_zonelists //建立bubby
SPARSEMEM原理:
- section的概念:
SPARSEMEM内存模型引入了section的概念,可以简单将它理解为struct page的集合(数组)。内核使用struct mem_section去描述section,定义如下:
struct mem_section {
unsigned long section_mem_map;
/* See declaration of similar field in struct zone */
unsigned long *pageblock_flags;
};
其中的section_mem_map成员存放的是struct page数组的地址,每个section可容纳PFN_SECTION_SHIFT个struct page,arm64地址位宽为48bit时定义了每个section可囊括的地址范围是1GB。
- 全局变量**mem_section
内核中用了一个二级指针struct mem_section **mem_section去管理section,我们可以简单理解为一个动态的二维数组。所谓二维即内核又将SECTIONS_PER_ROOT个section划分为一个ROOT,ROOT的个数不是固定的,根据系统实际的物理地址大小来分配。
- 物理页帧号PFN
SPARSEMEM将PFN差分成了三个level,每个level分别对应:ROOT编号、ROOT内的section偏移、section内的page偏移。(可以类比多级页表来理解)
- vmemmap区域
vmemmap区域是一块起始地址是VMEMMAP_START,范围是2TB的虚拟地址区域,位于kernel space。以section为单位来存放strcut page结构的虚拟地址空间,然后线性映射到物理内存。
- PFN和struct page的转换:
SPARSEMEM中__pfn_to_page和__page_to_pfn的实现如下:
#define __pfn_to_page(pfn) (vmemmap + (pfn))
#define __page_to_pfn(page) (unsigned long)((page) - vmemmap)
#define vmemmap ((struct page *)VMEMMAP_START - (memstart_addr >> PAGE_SHIFT))
其中vmemmap指针指向VMEMMAP_START偏移memstart_addr的地址处,memstart_addr则是根据物理起始地址PHYS_OFFSET算出来的偏移。
arm64:setup_arch(arch/arm64/kernel/setup.c)
->bootmem_init->sparse_init
void __init bootmem_init(void)
{
unsigned long min, max;
min = PFN_UP(memblock_start_of_DRAM());
max = PFN_DOWN(memblock_end_of_DRAM());
early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
max_pfn = max_low_pfn = max;
arm64_numa_init();
/*
* Sparsemem tries to allocate bootmem in memory_present(), so must be
* done after the fixed reservations.
*/
arm64_memory_present();
sparse_init();
zone_sizes_init(min, max);
memblock_dump_all();
}
void __init sparse_init(void)
{
unsigned long pnum_begin = first_present_section_nr();
int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin));
unsigned long pnum_end, map_count = 1;
/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
set_pageblock_order();
printk("===sparse_init nid_begin %d pnum_begin %llu pnum_end %llu \n",nid_begin,pnum_begin,pnum_end);
for_each_present_section_nr(pnum_begin + 1, pnum_end) {
int nid = sparse_early_nid(__nr_to_section(pnum_end));
if (nid == nid_begin) {
map_count++;
continue;
}
/* Init node with sections in range [pnum_begin, pnum_end) */
printk("===sparse_init::sparse_init_nid 0 nid_begin %d pnum_begin %llu pnum_end %llu map_count %d\n",nid_begin, pnum_begin, pnum_end, map_count);
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
nid_begin = nid;
pnum_begin = pnum_end;
map_count = 1;
}
/* cover the last node */
printk("===sparse_init::sparse_init_nid 1 nid_begin %d pnum_begin %llu pnum_end %llu map_count %d\n",nid_begin, pnum_begin, pnum_end, map_count);
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
vmemmap_populate_print_last();
}
sparse_init 参考:https://zhuanlan.zhihu.com/p/555478708
先找到物理地址的pfn,可以计算出其struct page 所在的地址根据vmemmap 。
sparse_buffer_init 通过memblock申请实际物理内存。然后通过vmemmap_pxx_populate 映射页表,完成struct page 虚拟地址 到物理内存的映射。 建立pfn,page,pa的关系。原文引用:
/*
* Allocate the accumulated non-linear sections, allocate a mem_map
* for each and record the physical to section mapping.
*/
void __init sparse_init(void)
{
unsigned long pnum_begin = first_present_section_nr(); //找到第一个section和node_id,这是在上一步中通
int nid_begin = sparse_early_nid(__nr_to_section(pnum_begin)); //过mem_present函数初始化的。
unsigned long pnum_end, map_count = 1;
/* Setup pageblock_order for HUGETLB_PAGE_SIZE_VARIABLE */
set_pageblock_order();
for_each_present_section_nr(pnum_begin + 1, pnum_end) { //遍历所有的section
int nid = sparse_early_nid(__nr_to_section(pnum_end));
if (nid == nid_begin) {
map_count++;
continue;
}
/* Init node with sections in range [pnum_begin, pnum_end) */
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count); //为section申请mem_map
nid_begin = nid;
pnum_begin = pnum_end;
map_count = 1;
}
/* cover the last node */
sparse_init_nid(nid_begin, pnum_begin, pnum_end, map_count);
vmemmap_populate_print_last();
}
/*
* Initialize sparse on a specific node. The node spans [pnum_begin, pnum_end)
* And number of present sections in this node is map_count.
*/
static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
unsigned long pnum_end,
unsigned long map_count)
{
struct mem_section_usage *usage;
unsigned long pnum;
struct page *map;
usage = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid), //记录subsection的bitmap
mem_section_usage_size() * map_count);
if (!usage) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
sparse_buffer_init(map_count * section_map_size(), nid); //--为mem_map数组预申请的内存,只在非VMEMMAP时使用
for_each_present_section_nr(pnum_begin, pnum) { //--遍历所有section
unsigned long pfn = section_nr_to_pfn(pnum);
if (pnum >= pnum_end)
break;
map = __populate_section_memmap(pfn, PAGES_PER_SECTION, //--为mem_map申请内存,VMEMMAP下内存被
nid, NULL); //映射到了virtual memory map内核空间
if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
pnum_begin = pnum;
goto failed;
}
check_usemap_section_nr(nid, usage);
sparse_init_one_section(__nr_to_section(pnum), pnum, map, usage,
SECTION_IS_EARLY); //将mem_map数组赋值给对应section的指针
usage = (void *) usage + mem_section_usage_size();
}
sparse_buffer_fini();
return;
failed:
/* We failed to allocate, mark all the following pnums as not present */
for_each_present_section_nr(pnum_begin, pnum) {
struct mem_section *ms;
if (pnum >= pnum_end)
break;
ms = __nr_to_section(pnum);
ms->section_mem_map = 0;
}
}
struct page * __meminit __populate_section_memmap(unsigned long pfn,
unsigned long nr_pages, int nid, struct vmem_altmap *altmap)
{
unsigned long start;
unsigned long end;
/*
* The minimum granularity of memmap extensions is
* PAGES_PER_SUBSECTION as allocations are tracked in the
* 'subsection_map' bitmap of the section.
*/
end = ALIGN(pfn + nr_pages, PAGES_PER_SUBSECTION); //pfn和end都向上对齐
pfn &= PAGE_SUBSECTION_MASK;
nr_pages = end - pfn;
start = (unsigned long) pfn_to_page(pfn); //算出在virtual memory map中struct page数组的地址范围
end = start + nr_pages * sizeof(struct page);
if (vmemmap_populate(start, end, nid, altmap)) //为该范围建立页表,并映射物理页框
return NULL;
return pfn_to_page(pfn);
}
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{
int err;
if (end - start < PAGES_PER_SECTION * sizeof(struct page))
err = vmemmap_populate_basepages(start, end, node);
else if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, altmap);
else if (altmap) {
pr_err_once("%s: no cpu support for altmap allocations\n",
__func__);
err = -ENOMEM;
} else
err = vmemmap_populate_basepages(start, end, node); //后边调用这个通用的分支
if (!err)
sync_global_pgds(start, end - 1);
return err;
}
int __meminit vmemmap_populate_basepages(unsigned long start,
unsigned long end, int node)
{
unsigned long addr = start;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
//--这里对内核地址空间中virtual memory map区间对应的页表进行初始化,for初始化的地址范围是[addr,end)。
//--为页表条目申请一页内存并用其地址初始化该条目,注意各级页表中的条目是页框的物理地址。
//--页表条目为什么不使用虚拟地址?(以下是个人想法)MMU是通过逐级查询页表中的条目最终把虚拟地址转换为物理
//地址的,如果页表条目使用虚拟地址,那么要查找页表条目指向的页框,需要完成虚拟地址到物理地址的转换,
//这样好像又需要下一级MMU···一直这样下去。所以把虚拟地址相关的放到MMU的前边做输入,后边MMU查找的内存
//都是物理地址,就能很好的工作了。而且MMU是硬件,更适合访问物理地址。
for (; addr < end; addr += PAGE_SIZE) {
pgd = vmemmap_pgd_populate(addr, node);
if (!pgd)
return -ENOMEM;
p4d = vmemmap_p4d_populate(pgd, addr, node);
if (!p4d)
return -ENOMEM;
pud = vmemmap_pud_populate(p4d, addr, node);
if (!pud)
return -ENOMEM;
pmd = vmemmap_pmd_populate(pud, addr, node);
if (!pmd)
return -ENOMEM;
pte = vmemmap_pte_populate(pmd, addr, node);
if (!pte)
return -ENOMEM;
vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
}
return 0;
}
pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
{
pgd_t *pgd = pgd_offset_k(addr);
if (pgd_none(*pgd)) {
void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); //申请一个内存页并返回其虚拟地址
if (!p) //这里我有个疑问,现在buddy系统还没初始化,为啥是从buddy系统申请的? //应该不是buddy,是memblock吧?
return NULL;
pgd_populate(&init_mm, pgd, p); //用内存页地址初始化页表条目
}
return pgd;
}
static void __meminit sparse_init_one_section(struct mem_section *ms,
unsigned long pnum, struct page *mem_map,
struct mem_section_usage *usage, unsigned long flags)
{
ms->section_mem_map &= ~SECTION_MAP_MASK;
ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum) //对struct page数组编码后赋值给mem_map指针
| SECTION_HAS_MEM_MAP | flags;
ms->usage = usage;
}
* Subtle, we encode the real pfn into the mem_map such that
* the identity pfn - section_mem_map will return the actual
* physical page frame number.
*/
static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long pnum)
{
unsigned long coded_mem_map =
(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
BUILD_BUG_ON(SECTION_MAP_LAST_BIT > (1UL<<PFN_SECTION_SHIFT));
BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
return coded_mem_map;
}
这里对 mem_map 编码做一下计算,从 struct page *p 得到real_pfn:
real_pfn = p - section_mem_map = p - mem_map + section_nr_to_pfn(pnum) = section_offset + pfn_section_start
在S2500服务器上,16个NODE节点的情况下,日志如下,可以看出每个node的map_count为32 也就是每个NODE内存32G。因为每个map_section代表1G内存。
[ 0.000000] ===sparse_init nid_begin 0 pnum_begin 2 pnum_end 0
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 0 pnum_begin 2 pnum_end 1088 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 1 pnum_begin 1088 pnum_end 1152 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 2 pnum_begin 1152 pnum_end 1216 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 3 pnum_begin 1216 pnum_end 1280 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 4 pnum_begin 1280 pnum_end 1344 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 5 pnum_begin 1344 pnum_end 1408 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 6 pnum_begin 1408 pnum_end 1472 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 7 pnum_begin 1472 pnum_end 2050 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 8 pnum_begin 2050 pnum_end 3136 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 9 pnum_begin 3136 pnum_end 3200 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 10 pnum_begin 3200 pnum_end 3264 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 11 pnum_begin 3264 pnum_end 3328 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 12 pnum_begin 3328 pnum_end 3392 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 13 pnum_begin 3392 pnum_end 3456 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 0 nid_begin 14 pnum_begin 3456 pnum_end 3520 map_count 32
[ 0.000000] ===sparse_init::sparse_init_nid 1 nid_begin 15 pnum_begin 3520 pnum_end 18446744073709551615 map_count 32//最后的pnum_end是上面循环的原因导致到了最后一个section。不过其中大多数是不存在实际物理内存。通过for_each_present_section_nr 可以循环其中存在的section。确定section的内容参考函数arm64_memory_present。
sparse_init_nid解析一个node节点的物理内存:
static void __init sparse_init_nid(int nid, unsigned long pnum_begin,
unsigned long pnum_end,
unsigned long map_count)
{
unsigned long pnum, usemap_longs, *usemap;
struct page *map;
usemap_longs = BITS_TO_LONGS(SECTION_BLOCKFLAGS_BITS);
usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nid),
usemap_size() *
map_count);
if (!usemap) {
pr_err("%s: node[%d] usemap allocation failed", __func__, nid);
goto failed;
}
printk("===sparse_init_nid nid %d pnum_begin %llu pnum_end %llu \n",nid,pnum_begin,pnum_end);
sparse_buffer_init(map_count * section_map_size(), nid);
for_each_present_section_nr(pnum_begin, pnum) {
if (pnum >= pnum_end)
break;
printk("===sparse_init_nid loop nid %d pnum %llu \n",nid,pnum);
map = sparse_mem_map_populate(pnum, nid, NULL);
if (!map) {
pr_err("%s: node[%d] memory map backing failed. Some memory will not be available.",
__func__, nid);
pnum_begin = pnum;
goto failed;
}
check_usemap_section_nr(nid, usemap);
sparse_init_one_section(__nr_to_section(pnum), pnum, map, usemap);
usemap += usemap_longs;
}
sparse_buffer_fini();
return;
failed:
/* We failed to allocate, mark all the following pnums as not present */
for_each_present_section_nr(pnum_begin, pnum) {
struct mem_section *ms;
if (pnum >= pnum_end)
break;
ms = __nr_to_section(pnum);
ms->section_mem_map = 0;
}
}
[ 0.000000] ===sparse_init_nid nid 0 pnum_begin 2 pnum_end 1088
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 2
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 3
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1024
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1025
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1028
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1029
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1030
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1031
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1032
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1033
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1034
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1035
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1036
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1037
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1038
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1039
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1056
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1057
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1058
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1059
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1060
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1061
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1062
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1063
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1064
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1065
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1066
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1067
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1068
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1069
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1070
[ 0.000000] ===sparse_init_nid loop nid 0 pnum 1071
[ 0.000000] ===sparse_init_nid nid 1 pnum_begin 1088 pnum_end 1152
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1088
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1089
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1090
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1091
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1092
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1093
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1094
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1095
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1096
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1097
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1098
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1099
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1100
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1101
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1102
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1103
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1120
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1121
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1122
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1123
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1124
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1125
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1126
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1127
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1128
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1129
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1130
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1131
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1132
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1133
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1134
[ 0.000000] ===sparse_init_nid loop nid 1 pnum 1135
[ 0.000000] ===sparse_init_nid nid 2 pnum_begin 1152 pnum_end 1216[ 0.000000] ===sparse_init_nid loop nid 2 pnum 1152
。。。
省略
。。。
[ 0.000000] ===sparse_init_nid nid 3 pnum_begin 1216 pnum_end 1280
。。。
[ 0.000000] ===sparse_init_nid nid 4 pnum_begin 1280 pnum_end 1344。。。
[ 0.000000] ===sparse_init_nid nid 5 pnum_begin 1344 pnum_end 1408。。。
[ 0.000000] ===sparse_init_nid nid 6 pnum_begin 1408 pnum_end 1472。。。
[ 0.000000] ===sparse_init_nid nid 7 pnum_begin 1472 pnum_end 2050。。。
[ 0.000000] ===sparse_init_nid nid 8 pnum_begin 2050 pnum_end 3136。。。
[ 0.000000] ===sparse_init_nid nid 9 pnum_begin 3136 pnum_end 3200。。。
[ 0.000000] ===sparse_init_nid nid 10 pnum_begin 3200 pnum_end 3264。。。
[ 0.000000] ===sparse_init_nid nid 11 pnum_begin 3264 pnum_end 3328。。。
[ 0.000000] ===sparse_init_nid nid 12 pnum_begin 3328 pnum_end 3392。。。
[ 0.000000] ===sparse_init_nid nid 13 pnum_begin 3392 pnum_end 3456。。。
[ 0.000000] ===sparse_init_nid nid 14 pnum_begin 3456 pnum_end 3520。。。
[ 0.000000] ===sparse_init_nid nid 15 pnum_begin 3520 pnum_end18446744073709551615
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3520
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3521
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3522
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3523
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3524
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3525
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3526
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3527
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3528
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3529
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3530
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3531
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3532
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3533
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3534
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3535
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3552
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3553
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3554
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3555
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3556
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3557
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3558
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3559
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3560
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3561
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3562
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3563
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3564
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3565
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3566
[ 0.000000] ===sparse_init_nid loop nid 15 pnum 3567上述日志。在确定的一个nid中,循环一个存在的section。可以看到循环的次数是32。
前面已经得出每个node下的物理内存。且已经完成**mem_section 的创建。虽然此时page的地址已经确定,单具体的内容还是空的。mem_section对应一个G的物理内存,所有page的内容要根据实际物理内存来初始化。
bootmem_init
zone_sizes_init
free_area_init_nodes
free_area_init_node
calculate_node_totalpages
free_area_init_core
memmap_init(memmap_init_zone)
__init_single_page
在memmap_init_zone函数中,会根据实际的物理内存进行page的初始化,调用__init_single_page函数。下面我就打印
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context,
struct vmem_altmap *altmap)
{
unsigned long realcount = 0 ;
unsigned long end_pfn = start_pfn + size;
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn;
unsigned long nr_initialised = 0;
struct page *page;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
struct memblock_region *r = NULL, *tmp;
#endif
if (highest_memmap_pfn < end_pfn - 1)
highest_memmap_pfn = end_pfn - 1;
/*
* Honor reservation requested by the driver for this ZONE_DEVICE
* memory
*/
if (altmap && start_pfn == altmap->base_pfn)
start_pfn += altmap->reserve;
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
* There can be holes in boot-time mem_map[]s handed to this
* function. They do not exist on hotplugged memory.
*/
if (context != MEMMAP_EARLY)
goto not_early;
if (!early_pfn_valid(pfn))
continue;
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
break;
。。。。。省略。。。。。。
not_early:
realcount++;
page = pfn_to_page(pfn);
__init_single_page(page, pfn, zone, nid);
if (context == MEMMAP_HOTPLUG)
SetPageReserved(page);
if (!(pfn & (pageblock_nr_pages - 1))) {
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
cond_resched();
}
}
printk("===memmap_init_zone nid %d size %llu zone %llu start_pfn %llu readlcount %llu\n",nid,size,zone,start_pfn,realcount);
}
下面打印除了每个node下时间的page数量。并初始化page结构内容。
[ 0.000000] ===memmap_init_zone nid 0 size 32768 zone 0 start_pfn 32768 readlcount 31661
[ 0.000000] ===memmap_init_zone nid 0 size 17498112 zone 1 start_pfn 65536 readlcount 490496
[ 0.000000] ===memmap_init_zone nid 1 size 786432 zone 1 start_pfn 17825792 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 2 size 786432 zone 1 start_pfn 18874368 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 3 size 786432 zone 1 start_pfn 19922944 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 4 size 786432 zone 1 start_pfn 20971520 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 5 size 786432 zone 1 start_pfn 22020096 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 6 size 786432 zone 1 start_pfn 23068672 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 7 size 786432 zone 1 start_pfn 24117248 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 8 size 17530880 zone 1 start_pfn 33587200 readlcount 523264
[ 0.000000] ===memmap_init_zone nid 9 size 786432 zone 1 start_pfn 51380224 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 10 size 786432 zone 1 start_pfn 52428800 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 11 size 786432 zone 1 start_pfn 53477376 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 12 size 786432 zone 1 start_pfn 54525952 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 13 size 786432 zone 1 start_pfn 55574528 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 14 size 786432 zone 1 start_pfn 56623104 readlcount 524288
[ 0.000000] ===memmap_init_zone nid 15 size 786432 zone 1 start_pfn 57671680 readlcount 524288
测试page地址是否在VMEMMAP定义的地址范围:
#include <linux/module.h>
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/device.h>
#include <linux/io.h>
static int __init test_init(void)
{
struct page *page;
unsigned long vaddr ;
printk("VMEMMAP_START %llx vmemmap %llx \n ",VMEMMAP_START,vmemmap);
page = alloc_pages(GFP_ATOMIC & ~__GFP_HIGHMEM, 0);
if (!page)
return 0;
printk("page_to_pfn(%llx):%llu pa %llx \n",page,page_to_pfn(page),PFN_PHYS(page_to_pfn(page)));
vaddr = page_address(page);
printk("page %llx vaddr %llx \n",page,vaddr);
printk(KERN_INFO "test_init\n");
return 0;
}
static void __exit test_exit(void)
{
printk(KERN_INFO "test_exit\n");
}
module_init(test_init);
module_exit(test_exit);
MODULE_LICENSE("GPL");
输出:
[239935.492355] VMEMMAP_START ffff7fe000000000 vmemmap ffff7fdfffe00000
[239935.492359] page_to_pfn(ffff7fe008b6f100):2317252 pa 235bc40000
[239935.518223] page ffff7fe008b6f100 vaddr ffff8022dbc40000
page地址在VMEMMAP_START 开始。最终的虚拟地址是通过pfn->pa->va。 虚拟地址自然是内核之前映射的线性地址区间。
文章来源:https://www.toymoban.com/news/detail-611016.html
物理地址获取numa id 函数:文章来源地址https://www.toymoban.com/news/detail-611016.html
pfn_to_nid page_to_nid
到了这里,关于arm64内核内存布局-之vmemmap(page初始化)的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!