/* * If know now we are going to need KPTI then use non-global * mappings from the start, avoiding the cost of rewriting * everything later. */ arm64_use_ng_mappings = kaslr_requires_kpti();
/* * Initialise the static keys early as they may be enabled by the * cpufeature code and early parameters. */ jump_label_init(); /* 解析boot_command_line, 并设置done标志位 */ parse_early_param();
/* * Unmask asynchronous aborts and fiq after bringing up possible * earlycon. (Report possible System Errors once we can report this * occurred). */ local_daif_restore(DAIF_PROCCTX_NOIRQ);
/* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. */ cpu_uninstall_idmap();
/* Init percpu seeds for random tags after cpus are set up. */ kasan_init_sw_tags();
#ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation * faults in case uaccess_enable() is inadvertently called by the init * thread. */ init_task.thread_info.ttbr0 = phys_to_ttbr(__pa_symbol(reserved_pg_dir)); #endif
if (boot_args[1] || boot_args[2] || boot_args[3]) { pr_err("WARNING: x1-x3 nonzero in violation of boot protocol:\n" "\tx1: %016llx\n\tx2: %016llx\n\tx3: %016llx\n" "This indicates a broken bootloader or old kernel\n", boot_args[1], boot_args[2], boot_args[3]); } }
一、fixmap 映射的由来:
建立了恒等映射和粗粒度内核页表映射,只能保证内核镜像的正常访问,此时尚未建立内存管理子系统,如果想访问 bootloader 传入的 dtb,或者其他 io 设备,还是无法实现的,因此 Linux 提出了 fixmap.
pgdp = pgd_offset_k(addr); /// 获得 pgd 页表项 p4dp = p4d_offset(pgdp, addr); p4d = READ_ONCE(*p4dp); if (CONFIG_PGTABLE_LEVELS> 3 && !(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) { /* * We only end up here if the kernel mapping and the fixmap * share the top level pgd entry, which should only happen on * 16k/4 levels configurations. */ BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES)); pudp = pud_offset_kimg(p4dp, addr); } else {if (p4d_none(p4d)) __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); /// 填充 p4d 表项 pudp = fixmap_pud(addr); /// 获得 pud 表项 } if (pud_none(READ_ONCE(*pudp))) __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); /// 填充 pud 表项 pmdp = fixmap_pmd(addr); __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); /// 填充 pmd 表项
/* * The boot-ioremap range spans multiple pmds, for which * we are not prepared: */ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
if (dt_virt) ///把dtb所占内存添加到memblock管理的reserve模块,后续内存分配不会使用这段内存 //使用完后,会使用memblock_free()释放 memblock_reserve(dt_phys, size);
///扫描解析dtb,将内存布局信息填入memblock系统 if (!dt_virt || !early_init_dt_scan(dt_virt)) { pr_crit("\n" "Error: invalid device tree blob at physical address %pa (virtual address 0x%p)\n" "The dtb must be 8-byte aligned and must not exceed 2 MB in size\n" "\nPlease check your bootloader.", &dt_phys, dt_virt);
while (true) cpu_relax(); }
/* Early fixups are done, map the FDT as read-only now */ fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
name = of_flat_dt_get_machine_name(); if (!name) return;
void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { ///完成fdt的pte页表填写,返回fdt虚拟地址,这里虚拟地址事先定义预留 const u64 dt_virt_base = __fix_to_virt(FIX_FDT); ///获得设备树的虚拟地址 int offset; void *dt_virt;
/* * Check whether the physical FDT address is set and meets the minimum * alignment requirement. Since we are relying on MIN_FDT_ALIGN to be * at least 8 bytes so that we can always access the magic and size * fields of the FDT header after mapping the first chunk, double check * here if that is indeed the case. */ BUILD_BUG_ON(MIN_FDT_ALIGN < 8); if (!dt_phys || dt_phys % MIN_FDT_ALIGN) returnNULL;
/* * Make sure that the FDT region can be mapped without the need to * allocate additional translation table pages, so that it is safe * to call create_mapping_noalloc() this early. * * On 64k pages, the FDT will be mapped using PTEs, so we need to * be in the same PMD as the rest of the fixmap. * On 4k pages, we'll use section mappings for the FDT so we only * have to be in the same PUD. */ BUILD_BUG_ON(dt_virt_base % SZ_2M);
/* map the first chunk so we can read the size from the header */ //根据提供的物理地址和虚拟地址,设置页表项 //建立映射,页表物理地址已知的,不能临时分配(因为伙伴系统尚未工作) create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
///根据虚拟地址访问物理地址内容(fdt内容),检测dtb文件头的魔数是否正确 if (fdt_magic(dt_virt) != FDT_MAGIC) returnNULL;
*size = fdt_totalsize(dt_virt); if (*size > MAX_FDT_SIZE) returnNULL;
/* Retrieve command line */ // 获取bootargs p = of_get_flat_dt_prop(node, "bootargs", &l); if (p != NULL && l > 0) strlcpy(data, p, min(l, COMMAND_LINE_SIZE));
/* * CONFIG_CMDLINE is meant to be a default in case nothing else * managed to set the command line, unless CONFIG_CMDLINE_FORCE * is set in which case we override whatever was found earlier. */ #ifdef CONFIG_CMDLINE #if defined(CONFIG_CMDLINE_EXTEND) strlcat(data, " ", COMMAND_LINE_SIZE); strlcat(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #elif defined(CONFIG_CMDLINE_FORCE) strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #else /* No arguments from boot loader, use kernel's cmdl*/ if (!((char *)data)[0]) strlcpy(data, CONFIG_CMDLINE, COMMAND_LINE_SIZE); #endif #endif/* CONFIG_CMDLINE */
pr_debug("Command line is: %s\n", (char *)data);
rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l); if (rng_seed && l > 0) { add_bootloader_randomness(rng_seed, l);
/* try to clear seed so it won't be found. */ fdt_nop_property(initial_boot_params, node, "rng-seed");
/* * Corner case: 52-bit VA capable systems running KVM in nVHE mode may * be limited in their ability to support a linear map that exceeds 51 * bits of VA space, depending on the placement of the ID map. Given * that the placement of the ID map may be randomized, let's simply * limit the kernel's linear map to 51 bits as well if we detect this * configuration. */ if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 && is_hyp_mode_available() && !is_kernel_in_hyp_mode()) { pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n"); linear_region_size = min_t(u64, linear_region_size, BIT(51)); }
/* * Select a suitable value for the base of physical memory. */ ///物理地址起始地址,尚未memblock_add,此处为0 memstart_addr = round_down(memblock_start_of_DRAM(), ARM64_MEMSTART_ALIGN);
if ((memblock_end_of_DRAM() - memstart_addr) > linear_region_size) pr_warn("Memory doesn't fit in the linear mapping, VA_BITS too small\n");
/* * Remove the memory that we will not be able to cover with the * linear mapping. Take care not to clip the kernel which may be * high in memory. */ ///移除线性映射以外的内存 memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa_symbol(_end)), ULLONG_MAX); ///如果物理内存足够大,将虚拟地址无法覆盖的区域删除掉 if (memstart_addr + linear_region_size < memblock_end_of_DRAM()) { /* ensure that memstart_addr remains sufficiently aligned */ memstart_addr = round_up(memblock_end_of_DRAM() - linear_region_size, ARM64_MEMSTART_ALIGN); memblock_remove(0, memstart_addr); }
/* * If we are running with a 52-bit kernel VA config on a system that * does not support it, we have to place the available physical * memory in the 48-bit addressable part of the linear region, i.e., * we have to move it upward. Since memstart_addr represents the * physical address of PAGE_OFFSET, we have to *subtract* from it. */ if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52) && (vabits_actual != 52)) memstart_addr -= _PAGE_OFFSET(48) - _PAGE_OFFSET(52);
/* * Apply the memory limit if it was set. Since the kernel may be loaded * high up in memory, add back the kernel region that must be accessible * via the linear mapping. */ ///如果limit被配置,根据meory_limit重新配置,一般不会配置 if (memory_limit != PHYS_ADDR_MAX) { memblock_mem_limit_remove_map(memory_limit); memblock_add(__pa_symbol(_text), (u64)(_end - _text)); }
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && phys_initrd_size) { /* * Add back the memory we just removed if it results in the * initrd to become inaccessible via the linear mapping. * Otherwise, this is a no-op */ u64 base = phys_initrd_start & PAGE_MASK; u64 size = PAGE_ALIGN(phys_initrd_start + phys_initrd_size) - base;
/* * We can only add back the initrd memory if we don't end up * with more memory than we can address via the linear mapping. * It is up to the bootloader to position the kernel and the * initrd reasonably close to each other (i.e., within 32 GB of * each other) so that all granule/#levels combinations can * always access both. */ if (WARN(base < memblock_start_of_DRAM() || base + size > memblock_start_of_DRAM() + linear_region_size, "initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) { phys_initrd_size = 0; } else { ///如果initrd地址符合要求,重新加入memblock.memory,且reserved这块内存 memblock_remove(base, size); /* clear MEMBLOCK_ flags */ memblock_add(base, size); memblock_reserve(base, size); } }
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern u16 memstart_offset_seed; u64 mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); int parange = cpuid_feature_extract_unsigned_field( mmfr0, ID_AA64MMFR0_PARANGE_SHIFT); s64 range = linear_region_size - BIT(id_aa64mmfr0_parange_to_phys_shift(parange));
/* * If the size of the linear region exceeds, by a sufficient * margin, the size of the region that the physical memory can * span, randomize the linear region as well. */ if (memstart_offset_seed > 0 && range >= (s64)ARM64_MEMSTART_ALIGN) { range /= ARM64_MEMSTART_ALIGN; memstart_addr -= ARM64_MEMSTART_ALIGN * ((range * memstart_offset_seed) >> 16); } }
/* * Register the kernel text, kernel data, initrd, and initial * pagetables with memblock. */ ///将内核设置为reserved类型 ///paging_init()后会释放 memblock_reserve(__pa_symbol(_stext), _end - _stext);