[Android稳定性] 第011篇 [原理篇] minidump的原理介绍补充

一、什么是minidump?

各个subsystem 都会注册在 memory 映射表中,当system 发⽣crash的时候,boot subsystem 会去加密并保存注册过的memory信息,保存到RAM EMMC 分区。

一、MINIDUMP流程图

二、MINIDUMP代码流程

2.1 HLOS侧流程

2.1.1 defconfig配置

1
2
3
4
5
6
7
8
9
10
CONFIG_QCOM_MINIDUMP=m
# CONFIG_QCOM_VA_MINIDUMP is not set
# CONFIG_QCOM_DYN_MINIDUMP_STACK is not set
CONFIG_QCOM_MINIDUMP_FTRACE=y
CONFIG_QCOM_MINIDUMP_PANIC_DUMP=y
# CONFIG_QCOM_MINIDUMP_PANIC_CPU_CONTEXT is not set
CONFIG_QCOM_MINIDUMP_PSTORE=y
CONFIG_MINIDUMP_MAX_ENTRIES=200
CONFIG_IPC_LOG_MINIDUMP_BUFFERS=16
CONFIG_QCOM_IRQ_STAT=y

2.1.2 相关代码

1
2
3
4
msm-kernel/drivers/soc/qcom/msm_minidump.c
msm-kernel/drivers/soc/qcom/minidump_log.c
msm-kernel/drivers/soc/qcom/minidump_memory.c
msm-kernel/drivers/soc/qcom/qcom_va_minidump.c

2.1.3 msm_minidump_add_region

msm_minidump_add_region是AP侧增加minidump region的核心API

1
2
3
4
5
6
7
8
9
int msm_minidump_add_region(const struct md_region *entry)
{
//...
if (md_core.ops) {
ret = md_core.ops->add_region(entry);
} else {
//...
}
}

调用 md_code.ops->add_region函数

1
2
3
4
5
6
7
8
9
10
11
static const struct md_ops md_smem_ops = {
.init_md_table = md_smem_init_md_table,
.add_pending_entry = md_smem_add_pending_entry,
.add_header = md_smem_add_header,
.remove_region = md_smem_remove_region,
.add_region = md_smem_add_region,
.update_region = md_smem_update_region,
.get_available_region = md_smem_get_available_region,
.md_enable = md_smem_md_enable,
.get_region = md_smem_get_region,
};

也就是 md_smem_add_region函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
static int md_smem_add_region(const struct md_region *entry)
{
u32 toc_init;
int ret = 0;
unsigned long flags;

spin_lock_irqsave(&mdt_lock, flags);

if (md_num_regions >= MAX_NUM_ENTRIES) {
printk_deferred("Maximum entries reached\n");
ret = -ENOMEM;
goto out;
}

toc_init = 0;
if (minidump_table.md_ss_toc &&
(minidump_table.md_ss_toc->md_ss_enable_status == MD_SS_ENABLED)) {
toc_init = 1;
if (minidump_table.md_ss_toc->ss_region_count >= MAX_NUM_ENTRIES) {
printk_deferred("Maximum regions in minidump table reached\n");
ret = -ENOMEM;
goto out;
}
}

if (toc_init) {
if (md_entry_num(entry) >= 0) {
printk_deferred("Entry name already exist\n");
ret = -EEXIST;
goto out;
}
md_add_ss_toc(entry, false);
md_add_elf_header(entry);
}
ret = md_num_regions;
md_num_regions++;

out:
spin_unlock_irqrestore(&mdt_lock, flags);

return ret;
}

这个函数实际上就是填充了一个全局的结构体变量:minidump_table

而这个变量的初始化在 md_smem_init_md_table

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
static int md_smem_init_md_table(void)
{
size_t size;
int ret = 0;

/* Get Minidump table */
md_global_toc = qcom_smem_get(QCOM_SMEM_HOST_ANY,
SBL_MINIDUMP_SMEM_ID, &size);
if (IS_ERR_OR_NULL(md_global_toc)) {
pr_err("SMEM is not initialized\n");
return PTR_ERR(md_global_toc);
}

/*Check global minidump support initialization */
if (size < sizeof(*md_global_toc) || !md_global_toc->md_toc_init) {
pr_err("System Minidump TOC not initialized\n");
return -ENODEV;
}

minidump_table.md_gbl_toc = md_global_toc;
minidump_table.revision = md_global_toc->md_revision;
md_ss_toc = &md_global_toc->md_ss_toc[MD_SS_HLOS_ID];

md_ss_toc->encryption_status = MD_SS_ENCR_DONE;
md_ss_toc->encryption_required = MD_SS_ENCR_NOTREQ;

minidump_table.md_ss_toc = md_ss_toc;
minidump_table.md_regions = kzalloc((MAX_NUM_ENTRIES *
sizeof(struct md_ss_region)), GFP_KERNEL);
if (!minidump_table.md_regions)
return -ENOMEM;

md_ss_toc->md_ss_smem_regions_baseptr =
virt_to_phys(minidump_table.md_regions);

md_ss_toc->ss_region_count = 1;

return ret;
}

可以看出HLOS侧增加minidump的region,实际上就是通过 SBL_MINIDUMP_SMEM_ID这个share memory。而在xbl阶段会根据share memory内的内容增加regions。

2.2 NON-HLOS侧流程

2.2.1 add_minidump_regions

1
2
3
4
5
6
7
8
void add_minidump_regions(void)
{
//...
md_get_smem_base_address();
//...
add_one_region_to_dump(region_info);
//...
}

这个函数有两个重点函数:

  • md_get_smem_base_address : 读取 SBL_MINIDUMP_SMEM_ID的共享内存

    1
    2
    3
    4
    5
    6
    7
    static void md_get_smem_base_address(void)
    {
    uint32 md_smem_size = sizeof(md_global_toc);

    smem_init();
    md_global_toc_smem_base = (md_global_toc *)smem_get_addr( SBL_MINIDUMP_SMEM_ID, &md_smem_size);
    }
  • add_one_region_to_dump(region_info) :从内存中读取数据存起来

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    static void add_one_region_to_dump(md_ss_region *r)
    {
    char dump_region_name[DLOAD_DEBUG_STRLEN_BYTES] = "md_";
    char dump_region_bin_name[DLOAD_DEBUG_STRLEN_BYTES];
    char seq_num[5] = {0};

    strlcat(dump_region_name, (const char *)r->region_name, MD_REGION_NAME_LENGTH);

    if((r->seq_num > 0) && (r->seq_num < 10000))
    {
    itoa(seq_num, r->seq_num);
    strlcat(dump_region_name, (const char *)seq_num, MD_REGION_NAME_LENGTH);
    }

    strlcpy(dump_region_bin_name, dump_region_name, DLOAD_DEBUG_STRLEN_BYTES);
    strlcat(dump_region_bin_name, ".BIN", DLOAD_DEBUG_STRLEN_BYTES);

    dload_debug_add_region(OPTIONAL_DEF_SAVE,
    (r->region_base_address), (r->region_size),
    dump_region_name, dump_region_bin_name);
    }

2.2.2 boot_ram_dump_to_raw_parition

这个函数是的minidump保存到分区中,比如minidump分区(老项目),在现在的小米项目设计中会保存到rawdump中或者blackbox分区中,这里不详细展开,在blackbox设计原理篇再详细赘述。

三、小米增加的regions

小米项目在minidump中增加了几个regions,分别是 md_kmsg/md_pmsg/tz_log

这里也解释一下设计原理:

3.1 定义日志内存地址

1
2
3
4
5
struct log_memory_region log_dump_regions_md[] = {
{0xD0080000, 0x80000, "console region", "md_kmsg"},
{0xD0100000, 0x100000, "logcat region", "md_pmsg"},
{0x0, 0x0, NULL, NULL}
};

这个地址起始就是ramoops的地址,在kernel启动后pstore的驱动代码进行的log保存到ramoops的地址上。

1
2
3
4
5
6
7
ramoops_mem: ramoops@d0000000 {
compatible = "ramoops";
reg = <0x0 0xd0000000 0x0 0x200000>;
record-size = <0x40000>;
pmsg-size = <0x100000>;
console-size = <0x80000>;
};

详细可查看:[Android稳定性] 第003篇 mtdoops的原理介绍

关于这个地方的地址还是值得说明一下ramoops的布局:

1
2
3
4
5
6
7
8
9
10
11
12
/*
Ramoops address maps
|----------------| <------Ramoops start address 0xD0000000
| record1 | Recordsize *2 = 0x40000 * 2
|----------------|
| record2 |
|----------------| <------Last kmsg start address 0xD0080000
| console | console size = 0x80000;
|----------------| <------Last pmsg start address 0xD0100000
| pmsg | pmsg size = 0x100000;
|________________|
*/

所以md_kmsg的地址要和设备树中的不一样,要将record size的偏移加上!

3.2 dload_debug_add_region

dload_add_last_kmsg中调用 dload_debug_add_region增加regions。

四、如何验证?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
1. set the mini dump to emmc
adb root
adb wait-for-device
adb shell "echo mini > /sys/kernel/dload/dload_mode"
adb shell "cat /sys/kernel/dload/dload_mode"
adb shell "echo 1 > /sys/module/subsystem_restart/parameters/enable_ramdumps"
adb shell "echo 1 > /sys/module/subsystem_restart/parameters/enable_mini_ramdumps"
adb shell "echo 1 > /sys/module/subsystem_restart/parameters/enable_debug"
adb shell "echo 'file ramdump.c +p' > /sys/kernel/debug/dynamic_debug/control"
adb shell "echo 1 >/sys/kernel/dload/emmc_dload"
adb shell "cat /sys/kernel/dload/emmc_dload"

2. adb shell "echo c > /proc/sysrq-trigger" to get a dump
in the uart, you could see log as below to save the minidump to emmc
B - 3299490 - RawDump Free space:0x4c2fa20, Dump start address:0x858c1000, size 0x2000
B - 3310348 - RawDump Free space:0x4c2da20, Dump start address:0x86007210, size 0xf8
B - 3320565 - RawDump Free space:0x4c2d928, Dump start address:0x86001030, size 0x1000
B - 3331423 - RawDump Free space:0x4c2c928, Dump start address:0x85eac400, size 0x8
B - 3340787 - RawDump Free space:0x4c2c920, Dump start address:0x85e97000, size 0xcc
B - 3350150 - RawDump Free space:0x4c2c854, Dump start address:0x85eac408, size 0x4
B - 3359483 - RawDump Free space:0x4c2c850, Dump start address:0x146aa000, size 0x1000
B - 3370311 - RawDump Free space:0x4c2b850, Dump start address:0x85ebad04, size 0x1c08
B - 3381931 - RawDump successfully, Reset the device

3. pull the minidump from device
adb wait-for-device
adb root
adb wait-for-device
adb shell "dd if=/dev/block/bootdevice/by-name/minidump of=/sdcard/minidump.bin"
adb pull /sdcard/minidump.bin .

串口日志输出:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
MINIDUMP: dload_add_last_kmsg() log_dump_regions[0].region_base = 0x5d100000
MINIDUMP: dload_add_last_kmsg() rb->data = 0x5d10000c
MINIDUMP: dload_add_last_kmsg() rb->start = 0x4fb1a
MINIDUMP: dload_add_last_kmsg() rb->size = 0x4fb1a
MINIDUMP: dload_add_last_kmsg() log_dump_regions[0].region_size = 0x100000
MINIDUMP: dload_add_last_kmsg() sizeof(ram_buffer) = 0xc
MINIDUMP: memory region old mem_base: 0x5d10000c
==================================================
MINIDUMP: add memory region
MINIDUMP: memory region mem_base: 0x5d10000c
MINIDUMP: memory region length: 4fb1a
MINIDUMP: memory region desc : console region
MINIDUMP: memory region filename : md_kmsg
==================================================
MINIDUMP: dload_add_last_kmsg() log_dump_regions[1].region_base = 0x5d200000
MINIDUMP: dload_add_last_kmsg() rb->data = 0x5d20000c
MINIDUMP: dload_add_last_kmsg() rb->start = 0x2a7b5
MINIDUMP: dload_add_last_kmsg() rb->size = 0x1ffff4
MINIDUMP: dload_add_last_kmsg() log_dump_regions[1].region_size = 0x200000
MINIDUMP: dload_add_last_kmsg() sizeof(ram_buffer) = 0xc
MINIDUMP: memory region old mem_base: 0x5d22a7c1
==================================================
MINIDUMP: add memory region
MINIDUMP: memory region mem_base: 0x5d22a7c1
MINIDUMP: memory region length: 1d584b
MINIDUMP: memory region desc : logcat region
MINIDUMP: memory region filename : md_pmsg
==================================================
MINIDUMP: memory region old mem_base: 0x5d20000c
==================================================
MINIDUMP: add memory region
MINIDUMP: memory region mem_base: 0x5d20000c
MINIDUMP: memory region length: 2a7b5
MINIDUMP: memory region desc : logcat region
MINIDUMP: memory region filename : md_pmsg
==================================================
MINIDUMP: memory region old mem_base: 0x45ebb104
==================================================
MINIDUMP: add memory region
MINIDUMP: memory region mem_base: 0x45ebb104
MINIDUMP: memory region length: ed
MINIDUMP: memory region desc : CMM Script
MINIDUMP: memory region filename : load.cmm
==================================================
RawDump Free space:0x3fff08, Dump start address:0x5d10000c, size 0x4fb18
RawDump Free space:0x3b03f0, Dump start address:0x5d22a7c0, size 0x1d5848
RawDump Free space:0x1daba8, Dump start address:0x5d20000c, size 0x2a7b4
RawDump successfully, Reset the device

五、minidump.gz

在小米项目中,blackbox的需求实现时,会将rawdump中的minidump内容保存一份存到/data/vendor/diag/minidump.gz中。
我们可以通过脚本解析minidump.gz转化成可读的日志文件。下面介绍一下minidump.gz的解析方法: