0. watchdog的概念 Watchdog主要应用于嵌入式系统,用于系统出现严重故障(如内核死锁,进入死循环,CPU跑飞等)不能恢复时,在无人为介入的情况下可以自动重新启动系统。 在传统Linux 内核下, watchdog的基本工作原理是:当watchdog启动后(即/dev/watchdog 设备被打开后),如果在某一设定的时间间隔内/dev/watchdog没有被执行写操作, 硬件watchdog电路或软件定时器就会重新启动系统。 Watchdog根据实现方式又可以分为硬件watchdog和软件watchdog。硬件watchdog必须有硬件电路支持,设备节点/dev/watchdog对应真实的物理设备。软件watchdog通过通过内核定时器来实现,/dev/watchdog并不对应真实的物理设备。 硬件watchdog比软件watchdog有更好的可靠性。软件watchdog最大的优势是成本低,因此在可靠性要求不是很高一般民用产品被广泛使用。硬件watchdog的优势是可靠性高,因此在对可靠性要求严格的工业产品中被广泛使用。 但是在高通平台Android系统中,watchdog的实现有所不同,稍后我们会分析,这里只需知道其并没有提供/dev/watchdog。 当然在系统出现严重故障不能恢复时触发Watchdog,重启系统,仅仅是一个补救措施,虽然有效,但是过于简单粗暴,用户体验不佳 。 解决问题的最好方法是不让问题发生,因此我们需要针对watchdog进行和分析,尽量不让问题不发生。 注意Android系统中还有一套watchdog实现,也是使用软件实现的,用于检测SystemServer中各Service是否正常运行。大家不要搞混了。 如没有特别说明,本文后续提到的watchdog都特指高通平台Android系统kernel中watchdog。
1. 高通watchdog的种类 看门狗(WD 或 WDOG)是一种固定长度计数器,使系统能够从意外的硬件或软件灾难中恢复。 除非系统定期重置看门狗定时器,否则看门狗定时器会假设发生灾难,并根据哪个看门狗触发来重置子系统或整个系统。 一般来说,看门狗的实现有多种类型,硬件看门狗、软件看门狗、吠叫、咬合等。
看门狗类型
超时时间
Owner
Expires during
Result
Nonsecure (NS) WD bark
11s
HLOS
IRQ to HLOS
HLOS falls to Panic
Nonsecure (NS) WD bite
12s
HLOS
FIQ to TZ
TZ asserts PS_HOLD
Secure WD bark
6s
TZ
FIQ to TZ
TZ just pets secure WD
Secure WD bite
22s
TZ
asserting PS_HOLD
PMIC resets the system
AOP hardware WD bark
10ms
AOP
IRQ to AOP
AOP falls to error fatal
AOP hardware WD bite
30ms
AOP
IRQ to application processor
HLOS falls to Panic
Software WD timeout
10s
User tasks on SS
calls Error fatal
SS Error fatal1 2
(SS) hardware WD bark
2.25s
Dog task on SS
FIQ to error handler
SS Error fatal/pet WD1 2
(SS) Nonmaskable interrupt (NMI) due to HW WD
2.4s
Dog task on SS
NMI to SS
NMI on SS1
(SS) Hardware WD bite
2.5s
Dog task on SS
IRQ to HLOS
SS hardware reset1
注意:本文不涉及子系统的watchdog类型以及hardware watchdog,且专注于在开发过程中遇到的最多的watchdog,也就是Watchdog for APPS CPU。
2. 高通watchdog的实现 2.1 devicetree中watchdog的定义 1 2 3 4 5 6 7 8 9 10 11 wdog: qcom,wdt@f410000 { compatible = "qcom,msm-watchdog"; reg = <0xf410000 0x1000>; reg-names = "wdt-base"; interrupts = <0 0 IRQ_TYPE_LEVEL_HIGH>, <0 1 IRQ_TYPE_LEVEL_HIGH>; qcom,bark-time = <11000>; // 超过 11 秒没有喂狗,连叫带咬,系统重启 qcom,pet-time = <9360>; // 每 9 秒喂狗一次 qcom,ipi-ping; // 喂狗时需要 ping 一下系统中的其他 cpu ,确保所有 cpu 都处于正常状态 qcom,wakeup-enable; // 看门狗具有唤醒系统的能力,如果不具备唤醒能力的话,需要在系统睡眠时关闭看门狗,唤醒时再重新打开看门狗 };
注意:qcom,bark-time
和qcom,pet-time
可能已失效,在defconfig中利用CONFIG_QCOM_WATCHDOG_BARK_TIME
和CONFIG_QCOM_WATCHDOG_PET_TIME
设置。
2.2 核心数据结构struct msm_watchdog_data 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 struct msm_watchdog_data { void __iomem *base; struct device *dev ; struct qcom_wdt_ops *ops ; unsigned int pet_time; unsigned int bark_time; unsigned int bark_irq; bool do_ipi_ping; bool in_panic; bool wakeup_irq_enable; bool irq_ppi; unsigned long long last_pet; cpumask_t alive_mask; struct mutex disable_lock ; struct msm_watchdog_data * __percpu *wdog_cpu_dd ; struct notifier_block panic_blk ; struct notifier_block die_blk ; struct notifier_block wdog_cpu_pm_nb ; struct notifier_block restart_blk ; bool enabled; bool user_pet_enabled; struct task_struct *watchdog_task ; struct timer_list pet_timer ; wait_queue_head_t pet_complete; bool timer_expired; bool user_pet_complete; unsigned long long timer_fired; unsigned long long thread_start; unsigned long long ping_start[NR_CPUS]; unsigned long long ping_end[NR_CPUS]; int cpu_idle_pc_state[NR_CPUS]; bool freeze_in_progress; spinlock_t freeze_lock; struct timer_list user_pet_timer ; bool hibernate; };
2.3 watchdog核心操作api 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 struct qcom_wdt_ops { int (*set_bark_time)(u32 time, struct msm_watchdog_data *wdog_dd); int (*set_bite_time)(u32 time, struct msm_watchdog_data *wdog_dd); int (*reset_wdt)(struct msm_watchdog_data *wdog_dd); int (*enable_wdt)(u32 val, struct msm_watchdog_data *wdog_dd); int (*disable_wdt)(struct msm_watchdog_data *wdog_dd); int (*show_wdt_status)(struct msm_watchdog_data *wdog_dd); }; static struct qcom_wdt_ops qcom_soc_wdt_ops = { .set_bark_time = qcom_soc_set_wdt_bark, .set_bite_time = qcom_soc_set_wdt_bite, .reset_wdt = qcom_soc_reset_wdt, .enable_wdt = qcom_soc_enable_wdt, .disable_wdt = qcom_soc_disable_wdt, .show_wdt_status = qcom_soc_show_wdt_status };
1 2 3 4 5 6 7 8 9 #define WDT0_BARK_TIME 0x10 static inline int qcom_soc_set_wdt_bark (u32 time, struct msm_watchdog_data *wdog_dd) { __raw_writel((time * WDT_HZ)/1000 , wdog_dd->base + WDT0_BARK_TIME); mb(); return 0 ; }
直接将bark time 写入到base+0x10的地址,对应的物理地址就是0xf410000+0x10。
1 2 3 4 5 6 7 8 9 #define WDT0_BITE_TIME 0x14 static inline int qcom_soc_set_wdt_bite (u32 time, struct msm_watchdog_data *wdog_dd) { __raw_writel((time * WDT_HZ)/1000 , wdog_dd->base + WDT0_BITE_TIME); mb(); return 0 ; }
直接将pet time 写入到base+WDT0_BITE_TIME的地址,对应的物理地址就是0xf410000+0x14。
1 2 3 4 5 6 7 8 #define WDT0_RST 0x04 static inline int qcom_soc_reset_wdt (struct msm_watchdog_data *wdog_dd) { __raw_writel(1 , wdog_dd->base + WDT0_RST); mb(); return 0 ; }
1 2 3 4 5 6 7 8 #define WDT0_EN 0x08 static inline int qcom_soc_disable_wdt (struct msm_watchdog_data *wdog_dd) { __raw_writel(0 , wdog_dd->base + WDT0_EN); mb(); return 0 ; }
1 2 3 4 5 6 7 8 #define WDT0_EN 0x08 static inline int qcom_soc_disable_wdt (struct msm_watchdog_data *wdog_dd) { __raw_writel(0 , wdog_dd->base + WDT0_EN); mb(); return 0 ; }
1 2 3 4 5 6 7 8 9 static inline int qcom_soc_show_wdt_status (struct msm_watchdog_data *wdog_dd) { dev_err(wdog_dd->dev, "Wdog - STS: 0x%x, CTL: 0x%x, BARK TIME: 0x%x, BITE TIME: 0x%x\n" , __raw_readl(wdog_dd->base + WDT0_STS), __raw_readl(wdog_dd->base + WDT0_EN), __raw_readl(wdog_dd->base + WDT0_BARK_TIME), __raw_readl(wdog_dd->base + WDT0_BITE_TIME)); return 0 ; }
2.4 watchdog的初始化入口 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 static int qcom_soc_wdt_probe (struct platform_device *pdev) { struct resource *res ; struct msm_watchdog_data *wdog_dd ; wdog_dd = devm_kzalloc(&pdev->dev, sizeof (*wdog_dd), GFP_KERNEL); if (!wdog_dd) return -ENOMEM; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "wdt-base" ); if (!res) return -ENODEV; wdog_dd->base = devm_ioremap_resource(&pdev->dev, res); if (!wdog_dd->base) { dev_err(&pdev->dev, "%s cannot map wdog register space\n" , __func__); return -ENXIO; } wdog_dd->ops = &qcom_soc_wdt_ops; return qcom_wdt_register(pdev, wdog_dd, "msm-watchdog" ); }
2.5 注册watchdog 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 int qcom_wdt_register (struct platform_device *pdev, struct msm_watchdog_data *wdog_dd, char *wdog_dd_name) { struct md_region md_entry ; int ret; if (!pdev || !wdog_dd || !wdog_dd_name) { pr_err("wdt_register input incorrect\n" ); return -EINVAL; } qcom_wdt_dt_to_pdata(pdev, wdog_dd); wdog_data = wdog_dd; wdog_dd->dev = &pdev->dev; platform_set_drvdata(pdev, wdog_dd); cpumask_clear(&wdog_dd->alive_mask); wdog_dd->watchdog_task = kthread_create(qcom_wdt_kthread, wdog_dd, wdog_dd_name); if (IS_ERR(wdog_dd->watchdog_task)) { ret = PTR_ERR(wdog_dd->watchdog_task); goto err; } ret = qcom_wdt_init(wdog_dd, pdev); if (ret) { kthread_stop(wdog_dd->watchdog_task); goto err; } strlcpy(md_entry.name, "KWDOGDATA" , sizeof (md_entry.name)); md_entry.virt_addr = (uintptr_t )wdog_dd; md_entry.phys_addr = virt_to_phys(wdog_dd); md_entry.size = sizeof (*wdog_dd); if (msm_minidump_add_region(&md_entry) < 0 ) dev_err(wdog_dd->dev, "Failed to add Wdt data in Minidump\n" ); return 0 ; err: return ret; }
该函数用于解析设备树资源
1 2 3 4 5 6 7 8 9 10 11 static void qcom_wdt_dt_to_pdata (struct platform_device *pdev, struct msm_watchdog_data *pdata) { pdata->bark_irq = platform_get_irq(pdev, 0 ); pdata->irq_ppi = irq_is_percpu(pdata->bark_irq); pdata->bark_time = QCOM_WATCHDOG_BARK_TIME; pdata->pet_time = QCOM_WATCHDOG_PET_TIME; pdata->do_ipi_ping = QCOM_WATCHDOG_IPI_PING; pdata->wakeup_irq_enable = QCOM_WATCHDOG_WAKEUP_ENABLE; qcom_wdt_dump_pdata(pdata); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 static int qcom_wdt_init (struct msm_watchdog_data *wdog_dd, struct platform_device *pdev) { unsigned long delay_time; uint32_t val; int ret; void *wdog_cpu_dd_v; if (wdog_dd->irq_ppi) { wdog_dd->wdog_cpu_dd = alloc_percpu(struct msm_watchdog_data *); if (!wdog_dd->wdog_cpu_dd) { dev_err(wdog_dd->dev, "failed to allocate cpu data\n" ); return -ENOMEM; } wdog_cpu_dd_v = raw_cpu_ptr((void __percpu *)wdog_dd->wdog_cpu_dd); *((struct msm_watchdog_data **)wdog_cpu_dd_v) = wdog_dd; ret = request_percpu_irq(wdog_dd->bark_irq, qcom_wdt_ppi_bark, "apps_wdog_bark" , (void __percpu *)wdog_dd->wdog_cpu_dd); if (ret) { dev_err(wdog_dd->dev, "failed to request bark irq\n" ); free_percpu((void __percpu *)wdog_dd->wdog_cpu_dd); return ret; } } else { ret = devm_request_irq(wdog_dd->dev, wdog_dd->bark_irq, qcom_wdt_bark_handler, IRQF_TRIGGER_RISING | IRQF_NO_SUSPEND, "apps_wdog_bark" , wdog_dd); if (ret) { dev_err(wdog_dd->dev, "failed to request bark irq: %d\n" , ret); return -EINVAL; } } wdog_data->hibernate = false ; ret = register_pm_notifier(&qcom_wdt_notif_block); if (ret) return ret; delay_time = msecs_to_jiffies(wdog_dd->pet_time); wdog_dd->ops->set_bark_time(wdog_dd->bark_time, wdog_dd); wdog_dd->ops->set_bite_time(wdog_dd->bark_time + 10 * 1000 , wdog_dd); wdog_dd->panic_blk.priority = INT_MAX - 1 ; wdog_dd->panic_blk.notifier_call = qcom_wdt_panic_handler; atomic_notifier_chain_register(&panic_notifier_list, &wdog_dd->panic_blk); qcom_wdt_register_die_notifier(wdog_dd); wdog_dd->restart_blk.priority = 255 ; wdog_dd->restart_blk.notifier_call = restart_wdog_handler; register_restart_handler(&wdog_dd->restart_blk); mutex_init(&wdog_dd->disable_lock); init_waitqueue_head(&wdog_dd->pet_complete); wdog_dd->timer_expired = false ; wdog_dd->user_pet_complete = true ; wdog_dd->user_pet_enabled = false ; spin_lock_init(&wdog_dd->freeze_lock); wdog_dd->freeze_in_progress = false ; wake_up_process(wdog_dd->watchdog_task); timer_setup(&wdog_dd->pet_timer, qcom_wdt_pet_task_wakeup, 0 ); wdog_dd->pet_timer.expires = jiffies + delay_time; add_timer(&wdog_dd->pet_timer); timer_setup(&wdog_dd->user_pet_timer, qcom_wdt_user_pet_bite, 0 ); val = BIT(EN); if (wdog_dd->wakeup_irq_enable) val |= BIT(UNMASKED_INT_EN); ret = wdog_dd->ops->enable_wdt(val, wdog_dd); if (ret) { atomic_notifier_chain_unregister(&panic_notifier_list, &wdog_dd->panic_blk); qcom_wdt_unregister_die_notifier(wdog_dd); unregister_restart_handler(&wdog_dd->restart_blk); if (wdog_dd->irq_ppi) { free_percpu_irq(wdog_dd->bark_irq, (void __percpu *)wdog_dd->wdog_cpu_dd); free_percpu((void __percpu *)wdog_dd->wdog_cpu_dd); } del_timer_sync(&wdog_dd->pet_timer); dev_err(wdog_dd->dev, "Failed Initializing QCOM Apps Watchdog\n" ); return ret; } wdog_dd->ops->reset_wdt(wdog_dd); wdog_dd->last_pet = sched_clock(); wdog_dd->enabled = true ; qcom_wdt_init_sysfs(wdog_dd); if (wdog_dd->irq_ppi) enable_percpu_irq(wdog_dd->bark_irq, 0 ); if (!IPI_CORES_IN_LPM) { wdog_dd->wdog_cpu_pm_nb.notifier_call = qcom_wdt_cpu_pm_notify; cpu_pm_register_notifier(&wdog_dd->wdog_cpu_pm_nb); } dev_info(wdog_dd->dev, "QCOM Apps Watchdog Initialized\n" ); return 0 ; }
2.6 panic/die/restart通知链 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 #ifdef CONFIG_QCOM_WDOG_BITE_EARLY_PANIC #define WDOG_BITE_EARLY_PANIC 1 #else #define WDOG_BITE_EARLY_PANIC 0 #endif static void qcom_wdt_reset_on_oops (struct msm_watchdog_data *wdog_dd, int timeout) { wdog_dd->ops->reset_wdt(wdog_dd); wdog_dd->ops->set_bark_time((timeout + 10 ) * 1000 , wdog_dd); wdog_dd->ops->set_bite_time((timeout + 10 ) * 1000 , wdog_dd); } static int qcom_wdt_panic_handler (struct notifier_block *this, unsigned long event, void *ptr) { struct msm_watchdog_data *wdog_dd = container_of(this, struct msm_watchdog_data, panic_blk); wdog_dd->in_panic = true ; if (WDOG_BITE_EARLY_PANIC) { pr_info("Triggering early bite\n" ); qcom_wdt_trigger_bite(); } if (panic_timeout == 0 ) { wdog_dd->ops->disable_wdt(wdog_dd); } else { qcom_wdt_reset_on_oops(wdog_dd, panic_timeout); } return NOTIFY_DONE; }
1 2 3 4 5 6 7 8 9 static int qcom_wdt_die_handler (struct notifier_block *this, unsigned long val, void *data) { struct msm_watchdog_data *wdog_dd = container_of(this, struct msm_watchdog_data, die_blk); qcom_wdt_reset_on_oops(wdog_dd, 5 ); return NOTIFY_DONE; }
2.6.3 restart_wdog_handler 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 #ifdef CONFIG_QCOM_FORCE_WDOG_BITE_ON_PANIC #define WDOG_BITE_ON_PANIC 1 #else #define WDOG_BITE_ON_PANIC 0 #endif static int restart_wdog_handler (struct notifier_block *this, unsigned long event, void *ptr) { struct msm_watchdog_data *wdog_dd = container_of(this, struct msm_watchdog_data, restart_blk); if (WDOG_BITE_ON_PANIC && wdog_dd->in_panic) { pr_info("Triggering late bite\n" ); qcom_wdt_trigger_bite(); } return NOTIFY_DONE; }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 static __ref int qcom_wdt_kthread (void *arg) { struct msm_watchdog_data *wdog_dd = arg; unsigned long delay_time = 0 ; struct sched_param param = {.sched_priority = MAX_RT_PRIO-1 }; int ret, cpu; sched_setscheduler(current, SCHED_FIFO, ¶m); while (!kthread_should_stop()) { do { ret = wait_event_interruptible(wdog_dd->pet_complete, wdog_dd->timer_expired); } while (ret != 0 ); wdog_dd->thread_start = sched_clock(); for_each_cpu(cpu, cpu_present_mask) wdog_dd->ping_start[cpu] = wdog_dd->ping_end[cpu] = 0 ; if (wdog_dd->do_ipi_ping) qcom_wdt_ping_other_cpus(wdog_dd); do { ret = wait_event_interruptible(wdog_dd->pet_complete, wdog_dd->user_pet_complete); } while (ret != 0 ); wdog_dd->timer_expired = false ; wdog_dd->user_pet_complete = !wdog_dd->user_pet_enabled; if (wdog_dd->enabled) { delay_time = msecs_to_jiffies(wdog_dd->pet_time); wdog_dd->ops->reset_wdt(wdog_dd); wdog_dd->last_pet = sched_clock(); } if (!kthread_should_stop()) { spin_lock(&wdog_dd->freeze_lock); if (!wdog_dd->freeze_in_progress) mod_timer(&wdog_dd->pet_timer, jiffies + delay_time); spin_unlock(&wdog_dd->freeze_lock); } record_irq_count(); } return 0 ; }