[Android稳定性] 第009篇 [问题篇] 数组越界导致的内核panic

0. 问题现象

收到研发提供的反馈,部分机器插着usb后出现死机。

1. 问题分析

1.1 dmesg_TZ.txt

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[  111.851460][ T2674] CPU: 6 PID: 2674 Comm: android.hardwar Tainted: G        W  OE      6.1.90-android14-11-maybe-dirty-qki-consolidate #1
[ 111.851463][ T2674] Hardware name: Qualcomm Technologies, Inc. Blair QRD (DT)
[ 111.851465][ T2674] pstate: a0400005 (NzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[ 111.851467][ T2674] pc : power_operation_mode_show+0x48/0x50
[ 111.851476][ T2674] lr : dev_attr_show+0x38/0x74
[ 111.851481][ T2674] sp : ffffffc01ee3bbe0
[ 111.851482][ T2674] x29: ffffffc01ee3bbe0 x28: ffffff80af158040 x27: 000000007ffff001
[ 111.851486][ T2674] x26: 0000000000000001 x25: 0000000000000000 x24: ffffff80bce5d818
[ 111.851489][ T2674] x23: ffffffc0096db730 x22: ffffff80c4c1ac00 x21: ffffff80cabd1000
[ 111.851492][ T2674] x20: ffffffc00a63c200 x19: ffffff80bce5d7f0 x18: ffffffc01ef95070
[ 111.851495][ T2674] x17: 00000000df43c25c x16: 00000000df43c25c x15: 00000000dfc6ce6f
[ 111.851498][ T2674] x14: 000000000b9ee1fe x13: 000000009483dc41 x12: ffffff80af158c00
[ 111.851501][ T2674] x11: ffffff80cabd1000 x10: 0000000000000000 x9 : ffffffc008982c70
[ 111.851504][ T2674] x8 : 00000000fffffffd x7 : 0000000000000000 x6 : 000000000000003f
[ 111.851507][ T2674] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004
[ 111.851510][ T2674] x2 : ffffff80cabd1000 x1 : ffffffc00a63c200 x0 : ffffff8084faa008
[ 111.851513][ T2674] Call trace:
[ 111.851514][ T2674] power_operation_mode_show+0x48/0x50
[ 111.851518][ T2674] dev_attr_show+0x38/0x74
[ 111.851521][ T2674] sysfs_kf_seq_show+0xd8/0x160
[ 111.851526][ T2674] kernfs_seq_show+0x4c/0x60
[ 111.851528][ T2674] seq_read_iter+0x15c/0x4f0
[ 111.851532][ T2674] kernfs_fop_read_iter+0x70/0x1f8
[ 111.851535][ T2674] vfs_read+0x1dc/0x2b8
[ 111.851539][ T2674] ksys_read+0x78/0xe8
[ 111.851542][ T2674] __arm64_sys_read+0x1c/0x2c
[ 111.851545][ T2674] invoke_syscall+0x58/0x114
[ 111.851548][ T2674] el0_svc_common+0xc4/0x118
[ 111.851551][ T2674] do_el0_svc+0x2c/0xb8
[ 111.851553][ T2674] el0_svc+0x30/0x9c
[ 111.851556][ T2674] el0t_64_sync_handler+0x68/0xb4
[ 111.851559][ T2674] el0t_64_sync+0x1a4/0x1a8
[ 111.851563][ T2674] Code: 93407c00 a8c17bfd f85f8e5e d65f03c0 (d42aa240)
[ 111.858365][ T2674] ---[ end trace 0000000000000000 ]---
[ 111.863691][ T2674] Kernel panic - not syncing: BRK handler: Fatal exception

基本定位为power_operation_mode_show函数的问题,接下去使用trace32恢复现场

1.2 trace32分析

导入cpu寄存器信息/symbols/源码后,查看堆栈信息

这个函数就两行代码,逐行分析一下,我们可以知道参数struct device dev的地址为*0xffffff8084faa008

struct typec_port *port的地址是通过to_typec_port函数转换得来,查看函数定义:

这个是通过container_of函数来获取struct typec_port的地址的,所以查看struct typec_port的定义

可以看到dev是结构体的第二位成员,第一位是一个unsigned int类型,在64位系统中占4字节,这里涉及到字节对齐的概念,id只占用4字节,但是编译器会补上4字节来进行8字节对齐。所以dev的偏移为0x8

这时候我们就能得到typec_port的地址了,也就是0xffffff8084faa000

1
2
typec_port地址 = dev地址 - 0x8
= 0xffffff8084faa008 - 0x8 = 0xffffff8084faa000

使用trace32查询0xffffff8084faa

我们发现pwr_opmode颜色标红,数值也很明显不对,这是一个补码!!!!

那这个补码对应的原码是多少呢?我们来计算一下

4294967293

= 1111_1111_1111_1111_1111_1111_1111_1101

原码=补码按位取反+1

= -(0000_0000_0000_0000_0000_0000_0000_0010 + 1)

= -0x11

=-3

这是一个负值,它也是传给数组作为了下标,这也是出现问题的根本原因,造成了数组的越界

2. 解决方案

charger模块更新对这个pwr_opmode的误判。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c
index f03ca27..db116d3 100644
--- a/drivers/power/supply/qcom/smb5-lib.c
+++ b/drivers/power/supply/qcom/smb5-lib.c
@@ -2139,6 +2139,7 @@
bool usb_online, dc_online;
u8 stat;
int rc, suspend = 0, input_present = 0;
+ int soc = 0;

if (chg->fake_chg_status_on_debug_batt) {
rc = smblib_get_prop_from_bms(chg, SMB5_QG_DEBUG_BATTERY,
@@ -2152,6 +2153,7 @@
}
}

+#if 0
rc = smblib_get_prop_batt_health(chg, &pval);
if (rc < 0) {
smblib_err(chg, "Couldn't get batt health rc=%d\n", rc);
@@ -2167,6 +2169,7 @@
val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
return 0;
}
+#endif

/*
* If SOC = 0 and we are discharging with input connected, report
@@ -2174,6 +2177,7 @@
*/
smblib_is_input_present(chg, &input_present);
rc = smblib_get_prop_from_bms(chg, SMB5_QG_CAPACITY, &pval.intval);
+ soc = pval.intval;
if (!rc && pval.intval == 0 && input_present) {
rc = smblib_get_prop_from_bms(chg, SMB5_QG_CURRENT_NOW,
&pval.intval);
@@ -2240,6 +2244,9 @@
stat = stat & BATTERY_CHARGER_STATUS_MASK;

if (!usb_online && !dc_online) {
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ return rc;
+#if 0
switch (stat) {
case TERMINATE_CHARGE:
fallthrough;
@@ -2251,6 +2258,7 @@
break;
}
return rc;
+#endif
}

switch (stat) {
@@ -2266,12 +2274,24 @@
case TERMINATE_CHARGE:
fallthrough;
case INHIBIT_CHARGE:
- val->intval = POWER_SUPPLY_STATUS_FULL;
+ if (usb_online) {
+ if (soc > 99) {
+ val->intval = POWER_SUPPLY_STATUS_FULL;
+ } else {
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ }
+ } else {
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ }
break;
case DISABLE_CHARGE:
fallthrough;
case PAUSE_CHARGE:
- val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ if (!usb_online) {
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+ } else {
+ val->intval = POWER_SUPPLY_STATUS_CHARGING;
+ }
break;
default:
val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
@@ -2302,6 +2322,12 @@
return 0;
}

+ if (usb_online && (chg->fake_batt_status == POWER_SUPPLY_STATUS_DISCHARGING
+ || chg->fake_batt_status == POWER_SUPPLY_STATUS_CHARGING)) {
+ val->intval = chg->fake_batt_status;
+ return 0;
+ }
+
rc = smblib_read(chg, BATTERY_CHARGER_STATUS_5_REG, &stat);
if (rc < 0) {
smblib_err(chg, "Couldn't read BATTERY_CHARGER_STATUS_2 rc=%d\n",
@@ -2312,8 +2338,8 @@
stat &= ENABLE_TRICKLE_BIT | ENABLE_PRE_CHARGING_BIT |
ENABLE_FULLON_MODE_BIT;

- if (!stat)
- val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+ if (!stat && !usb_online)
+ val->intval = POWER_SUPPLY_STATUS_DISCHARGING;

return 0;
}
@@ -2389,22 +2415,26 @@
}
}

- rc = smblib_read(chg, BATTERY_CHARGER_STATUS_7_REG, &stat);
+ rc = smblib_get_prop_from_bms(chg, SMB5_QG_TEMP, &pval.intval);
if (rc < 0) {
- smblib_err(chg, "Couldn't read BATTERY_CHARGER_STATUS_2 rc=%d\n",
- rc);
- return rc;
+ pr_err("Couldn't read batt temp prop rc=%d\n", rc);
+ return -EINVAL;
}
- if (stat & BAT_TEMP_STATUS_TOO_COLD_BIT)
- val->intval = POWER_SUPPLY_HEALTH_COLD;
- else if (stat & BAT_TEMP_STATUS_TOO_HOT_BIT)
+
+ pval.intval = pval.intval / 10;
+ if (pval.intval >= 60) {
val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
- else if (stat & BAT_TEMP_STATUS_COLD_SOFT_BIT)
- val->intval = POWER_SUPPLY_HEALTH_COOL;
- else if (stat & BAT_TEMP_STATUS_HOT_SOFT_BIT)
+ } else if (pval.intval >= 58 && pval.intval < 60) {
+ val->intval = POWER_SUPPLY_HEALTH_HOT;
+ } else if (pval.intval >= 45 && pval.intval < 58) {
val->intval = POWER_SUPPLY_HEALTH_WARM;
- else
+ } else if (pval.intval >= 15 && pval.intval < 45) {
val->intval = POWER_SUPPLY_HEALTH_GOOD;
+ } else if (pval.intval >= 0 && pval.intval < 15) {
+ val->intval = POWER_SUPPLY_HEALTH_COOL;
+ } else if (pval.intval < 0) {
+ val->intval = POWER_SUPPLY_HEALTH_COLD;
+ }

done:
return rc;
@@ -2591,13 +2621,20 @@
int smblib_set_prop_batt_status(struct smb_charger *chg,
const union power_supply_propval *val)
{
+#if 0
/* Faking battery full */
if (val->intval == POWER_SUPPLY_STATUS_FULL)
chg->fake_batt_status = val->intval;
else
chg->fake_batt_status = -EINVAL;
+#endif
+ if (val->intval < 0) {
+ chg->fake_batt_status = -EINVAL;
+ } else {
+ chg->fake_batt_status = val->intval;
+ }

- power_supply_changed(chg->batt_psy);
+ //power_supply_changed(chg->batt_psy);

return 0;
}
@@ -6017,6 +6054,7 @@
vote(chg->awake_votable, PL_DELAY_VOTER, true, 0);
schedule_delayed_work(&chg->pl_enable_work,
msecs_to_jiffies(PL_DELAY_MS));
+ chg->float_retry_flag = 1;
} else {
/* Disable SW Thermal Regulation */
rc = smblib_set_sw_thermal_regulation(chg, false);
@@ -6074,6 +6112,7 @@
smblib_err(chg, "Couldn't disable DPDM rc=%d\n", rc);

smblib_update_usb_type(chg);
+ chg->float_retry_flag = 0;
}

if (chg->connector_type == QTI_POWER_SUPPLY_CONNECTOR_MICRO_USB)
@@ -6100,9 +6139,32 @@
return IRQ_HANDLED;
}

+static void smblib_float_retry_work(struct work_struct *work)
+{
+ int rc = 0;
+ struct smb_charger *chg = container_of(work, struct smb_charger,
+ float_retry_work.work);
+
+ rc = smblib_request_dpdm(chg, false);
+ if (rc < 0)
+ smblib_err(chg, "Couldn't to disable DPDM rc=%d\n", rc);
+
+ msleep(100);
+ smblib_rerun_apsd_if_required(chg);
+
+ chg->float_retry_flag = 0;
+ smblib_dbg(chg, PR_PARALLEL, "float detected due to slow plug \n");
+}
+
+#define FLOAT_RETRY_DELAY 8000 /*8s*/
static void smblib_handle_slow_plugin_timeout(struct smb_charger *chg,
bool rising)
{
+ if (rising && chg->float_retry_flag)
+ schedule_delayed_work(&chg->float_retry_work, msecs_to_jiffies(FLOAT_RETRY_DELAY));
+ else if (!rising)
+ cancel_delayed_work_sync(&chg->float_retry_work);
+
smblib_dbg(chg, PR_INTERRUPT, "IRQ: slow-plugin-timeout %s\n",
rising ? "rising" : "falling");
}
@@ -8833,6 +8895,7 @@
INIT_DELAYED_WORK(&chg->lpd_ra_open_work, smblib_lpd_ra_open_work);
INIT_DELAYED_WORK(&chg->lpd_detach_work, smblib_lpd_detach_work);
INIT_DELAYED_WORK(&chg->raise_qc3_vbus_work, smblib_raise_qc3_vbus_work);
+ INIT_DELAYED_WORK(&chg->float_retry_work, smblib_float_retry_work);
INIT_DELAYED_WORK(&chg->thermal_regulation_work,
smblib_thermal_regulation_work);
INIT_DELAYED_WORK(&chg->usbov_dbc_work, smblib_usbov_dbc_work);
@@ -8892,6 +8955,7 @@
chg->dr_mode = TYPEC_PORT_DRP;
chg->raise_vbus_to_detect = false;
chg->qc2_unsupported = false;
+ chg->float_retry_flag = 1;
apsd_result = smblib_update_usb_type(chg);

switch (chg->mode) {
@@ -9016,6 +9080,7 @@
cancel_delayed_work_sync(&chg->lpd_ra_open_work);
cancel_delayed_work_sync(&chg->lpd_detach_work);
cancel_delayed_work_sync(&chg->raise_qc3_vbus_work);
+ cancel_delayed_work_sync(&chg->float_retry_work);
cancel_delayed_work_sync(&chg->thermal_regulation_work);
cancel_delayed_work_sync(&chg->usbov_dbc_work);
cancel_delayed_work_sync(&chg->role_reversal_check);
diff --git a/drivers/power/supply/qcom/smb5-lib.h b/drivers/power/supply/qcom/smb5-lib.h
index cb194cb..066eb8e 100644
--- a/drivers/power/supply/qcom/smb5-lib.h
+++ b/drivers/power/supply/qcom/smb5-lib.h
@@ -522,6 +522,7 @@
struct delayed_work pr_lock_clear_work;
struct delayed_work role_reversal_check;
struct delayed_work raise_qc3_vbus_work;
+ struct delayed_work float_retry_work;

struct alarm lpd_recheck_timer;
struct alarm moisture_protection_alarm;
@@ -690,6 +691,7 @@
bool support_ffc;
bool qc2_unsupported;
int dpdm_qc3p0_flag;
+ int float_retry_flag;
};

int smblib_read(struct smb_charger *chg, u16 addr, u8 *val);
diff --git a/drivers/usb/pd/policy_engine.c b/drivers/usb/pd/policy_engine.c
index 065c0e1..8241b07 100644
--- a/drivers/usb/pd/policy_engine.c
+++ b/drivers/usb/pd/policy_engine.c
@@ -671,8 +671,13 @@
start_usb_peripheral(pd);
typec_set_data_role(pd->typec_port, TYPEC_DEVICE);
typec_set_pwr_role(pd->typec_port, TYPEC_SINK);
- typec_set_pwr_opmode(pd->typec_port,
- pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+ if (pd->typec_mode <= QTI_POWER_SUPPLY_TYPEC_NONE) {
+ typec_set_pwr_opmode(pd->typec_port, TYPEC_PWR_MODE_USB);
+ } else if (pd->typec_mode < QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT) {
+ typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_NONE);
+ } else {
+ typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+ }
if (!pd->partner) {
memset(&pd->partner_identity, 0, sizeof(pd->partner_identity));
pd->partner_desc.usb_pd = false;
@@ -2833,8 +2838,13 @@

typec_set_pwr_role(pd->typec_port, TYPEC_SINK);
if (!pd->partner) {
- typec_set_pwr_opmode(pd->typec_port,
- pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+ if (pd->typec_mode <= QTI_POWER_SUPPLY_TYPEC_NONE) {
+ typec_set_pwr_opmode(pd->typec_port, TYPEC_PWR_MODE_USB);
+ } else if (pd->typec_mode < QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT) {
+ typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_NONE);
+ } else {
+ typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+ }
memset(&pd->partner_identity, 0, sizeof(pd->partner_identity));
pd->partner_desc.usb_pd = false;
pd->partner_desc.accessory = TYPEC_ACCESSORY_NONE;
@@ -4011,8 +4021,7 @@
}

if (val.intval == POWER_SUPPLY_TYPE_USB ||
- val.intval == POWER_SUPPLY_TYPE_USB_CDP ||
- val.intval == QTI_POWER_SUPPLY_TYPE_USB_FLOAT) {
+ val.intval == POWER_SUPPLY_TYPE_USB_CDP) {
usbpd_dbg(&pd->dev, "typec mode:%d type:%d\n",
typec_mode, val.intval);
pd->typec_mode = typec_mode;
@@ -4066,8 +4075,7 @@
}

if (val.intval == POWER_SUPPLY_TYPE_USB ||
- val.intval == POWER_SUPPLY_TYPE_USB_CDP ||
- val.intval == QTI_POWER_SUPPLY_TYPE_USB_FLOAT)
+ val.intval == POWER_SUPPLY_TYPE_USB_CDP)
queue_work(pd->wq, &pd->start_periph_work);
}
return;

4. 总结