0. 问题现象

收到研发提供的反馈,部分机器插着usb后出现死机。

1. 问题分析

1.1 dmesg_TZ.txt

[  111.851460][ T2674] CPU: 6 PID: 2674 Comm: android.hardwar Tainted: G        W  OE      6.1.90-android14-11-maybe-dirty-qki-consolidate #1
[  111.851463][ T2674] Hardware name: Qualcomm Technologies, Inc. Blair QRD (DT)
[  111.851465][ T2674] pstate: a0400005 (NzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[  111.851467][ T2674] pc : power_operation_mode_show+0x48/0x50
[  111.851476][ T2674] lr : dev_attr_show+0x38/0x74
[  111.851481][ T2674] sp : ffffffc01ee3bbe0
[  111.851482][ T2674] x29: ffffffc01ee3bbe0 x28: ffffff80af158040 x27: 000000007ffff001
[  111.851486][ T2674] x26: 0000000000000001 x25: 0000000000000000 x24: ffffff80bce5d818
[  111.851489][ T2674] x23: ffffffc0096db730 x22: ffffff80c4c1ac00 x21: ffffff80cabd1000
[  111.851492][ T2674] x20: ffffffc00a63c200 x19: ffffff80bce5d7f0 x18: ffffffc01ef95070
[  111.851495][ T2674] x17: 00000000df43c25c x16: 00000000df43c25c x15: 00000000dfc6ce6f
[  111.851498][ T2674] x14: 000000000b9ee1fe x13: 000000009483dc41 x12: ffffff80af158c00
[  111.851501][ T2674] x11: ffffff80cabd1000 x10: 0000000000000000 x9 : ffffffc008982c70
[  111.851504][ T2674] x8 : 00000000fffffffd x7 : 0000000000000000 x6 : 000000000000003f
[  111.851507][ T2674] x5 : 0000000000000040 x4 : 0000000000000000 x3 : 0000000000000004
[  111.851510][ T2674] x2 : ffffff80cabd1000 x1 : ffffffc00a63c200 x0 : ffffff8084faa008
[  111.851513][ T2674] Call trace:
[  111.851514][ T2674]  power_operation_mode_show+0x48/0x50
[  111.851518][ T2674]  dev_attr_show+0x38/0x74
[  111.851521][ T2674]  sysfs_kf_seq_show+0xd8/0x160
[  111.851526][ T2674]  kernfs_seq_show+0x4c/0x60
[  111.851528][ T2674]  seq_read_iter+0x15c/0x4f0
[  111.851532][ T2674]  kernfs_fop_read_iter+0x70/0x1f8
[  111.851535][ T2674]  vfs_read+0x1dc/0x2b8
[  111.851539][ T2674]  ksys_read+0x78/0xe8
[  111.851542][ T2674]  __arm64_sys_read+0x1c/0x2c
[  111.851545][ T2674]  invoke_syscall+0x58/0x114
[  111.851548][ T2674]  el0_svc_common+0xc4/0x118
[  111.851551][ T2674]  do_el0_svc+0x2c/0xb8
[  111.851553][ T2674]  el0_svc+0x30/0x9c
[  111.851556][ T2674]  el0t_64_sync_handler+0x68/0xb4
[  111.851559][ T2674]  el0t_64_sync+0x1a4/0x1a8
[  111.851563][ T2674] Code: 93407c00 a8c17bfd f85f8e5e d65f03c0 (d42aa240) 
[  111.858365][ T2674] ---[ end trace 0000000000000000 ]---
[  111.863691][ T2674] Kernel panic - not syncing: BRK handler: Fatal exception

基本定位为power_operation_mode_show函数的问题,接下去使用trace32恢复现场

1.2 trace32分析

导入cpu寄存器信息/symbols/源码后,查看堆栈信息

这个函数就两行代码,逐行分析一下,我们可以知道参数struct device *dev的地址为0xffffff8084faa008

struct typec_port *port的地址是通过to_typec_port函数转换得来,查看函数定义:

这个是通过container_of函数来获取struct typec_port的地址的,所以查看struct typec_port的定义

可以看到dev是结构体的第二位成员,第一位是一个unsigned int类型,在64位系统中占4字节,这里涉及到字节对齐的概念,id只占用4字节,但是编译器会补上4字节来进行8字节对齐。所以dev的偏移为0x8

这时候我们就能得到typec_port的地址了,也就是0xffffff8084faa000

typec_port地址 = dev地址 - 0x8
= 0xffffff8084faa008 - 0x8 = 0xffffff8084faa000

使用trace32查询0xffffff8084faa

我们发现pwr_opmode颜色标红,数值也很明显不对,这是一个补码!!!!

那这个补码对应的原码是多少呢?我们来计算一下

4294967293

= 1111_1111_1111_1111_1111_1111_1111_1101

原码=补码按位取反+1

= -(0000_0000_0000_0000_0000_0000_0000_0010 + 1)

= -0x11

=-3

这是一个负值,它也是传给数组作为了下标,这也是出现问题的根本原因,造成了数组的越界

2. 解决方案

charger模块更新对这个pwr_opmode的误判。

diff --git a/drivers/power/supply/qcom/smb5-lib.c b/drivers/power/supply/qcom/smb5-lib.c
index f03ca27..db116d3 100644
--- a/drivers/power/supply/qcom/smb5-lib.c
+++ b/drivers/power/supply/qcom/smb5-lib.c
@@ -2139,6 +2139,7 @@
 	bool usb_online, dc_online;
 	u8 stat;
 	int rc, suspend = 0, input_present = 0;
+	int soc = 0;
 
 	if (chg->fake_chg_status_on_debug_batt) {
 		rc = smblib_get_prop_from_bms(chg, SMB5_QG_DEBUG_BATTERY,
@@ -2152,6 +2153,7 @@
 		}
 	}
 
+#if 0
 	rc = smblib_get_prop_batt_health(chg, &pval);
 	if (rc < 0) {
 		smblib_err(chg, "Couldn't get batt health rc=%d\n", rc);
@@ -2167,6 +2169,7 @@
 		val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
 		return 0;
 	}
+#endif
 
 	/*
 	 * If SOC = 0 and we are discharging with input connected, report
@@ -2174,6 +2177,7 @@
 	 */
 	smblib_is_input_present(chg, &input_present);
 	rc = smblib_get_prop_from_bms(chg, SMB5_QG_CAPACITY, &pval.intval);
+	soc = pval.intval;
 	if (!rc && pval.intval == 0 && input_present) {
 		rc = smblib_get_prop_from_bms(chg, SMB5_QG_CURRENT_NOW,
 				&pval.intval);
@@ -2240,6 +2244,9 @@
 	stat = stat & BATTERY_CHARGER_STATUS_MASK;
 
 	if (!usb_online && !dc_online) {
+		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		return rc;
+#if 0
 		switch (stat) {
 		case TERMINATE_CHARGE:
 			fallthrough;
@@ -2251,6 +2258,7 @@
 			break;
 		}
 		return rc;
+#endif
 	}
 
 	switch (stat) {
@@ -2266,12 +2274,24 @@
 	case TERMINATE_CHARGE:
 		fallthrough;
 	case INHIBIT_CHARGE:
-		val->intval = POWER_SUPPLY_STATUS_FULL;
+		if (usb_online) {
+			if (soc > 99) {
+				val->intval = POWER_SUPPLY_STATUS_FULL;
+			} else {
+				val->intval = POWER_SUPPLY_STATUS_CHARGING;
+			}
+		} else {
+	 		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		}
 		break;
 	case DISABLE_CHARGE:
 		fallthrough;
 	case PAUSE_CHARGE:
-		val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+		if (!usb_online) {
+			val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
+		} else {
+			val->intval = POWER_SUPPLY_STATUS_CHARGING;
+		}
 		break;
 	default:
 		val->intval = POWER_SUPPLY_STATUS_UNKNOWN;
@@ -2302,6 +2322,12 @@
 		return 0;
 	}
 
+	if (usb_online && (chg->fake_batt_status == POWER_SUPPLY_STATUS_DISCHARGING
+		|| chg->fake_batt_status == POWER_SUPPLY_STATUS_CHARGING)) {
+		val->intval = chg->fake_batt_status;
+		return 0;
+	}
+
 	rc = smblib_read(chg, BATTERY_CHARGER_STATUS_5_REG, &stat);
 	if (rc < 0) {
 		smblib_err(chg, "Couldn't read BATTERY_CHARGER_STATUS_2 rc=%d\n",
@@ -2312,8 +2338,8 @@
 	stat &= ENABLE_TRICKLE_BIT | ENABLE_PRE_CHARGING_BIT |
 						ENABLE_FULLON_MODE_BIT;
 
-	if (!stat)
-		val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING;
+	if (!stat && !usb_online)
+		val->intval = POWER_SUPPLY_STATUS_DISCHARGING;
 
 	return 0;
 }
@@ -2389,22 +2415,26 @@
 		}
 	}
 
-	rc = smblib_read(chg, BATTERY_CHARGER_STATUS_7_REG, &stat);
+	rc = smblib_get_prop_from_bms(chg, SMB5_QG_TEMP, &pval.intval);
 	if (rc < 0) {
-		smblib_err(chg, "Couldn't read BATTERY_CHARGER_STATUS_2 rc=%d\n",
-			rc);
-		return rc;
+		pr_err("Couldn't read batt temp prop rc=%d\n", rc);
+		return -EINVAL;
 	}
-	if (stat & BAT_TEMP_STATUS_TOO_COLD_BIT)
-		val->intval = POWER_SUPPLY_HEALTH_COLD;
-	else if (stat & BAT_TEMP_STATUS_TOO_HOT_BIT)
+
+	pval.intval = pval.intval / 10;
+	if (pval.intval >= 60) {
 		val->intval = POWER_SUPPLY_HEALTH_OVERHEAT;
-	else if (stat & BAT_TEMP_STATUS_COLD_SOFT_BIT)
-		val->intval = POWER_SUPPLY_HEALTH_COOL;
-	else if (stat & BAT_TEMP_STATUS_HOT_SOFT_BIT)
+	} else if (pval.intval >= 58 && pval.intval < 60) {
+		val->intval = POWER_SUPPLY_HEALTH_HOT;
+	} else if (pval.intval >= 45 && pval.intval < 58) {
 		val->intval = POWER_SUPPLY_HEALTH_WARM;
-	else
+	} else if (pval.intval >= 15 && pval.intval < 45) {
 		val->intval = POWER_SUPPLY_HEALTH_GOOD;
+	} else if (pval.intval >= 0 && pval.intval < 15) {
+		val->intval = POWER_SUPPLY_HEALTH_COOL;
+	} else if (pval.intval < 0) {
+		val->intval = POWER_SUPPLY_HEALTH_COLD;
+	}
 
 done:
 	return rc;
@@ -2591,13 +2621,20 @@
 int smblib_set_prop_batt_status(struct smb_charger *chg,
 				  const union power_supply_propval *val)
 {
+#if 0
 	/* Faking battery full */
 	if (val->intval == POWER_SUPPLY_STATUS_FULL)
 		chg->fake_batt_status = val->intval;
 	else
 		chg->fake_batt_status = -EINVAL;
+#endif
+	if (val->intval < 0) {
+		chg->fake_batt_status = -EINVAL;
+	} else {
+		chg->fake_batt_status = val->intval;
+	}
 
-	power_supply_changed(chg->batt_psy);
+	//power_supply_changed(chg->batt_psy);
 
 	return 0;
 }
@@ -6017,6 +6054,7 @@
 		vote(chg->awake_votable, PL_DELAY_VOTER, true, 0);
 		schedule_delayed_work(&chg->pl_enable_work,
 					msecs_to_jiffies(PL_DELAY_MS));
+		chg->float_retry_flag = 1;
 	} else {
 		/* Disable SW Thermal Regulation */
 		rc = smblib_set_sw_thermal_regulation(chg, false);
@@ -6074,6 +6112,7 @@
 			smblib_err(chg, "Couldn't disable DPDM rc=%d\n", rc);
 
 		smblib_update_usb_type(chg);
+		chg->float_retry_flag = 0;
 	}
 
 	if (chg->connector_type == QTI_POWER_SUPPLY_CONNECTOR_MICRO_USB)
@@ -6100,9 +6139,32 @@
 	return IRQ_HANDLED;
 }
 
+static void smblib_float_retry_work(struct work_struct *work)
+{
+	int rc = 0;
+	struct smb_charger *chg = container_of(work, struct smb_charger,
+							float_retry_work.work);
+
+	rc = smblib_request_dpdm(chg, false);
+	if (rc < 0)
+		smblib_err(chg, "Couldn't to disable DPDM rc=%d\n", rc);
+
+	msleep(100);
+	smblib_rerun_apsd_if_required(chg);
+
+	chg->float_retry_flag = 0;
+	smblib_dbg(chg, PR_PARALLEL, "float detected due to slow plug \n");
+}
+
+#define FLOAT_RETRY_DELAY 8000 /*8s*/
 static void smblib_handle_slow_plugin_timeout(struct smb_charger *chg,
 					      bool rising)
 {
+	if (rising && chg->float_retry_flag)
+		schedule_delayed_work(&chg->float_retry_work, msecs_to_jiffies(FLOAT_RETRY_DELAY));
+	else if (!rising)
+		cancel_delayed_work_sync(&chg->float_retry_work);
+
 	smblib_dbg(chg, PR_INTERRUPT, "IRQ: slow-plugin-timeout %s\n",
 		   rising ? "rising" : "falling");
 }
@@ -8833,6 +8895,7 @@
 	INIT_DELAYED_WORK(&chg->lpd_ra_open_work, smblib_lpd_ra_open_work);
 	INIT_DELAYED_WORK(&chg->lpd_detach_work, smblib_lpd_detach_work);
 	INIT_DELAYED_WORK(&chg->raise_qc3_vbus_work, smblib_raise_qc3_vbus_work);
+	INIT_DELAYED_WORK(&chg->float_retry_work, smblib_float_retry_work);
 	INIT_DELAYED_WORK(&chg->thermal_regulation_work,
 					smblib_thermal_regulation_work);
 	INIT_DELAYED_WORK(&chg->usbov_dbc_work, smblib_usbov_dbc_work);
@@ -8892,6 +8955,7 @@
 	chg->dr_mode = TYPEC_PORT_DRP;
 	chg->raise_vbus_to_detect = false;
 	chg->qc2_unsupported = false;
+	chg->float_retry_flag = 1;
 	apsd_result = smblib_update_usb_type(chg);
 
 	switch (chg->mode) {
@@ -9016,6 +9080,7 @@
 		cancel_delayed_work_sync(&chg->lpd_ra_open_work);
 		cancel_delayed_work_sync(&chg->lpd_detach_work);
 		cancel_delayed_work_sync(&chg->raise_qc3_vbus_work);
+		cancel_delayed_work_sync(&chg->float_retry_work);
 		cancel_delayed_work_sync(&chg->thermal_regulation_work);
 		cancel_delayed_work_sync(&chg->usbov_dbc_work);
 		cancel_delayed_work_sync(&chg->role_reversal_check);
diff --git a/drivers/power/supply/qcom/smb5-lib.h b/drivers/power/supply/qcom/smb5-lib.h
index cb194cb..066eb8e 100644
--- a/drivers/power/supply/qcom/smb5-lib.h
+++ b/drivers/power/supply/qcom/smb5-lib.h
@@ -522,6 +522,7 @@
 	struct delayed_work	pr_lock_clear_work;
 	struct delayed_work	role_reversal_check;
 	struct delayed_work	raise_qc3_vbus_work;
+	struct delayed_work float_retry_work;
 
 	struct alarm		lpd_recheck_timer;
 	struct alarm		moisture_protection_alarm;
@@ -690,6 +691,7 @@
 	bool support_ffc;
 	bool qc2_unsupported;
 	int dpdm_qc3p0_flag;
+	int float_retry_flag;
 };
 
 int smblib_read(struct smb_charger *chg, u16 addr, u8 *val);
diff --git a/drivers/usb/pd/policy_engine.c b/drivers/usb/pd/policy_engine.c
index 065c0e1..8241b07 100644
--- a/drivers/usb/pd/policy_engine.c
+++ b/drivers/usb/pd/policy_engine.c
@@ -671,8 +671,13 @@
 	start_usb_peripheral(pd);
 	typec_set_data_role(pd->typec_port, TYPEC_DEVICE);
 	typec_set_pwr_role(pd->typec_port, TYPEC_SINK);
-	typec_set_pwr_opmode(pd->typec_port,
-			pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+	if (pd->typec_mode <= QTI_POWER_SUPPLY_TYPEC_NONE) {
+		typec_set_pwr_opmode(pd->typec_port, TYPEC_PWR_MODE_USB);
+	} else if (pd->typec_mode < QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT) {
+		typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_NONE);
+	} else {
+		typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+	}
 	if (!pd->partner) {
 		memset(&pd->partner_identity, 0, sizeof(pd->partner_identity));
 		pd->partner_desc.usb_pd = false;
@@ -2833,8 +2838,13 @@
 
 	typec_set_pwr_role(pd->typec_port, TYPEC_SINK);
 	if (!pd->partner) {
-		typec_set_pwr_opmode(pd->typec_port,
-			pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+		if (pd->typec_mode <= QTI_POWER_SUPPLY_TYPEC_NONE) {
+			typec_set_pwr_opmode(pd->typec_port, TYPEC_PWR_MODE_USB);
+		} else if (pd->typec_mode < QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT) {
+			typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_NONE);
+		} else {
+			typec_set_pwr_opmode(pd->typec_port, pd->typec_mode - QTI_POWER_SUPPLY_TYPEC_SOURCE_DEFAULT);
+		}
 		memset(&pd->partner_identity, 0, sizeof(pd->partner_identity));
 		pd->partner_desc.usb_pd = false;
 		pd->partner_desc.accessory = TYPEC_ACCESSORY_NONE;
@@ -4011,8 +4021,7 @@
 		}
 
 		if (val.intval == POWER_SUPPLY_TYPE_USB ||
-			val.intval == POWER_SUPPLY_TYPE_USB_CDP ||
-			val.intval == QTI_POWER_SUPPLY_TYPE_USB_FLOAT) {
+			val.intval == POWER_SUPPLY_TYPE_USB_CDP) {
 			usbpd_dbg(&pd->dev, "typec mode:%d type:%d\n",
 				typec_mode, val.intval);
 			pd->typec_mode = typec_mode;
@@ -4066,8 +4075,7 @@
 			}
 
 			if (val.intval == POWER_SUPPLY_TYPE_USB ||
-					val.intval == POWER_SUPPLY_TYPE_USB_CDP ||
-					val.intval == QTI_POWER_SUPPLY_TYPE_USB_FLOAT)
+					val.intval == POWER_SUPPLY_TYPE_USB_CDP)
 				queue_work(pd->wq, &pd->start_periph_work);
 		}
 		return;

3. 总结