一、问题背景
测试DDR TT和Reboot测试专项过程中出现机器进dump,dump根因都相同
二、问题分析
2.1 panic现场
[ 8.619016][ T138] ufshcd-qcom 4804000.ufshc: ufshcd_complete_dev_init fDeviceInit was not cleared by the device
[ 18.838491][ T1] init: Wait for partitions returned after 10010ms
[ 18.844889][ T1] init: bool android::init::BlockDevInitializer::InitDevices(std::set<std::string>): partition(s) not found after polling timeout: boot_a, dtbo_a, metadata, recovery_a, super, vbmeta_a, vbmeta_system_a
[ 18.864489][ T1] init: Failed to mount required partitions early ...
[ 18.871136][ T1] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00007f00
[ 18.879490][ T1] CPU: 6 PID: 1 Comm: init Tainted: G WC OE 5.15.167-android13-8-00009-g705f06af57bc-ab13257286 #1
[ 18.890966][ T1] Hardware name: Qualcomm Technologies, Inc. KHAJE IDP nopmi creek (DT)
[ 18.899138][ T1] Call trace:
[ 18.902278][ T1] dump_backtrace.cfi_jt+0x0/0x8
[ 18.907072][ T1] dump_stack_lvl+0x80/0xb8
[ 18.911430][ T1] panic+0x190/0x4b4
[ 18.915179][ T1] do_exit+0xb94/0x1054
[ 18.919185][ T1] do_group_exit+0x144/0x174
[ 18.923628][ T1] do_group_exit+0x0/0x174
[ 18.927898][ T1] invoke_syscall+0x60/0x150
[ 18.932339][ T1] el0_svc_common+0xb8/0xf8
[ 18.936694][ T1] do_el0_svc+0x28/0x98
[ 18.940698][ T1] el0_svc+0x24/0x84
[ 18.944449][ T1] el0t_64_sync_handler+0x88/0xec
[ 18.949326][ T1] el0t_64_sync+0x1b8/0x1bc
[ 18.953681][ T1] SMP: stopping secondary CPUs
从这个地方来看,死机的直接原因时发生了panic,init进程被杀掉。
/dev/block/...
设备没创建(UFS 初始化失败),导致mount 超时。
所以问题点在ufs没有初始化成功,函数ufshcd_complete_dev_init
2.2 ufshcd_complete_dev_init 加debug patch
diff --git a/drivers/scsi/mi_ufs/mi_ufshcd.c b/drivers/scsi/mi_ufs/mi_ufshcd.c
index c704ef3..fe57a2b 100644
--- a/drivers/scsi/mi_ufs/mi_ufshcd.c
+++ b/drivers/scsi/mi_ufs/mi_ufshcd.c
@@ -94,7 +94,7 @@
#define UFSHCD_REF_CLK_GATING_WAIT_US 0xFF /* microsecs */
/* Polling time to wait for fDeviceInit */
-#define FDEVICEINIT_COMPL_TIMEOUT 1500 /* millisecs */
+#define FDEVICEINIT_COMPL_TIMEOUT 5000 /* millisecs */
#define wlun_dev_to_hba(dv) shost_priv(to_scsi_device(dv)->host)
@@ -4599,6 +4599,12 @@
int err;
bool flag_res = true;
ktime_t timeout;
+ u64 time_before, time_after, current_time, before_send, before_usleep, after_usleep;
+ u64 fDeviceInit_time = 0;
+ u64 fDeviceInit_threshold = 3500; /* 3500ms */
+ unsigned long usleep_min = 5*1000*1000; /* 5s */
+ unsigned long usleep_max = 6*1000*1000; /* 6s */
+ int count = 0;
err = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG,
QUERY_FLAG_IDN_FDEVICEINIT, 0, NULL);
@@ -4611,13 +4617,26 @@
/* Poll fDeviceInit flag to be cleared */
timeout = ktime_add_ms(ktime_get(), FDEVICEINIT_COMPL_TIMEOUT);
+ time_before = ktime_get_ns();
do {
+ count++;
+ before_send = ktime_get_ns();
err = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG,
QUERY_FLAG_IDN_FDEVICEINIT, 0, &flag_res);
+ current_time = ktime_get_ns();
+ pr_err("fDeviceInit value %d before_send %llu current_time %lluns time %lluns count %d\n", flag_res, before_send, current_time, current_time - before_send, count);
if (!flag_res)
break;
+
+ before_usleep = ktime_get_ns();
usleep_range(500, 1000);
+ after_usleep = ktime_get_ns();
+ pr_err("fDeviceInit before_usleep %lluns after_usleep %lluns usleep %lluns count %d\n", before_usleep, after_usleep, after_usleep - before_usleep, count);
} while (ktime_before(ktime_get(), timeout));
+ time_after = ktime_get_ns();
+
+ fDeviceInit_time = current_time - time_before;
+ pr_err("fDeviceInit time %lluns, timeout limit %dms, threshold %llums time_before %lluns time_after %lluns time %lluns\n", fDeviceInit_time, FDEVICEINIT_COMPL_TIMEOUT, fDeviceInit_threshold, time_before, time_after, time_after - time_before);
if (err) {
dev_err(hba->dev,
@@ -4629,6 +4648,13 @@
__func__);
err = -EBUSY;
}
+
+ if (fDeviceInit_time/1000/1000 > fDeviceInit_threshold) {
+ err = -EBUSY;
+ usleep_range(usleep_min, usleep_max);
+ pr_err("fDeviceInit time exceeds %llums\n", fDeviceInit_threshold);
+ BUG();
+ }
out:
return err;
}
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 86c10ed..b62eef2 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -832,7 +832,7 @@
blist_flags_t *bflags, int async)
{
int ret;
-
+ struct task_struct *task = get_current();
/*
* XXX do not save the inquiry, since it can change underneath us,
* save just vendor/model/rev.
@@ -932,10 +932,10 @@
sdev->sdtr = 1;
sdev_printk(KERN_NOTICE, sdev, "%s %.8s %.16s %.4s PQ: %d "
- "ANSI: %d%s\n", scsi_device_type(sdev->type),
+ "ANSI: %d%s comm: %s pid: %d tgid: %d\n", scsi_device_type(sdev->type),
sdev->vendor, sdev->model, sdev->rev,
sdev->inq_periph_qual, inq_result[2] & 0x07,
- (inq_result[3] & 0x0f) == 1 ? " CCS" : "");
+ (inq_result[3] & 0x0f) == 1 ? " CCS" : "", task->comm, task->pid, task->tgid);
if ((sdev->scsi_level >= SCSI_2) && (inq_result[7] & 2) &&
!(*bflags & BLIST_NOTQ)) {
在这个函ufshcd_complete_dev_init
所涉及的函数跳转都加上时间戳的计算,来判定出耗时出现在哪些地方!并且当出现问题时,主动执BUG
函数触发进入dump,抓取fulldump。
2.3 新dump分析
Line 4775: [ 2.247957][ T8] fDeviceInit before_usleep 2244560207ns after_usleep 2245576093ns usleep 1015886ns count 12
Line 4777: [ 2.248998][ T8] fDeviceInit value 1 before_send 2245580936 current_time 2246616770ns time 1035834ns count 13
Line 4778: [ 2.250016][ T8] fDeviceInit before_usleep 2246620780ns after_usleep 2247634999ns usleep 1014219ns count 13
Line 4779: [ 2.251193][ T8] fDeviceInit value 1 before_send 2247638488 current_time 2248812395ns time 1173907ns count 14
Line 4780: [ 2.252214][ T8] fDeviceInit before_usleep 2248817655ns after_usleep 2249832916ns usleep 1015261ns count 14
Line 4781: [ 2.253251][ T8] fDeviceInit value 1 before_send 2249836874 current_time 2250869999ns time 1033125ns count 15
Line 4782: [ 2.254268][ T8] fDeviceInit before_usleep 2250873749ns after_usleep 2251887603ns usleep 1013854ns count 15
Line 4789: [ 7.139614][ T8] fDeviceInit value 0 before_send 7136052914 current_time 7137233070ns time 1180156ns count 16
Line 4791: [ 7.150431][ T8] fDeviceInit time 4917530779ns, timeout limit 5000ms, threshold 3500ms time_before 2219702291ns time_after 7148050153ns time 4928347862ns
Line 4795: [ 12.766765][ T8] fDeviceInit time exceeds 3500ms
在循环15次后,第16次发生了异常,我们可以看到2.25s直接跳到了7.15s,大约5s中的时间没有出现过日志输出。
第二份日志
Line 4815: [ 2.166157][ T8] fDeviceInit value 1 before_send 2159485520 current_time 2160663749ns time 1178229ns count 19
Line 4816: [ 2.167178][ T8] fDeviceInit before_usleep 2160669166ns after_usleep 2161684999ns usleep 1015833ns count 19
Line 4817: [ 2.168354][ T8] fDeviceInit value 1 before_send 2161689113 current_time 2162861301ns time 1172188ns count 20
Line 4818: [ 2.169375][ T8] fDeviceInit before_usleep 2162866301ns after_usleep 2163882655ns usleep 1016354ns count 20
Line 4819: [ 2.170549][ T8] fDeviceInit value 1 before_send 2163886666 current_time 2165056509ns time 1169843ns count 21
Line 4825: [ 7.158296][ T8] fDeviceInit before_usleep 7151788382ns after_usleep 7152803643ns usleep 1015261ns count 21
Line 4827: [ 7.172870][ T8] fDeviceInit time 45096093ns, timeout limit 5000ms, threshold 3500ms time_before 2119960416ns time_after 7167376924ns time 5047416508ns
Line 4829: [ 7.184458][ T8] ufshcd-qcom 4804000.ufshc: ufshcd_complete_dev_init fDeviceInit was not cleared by the device
在循环21次的时候出现问题,本应该成对出现的第21次日志打印,前者出现在2.17s,后者出现在7.15s
但是看这个usleep(也就是内部函数执行的耗时)都是1ms左右,那说明此时的并不是函数执行导致的阻塞!
2.4 trace32分析
通过查看各个cpu的tasks_sched_stats,发现了如下一个线程出现了很明显的异常,执行时间大约4386ms。
kworker/6:1 89 7.138401166 0.000000000 4.391132095 92 120 D 2.751619695 7.138425541 4386.805846000
查询这个栈
__switch_to(prev = 0xFFFFFF80010B0000, next = 0xFFFFFF8000208000)
prev = 0xFFFFFF80010B0000
next = 0xFFFFFF8000208000
last = 0x1B1507D9
context_switch(inline)
rq = 0xFFFFFF81791D7100
prev = 0xFFFFFF80010B0000
next = 0xFFFFFF8000208000
rf = 0xFFFFFFC008463D68
__schedule(sched_mode = ?)
rf = (flags = 0x0, cookie = (), clock_update_flags = 0x4)
switch_count = 0xFFFFFF80010B0708
next = 0xFFFFFF8000208000
prev = 0xFFFFFF80010B0000
rq = 0xFFFFFF81791D7100
schedule()
tsk = 0xFFFFFF80010B0000
worker_thread(__worker = 0xFFFFFF800012D500)
__worker = 0xFFFFFF800012D500
worker = 0x0
pool = 0xFFFFFF81791D6A00
kthread(_create = 0xFFFFFF8001066100)
_create = 0xFFFFFF8001066100
create = 0x0
data = 0xFFFFFF800012D500
threadfn = 0x0
ret = 0x0012D500
self = 0xFFFFFF800014D900
ret_from_fork(asm)
当前这个 kworker
正在执行一个工作项(work item),并在某个阶段进入了 schedule()
阻塞,具体任务在 __worker
中
struct worker *
= 0xFFFFFF800012D500
\重点字段分析
current_work = 0x0
current_func = 0x0
last_func = 0xFFFFFFEBF6352CA8
含义如下:
查看是哪个函数?
(work_func_t) [ND:0x0::0xFFFFFF800012D598] (*(((struct worker *)0xFFFFFF800012D500))).last_func = 0xFFFFFFEBF6352CA8 = kfree_rcu_monitor.cfi_jt
这说明:
当前
kworker/6:1
的最后一次有效执行函数是kfree_rcu_monitor()
,它是kfree_rcu()
或call_rcu()
用于回收内存的后台回调。
但是你系统在 [2.7s → 7.1s]
卡住,不太可能是单纯一个 RCU 的 kfree 导致所有线程死锁。这提示我们:
last_func = kfree_rcu_monitor()
很可能是:栈帧/worker 数据被破坏后残留的假地址;
此时我们查看一下寄存器信息:
关键寄存器分析
当前栈指针
SP = X29 = 0xFFFFFFC008463DE0
表示当前任务栈顶位于此处,你已经知道它正在 schedule()
中。
X30 = 0x0
这是 Link Register(返回地址)为空,说明调用者已经退栈,或因为调度进入 schedule()
,LR 未再写入。
程序计数器 PC
PC = 0xFFFFFFEBF6356334 → schedule+0x150
说明当前任务正卡在 schedule()
ELR = 0xFFFFFFEBF1FC5D50
这是 Exception Link Register,也就是中断/异常返回地址。
极可能是:
调度器中断切换上下文时保存的 被中断线程的返回地址。
-> 反查 ELR 指向的函数
而mi_ufshcd.c +8279行就是我们加debug patch的函数。
到这里我们可以证实一点:那就ufshcd_complete_dev_init
函数在执行过程中被调度出去了,因为ELR里保存了函数地址!
2.5 查看CPU栈
0xffffffc00803b718:('vsnprintf', 160)
0xffffffc00803b798:('sprintf', 96)
0xffffffc00803b858:('number', 312)
0xffffffc00803b8c8:('vsnprintf', 160)
0xffffffc00803b8f8:('vsnprintf', 188)
0xffffffc00803b938:('vsnprintf', 160)
0xffffffc00803b968:('vsnprintf', 188)
0xffffffc00803b998:('persistent_ram_write', 564)
0xffffffc00803b9a8:('sprintf', 96)
0xffffffc00803b9f8:('ramoops_pstore_write', 540)
0xffffffc00803ba00:('dummy_clock_read[jt]', 0)
0xffffffc00803ba28:('ramoops_pstore_write[jt]', 0)
0xffffffc00803ba48:('qcom_geni_serial_poll_tx_done', 184)
0xffffffc00803ba78:('pstore_console_write', 372)
0xffffffc00803ba98:('ramoops_pstore_write[jt]', 0)
0xffffffc00803bab8:('qcom_geni_serial_console_write', 1080)
0xffffffc00803bb28:('console_unlock', 1436)
0xffffffc00803bb40:('pstore_console_write[jt]', 0)
0xffffffc00803bc28:('vprintk_emit', 196)
0xffffffc00803bc68:('vprintk_emit', 232)
0xffffffc00803bcb8:('vprintk[jt]', 224)
0xffffffc00803bd68:('_printk', 88)
0xffffffc00803bd98:('__secondary_switched', 156)
0xffffffc00803be18:('dump_backtrace', 392)
0xffffffc00803be28:('__secondary_switched', 156)
0xffffffc00803be58:('show_regs', 48)
0xffffffc00803bec8:('do_handle_IPI', 712)
0xffffffc00803bef8:('ipi_handler', 32)
0xffffffc00803bf00:('timer_irq_ack[jt]', 0)
0xffffffc00803bf38:('handle_percpu_devid_irq', 192)
0xffffffc00803bf48:('handle_domain_irq', 176)
0xffffffc00803bf58:('handle_domain_irq', 216)
0xffffffc00803bf90:('handle_percpu_devid_irq[jt]', 0)
0xffffffc00803bfa8:('gic_handle_irq.34242', 88)
0xffffffc00803bfc0:('cpuidle_enter_state', 948)
0xffffffc00803bfd0:('gic_handle_irq.34242[jt]', 0)
0xffffffc00803bfe8:('call_on_irq_stack', 60)
该CPU在处理console输出,查看所有CPU后发现,8个CPU有7个CPU在调度串口输出
Line 5832: Core_0 call stack :
Line 5846: 0xffffffc008003a48:('qcom_geni_serial_poll_tx_done', 184)
Line 5869: Core_1 call stack :
Line 5883: 0xffffffc00800ba48:('qcom_geni_serial_poll_tx_done', 184)
Line 5909: Core_2 call stack :
Line 5923: 0xffffffc008013a48:('qcom_geni_serial_poll_tx_done', 184)
Line 5948: Core_3 call stack :
Line 5962: 0xffffffc00801ba48:('qcom_geni_serial_poll_tx_done', 184)
Line 5987: Core_4 call stack :
Line 6001: 0xffffffc008023a48:('qcom_geni_serial_poll_tx_done', 184)
Line 6026: Core_5 call stack :
Line 6040: 0xffffffc00802ba48:('qcom_geni_serial_poll_tx_done', 184)
Line 6065: Core_6 call stack :
Line 6122: Core_7 call stack :
Line 6136: 0xffffffc00803ba48:('qcom_geni_serial_poll_tx_done', 184)
所以问题分析到现在,已经很明了了。
三、根本原因
ufs函数执行的时候被串口输出调度出去
串口输出 调度优先级很高,输出时,会执行console_unlock(),在持锁状态下运行,不允许调度,一直执行串口输出,直到结束;
阻塞式串口写函数(qcom_geni_serial_poll_tx_done)会主动 spin 等待硬件完成,造成长时间占用 CPU。