Linux kernel crash and analysis example (3)

BUG:

Randomly, ARMv7 mobile crashed when USB OTG operations are executed.


Context:

[   30.648925] C0 [m.android.phone] Unable to handle kernel paging request at virtual address 6b6b6b9f
[   30.657958] C0 [m.android.phone] pgd = ec58c000
[   30.662506] C0 [m.android.phone] [6b6b6b9f] *pgd=00000000
[   30.667938] C0 [m.android.phone] Internal error: Oops: 5 [#1] PREEMPT SMP ARM
[   30.675079] C0 [m.android.phone] kona_fb: die notifier invoked
[   30.680908] C0 [m.android.phone] kona_display_crash_image:531 image_idx=4
[   30.688690] C0 [m.android.phone] post_async:626 Likely tearing on screen posted:0x80000000 curr= 0x80000000 nxt=0x80000000
[   30.699737] C0 [m.android.phone] axipv_dump_buff_status:141 0x80000000 state=2
[   30.706970] C0 [m.android.phone] axipv_dump_buff_status:141 0x80384000 state=1
[   30.714263] C0 [m.android.phone] Modules linked in:
[   30.719177] C0 [m.android.phone] CPU: 0    Tainted: G        W     (3.4.5 #1)
[   30.726318] C0 [m.android.phone] PC is at handle_hc_nak_intr+0x3c/0x110
[   30.732940] C0 [m.android.phone] LR is at dwc_otg_hcd_handle_hc_n_intr+0x468/0x5c8
[   30.740509] C0 [m.android.phone] pc : [<c0326ca0>]    lr : [<c032747c>]    psr: 60000193
[   30.740539] C0 [m.android.phone] sp : ec58be88  ip : 00000000  fp : 00000000
[   30.755615] C0 [m.android.phone] r10: 00000012  r9 : 00000012  r8 : 00000001
[   30.762664] C0 [m.android.phone] r7 : ed8e2f00  r6 : fe020500  r5 : ee2e63c0  r4 : ebf5a900
[   30.770996] C0 [m.android.phone] r3 : ebf5a900  r2 : 6b6b6b6b  r1 : ee2e63c0  r0 : ed8e2f00
[   30.779327] C0 [m.android.phone] Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   30.787414] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: controller timed out
[   30.787414] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: failed to send the start command
[   30.787445] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: start command failed
[   30.810150] C0 [m.android.phone] Control: 10c53c7d  Table: ae58c06a  DAC: 00000015
[   30.817718] C0 [m.android.phone] 
[   30.817718] C0 [m.android.phone] PC: 0xc0326c20:
[   30.825653] C0 [m.android.phone] 6c20 
[   30.827545] C2 [    usb-storage] usb 1-1: reset high-speed USB device number 2 using dwc_otg
[   30.837799] C0 [m.android.phone]  e1a00006 e1a01005 e1a02004 e1a03007 eaffff65 e1a00006 e1a01005 e1a02004
[   30.847564] C0 [m.android.phone] 6c40  e1a03007 ebfffcc8 e288000c e3a01001 e3a02000 eb001749 e3a00001 e8bd8ffe
[   30.857727] C0 [m.android.phone] 6c60  fffff05b e92d40f7 e1a07000 e5d1c024 e1a05001 e1a06002 e1a04003 e35c0000
[   30.867919] C0 [m.android.phone] 6c80  0a000005 e5d13025 e3530000 e3a03000 15c43010 e5c43008 ea000028 e5932018
[   30.878173] C0 [m.android.phone] 6ca0  e5d23034 e3530002 0a000003 e3530003 0a000025 e3530000 1a000027 e5973000
[   30.888427] C0 [m.android.phone] 6cc0  e5d33066 e3530000 0a000006 e5d53002 e2033008 e6ef3073 e3530000 13a03000
[   30.898651] C0 [m.android.phone] 6ce0  15c43010 1a00001c e3a03000 e5c43010 e595302c e5d33006 e3530000 1a00000f
[   30.908874] C0 [m.android.phone] 6d00  e3a03004 e1a00005 e58d3000 e1a01006 e1a03004 ebfffc77 e1a02004 e1a00005
[   30.919067] C0 [m.android.phone] 
[   30.919067] C0 [m.android.phone] LR: 0xc03273fc:
[   30.927001] C0 [m.android.phone] 73fc  e3888001 e7c5a29f e6ef907a e2093004 e6ef3073 e3530000 0a000005 e1a00005
[   30.937255] C0 [m.android.phone] 741c  e1a01004 e1a02006 e1a03007 ebfff8ee e1888000 e2093008 e6ef3073 e3530000
[   30.947479] C0 [m.android.phone] 743c  0a000005 e1a00005 e1a01004 e1a02006 e1a03007 ebfffb1b e1888000 e2093010
[   30.957489] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: controller timed out
[   30.957519] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: failed to send the start command
[   30.957550] C2 [    kworker/2:1] bsc-i2c 3e018000.i2c: start command failed
[   30.979461] C0 [m.android.phone] 745c  e6ef3073 e3530000 0a000005 e1a00005 e1a01004 e1a02006 e1a03007 ebfffdf9
[   30.989593] C0 [m.android.phone] 747c  e1888000 e2093020 e6ef3073 e3530000 0a000005 e1a00005 e1a01004 e1a02006
[   30.999755] C0 [m.android.phone] 749c  e1a03007 ebfffb36 e1888000 e2093040 e6ef3073 e3530000 0a000005 e1a00005
[   31.009918] C0 [m.android.phone] 74bc  e1a01004 e1a02006 e1a03007 ebfffe29 e1888000 e2099080 e6ef9079 e3590000
[   31.020050] C0 [m.android.phone] 74dc  0a000005 e1a00005 e1a01004 e1a02006 e1a03007 ebfffe7b e1888000 e7e7a45a

Analysis:

1) Find offending function

[   30.726318] C0 [m.android.phone] PC is at handle_hc_nak_intr+0x3c/0x110

/**
 * Handles a host channel NAK interrupt. This handler may be called in either
 * DMA mode or Slave mode.
 */
static int32_t handle_hc_nak_intr(dwc_otg_hcd_t *hcd,
				  dwc_hc_t *hc,
				  dwc_otg_hc_regs_t *hc_regs,
				  dwc_otg_qtd_t *qtd)
{
	DWC_DEBUGPL(DBG_HCD, "--Host Channel %d Interrupt: "
		    "NAK Received--\n", hc->hc_num);

	/*
	 * Handle NAK for IN/OUT SSPLIT/CSPLIT transfers, bulk, control, and
	 * interrupt.  Re-start the SSPLIT transfer.
	 */
	if (hc->do_split) {
		if (hc->complete_split)
			qtd->error_count = 0;

		qtd->complete_split = 0;
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
		goto handle_nak_done;
	}

	switch (dwc_otg_hcd_get_pipe_type(&qtd->urb->pipe_info)) {
	case UE_CONTROL:
	case UE_BULK:
		if (hcd->core_if->dma_enable && hc->ep_is_in) {
			/*
			 * NAK interrupts are enabled on bulk/control IN
			 * transfers in DMA mode for the sole purpose of
			 * resetting the error count after a transaction error
			 * occurs. The core will continue transferring data.
			 */
			qtd->error_count = 0;
			goto handle_nak_done;
		}

		/*
		 * NAK interrupts normally occur during OUT transfers in DMA
		 * or Slave mode. For IN transfers, more requests will be
		 * queued as request queue space is available.
		 */
		qtd->error_count = 0;

		if (!hc->qh->ping_state) {
			update_urb_state_xfer_intr(hc, hc_regs,
						   qtd->urb, qtd,
						   DWC_OTG_HC_XFER_NAK);
			dwc_otg_hcd_save_data_toggle(hc, hc_regs, qtd);

			if (hc->speed == DWC_OTG_EP_SPEED_HIGH)
				hc->qh->ping_state = 1;
		}

		/*
		 * Halt the channel so the transfer can be re-started from
		 * the appropriate point or the PING protocol will
		 * start/continue.
		 */
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
		break;
	case UE_INTERRUPT:
		qtd->error_count = 0;
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
		break;
	case UE_ISOCHRONOUS:
		/* Should never get called for isochronous transfers. */
		DWC_ASSERT(1, "NACK interrupt for ISOC transfer\n");
		break;
	}

handle_nak_done:
	disable_hc_int(hc_regs, nak);

	return 1;
}

2) assembly code

00001630 <handle_hc_nak_intr>:
 */
static int32_t handle_hc_nak_intr(dwc_otg_hcd_t *hcd,
				  dwc_hc_t *hc,
				  dwc_otg_hc_regs_t *hc_regs,
				  dwc_otg_qtd_t *qtd)
{
    1630:	e92d40f7 	push	{r0, r1, r2, r4, r5, r6, r7, lr}
    1634:	e1a07000 	mov	r7, r0

	/*
	 * Handle NAK for IN/OUT SSPLIT/CSPLIT transfers, bulk, control, and
	 * interrupt.  Re-start the SSPLIT transfer.
	 */
	if (hc->do_split) {
    1638:	e5d1c024 	ldrb	ip, [r1, #36]	; 0x24
 */
static int32_t handle_hc_nak_intr(dwc_otg_hcd_t *hcd,
				  dwc_hc_t *hc,
				  dwc_otg_hc_regs_t *hc_regs,
				  dwc_otg_qtd_t *qtd)
{
    163c:	e1a05001 	mov	r5, r1
    1640:	e1a06002 	mov	r6, r2
    1644:	e1a04003 	mov	r4, r3

	/*
	 * Handle NAK for IN/OUT SSPLIT/CSPLIT transfers, bulk, control, and
	 * interrupt.  Re-start the SSPLIT transfer.
	 */
	if (hc->do_split) {
    1648:	e35c0000 	cmp	ip, #0
    164c:	0a000005 	beq	1668 <handle_hc_nak_intr+0x38>
		if (hc->complete_split)
    1650:	e5d13025 	ldrb	r3, [r1, #37]	; 0x25
    1654:	e3530000 	cmp	r3, #0
    1658:	e3a03000 	mov	r3, #0
			qtd->error_count = 0;
    165c:	15c43010 	strbne	r3, [r4, #16]

		qtd->complete_split = 0;
    1660:	e5c43008 	strb	r3, [r4, #8]
    1664:	ea000028 	b	170c <handle_hc_nak_intr+0xdc>
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
		goto handle_nak_done;
	}

	switch (dwc_otg_hcd_get_pipe_type(&qtd->urb->pipe_info)) {
    1668:	e5932018 	ldr	r2, [r3, #24]
    166c:	e5d23034 	ldrb	r3, [r2, #52]	; 0x34
    1670:	e3530002 	cmp	r3, #2
    1674:	0a000003 	beq	1688 <handle_hc_nak_intr+0x58>
    1678:	e3530003 	cmp	r3, #3
    167c:	0a000025 	beq	1718 <handle_hc_nak_intr+0xe8>
    1680:	e3530000 	cmp	r3, #0
    1684:	1a000027 	bne	1728 <handle_hc_nak_intr+0xf8>
	case UE_CONTROL:
	case UE_BULK:
		if (hcd->core_if->dma_enable && hc->ep_is_in) {
    1688:	e5973000 	ldr	r3, [r7]
    168c:	e5d33066 	ldrb	r3, [r3, #102]	; 0x66
    1690:	e3530000 	cmp	r3, #0
    1694:	0a000006 	beq	16b4 <handle_hc_nak_intr+0x84>
    1698:	e5d53002 	ldrb	r3, [r5, #2]
    169c:	e2033008 	and	r3, r3, #8
    16a0:	e6ef3073 	uxtb	r3, r3
    16a4:	e3530000 	cmp	r3, #0
			 * NAK interrupts are enabled on bulk/control IN
			 * transfers in DMA mode for the sole purpose of
			 * resetting the error count after a transaction error
			 * occurs. The core will continue transferring data.
			 */
			qtd->error_count = 0;
    16a8:	13a03000 	movne	r3, #0
    16ac:	15c43010 	strbne	r3, [r4, #16]
	}

	switch (dwc_otg_hcd_get_pipe_type(&qtd->urb->pipe_info)) {
	case UE_CONTROL:
	case UE_BULK:
		if (hcd->core_if->dma_enable && hc->ep_is_in) {
    16b0:	1a00001c 	bne	1728 <handle_hc_nak_intr+0xf8>
		/*
		 * NAK interrupts normally occur during OUT transfers in DMA
		 * or Slave mode. For IN transfers, more requests will be
		 * queued as request queue space is available.
		 */
		qtd->error_count = 0;
    16b4:	e3a03000 	mov	r3, #0
    16b8:	e5c43010 	strb	r3, [r4, #16]

		if (!hc->qh->ping_state) {
    16bc:	e595302c 	ldr	r3, [r5, #44]	; 0x2c
    16c0:	e5d33006 	ldrb	r3, [r3, #6]
    16c4:	e3530000 	cmp	r3, #0
    16c8:	1a00000f 	bne	170c <handle_hc_nak_intr+0xdc>
			update_urb_state_xfer_intr(hc, hc_regs,
    16cc:	e3a03004 	mov	r3, #4
    16d0:	e1a00005 	mov	r0, r5
    16d4:	e58d3000 	str	r3, [sp]
    16d8:	e1a01006 	mov	r1, r6
    16dc:	e1a03004 	mov	r3, r4
    16e0:	ebfffc77 	bl	8c4 <update_urb_state_xfer_intr>
						   qtd->urb, qtd,
						   DWC_OTG_HC_XFER_NAK);
			dwc_otg_hcd_save_data_toggle(hc, hc_regs, qtd);
    16e4:	e1a02004 	mov	r2, r4
    16e8:	e1a00005 	mov	r0, r5
    16ec:	e1a01006 	mov	r1, r6
    16f0:	ebfffffe 	bl	11d4 <dwc_otg_hcd_save_data_toggle>

			if (hc->speed == DWC_OTG_EP_SPEED_HIGH)
    16f4:	e5d53002 	ldrb	r3, [r5, #2]
    16f8:	e2033030 	and	r3, r3, #48	; 0x30
    16fc:	e3530020 	cmp	r3, #32
				hc->qh->ping_state = 1;
    1700:	0595302c 	ldreq	r3, [r5, #44]	; 0x2c
    1704:	03a02001 	moveq	r2, #1
    1708:	05c32006 	strbeq	r2, [r3, #6]
		/*
		 * Halt the channel so the transfer can be re-started from
		 * the appropriate point or the PING protocol will
		 * start/continue.
		 */
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
    170c:	e1a00007 	mov	r0, r7
    1710:	e1a01005 	mov	r1, r5
    1714:	ea000000 	b	171c <handle_hc_nak_intr+0xec>
		break;
	case UE_INTERRUPT:
		qtd->error_count = 0;
    1718:	e5c4c010 	strb	ip, [r4, #16]
		halt_channel(hcd, hc, qtd, DWC_OTG_HC_XFER_NAK);
    171c:	e1a02004 	mov	r2, r4
    1720:	e3a03004 	mov	r3, #4
    1724:	ebfffc83 	bl	938 <halt_channel>
		DWC_ASSERT(1, "NACK interrupt for ISOC transfer\n");
		break;
	}

handle_nak_done:
	disable_hc_int(hc_regs, nak);
    1728:	e286000c 	add	r0, r6, #12
    172c:	e3a01010 	mov	r1, #16
    1730:	e3a02000 	mov	r2, #0
    1734:	ebfffffe 	bl	0 <DWC_MODIFY_REG32>

	return 1;
}
    1738:	e3a00001 	mov	r0, #1
    173c:	e8bd80fe 	pop	{r1, r2, r3, r4, r5, r6, r7, pc}

3) offending code

    1668:	e5932018 	ldr	r2, [r3, #24]
    166c:	e5d23034 	ldrb	r3, [r2, #52]	; 0x34         // [r2, #52] is 0

4) Must check null pointer before usage

dwc_otg_hcd_get_pipe_type(&qtd->urb->pipe_info)



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章