Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091(CAM_MctBus) 解決方法總結

--------- beginning of crash
01-02 09:14:39.508   775  5091 F libc    : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)
01-02 09:14:39.762 28465 28465 F DEBUG   : *** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***
01-02 09:14:39.762 28465 28465 F DEBUG   : Build fingerprint: 'Android/Aiolos/msm8953_64:7.1.2/Aiolos/20180811_1106:userdebug/test-keys'
01-02 09:14:39.762 28465 28465 F DEBUG   : Revision: '0'
01-02 09:14:39.762 28465 28465 F DEBUG   : ABI: 'arm'
01-02 09:14:39.763 28465 28465 F DEBUG   : pid: 775, tid: 5091, name: CAM_MctBus  >>> /system/bin/mm-qcamera-daemon <<<
01-02 09:14:39.763 28465 28465 F DEBUG   : signal 11 (SIGSEGV), code 1 (SEGV_MAPERR), fault addr 0xf4f17000
01-02 09:14:39.763 28465 28465 F DEBUG   :     r0 f4f17000  r1 00000003  r2 00008318  r3 ebdedac0
01-02 09:14:39.763 28465 28465 F DEBUG   :     r4 f08de1e0  r5 00000001  r6 ebdf5dd8  r7 f08de224
01-02 09:14:39.763 28465 28465 F DEBUG   :     r8 00000001  r9 e992d540  sl f1548800  fp 00000003
01-02 09:14:39.763 28465 28465 F DEBUG   :     ip f4255df4  sp ea8037e8  lr f423ed8f  pc f423c166  cpsr a0070030
01-02 09:14:39.781 28465 28465 F DEBUG   : 
01-02 09:14:39.781 28465 28465 F DEBUG   : backtrace:
01-02 09:14:39.782 28465 28465 F DEBUG   :     #00 pc 00006166  /system/vendor/lib/libmmcamera2_mct.so
01-02 09:14:39.782 28465 28465 F DEBUG   :     #01 pc 00046fe3  /system/lib/libc.so (_ZL15__pthread_startPv+22)
01-02 09:14:39.782 28465 28465 F DEBUG   :     #02 pc 00019ced  /system/lib/libc.so (__start_thread+6)

根據kernel log

[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003

以及反彙編

00006166                 LDR             R0, [R0]

我們可以得到是 0xf4f17000 這個指針地址的取值出現了錯誤。

根據反彙編我們可以定位到函數的位置
代碼路徑
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/mct/controller/mct_controller.c

static boolean mct_controller_handle_SOF_proc(
  mct_pipeline_t *pipeline, mct_bus_msg_t *bus_msg)
{

 CLOGI(CAM_MCT_MODULE, "(sofdelay, curr_sofdelay) = (%d, %d) kptr %u",
    pipeline->sof_delay, current_sof_delay, *(int *)isp_sof_bus_msg->kernel_sof_ptr);
}

我們可以通過打印信息驗證這一點

01-02 09:14:39.301   775 28447 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.301   775 28447 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.301   775  5091 E mm-camera: <MCT   ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.301   775  5091 I mm-camera: <MCT   >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-2, -3) kptr 0xf4f17000

這個地址 0xf4f17000 就是 isp_sof_bus_msg->kernel_sof_ptr 的值,而 Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus) 中也是這個地址。
下面我們分析這個地址的來源,根據分析我們可以得到地址來源 ,iface_axi_open iface_thread_start iface_axi_handle_sof_event 三個函數列出如下,我們可以從中得知
isp_sof_bus_msg->kernel_sof_ptr 的創建和傳遞:
代碼路徑
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c

static int iface_axi_open(iface_axi_hw_t *axi_hw, char *dev_name)
{
  int rc = 0;

  switch (axi_hw->hw_state) {
  case IFACE_AXI_HW_STATE_INVALID: {
    axi_hw->fd = open(dev_name, O_RDWR | O_NONBLOCK);

    if (axi_hw->fd <= 0) {
      CDBG_ERROR("%s: cannot open '%s'\n", __func__, dev_name);
      axi_hw->fd = 0;
      return -1;
    }

    /* hw opened, set state to idle */
    axi_hw->hw_state = IFACE_AXI_HW_STATE_DEV_OPEN;
  }
    break;

  default: {
    /* cannot open twice, nop */
    rc = -EAGAIN;
  }
    break;
  }

  if (rc == 0) {
    /* fork the hw thread to poll on vfe subdev and pipe */
    rc = iface_thread_start(&axi_hw->thread_poll, axi_hw, axi_hw->fd);
  }

  if(rc == 0) {
    /* fork the hw thread for stream on/off */
    rc = iface_sem_thread_start(&axi_hw->thread_stream, axi_hw);
  }

  return rc;
}

代碼路徑
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c

int iface_thread_start(iface_thread_t *thread_data, void *hw_ptr, int poll_fd)
{

 axi_hw->mmap_addr =
    mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
      MAP_SHARED, poll_fd, 0);
}

以及
代碼路徑
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi.c

/** iface_axi_handle_sof_event:
 *
 *  Description:
 *  this funciton is called when SOF event is received from
 *  kernel
 *
 *    @isp_hw: isp hw
 *    @isp_event_data: isp event data
 *
 *  Returns 0 for success and negative error on failure
 **/
static int iface_axi_handle_sof_event(
    iface_axi_hw_t *axi_hw,
    struct msm_isp_event_data *sof,
    enum msm_vfe_input_src input_src)
{
  if (axi_hw->mmap_addr != MAP_FAILED) {
    isp_sof.kernel_sof_ptr = axi_hw->mmap_addr;
    ksofid = *(uint32_t *)axi_hw->mmap_addr;
  } else {
    isp_sof.kernel_sof_ptr = NULL;
  }
}

這裏的 axi_hw->fd =open(“/dev/v4l-subdev13”)對應着的設備路徑如下
==kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp.c==
大致我們可以分析狀況如下:
vendor 還在處理數據,但是mmap的地址已經被釋放了,現在我們來驗證這一想法。

下面我們摘錄出log的異常地方便於分析

01-02 09:14:39.463   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 2 ksofid: 0
01-02 09:14:39.463   775  5091 E mm-camera: <MCT   ><ERROR> 1026: mct_controller_handle_SOF_proc: Metadata stream not present
01-02 09:14:39.464   775  5091 I mm-camera: <MCT   >< INFO> 1040: mct_controller_handle_SOF_proc: (sofdelay, curr_sofdelay) = (-1, -2) kptr 0xf4f17000
01-02 09:14:39.464   775  5091 E mm-camera-iface2: iface_post_control_sof_to_thread: E, session id = 3, user stream id = 15
01-02 09:14:39.471   775  5089 I mm-camera: <MCT   >< INFO> 3674: mct_pipeline_process_set: command=8000009
01-02 09:14:39.471   775  5089 I mm-camera: <MCT   >< INFO> 3812: mct_pipeline_process_set:  STREAM-OFF on stream 0x30001 stream type=8
01-02 09:14:39.479   775  5089 E mm-camera: <ISP   ><ERROR> 1006: isp_handler_control_streamoff: stream_off 2 2 ide 30001
01-02 09:14:39.480   775  5089 E mm-camera: <ISP   ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.480   775  5089 E mm-camera: <ISP   ><ERROR> 1729: isp_util_forward_event_to_internal_pipeline: isp pipeline not support this stream identity 30001
01-02 09:14:39.481   775  5089 I mm-camera: <IFACE >< INFO> 1622: iface_streamoff_to_thread: iface_streamoff_to_thread: E, session id = 3, user stream id = 1
01-02 09:14:39.481   775  5089 E mm-camera-iface2: iface_session_sem_thread_execute: E
01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface:streamoff
01-02 09:14:39.481   775  5072 I mm-camera: <IFACE >< INFO> 2216: iface_streamoff: E, session_id 3 mct_stream_id = 1
01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface_decide_session_hw_stream: no Session based hw streams to streamon, *num_hw_streams = 0
01-02 09:14:39.482   775 28463 E mm-camera-iface2: iface_axi_cfg_stream: E, start_flag = 0, sessionid = 3
01-02 09:14:39.482   775 28463 E mm-camera-iface2: iface_axi_cfg_stream:1617: Cmd (0) axi stream 0x205 on vfe 0
01-02 09:14:39.483   775 28462 E mm-camera-iface2: isp_sof.frame_id=3, f_src=1, mmap_addr=0xf4f17000
01-02 09:14:39.483   775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Revmoe bufq in kernel with hw stream 0x1, use stream 1
01-02 09:14:39.483   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_unreg_buf: Unregister buf session_id = 3, stream_id = 1, buf_handle = 0x340a0000
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: stats_cmd: enable  = 0, num of stats stream = 0
01-02 09:14:39.484   775 28463 E mm-camera-iface2: iface_axi_cfg_stats_stream_stop: no stats stream to START/STOP, stats cmd.num_streams = 0
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_streamoff: hw_stream id 1 mct_stream_id 1 user_stream_id 1
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 2406: iface_streamoff: session_id = 3, active_streams = 0
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 2425: iface_streamoff: session id = 0x3,Delta between vfe irqs 0 sec 0 usec
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_streamoff: hw_stream 0xf106777c type 0 axi_path 5 id 1
01-02 09:14:39.486   775  5072 I mm-camera: <IFACE >< INFO> 10046: iface_util_release_resource: camif_cnt= 0, rdi_cnt= 0, used mask 2
01-02 09:14:39.486   775  5072 E mm-camera-iface2: iface_destroy_hw: E, hw_idx 0
01-02 09:14:39.487   775  5072 E mm-camera-iface2: iface_destroy_hw: Destroy axi hw on VFE :0
--------- beginning of crash
01-02 09:14:39.508   775  5091 F libc    : Fatal signal 11 (SIGSEGV), code 1, fault addr 0xf4f17000 in tid 5091 (CAM_MctBus)

由log可以看出 ==iface_axi_handle_sof_event== 這個事件還在處理中,根據函數註釋我們可以得知
this funciton is called when SOF event is received from kernel

01-02 09:14:39.483   775 28462 E mm-camera-iface2: Event SOF session 3 VFE0, src 1 with frame_id 3 ksofid: 0

但是在此之前,根據時間戳發現卻已經執行了 streamoff

01-02 09:14:39.481   775  5072 E mm-camera-iface2: iface:streamoff

現在我們把整個函數整理如下
代碼路徑
:vendor/qcom/proprietary/mm-camera/mm-camera2/media-controller/modules/iface2/axi/iface_axi_thread.c

/** iface_thread_main_loop
 *
 * DESCRIPTION: main loop to proc cmd or poll v4l2 event
 *
 **/
static void *iface_thread_main_loop(void *data)
{
  int rc = 0, i;
  int timeout;
  int thread_exit = 0;
  iface_thread_t *thread_data = (iface_thread_t *)data;
  iface_axi_hw_t *axi_hw = (iface_axi_hw_t *)thread_data->hw_ptr;

  timeout = thread_data->poll_timeoutms;
  /* wake up the creater first */
  sem_post(&thread_data->sig_sem);
  while(!thread_exit) {
    for(i = 0; i < thread_data->num_fds; i++)
      thread_data->poll_fds[i].events = POLLIN|POLLRDNORM|POLLPRI;

    rc = poll(thread_data->poll_fds, thread_data->num_fds, timeout);
    if(rc > 0) {
      if ((thread_data->poll_fds[0].revents & POLLIN) &&
        (thread_data->poll_fds[0].revents & POLLRDNORM)) {
        /* if we have data on pipe, we only process pipe in this iteration */
        rc = iface_thread_proc_cmd(axi_hw, thread_data, &thread_exit, TRUE);
      } else {
        if ((thread_data->poll_fds[1].revents & POLLPRI) ||
          (thread_data->poll_fds[1].revents & POLLIN) ||
          (thread_data->poll_fds[1].revents & POLLRDNORM)) {
          /* if we have data on subdev */
          iface_axi_proc_subdev_event(axi_hw, thread_data);
        }
      }
    } else {
      CDBG_ERROR("error: poll() failed error =%s %d",
        strerror(errno), errno);
    }
  }
  if (thread_data->pipe_fds[0] > 0) {
    close(thread_data->pipe_fds[0]);
    thread_data->pipe_fds[0] = 0;
  }
  if (thread_data->pipe_fds[1] > 0) {
    close(thread_data->pipe_fds[1]);
    thread_data->pipe_fds[1] = 0;
  }

  return NULL;
}

iface_axi_proc_subdev_event—->iface_axi_handle_sof_event(axi_hw, sof, input_src);
—->ISP_EVENT_SOF—->iface_axi_handle_sof_event
而 ISP_EVENT_SOF 的事件來源是 msm_isp40.c ,代表着 kernel 接收到 sensor 的幀。

代碼路徑
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c

            case VFE_RAW_0:
            case VFE_RAW_1:
            case VFE_RAW_2:
                msm_isp_notify(vfe_dev, ISP_EVENT_SOF, i, ts);

或者

    msm_isp_notify(vfe_dev, ISP_EVENT_SOF, VFE_PIX_0, ts);

這裏可以驗證我們的猜想,kernel的事件確實還通過v4l2上傳到vendor去處理。
現在我們回到kernel的log

[ 2801.881016] msm_isp_open_node open_cnt 0
[ 2801.886397] msm_isp_open_node: HW Version: 0x10090000
[ 2801.892568] msm_isp_open_node open_cnt 1 exit 111111
[ 2801.963456] msm_isp_close_node E open_cnt 1
[ 2801.966810] msm_isp_close_node open_cnt 1 unregister page fault handler
[ 2801.976748] msm_isp_close_node X open_cnt 0 EXIT 1111111
[ 2804.384904] pgd = ffffffc059f28000
[ 2804.389559] [f4f17000] *pgd=00000000d9dda003, *pud=00000000d9dda003

也可以佐證我們的想法,已經執行完 msm_isp_close_node 函數纔出現 pgd = ffffffc059f28000 這個取地址的錯誤。
我們可以打印如下函數中的 munmap(axi_hw->mmap_addr, PAGE_SIZE) 來驗證
代碼路徑
:kernel/msm-3.18/drivers/media/platform/msm/camera_v2/isp/msm_isp40.c

/** iface_axi_destroy_thread
 *
 *    @thread: information about the thread to be destroyed
 *
 *  Destroys isp hw thread
 **/
static int iface_axi_destroy_thread(iface_thread_t *thread)
{
  int len;
  uint32_t cmd = IFACE_THREAD_CMD_DESTROY;
  iface_axi_hw_t *axi_hw;

  axi_hw = (iface_axi_hw_t *)thread->hw_ptr;

  pthread_mutex_lock(&thread->cmd_mutex);

  if (axi_hw->mmap_addr != MAP_FAILED) {
    if (munmap(axi_hw->mmap_addr, PAGE_SIZE))
      perror("munmap");
  }

  if (thread->return_code == -EPIPE) {
    CDBG_ERROR("%s: Pipe read error\n", __func__);
    pthread_mutex_unlock(&thread->cmd_mutex);
    return -EPIPE;
  }

  len = write(thread->pipe_fds[1], &cmd, sizeof(cmd));
  if (len != sizeof(cmd)) {
    /* we got error here */
    pthread_mutex_unlock(&thread->cmd_mutex);
    CDBG_ERROR("%s: Pipe write error\n", __func__);
    return -EPIPE;
  }
  /* we do not wait on sem but join to wait the thread to die. */
  if (pthread_join(thread->pid, NULL) != 0)
    CDBG("%s: pthread dead already\n", __func__);

  sem_destroy(&thread->sig_sem);

  pthread_mutex_destroy(&thread->busy_mutex);
  pthread_mutex_unlock(&thread->cmd_mutex);
  pthread_mutex_destroy(&thread->cmd_mutex);

  return 0;
}
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章