一個ixgbe空指針訪問導致宿主機宕機的未知問題

1、在某個機器上出現機器宕機,通過kdump日誌,初步看是因爲ixgbe裏有控制針訪問

2、使用crash分析空指針訪問原因

1)、安裝kernel debug包(包含debug調試信息的vmlinux);

2)、使用crash打開vmcore( crash /usr/lib/debug/usr/lib/modules/3.10.0-327/vmlinux /home/vmcore ),先用dis命令看下RIP地址的彙編信息;從彙編指令看此時正在訪問rbx寄存器;

3)、從bt裏看下現場信息,發現rbx確實爲空;而且從調用棧裏看,此時正在訪問ixgbe_xmit_frame_ring函數;

4)、對ixgbe_xmit_frame_ring做下反彙編,追蹤下rbx的來源;從彙編裏看出rbx是從rdx裏賦值過來的,按x86處理器的約定,函數調用時,%rdi,%rsi,%rdx,%rcx,%r8,%r9分別用來傳遞第1、2、3、4、5、6個參數,因此這裏的rdx表示的是ixgbe_xmit_frame_ring的第三個參數;

crash> dis -l ixgbe_xmit_frame_ring
0xffffffffc05cdd90 <ixgbe_xmit_frame_ring>:     nopl   0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc05cdd95 <ixgbe_xmit_frame_ring+5>:   push   %rbp
0xffffffffc05cdd96 <ixgbe_xmit_frame_ring+6>:   mov    %rsp,%rbp
0xffffffffc05cdd99 <ixgbe_xmit_frame_ring+9>:   push   %r15
0xffffffffc05cdd9b <ixgbe_xmit_frame_ring+11>:  push   %r14
0xffffffffc05cdd9d <ixgbe_xmit_frame_ring+13>:  mov    %rsi,%r14
0xffffffffc05cdda0 <ixgbe_xmit_frame_ring+16>:  push   %r13
0xffffffffc05cdda2 <ixgbe_xmit_frame_ring+18>:  mov    %rdi,%r13
0xffffffffc05cdda5 <ixgbe_xmit_frame_ring+21>:  push   %r12
0xffffffffc05cdda7 <ixgbe_xmit_frame_ring+23>:  push   %rbx
0xffffffffc05cdda8 <ixgbe_xmit_frame_ring+24>:  mov    %rdx,%rbx  //rbx賦值的地方
0xffffffffc05cddab <ixgbe_xmit_frame_ring+27>:  sub    $0x40,%rsp
0xffffffffc05cddaf <ixgbe_xmit_frame_ring+31>:  movzwl 0x7e(%rdi),%r15d
0xffffffffc05cddb4 <ixgbe_xmit_frame_ring+36>:  movb   $0x0,-0x35(%rbp)
0xffffffffc05cddb8 <ixgbe_xmit_frame_ring+40>:  mov    %gs:0x28,%rax
0xffffffffc05cddc1 <ixgbe_xmit_frame_ring+49>:  mov    %rax,-0x30(%rbp)
0xffffffffc05cddc5 <ixgbe_xmit_frame_ring+53>:  xor    %eax,%eax
0xffffffffc05cddc7 <ixgbe_xmit_frame_ring+55>:  mov    0x68(%rdi),%eax
0xffffffffc05cddca <ixgbe_xmit_frame_ring+58>:  lea    0x3fff(%rax),%ecx
0xffffffffc05cddd0 <ixgbe_xmit_frame_ring+64>:  sub    0x6c(%rdi),%ecx
0xffffffffc05cddd3 <ixgbe_xmit_frame_ring+67>:  mov    0xdc(%rdi),%edi
0xffffffffc05cddd9 <ixgbe_xmit_frame_ring+73>:  add    0xe0(%r13),%rdi
0xffffffffc05cdde0 <ixgbe_xmit_frame_ring+80>:  shr    $0xe,%ecx
0xffffffffc05cdde3 <ixgbe_xmit_frame_ring+83>:  movzbl (%rdi),%r8d
0xffffffffc05cdde7 <ixgbe_xmit_frame_ring+87>:  test   %r8w,%r8w
0xffffffffc05cddeb <ixgbe_xmit_frame_ring+91>:  je     0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>
0xffffffffc05cdded <ixgbe_xmit_frame_ring+93>:  sub    $0x1,%r8d
0xffffffffc05cddf1 <ixgbe_xmit_frame_ring+97>:  xor    %eax,%eax
0xffffffffc05cddf3 <ixgbe_xmit_frame_ring+99>:  movzwl %r8w,%r8d
0xffffffffc05cddf7 <ixgbe_xmit_frame_ring+103>: add    $0x1,%r8
0xffffffffc05cddfb <ixgbe_xmit_frame_ring+107>: shl    $0x4,%r8
0xffffffffc05cddff <ixgbe_xmit_frame_ring+111>: nop
0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>: mov    0x3c(%rdi,%rax,1),%esi
0xffffffffc05cde04 <ixgbe_xmit_frame_ring+116>: add    $0x10,%rax
0xffffffffc05cde08 <ixgbe_xmit_frame_ring+120>: lea    0x3fff(%rsi),%edx
0xffffffffc05cde0e <ixgbe_xmit_frame_ring+126>: shr    $0xe,%edx
0xffffffffc05cde11 <ixgbe_xmit_frame_ring+129>: add    %edx,%ecx
0xffffffffc05cde13 <ixgbe_xmit_frame_ring+131>: cmp    %r8,%rax
0xffffffffc05cde16 <ixgbe_xmit_frame_ring+134>: jne    0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>
0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>: movzwl 0x58(%rbx),%eax  //訪問空指針的地方
0xffffffffc05cde1c <ixgbe_xmit_frame_ring+140>: movzwl 0x5a(%rbx),%esi
0xffffffffc05cde20 <ixgbe_xmit_frame_ring+144>: add    $0x3,%ecx
0xffffffffc05cde23 <ixgbe_xmit_frame_ring+147>: xor    %edx,%edx

5)、結合源碼分析下函數ixgbe_xmit_frame_ring,不難分析出rbx表示的是tx_ring,由於tx_ring爲空,函數在調用ixgbe_desc_unused時,訪問tx_ring->next_to_clean時出現異常,next_to_clean正好位於tx_ring的0x58偏移處,與RIP信息一致;

netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
			  struct ixgbe_adapter *adapter,
			  struct ixgbe_ring *tx_ring)
{
	struct ixgbe_tx_buffer *first;
	int tso;
	u32 tx_flags = 0;
	unsigned short f;
	u16 count = TXD_USE_COUNT(skb_headlen(skb));
	__be16 protocol = skb->protocol;
	u8 hdr_len = 0;

	/*
	 * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
	 *       + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
	 *       + 2 desc gap to keep tail from touching head,
	 *       + 1 desc for context descriptor,
	 * otherwise try next time
	 */
	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
		count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);

	if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
		tx_ring->tx_stats.tx_busy++;
		return NETDEV_TX_BUSY;
	}
}

static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
{
	u16 ntc = ring->next_to_clean;
	u16 ntu = ring->next_to_use;

	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
}

6)、再進一步看ixgbe_xmit_frame_ring的調用關係,會發現tx_ring是由adapter->tx_ring[skb->queue_mapping]得到的;

static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
				      struct net_device *netdev,
				      struct ixgbe_ring *ring)
{
	struct ixgbe_adapter *adapter = netdev_priv(netdev);
	struct ixgbe_ring *tx_ring;

	/*
	 * The minimum packet size for olinfo paylen is 17 so pad the skb
	 * in order to meet this minimum size requirement.
	 */
	if (skb_put_padto(skb, 17))
		return NETDEV_TX_OK;
        //ring入參始終爲null,因此tx_ring爲adater->tx_ring來賦值
	tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];

	return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
}

7)、接下來看下adapter->tx_ring[skb->queue_mapping]是否爲空,首先得先找到skb->queue_mapping的值,由於skb是ixgbe_xmit_frame_ring的第一個參數,第一個參數保存在rdi裏,因此先看下rdi描述的skb的信息;從以下的輸出裏可以看出skb->queue_mapping爲0,因此tx_ring即爲adapt->rx_ring[0]所表示的值;

crash> bt
PID: 19     TASK: ffff880169748fe0  CPU: 2   COMMAND: "ksoftirqd/2"
 #0 [ffff8801697578d8] machine_kexec at ffffffff8105c54b
 #1 [ffff880169757938] __crash_kexec at ffffffff81105b82
 #2 [ffff880169757a08] crash_kexec at ffffffff81105c70
 #3 [ffff880169757a20] oops_end at ffffffff816bb078
 #4 [ffff880169757a48] no_context at ffffffff816ab189
 #5 [ffff880169757a98] __bad_area_nosemaphore at ffffffff816ab21f
 #6 [ffff880169757ae0] bad_area_nosemaphore at ffffffff816ab389
 #7 [ffff880169757af0] __do_page_fault at ffffffff816bdf3e
 #8 [ffff880169757b50] do_page_fault at ffffffff816be0e5
 #9 [ffff880169757b80] page_fault at ffffffff816ba308
    [exception RIP: ixgbe_xmit_frame_ring+136]
    RIP: ffffffffc05cde18  RSP: ffff880169757c30  RFLAGS: 00010246
    RAX: 00000000000005ea  RBX: 0000000000000000  RCX: 0000000000000001
    RDX: 0000000000000000  RSI: ffff88203d4208c0  RDI: ffff88188fd90ec0
    RBP: ffff880169757c98   R8: 0000000000000000   R9: ffffffff8157d137
    R10: ffff88103fc99f40  R11: ffffea008062e900  R12: ffff88203d420000
    R13: ffff88132b78eb00  R14: ffff88203d4208c0  R15: 0000000000000008
    ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
#10 [ffff880169757ca0] ixgbe_xmit_frame at ffffffffc05cec2f [ixgbe]
#11 [ffff880169757cd0] dev_hard_start_xmit at ffffffff815927d1
#12 [ffff880169757d40] sch_direct_xmit at ffffffff815bd2ba
#13 [ffff880169757d90] __qdisc_run at ffffffff815bd470
#14 [ffff880169757dd8] net_tx_action at ffffffff81591cc8
#15 [ffff880169757e10] __do_softirq at ffffffff810916af
#16 [ffff880169757e80] run_ksoftirqd at ffffffff81091878
#17 [ffff880169757e98] smpboot_thread_fn at ffffffff810b9e0f
#18 [ffff880169757ec8] kthread at ffffffff810b16ff
#19 [ffff880169757f50] ret_from_fork at ffffffff816c2cd8
crash> struct sk_buff -x ffff88188fd90ec0
struct sk_buff {
  next = 0x0, 
  prev = 0x0, 
  {
    tstamp = {
      tv64 = 0x0
    }, 
    skb_mstamp = {
      {
        v64 = 0x0, 
        {
          stamp_us = 0x0, 
          stamp_jiffies = 0x0
        }
      }
    }
  }, 
  sk = 0x0, 
  dev = 0x100000000, 
  cb = "\000\000\000\000\000\000\000\000\000\317\025\\\000\352\377\377*\b\000\000\337\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000", 
  _skb_refdst = 0x0, 
  sp = 0x0, 
  len = 0x0, 
  data_len = 0x0, 
  mac_len = 0x0, 
  hdr_len = 0x0, 
  {
    csum = 0x0, 
    {
      csum_start = 0x0, 
      csum_offset = 0x0
    }
  }, 
  priority = 0x0, 
  ignore_df = 0x0, 
  cloned = 0x0, 
  ip_summed = 0x0, 
  nohdr = 0x0, 
  nfctinfo = 0x0, 
  pkt_type = 0x0, 
  fclone = 0x0, 
  ipvs_property = 0x0, 
  peeked = 0x0, 
  nf_trace = 0x0, 
  protocol = 0x0, 
  destructor = 0x0, 
  nfct = 0x0, 
  nf_bridge = 0x0, 
  headers_start = 0xffff88188fd90f58, 
  skb_iif = 0x0, 
  {
    hash = 0x0, 
    __UNIQUE_ID_rh_kabi_hide35 = {
      rxhash = 0x0
    }, 
    {<No data fields>}
  }, 
  vlan_proto = 0x0, 
  vlan_tci = 0x0, 
  tc_index = 0x0, 
  tc_verd = 0x0, 
  queue_mapping = 0x0, 
  ndisc_nodetype = 0x0, 
  pfmemalloc = 0x0, 
  ooo_okay = 0x0, 

8)、進一步分析adapter的值,adapter作爲ixgbe_xmit_frame_ring的第二個參數保存在rsi裏,因此分析下rsi表示的adapter的信息(一開始會報ixgbe_adapter變量爲定義,需要先加載ixgbe模塊的調試信息),從以下的輸出信息裏可以看到adapter->rx_ring[0]爲0xffff88088a4f7400,並不會爲空;

crash> struct ixgbe_adapter -x  ffff88203d4208c0
struct: invalid data structure reference: ixgbe_adapter
crash> mod -s ixgbe /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
     MODULE       NAME                       SIZE  OBJECT FILE
ffffffffc0600000  ixgbe                    301698  /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug 
crash> 
crash> 
crash> 
crash> 
crash> struct ixgbe_adapter -x  ffff88203d4208c0
struct ixgbe_adapter {
  active_vlans = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 
  netdev = 0xffff88203d420000, 
  pdev = 0xffff8810e923d000, 
  state = 0x140, 
  flags = 0x8050208, 
  flags2 = 0x1, 
  num_tx_queues = 0x8, 
  tx_itr_setting = 0x1, 
  tx_work_limit = 0x100, 
  num_rx_queues = 0x8, 
  rx_itr_setting = 0x1, 
  vxlan_port = 0x0, 
  geneve_port = 0x0, 
  tx_ring = {0xffff88088a4f7400, 0xffff88088a4f7c00, 0xffff880aac240400, 0xffff880234c3c400, 0xffff880234c3ac00, 0xffff880234c3e400, 0xffff880234c3fc00, 0xffff880234c3d400, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, 

3、總結

根據crash信息,結合源碼分析,訪問空指針是由於ixgbe_xmit_frame_ring的tx_ring參數爲空,tx_ring是從adapter->tx_ring[skb->queue_mapping]裏獲取的,但是adapter->tx_ring[skb->queue_mapping]又不爲空!懷疑可能是有一些併發問題,tx_ring一開始獲取的確實是空的,但是獲取完後adapter的值馬上被重新賦值了(從當時的日誌看,網卡正在重新初始化過程)。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章