1、在某個機器上出現機器宕機,通過kdump日誌,初步看是因爲ixgbe裏有控制針訪問
2、使用crash分析空指針訪問原因
1)、安裝kernel debug包(包含debug調試信息的vmlinux);
2)、使用crash打開vmcore( crash /usr/lib/debug/usr/lib/modules/3.10.0-327/vmlinux /home/vmcore ),先用dis命令看下RIP地址的彙編信息;從彙編指令看此時正在訪問rbx寄存器;
3)、從bt裏看下現場信息,發現rbx確實爲空;而且從調用棧裏看,此時正在訪問ixgbe_xmit_frame_ring函數;
4)、對ixgbe_xmit_frame_ring做下反彙編,追蹤下rbx的來源;從彙編裏看出rbx是從rdx裏賦值過來的,按x86處理器的約定,函數調用時,%rdi,%rsi,%rdx,%rcx,%r8,%r9分別用來傳遞第1、2、3、4、5、6個參數,因此這裏的rdx表示的是ixgbe_xmit_frame_ring的第三個參數;
crash> dis -l ixgbe_xmit_frame_ring
0xffffffffc05cdd90 <ixgbe_xmit_frame_ring>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffffc05cdd95 <ixgbe_xmit_frame_ring+5>: push %rbp
0xffffffffc05cdd96 <ixgbe_xmit_frame_ring+6>: mov %rsp,%rbp
0xffffffffc05cdd99 <ixgbe_xmit_frame_ring+9>: push %r15
0xffffffffc05cdd9b <ixgbe_xmit_frame_ring+11>: push %r14
0xffffffffc05cdd9d <ixgbe_xmit_frame_ring+13>: mov %rsi,%r14
0xffffffffc05cdda0 <ixgbe_xmit_frame_ring+16>: push %r13
0xffffffffc05cdda2 <ixgbe_xmit_frame_ring+18>: mov %rdi,%r13
0xffffffffc05cdda5 <ixgbe_xmit_frame_ring+21>: push %r12
0xffffffffc05cdda7 <ixgbe_xmit_frame_ring+23>: push %rbx
0xffffffffc05cdda8 <ixgbe_xmit_frame_ring+24>: mov %rdx,%rbx //rbx賦值的地方
0xffffffffc05cddab <ixgbe_xmit_frame_ring+27>: sub $0x40,%rsp
0xffffffffc05cddaf <ixgbe_xmit_frame_ring+31>: movzwl 0x7e(%rdi),%r15d
0xffffffffc05cddb4 <ixgbe_xmit_frame_ring+36>: movb $0x0,-0x35(%rbp)
0xffffffffc05cddb8 <ixgbe_xmit_frame_ring+40>: mov %gs:0x28,%rax
0xffffffffc05cddc1 <ixgbe_xmit_frame_ring+49>: mov %rax,-0x30(%rbp)
0xffffffffc05cddc5 <ixgbe_xmit_frame_ring+53>: xor %eax,%eax
0xffffffffc05cddc7 <ixgbe_xmit_frame_ring+55>: mov 0x68(%rdi),%eax
0xffffffffc05cddca <ixgbe_xmit_frame_ring+58>: lea 0x3fff(%rax),%ecx
0xffffffffc05cddd0 <ixgbe_xmit_frame_ring+64>: sub 0x6c(%rdi),%ecx
0xffffffffc05cddd3 <ixgbe_xmit_frame_ring+67>: mov 0xdc(%rdi),%edi
0xffffffffc05cddd9 <ixgbe_xmit_frame_ring+73>: add 0xe0(%r13),%rdi
0xffffffffc05cdde0 <ixgbe_xmit_frame_ring+80>: shr $0xe,%ecx
0xffffffffc05cdde3 <ixgbe_xmit_frame_ring+83>: movzbl (%rdi),%r8d
0xffffffffc05cdde7 <ixgbe_xmit_frame_ring+87>: test %r8w,%r8w
0xffffffffc05cddeb <ixgbe_xmit_frame_ring+91>: je 0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>
0xffffffffc05cdded <ixgbe_xmit_frame_ring+93>: sub $0x1,%r8d
0xffffffffc05cddf1 <ixgbe_xmit_frame_ring+97>: xor %eax,%eax
0xffffffffc05cddf3 <ixgbe_xmit_frame_ring+99>: movzwl %r8w,%r8d
0xffffffffc05cddf7 <ixgbe_xmit_frame_ring+103>: add $0x1,%r8
0xffffffffc05cddfb <ixgbe_xmit_frame_ring+107>: shl $0x4,%r8
0xffffffffc05cddff <ixgbe_xmit_frame_ring+111>: nop
0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>: mov 0x3c(%rdi,%rax,1),%esi
0xffffffffc05cde04 <ixgbe_xmit_frame_ring+116>: add $0x10,%rax
0xffffffffc05cde08 <ixgbe_xmit_frame_ring+120>: lea 0x3fff(%rsi),%edx
0xffffffffc05cde0e <ixgbe_xmit_frame_ring+126>: shr $0xe,%edx
0xffffffffc05cde11 <ixgbe_xmit_frame_ring+129>: add %edx,%ecx
0xffffffffc05cde13 <ixgbe_xmit_frame_ring+131>: cmp %r8,%rax
0xffffffffc05cde16 <ixgbe_xmit_frame_ring+134>: jne 0xffffffffc05cde00 <ixgbe_xmit_frame_ring+112>
0xffffffffc05cde18 <ixgbe_xmit_frame_ring+136>: movzwl 0x58(%rbx),%eax //訪問空指針的地方
0xffffffffc05cde1c <ixgbe_xmit_frame_ring+140>: movzwl 0x5a(%rbx),%esi
0xffffffffc05cde20 <ixgbe_xmit_frame_ring+144>: add $0x3,%ecx
0xffffffffc05cde23 <ixgbe_xmit_frame_ring+147>: xor %edx,%edx
5)、結合源碼分析下函數ixgbe_xmit_frame_ring,不難分析出rbx表示的是tx_ring,由於tx_ring爲空,函數在調用ixgbe_desc_unused時,訪問tx_ring->next_to_clean時出現異常,next_to_clean正好位於tx_ring的0x58偏移處,與RIP信息一致;
netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
struct ixgbe_adapter *adapter,
struct ixgbe_ring *tx_ring)
{
struct ixgbe_tx_buffer *first;
int tso;
u32 tx_flags = 0;
unsigned short f;
u16 count = TXD_USE_COUNT(skb_headlen(skb));
__be16 protocol = skb->protocol;
u8 hdr_len = 0;
/*
* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
* + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD,
* + 2 desc gap to keep tail from touching head,
* + 1 desc for context descriptor,
* otherwise try next time
*/
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
tx_ring->tx_stats.tx_busy++;
return NETDEV_TX_BUSY;
}
}
static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring)
{
u16 ntc = ring->next_to_clean;
u16 ntu = ring->next_to_use;
return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
}
6)、再進一步看ixgbe_xmit_frame_ring的調用關係,會發現tx_ring是由adapter->tx_ring[skb->queue_mapping]得到的;
static netdev_tx_t __ixgbe_xmit_frame(struct sk_buff *skb,
struct net_device *netdev,
struct ixgbe_ring *ring)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
struct ixgbe_ring *tx_ring;
/*
* The minimum packet size for olinfo paylen is 17 so pad the skb
* in order to meet this minimum size requirement.
*/
if (skb_put_padto(skb, 17))
return NETDEV_TX_OK;
//ring入參始終爲null,因此tx_ring爲adater->tx_ring來賦值
tx_ring = ring ? ring : adapter->tx_ring[skb->queue_mapping];
return ixgbe_xmit_frame_ring(skb, adapter, tx_ring);
}
7)、接下來看下adapter->tx_ring[skb->queue_mapping]是否爲空,首先得先找到skb->queue_mapping的值,由於skb是ixgbe_xmit_frame_ring的第一個參數,第一個參數保存在rdi裏,因此先看下rdi描述的skb的信息;從以下的輸出裏可以看出skb->queue_mapping爲0,因此tx_ring即爲adapt->rx_ring[0]所表示的值;
crash> bt
PID: 19 TASK: ffff880169748fe0 CPU: 2 COMMAND: "ksoftirqd/2"
#0 [ffff8801697578d8] machine_kexec at ffffffff8105c54b
#1 [ffff880169757938] __crash_kexec at ffffffff81105b82
#2 [ffff880169757a08] crash_kexec at ffffffff81105c70
#3 [ffff880169757a20] oops_end at ffffffff816bb078
#4 [ffff880169757a48] no_context at ffffffff816ab189
#5 [ffff880169757a98] __bad_area_nosemaphore at ffffffff816ab21f
#6 [ffff880169757ae0] bad_area_nosemaphore at ffffffff816ab389
#7 [ffff880169757af0] __do_page_fault at ffffffff816bdf3e
#8 [ffff880169757b50] do_page_fault at ffffffff816be0e5
#9 [ffff880169757b80] page_fault at ffffffff816ba308
[exception RIP: ixgbe_xmit_frame_ring+136]
RIP: ffffffffc05cde18 RSP: ffff880169757c30 RFLAGS: 00010246
RAX: 00000000000005ea RBX: 0000000000000000 RCX: 0000000000000001
RDX: 0000000000000000 RSI: ffff88203d4208c0 RDI: ffff88188fd90ec0
RBP: ffff880169757c98 R8: 0000000000000000 R9: ffffffff8157d137
R10: ffff88103fc99f40 R11: ffffea008062e900 R12: ffff88203d420000
R13: ffff88132b78eb00 R14: ffff88203d4208c0 R15: 0000000000000008
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#10 [ffff880169757ca0] ixgbe_xmit_frame at ffffffffc05cec2f [ixgbe]
#11 [ffff880169757cd0] dev_hard_start_xmit at ffffffff815927d1
#12 [ffff880169757d40] sch_direct_xmit at ffffffff815bd2ba
#13 [ffff880169757d90] __qdisc_run at ffffffff815bd470
#14 [ffff880169757dd8] net_tx_action at ffffffff81591cc8
#15 [ffff880169757e10] __do_softirq at ffffffff810916af
#16 [ffff880169757e80] run_ksoftirqd at ffffffff81091878
#17 [ffff880169757e98] smpboot_thread_fn at ffffffff810b9e0f
#18 [ffff880169757ec8] kthread at ffffffff810b16ff
#19 [ffff880169757f50] ret_from_fork at ffffffff816c2cd8
crash> struct sk_buff -x ffff88188fd90ec0
struct sk_buff {
next = 0x0,
prev = 0x0,
{
tstamp = {
tv64 = 0x0
},
skb_mstamp = {
{
v64 = 0x0,
{
stamp_us = 0x0,
stamp_jiffies = 0x0
}
}
}
},
sk = 0x0,
dev = 0x100000000,
cb = "\000\000\000\000\000\000\000\000\000\317\025\\\000\352\377\377*\b\000\000\337\003\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000",
_skb_refdst = 0x0,
sp = 0x0,
len = 0x0,
data_len = 0x0,
mac_len = 0x0,
hdr_len = 0x0,
{
csum = 0x0,
{
csum_start = 0x0,
csum_offset = 0x0
}
},
priority = 0x0,
ignore_df = 0x0,
cloned = 0x0,
ip_summed = 0x0,
nohdr = 0x0,
nfctinfo = 0x0,
pkt_type = 0x0,
fclone = 0x0,
ipvs_property = 0x0,
peeked = 0x0,
nf_trace = 0x0,
protocol = 0x0,
destructor = 0x0,
nfct = 0x0,
nf_bridge = 0x0,
headers_start = 0xffff88188fd90f58,
skb_iif = 0x0,
{
hash = 0x0,
__UNIQUE_ID_rh_kabi_hide35 = {
rxhash = 0x0
},
{<No data fields>}
},
vlan_proto = 0x0,
vlan_tci = 0x0,
tc_index = 0x0,
tc_verd = 0x0,
queue_mapping = 0x0,
ndisc_nodetype = 0x0,
pfmemalloc = 0x0,
ooo_okay = 0x0,
8)、進一步分析adapter的值,adapter作爲ixgbe_xmit_frame_ring的第二個參數保存在rsi裏,因此分析下rsi表示的adapter的信息(一開始會報ixgbe_adapter變量爲定義,需要先加載ixgbe模塊的調試信息),從以下的輸出信息裏可以看到adapter->rx_ring[0]爲0xffff88088a4f7400,並不會爲空;
crash> struct ixgbe_adapter -x ffff88203d4208c0
struct: invalid data structure reference: ixgbe_adapter
crash> mod -s ixgbe /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
MODULE NAME SIZE OBJECT FILE
ffffffffc0600000 ixgbe 301698 /usr/lib/debug/usr/lib/modules/3.10.0-327/kernel/drivers/net/ethernet/intel/ixgbe/ixgbe.ko.debug
crash>
crash>
crash>
crash>
crash> struct ixgbe_adapter -x ffff88203d4208c0
struct ixgbe_adapter {
active_vlans = {0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
netdev = 0xffff88203d420000,
pdev = 0xffff8810e923d000,
state = 0x140,
flags = 0x8050208,
flags2 = 0x1,
num_tx_queues = 0x8,
tx_itr_setting = 0x1,
tx_work_limit = 0x100,
num_rx_queues = 0x8,
rx_itr_setting = 0x1,
vxlan_port = 0x0,
geneve_port = 0x0,
tx_ring = {0xffff88088a4f7400, 0xffff88088a4f7c00, 0xffff880aac240400, 0xffff880234c3c400, 0xffff880234c3ac00, 0xffff880234c3e400, 0xffff880234c3fc00, 0xffff880234c3d400, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
3、總結
根據crash信息,結合源碼分析,訪問空指針是由於ixgbe_xmit_frame_ring的tx_ring參數爲空,tx_ring是從adapter->tx_ring[skb->queue_mapping]裏獲取的,但是adapter->tx_ring[skb->queue_mapping]又不爲空!懷疑可能是有一些併發問題,tx_ring一開始獲取的確實是空的,但是獲取完後adapter的值馬上被重新賦值了(從當時的日誌看,網卡正在重新初始化過程)。