感謝我的老大馬大師,我們花了很多時間才找出第三個步。
網卡ip/tcp/udp checksum offload的功能就是將報文的ip,tcp, udp校驗和的計算交給網卡計算,節省cpu的消耗。
第一步
static struct rte_eth_conf default_port_conf = {
.rxmode = {
.mq_mode = ETH_MQ_RX_RSS,
.max_rx_pkt_len = ETHER_MAX_LEN,
.split_hdr_size = 0, /**< hdr buf size */
.header_split = 0, /**< Header Split disabled */
.hw_ip_checksum = 1, /**< IP checksum offload enabled */
.hw_vlan_filter = 0, /**< VLAN filtering disabled */
.hw_vlan_strip = 0, /**< VLAN strip disabled. */
.hw_vlan_extend = 0, /**< Extended VLAN disabled. */
.jumbo_frame = 0, /**< Jumbo Frame Support disabled */
.hw_strip_crc = 0, /**< CRC stripped by hardware */
.enable_lro = 0, /**< LRO disabled */
},
.rx_adv_conf = {
.rss_conf = {
.rss_key = default_rsskey_40bytes,
.rss_key_len = 40,
.rss_hf = ETH_RSS_PROTO_MASK,
},
},
.txmode = {
.mq_mode = ETH_MQ_TX_NONE,
},
};
文檔出處:http://dpdk.org/doc/api-2.2/rte__mbuf_8h.html
#define PKT_TX_L4_NO_CKSUM (0ULL << 52)
Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved, 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware L4 checksum offload, the user needs to:
- fill l2_len and l3_len in mbuf
- set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM
- set the flag PKT_TX_IPV4 or PKT_TX_IPV6
- calculate the pseudo header checksum and set it in the L4 header (only for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum().
For SCTP, set the crc field to 0.Disable L4 cksum of TX pkt.
static uint16_t
get_psd_sum(void *l3_hdr, uint64_t ol_flags)
{
if (ol_flags & PKT_TX_IPV4)
return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
else /* assume ethertype == ETHER_TYPE_IPv6 */
return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
}
static uint64_t
process_inner_cksums(struct ether_hdr *eth_hdr, union tunnel_offload_info *info)
{
void *l3_hdr = NULL;
uint8_t l4_proto;
uint16_t ethertype;
struct ipv4_hdr *ipv4_hdr;
struct ipv6_hdr *ipv6_hdr;
struct udp_hdr *udp_hdr;
struct tcp_hdr *tcp_hdr;
struct sctp_hdr *sctp_hdr;
uint64_t ol_flags = 0;
info->l2_len = sizeof(struct ether_hdr);
ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
if (ethertype == ETHER_TYPE_VLAN) {
struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
info->l2_len += sizeof(struct vlan_hdr);
ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
}
l3_hdr = (char *)eth_hdr + info->l2_len;
if (ethertype == ETHER_TYPE_IPv4) {
ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
ipv4_hdr->hdr_checksum = 0;
ol_flags |= PKT_TX_IPV4;
ol_flags |= PKT_TX_IP_CKSUM;
info->l3_len = sizeof(struct ipv4_hdr);
l4_proto = ipv4_hdr->next_proto_id;
} else if (ethertype == ETHER_TYPE_IPv6) {
ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
info->l3_len = sizeof(struct ipv6_hdr);
l4_proto = ipv6_hdr->proto;
ol_flags |= PKT_TX_IPV6;
} else
return 0; /* packet type not supported, nothing to do */
if (l4_proto == IPPROTO_UDP) {
udp_hdr = (struct udp_hdr *)((char *)l3_hdr + info->l3_len);
ol_flags |= PKT_TX_UDP_CKSUM;
udp_hdr->dgram_cksum = get_psd_sum(l3_hdr, ol_flags);
} else if (l4_proto == IPPROTO_TCP) {
tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
ol_flags |= PKT_TX_TCP_CKSUM;
tcp_hdr->cksum = get_psd_sum(l3_hdr, ol_flags);
if (tso_segsz != 0) {
ol_flags |= PKT_TX_TCP_SEG;
info->tso_segsz = tso_segsz;
info->l4_len = sizeof(struct tcp_hdr);
}
} else if (l4_proto == IPPROTO_SCTP) {
sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
sctp_hdr->cksum = 0;
ol_flags |= PKT_TX_SCTP_CKSUM;
}
return ol_flags;
}
/* Disable features that are not supported by port's HW */
if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_UDP_CKSUM)) {
dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMUDP;
}
if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_CKSUM)) {
dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMTCP;
}
if (!(dev_info.tx_offload_capa & DEV_TX_OFFLOAD_SCTP_CKSUM)) {
dev_info.default_txconf.txq_flags |= ETH_TXQ_FLAGS_NOXSUMSCTP;
}
rte_eth_tx_queue_setup(port_id, q, TX_QUEUE_SIZE,
socketid, &dev_info.default_txconf);
參考例子:dpdk源碼裏app裏的test-pmd程序,裏面的process_inner_cksums有相關操作。