struct sw_flow_key {
/*ip隧道層------------------------------------------------------------------------------------------------------------------------------------------------------------*/
u8 tun_proto; /* Protocol of encapsulating tunnel. */
struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
u8 tun_opts[255];
u8 tun_opts_len;
/*物理層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
struct {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
/*鏈路層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
u32 ovs_flow_hash; /* Datapath computed hash value. */
u8 mac_proto; /* MAC layer protocol (e.g. Ethernet). */
u32 recirc_id; /* Recirculation ID. */
struct {
u8 src[ETH_ALEN]; /* Ethernet source address. */
u8 dst[ETH_ALEN]; /* Ethernet destination address. */
struct vlan_head vlan;
struct vlan_head cvlan;
__be16 type; /* Ethernet frame type. */
} eth;
/*網絡層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
union {
struct {
__be32 top_lse; /* top label stack entry */
} mpls;
struct {
u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */
u8 tos; /* IP ToS. */
u8 ttl; /* IP TTL/hop limit. */
u8 frag; /* One of OVS_FRAG_TYPE_*. */
} ip;
};
union {
struct {
struct {
__be32 src; /* IP source address. */
__be32 dst; /* IP destination address. */
} addr;
union {
struct {
__be32 src;
__be32 dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
u8 sha[ETH_ALEN]; /* ARP source hardware address. */
u8 tha[ETH_ALEN]; /* ARP target hardware address. */
} arp;
};
} ipv4;
struct {
struct {
struct in6_addr src; /* IPv6 source address. */
struct in6_addr dst; /* IPv6 destination address. */
} addr;
__be32 label; /* IPv6 flow label. */
union {
struct {
struct in6_addr src;
struct in6_addr dst;
} ct_orig; /* Conntrack original direction fields. */
struct {
struct in6_addr target; /* ND target address. */
u8 sll[ETH_ALEN]; /* ND source link layer address. */
u8 tll[ETH_ALEN]; /* ND target link layer address. */
} nd;
};
} ipv6;
struct ovs_key_nsh nsh; /* network service header */
};
/*傳輸層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
傳輸層
struct {
__be16 src; /* TCP/UDP/SCTP source port. */
__be16 dst; /* TCP/UDP/SCTP destination port. */
__be16 flags; /* TCP flags. */
} tp;
/*ct 層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
/* Filling a hole of two bytes. */
u8 ct_state;
u16 ct_zone; /* Conntrack zone. */
u8 ct_orig_proto; /* CT original direction tuple IP protocol. */
struct {
/* Connection tracking fields not packed above. */
struct {
__be16 src; /* CT orig tuple tp src port. */
__be16 dst; /* CT orig tuple tp dst port. */
} orig_tp;
u32 mark;
struct ovs_key_ct_labels labels;
} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
struct sw_flow_key結構比較複雜,下面用層次化的圖形將其表示得更清晰一些。
struct sw_flow_key
`
IP隧道層:
物理層
鏈路層
網絡層
傳輸層
ct層:
sw_flow_key解析流程
ip 隧道層處理
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
if (tun_info) {
//獲取隧道協議號
key->tun_proto = ip_tunnel_info_af(tun_info);
//獲取隧道信息的key
memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
sizeof(key->tun_opts));
//獲取隧道操作函數集 opts,並更新 opts_len
if (tun_info->options_len) {
ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
tun_info);
key->tun_opts_len = tun_info->options_len;
} else {
key->tun_opts_len = 0;
}
} else {
//不使用ip隧道,相應ip隧道信息字段全部置零
key->tun_proto = 0;
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
}
物理層處理
int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
//獲取優先級
key->phy.priority = skb->priority;
//獲取包接收端口
key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
key->phy.skb_mark = skb->mark;
key->ovs_flow_hash = 0;
//獲取鏈路層協議種類(取決於skb->dev_type),爲鏈路層的處理做準備
res = key_extract_mac_proto(skb);
if (res < 0)
return res;
key->mac_proto = res;
key->recirc_id = 0;
//進行鏈路層,網絡層和傳輸層處理
err = key_extract(skb, key);
}
key_extract_mac_proto
static int key_extract_mac_proto(struct sk_buff *skb)
{
switch (skb->dev->type) {
case ARPHRD_ETHER:
return MAC_PROTO_ETHERNET;
case ARPHRD_NONE:
if (skb->protocol == htons(ETH_P_TEB))
return MAC_PROTO_ETHERNET;
return MAC_PROTO_NONE;
}
WARN_ON_ONCE(1);
return -EINVAL;
}
鏈路層、網絡層、傳輸層的代碼分析需要 sk_buff 處理基礎。教程可參考:
https://blog.csdn.net/dog250/article/details/43637053
鏈路層處理
static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
int error;
struct ethhdr *eth;
/*此時skb->data指針正好指向mac協議頭開始的位置,
*故可以直接調用skb_reset_mac_header(),記錄下mac頭的位置,
*這也是傳入key_extract函數的skb需要滿足的條件
*/
skb_reset_mac_header(skb);
clear_vlan(key);
//若沒有鏈路層
if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
skb_reset_network_header(skb);//直接記錄下skb中網絡層協議頭的位置
key->eth.type = skb->protocol;
} else {
eth = eth_hdr(skb);//獲取mac協議頭
//將mac頭地址,和mac目的地址寫入sw_flow_key
ether_addr_copy(key->eth.src, eth->h_source);
ether_addr_copy(key->eth.dst, eth->h_dest);
//將skb->data向裏壓入2個ETH_ALEN,使之指向net層協議頭開始的位置
__skb_pull(skb, 2 * ETH_ALEN);
//獲取網絡層協議種類,爲網絡層分析做準備
key->eth.type = parse_ethertype(skb);
//記錄下net層協議頭的起始位置
skb_reset_network_header(skb);
//還原skb->data,使之重新指向mac頭起始處
__skb_push(skb, skb->data - skb_mac_header(skb));
}
//記錄下mac頭起始處
skb_reset_mac_len(skb);
}
ethhdr
struct ethhdr {
unsigned char h_dest[ETH_ALEN]; //目的MAC地址
unsigned char h_source[ETH_ALEN]; //源MAC地址
__u16 h_proto ; //網絡層所使用的協議類型
}__attribute__((packed)) //用於告訴編譯器不要對這個結構體中的縫隙部分進行填充操作
網絡層
ipv4
if (key->eth.type == htons(ETH_P_IP)) {
struct iphdr *nh;
__be16 offset;
error = check_iphdr(skb);
nh = ip_hdr(skb);
key->ipv4.addr.src = nh->saddr;
key->ipv4.addr.dst = nh->daddr;
key->ip.proto = nh->protocol;
key->ip.tos = nh->tos;
key->ip.ttl = nh->ttl;
//偏移
offset = nh->frag_off & htons(IP_OFFSET);
if (offset) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
return 0;
}
//處理分片
#ifdef HAVE_SKB_GSO_UDP
if (nh->frag_off & htons(IP_MF) ||
skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
#else
if (nh->frag_off & htons(IP_MF))
#endif
key->ip.frag = OVS_FRAG_TYPE_FIRST;
else
key->ip.frag = OVS_FRAG_TYPE_NONE;
ARP或RARP
else if (key->eth.type == htons(ETH_P_ARP) ||
key->eth.type == htons(ETH_P_RARP)) {
struct arp_eth_header *arp;
bool arp_available = arphdr_ok(skb);
arp = (struct arp_eth_header *)skb_network_header(skb);
if (arp_available &&
arp->ar_hrd == htons(ARPHRD_ETHER) &&
arp->ar_pro == htons(ETH_P_IP) &&
arp->ar_hln == ETH_ALEN &&
arp->ar_pln == 4) {
/* We only match on the lower 8 bits of the opcode. */
if (ntohs(arp->ar_op) <= 0xff)
key->ip.proto = ntohs(arp->ar_op);
else
key->ip.proto = 0;
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
} else {
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv4, 0, sizeof(key->ipv4));
}
}
mpls
else if (eth_p_mpls(key->eth.type)) {
size_t stack_len = MPLS_HLEN;
skb_set_inner_network_header(skb, skb->mac_len);
while (1) {
__be32 lse;
memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);
if (stack_len == MPLS_HLEN)
memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);
skb_set_inner_network_header(skb, skb->mac_len + stack_len);
if (lse & htonl(MPLS_LS_S_MASK))
break;
stack_len += MPLS_HLEN;
}
}
ipv6
else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
nh_len = parse_ipv6hdr(skb, key);
if (unlikely(nh_len < 0)) {
switch (nh_len) {
case -EINVAL:
memset(&key->ip, 0, sizeof(key->ip));
memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
/* fall-through */
case -EPROTO:
skb->transport_header = skb->network_header;
error = 0;
break;
default:
error = nh_len;
}
return error;
}
if (key->ip.frag == OVS_FRAG_TYPE_LATER)
return 0;
#ifdef HAVE_SKB_GSO_UDP
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
key->ip.frag = OVS_FRAG_TYPE_FIRST;
#endif
nsh
else if (key->eth.type == htons(ETH_P_NSH)) {
error = parse_nsh(skb, key);
if (error)
return error;
}
傳輸層
TCP
//ipv4
if (key->ip.proto == IPPROTO_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
if (key->ip.proto == NEXTHDR_TCP) {
if (tcphdr_ok(skb)) {
struct tcphdr *tcp = tcp_hdr(skb);
key->tp.src = tcp->source;
key->tp.dst = tcp->dest;
key->tp.flags = TCP_FLAGS_BE16(tcp);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
UDP
//ipv4
else if (key->ip.proto == IPPROTO_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_UDP) {
if (udphdr_ok(skb)) {
struct udphdr *udp = udp_hdr(skb);
key->tp.src = udp->source;
key->tp.dst = udp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
SCTP
//ipv4
else if (key->ip.proto == IPPROTO_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_SCTP) {
if (sctphdr_ok(skb)) {
struct sctphdr *sctp = sctp_hdr(skb);
key->tp.src = sctp->source;
key->tp.dst = sctp->dest;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
ICMP
//ipv4
else if (key->ip.proto == IPPROTO_ICMP) {
if (icmphdr_ok(skb)) {
struct icmphdr *icmp = icmp_hdr(skb);
/* The ICMP type and code fields use the 16-bit
* transport port fields, so we need to store
* them in 16-bit network byte order.
*/
key->tp.src = htons(icmp->type);
key->tp.dst = htons(icmp->code);
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
//ipv6
else if (key->ip.proto == NEXTHDR_ICMP) {
if (icmp6hdr_ok(skb)) {
error = parse_icmpv6(skb, key, nh_len);
if (error)
return error;
} else {
memset(&key->tp, 0, sizeof(key->tp));
}
}
ct層:
static inline void ovs_ct_fill_key(const struct sk_buff *skb,
struct sw_flow_key *key)
{
key->ct_state = 0;
key->ct_zone = 0;
key->ct.mark = 0;
memset(&key->ct.labels, 0, sizeof(key->ct.labels));
/* Clear 'ct_orig_proto' to mark the non-existence of original
* direction key fields.
*/
key->ct_orig_proto = 0;
}
總結:
各層的協議分析一般均按以下幾個步驟:
1、定義一個該層協議頭結構體。
2、調用api從skb中獲取該協議頭。
3、根據協議頭中的字段,將各字段拷貝到sw_flow_key中。