OpenvSwitch key模塊詳解

struct sw_flow_key {
/*ip隧道層------------------------------------------------------------------------------------------------------------------------------------------------------------*/
 u8 tun_proto;                   /* Protocol of encapsulating tunnel. */
	struct ip_tunnel_key tun_key;  /* Encapsulating tunnel key. */
	u8 tun_opts[255];
	u8 tun_opts_len;
/*物理層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/

	struct {
		u32	priority;	/* Packet QoS priority. */
		u32	skb_mark;	/* SKB mark. */
		u16	in_port;	/* Input switch port (or DP_MAX_PORTS). */
	} __packed phy; /* Safe when right after 'tun_key'. */
/*鏈路層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	u32 ovs_flow_hash;		/* Datapath computed hash value.  */
        u8 mac_proto;			/* MAC layer protocol (e.g. Ethernet). */	
	u32 recirc_id;			/* Recirculation ID.  */
	struct {
		u8     src[ETH_ALEN];	/* Ethernet source address. */
		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
		struct vlan_head vlan;
		struct vlan_head cvlan;
		__be16 type;		/* Ethernet frame type. */
	} eth;
/*網絡層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	union {
		struct {
			__be32 top_lse;	/* top label stack entry */
		} mpls;
		struct {
			u8     proto;	/* IP protocol or lower 8 bits of ARP opcode. */
			u8     tos;	    /* IP ToS. */
			u8     ttl;	    /* IP TTL/hop limit. */
			u8     frag;	/* One of OVS_FRAG_TYPE_*. */
		} ip;
	};
	union {
		struct {
			struct {
				__be32 src;	/* IP source address. */
				__be32 dst;	/* IP destination address. */
			} addr;
			union {
				struct {
					__be32 src;
					__be32 dst;
				} ct_orig;	/* Conntrack original direction fields. */
				struct {
					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
				} arp;
			};
		} ipv4;
		struct {
			struct {
				struct in6_addr src;	/* IPv6 source address. */
				struct in6_addr dst;	/* IPv6 destination address. */
			} addr;
			__be32 label;			/* IPv6 flow label. */
			union {
				struct {
					struct in6_addr src;
					struct in6_addr dst;
				} ct_orig;	/* Conntrack original direction fields. */
				struct {
					struct in6_addr target;	/* ND target address. */
					u8 sll[ETH_ALEN];	/* ND source link layer address. */
					u8 tll[ETH_ALEN];	/* ND target link layer address. */
				} nd;
			};
		} ipv6;
		struct ovs_key_nsh nsh;         /* network service header */
	};
/*傳輸層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
傳輸層
	struct {
		__be16 src;		/* TCP/UDP/SCTP source port. */
		__be16 dst;		/* TCP/UDP/SCTP destination port. */
		__be16 flags;		/* TCP flags. */
	} tp;
/*ct 層-------------------------------------------------------------------------------------------------------------------------------------------------------------*/
	/* Filling a hole of two bytes. */
	u8 ct_state;
	u16 ct_zone;			/* Conntrack zone. */
	u8 ct_orig_proto;		/* CT original direction tuple IP protocol. */
	struct {
		/* Connection tracking fields not packed above. */
		struct {
			__be16 src;	/* CT orig tuple tp src port. */
			__be16 dst;	/* CT orig tuple tp dst port. */
		} orig_tp;
		u32 mark;
		struct ovs_key_ct_labels labels;
	} ct;
} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */

struct sw_flow_key結構比較複雜,下面用層次化的圖形將其表示得更清晰一些。

struct sw_flow_key

                                                                         `

IP隧道層:

                                       

物理層

                                                                                

鏈路層

                                       

網絡層

                                   

傳輸層

                                                      

ct層:

                                          

sw_flow_key解析流程

 

ip 隧道層處理

int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
			 struct sk_buff *skb, struct sw_flow_key *key)
{
	if (tun_info) {
        //獲取隧道協議號
		key->tun_proto = ip_tunnel_info_af(tun_info);
        //獲取隧道信息的key
		memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
		BUILD_BUG_ON(((1 << (sizeof(tun_info->options_len) * 8)) - 1) >
			     sizeof(key->tun_opts));
        //獲取隧道操作函數集 opts,並更新 opts_len
		if (tun_info->options_len) {
			ip_tunnel_info_opts_get(TUN_METADATA_OPTS(key, tun_info->options_len),
						tun_info);
			key->tun_opts_len = tun_info->options_len;
		} else {
			key->tun_opts_len = 0;
		}
	} else  {
        //不使用ip隧道,相應ip隧道信息字段全部置零
		key->tun_proto = 0;
		key->tun_opts_len = 0;
		memset(&key->tun_key, 0, sizeof(key->tun_key));
	}
}

物理層處理

int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
			 struct sk_buff *skb, struct sw_flow_key *key)
{
    //獲取優先級
	key->phy.priority = skb->priority;
    //獲取包接收端口
	key->phy.in_port = OVS_CB(skb)->input_vport->port_no;
	key->phy.skb_mark = skb->mark;
	key->ovs_flow_hash = 0;
    //獲取鏈路層協議種類(取決於skb->dev_type),爲鏈路層的處理做準備
	res = key_extract_mac_proto(skb); 
	if (res < 0)
		return res;
	key->mac_proto = res;
	key->recirc_id = 0;
    //進行鏈路層,網絡層和傳輸層處理
	err = key_extract(skb, key);
}

key_extract_mac_proto

static int key_extract_mac_proto(struct sk_buff *skb)
{
	switch (skb->dev->type) {
	case ARPHRD_ETHER:
		return MAC_PROTO_ETHERNET;
	case ARPHRD_NONE:
		if (skb->protocol == htons(ETH_P_TEB))
			return MAC_PROTO_ETHERNET;
		return MAC_PROTO_NONE;
	}
	WARN_ON_ONCE(1);
	return -EINVAL;
}

鏈路層、網絡層、傳輸層的代碼分析需要 sk_buff 處理基礎。教程可參考:

https://blog.csdn.net/dog250/article/details/43637053

鏈路層處理

static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
{
	int error;
	struct ethhdr *eth;
    /*此時skb->data指針正好指向mac協議頭開始的位置,
     *故可以直接調用skb_reset_mac_header(),記錄下mac頭的位置,
     *這也是傳入key_extract函數的skb需要滿足的條件
     */
	skb_reset_mac_header(skb);
	clear_vlan(key);
    //若沒有鏈路層
	if (ovs_key_mac_proto(key) == MAC_PROTO_NONE) {
		skb_reset_network_header(skb);//直接記錄下skb中網絡層協議頭的位置
		key->eth.type = skb->protocol;
	} else {
		eth = eth_hdr(skb);//獲取mac協議頭
        //將mac頭地址,和mac目的地址寫入sw_flow_key
		ether_addr_copy(key->eth.src, eth->h_source);
		ether_addr_copy(key->eth.dst, eth->h_dest);
        //將skb->data向裏壓入2個ETH_ALEN,使之指向net層協議頭開始的位置
		__skb_pull(skb, 2 * ETH_ALEN);
        //獲取網絡層協議種類,爲網絡層分析做準備
		key->eth.type = parse_ethertype(skb);
        //記錄下net層協議頭的起始位置
		skb_reset_network_header(skb);
        //還原skb->data,使之重新指向mac頭起始處
		__skb_push(skb, skb->data - skb_mac_header(skb));
	}
    //記錄下mac頭起始處
	skb_reset_mac_len(skb);
}

ethhdr

struct ethhdr {
    unsigned char h_dest[ETH_ALEN]; //目的MAC地址     
    unsigned char h_source[ETH_ALEN]; //源MAC地址    
    __u16 h_proto ; //網絡層所使用的協議類型
}__attribute__((packed))  //用於告訴編譯器不要對這個結構體中的縫隙部分進行填充操作

網絡層

ipv4

if (key->eth.type == htons(ETH_P_IP)) {
		struct iphdr *nh;
		__be16 offset;
        
		error = check_iphdr(skb);
        
		nh = ip_hdr(skb);
		key->ipv4.addr.src = nh->saddr;
		key->ipv4.addr.dst = nh->daddr;

		key->ip.proto = nh->protocol;
		key->ip.tos = nh->tos;
		key->ip.ttl = nh->ttl;
//偏移
		offset = nh->frag_off & htons(IP_OFFSET);
		if (offset) {
			key->ip.frag = OVS_FRAG_TYPE_LATER;
			return 0;
		}
//處理分片
#ifdef HAVE_SKB_GSO_UDP
		if (nh->frag_off & htons(IP_MF) ||
			skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
#else
		if (nh->frag_off & htons(IP_MF))
#endif
			key->ip.frag = OVS_FRAG_TYPE_FIRST;
		else
			key->ip.frag = OVS_FRAG_TYPE_NONE;

ARP或RARP

else if (key->eth.type == htons(ETH_P_ARP) ||
		   key->eth.type == htons(ETH_P_RARP)) {
		struct arp_eth_header *arp;
		bool arp_available = arphdr_ok(skb);

		arp = (struct arp_eth_header *)skb_network_header(skb);

		if (arp_available &&
		    arp->ar_hrd == htons(ARPHRD_ETHER) &&
		    arp->ar_pro == htons(ETH_P_IP) &&
		    arp->ar_hln == ETH_ALEN &&
		    arp->ar_pln == 4) {

			/* We only match on the lower 8 bits of the opcode. */
			if (ntohs(arp->ar_op) <= 0xff)
				key->ip.proto = ntohs(arp->ar_op);
			else
				key->ip.proto = 0;

			memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
			memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
			ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
			ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
		} else {
			memset(&key->ip, 0, sizeof(key->ip));
			memset(&key->ipv4, 0, sizeof(key->ipv4));
		}
	} 

mpls

else if (eth_p_mpls(key->eth.type)) {
		size_t stack_len = MPLS_HLEN;

		skb_set_inner_network_header(skb, skb->mac_len);
		while (1) {
			__be32 lse;

			memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN);

			if (stack_len == MPLS_HLEN)
				memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN);

			skb_set_inner_network_header(skb, skb->mac_len + stack_len);
			if (lse & htonl(MPLS_LS_S_MASK))
				break;

			stack_len += MPLS_HLEN;
		}
	} 

ipv6

else if (key->eth.type == htons(ETH_P_IPV6)) {
		int nh_len;             /* IPv6 Header + Extensions */

		nh_len = parse_ipv6hdr(skb, key);
		if (unlikely(nh_len < 0)) {
			switch (nh_len) {
			case -EINVAL:
				memset(&key->ip, 0, sizeof(key->ip));
				memset(&key->ipv6.addr, 0, sizeof(key->ipv6.addr));
				/* fall-through */
			case -EPROTO:
				skb->transport_header = skb->network_header;
				error = 0;
				break;
			default:
				error = nh_len;
			}
			return error;
		}

		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
			return 0;
#ifdef HAVE_SKB_GSO_UDP
		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
			key->ip.frag = OVS_FRAG_TYPE_FIRST;

#endif

nsh

else if (key->eth.type == htons(ETH_P_NSH)) {
		error = parse_nsh(skb, key);
		if (error)
			return error;
	}

傳輸層

TCP

//ipv4 
if (key->ip.proto == IPPROTO_TCP) {
			if (tcphdr_ok(skb)) {
				struct tcphdr *tcp = tcp_hdr(skb);
				key->tp.src = tcp->source;
				key->tp.dst = tcp->dest;
				key->tp.flags = TCP_FLAGS_BE16(tcp);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}

		} 
//ipv6
if (key->ip.proto == NEXTHDR_TCP) {
			if (tcphdr_ok(skb)) {
				struct tcphdr *tcp = tcp_hdr(skb);
				key->tp.src = tcp->source;
				key->tp.dst = tcp->dest;
				key->tp.flags = TCP_FLAGS_BE16(tcp);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

UDP

//ipv4
else if (key->ip.proto == IPPROTO_UDP) {
			if (udphdr_ok(skb)) {
				struct udphdr *udp = udp_hdr(skb);
				key->tp.src = udp->source;
				key->tp.dst = udp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 
//ipv6
else if (key->ip.proto == NEXTHDR_UDP) {
			if (udphdr_ok(skb)) {
				struct udphdr *udp = udp_hdr(skb);
				key->tp.src = udp->source;
				key->tp.dst = udp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

SCTP

//ipv4
else if (key->ip.proto == IPPROTO_SCTP) {
			if (sctphdr_ok(skb)) {
				struct sctphdr *sctp = sctp_hdr(skb);
				key->tp.src = sctp->source;
				key->tp.dst = sctp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 
//ipv6
else if (key->ip.proto == NEXTHDR_SCTP) {
			if (sctphdr_ok(skb)) {
				struct sctphdr *sctp = sctp_hdr(skb);
				key->tp.src = sctp->source;
				key->tp.dst = sctp->dest;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		} 

ICMP

//ipv4
else if (key->ip.proto == IPPROTO_ICMP) {
			if (icmphdr_ok(skb)) {
				struct icmphdr *icmp = icmp_hdr(skb);
				/* The ICMP type and code fields use the 16-bit
				 * transport port fields, so we need to store
				 * them in 16-bit network byte order.
				 */
				key->tp.src = htons(icmp->type);
				key->tp.dst = htons(icmp->code);
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		}
//ipv6
else if (key->ip.proto == NEXTHDR_ICMP) {
			if (icmp6hdr_ok(skb)) {
				error = parse_icmpv6(skb, key, nh_len);
				if (error)
					return error;
			} else {
				memset(&key->tp, 0, sizeof(key->tp));
			}
		}

ct層:

static inline void ovs_ct_fill_key(const struct sk_buff *skb,
				   struct sw_flow_key *key)
{
	key->ct_state = 0;
	key->ct_zone = 0;
	key->ct.mark = 0;
	memset(&key->ct.labels, 0, sizeof(key->ct.labels));
	/* Clear 'ct_orig_proto' to mark the non-existence of original
	 * direction key fields.
	 */
	key->ct_orig_proto = 0;
}

總結:

各層的協議分析一般均按以下幾個步驟:

1、定義一個該層協議頭結構體。

2、調用api從skb中獲取該協議頭。

3、根據協議頭中的字段,將各字段拷貝到sw_flow_key中。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章