UDP套接字的端口號綁定發生在兩種場景:
- 應用程序顯示的調用bind()系統調用實現端口綁定,一般服務器端編程時會這麼做;
- UDP套接字創建後是可以直接使用sendto()發送數據的,這種情況下會由kernel自動爲該套接字綁定一個可用端口。
當然,無論是哪種情況,它們最終都是使用同一個接口進行端口分配的。
這篇筆記就來分析下UDP套接字的端口綁定相關代碼的實現,主要是bind()系統調用的實現,最後也會簡單的看下端口自動綁定相關的實現。
系統調用bind()的函數調用關係如下圖所示:
注意:bind()可以綁定IP地址和端口號,只不過這裏我們只關心端口的綁定過程
1. 調用入口inet_bind()
實際上,系統調用bind()的真正入口在net/socket.c中,但是由於通用入口實際上做的文件描述符到struct socket的映射,然後就調用各個協議族提供的綁定接口,這種轉換不是我們關心的內容,所以我們從AF_INET協議族的綁定入口函數開始分析。
//入參的含義與系統調用bind()的參數含義相同
int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
struct sock *sk = sock->sk;
struct inet_sock *inet = inet_sk(sk);
unsigned short snum;
int chk_addr_ret;
int err;
//如果傳輸層提供了bind()接口,則直接使用傳輸層的接口完成綁定。AF_INET協議族中
//只有RAW套接字實現了該接口
/* If the socket has its own bind function then use it. (RAW) */
if (sk->sk_prot->bind) {
err = sk->sk_prot->bind(sk, uaddr, addr_len);
goto out;
}
//校驗應用程序提供的要綁定地址長度信息
err = -EINVAL;
if (addr_len < sizeof(struct sockaddr_in))
goto out;
//識別應用程序指定的IP地址類型
chk_addr_ret = inet_addr_type(&init_net, addr->sin_addr.s_addr);
//這裏的原理沒看懂
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
* allowing applications to make a non-local bind solves
* several problems with systems using dynamic addressing.
* (ie. your servers still start up even if your ISDN link
* is temporarily down)
*/
err = -EADDRNOTAVAIL;
if (!sysctl_ip_nonlocal_bind &&
!inet->freebind &&
addr->sin_addr.s_addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
chk_addr_ret != RTN_MULTICAST &&
chk_addr_ret != RTN_BROADCAST)
goto out;
//將主機字節序表示的應用程序想綁定的端口保存到snum中
snum = ntohs(addr->sin_port);
err = -EACCES;
//如果應用程序指定了想要綁定的端口(不爲0),並且該端口小於1024,
//那麼需要判端該應用程序是否有這種權限綁定這些保留端口
if (snum && snum < PROT_SOCK && !capable(CAP_NET_BIND_SERVICE))
goto out;
/* We keep a pair of addresses. rcv_saddr is the one
* used by hash lookups, and saddr is used for transmit.
*
* In the BSD API these are the same except where it
* would be illegal to use them (multicast/broadcast) in
* which case the sending device address is used.
*/
lock_sock(sk);
/* Check these errors (active socket, double bind). */
err = -EINVAL;
//如果傳輸控制塊的狀態不是CLOSE或者該傳輸控制塊已經綁定過了(綁定後的源端口信息會被保存
//到inet->num中,見下文),則不允許重複綁定
if (sk->sk_state != TCP_CLOSE || inet->num)
goto out_release_sock;
//將應用程序指定要綁定的IP地址保存到傳輸控制塊中。關於這兩個地址的區別,待研究
inet->rcv_saddr = inet->saddr = addr->sin_addr.s_addr;
if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
inet->saddr = 0; /* Use device */
//調用傳輸層協議提供的接口完成具體的端口綁定,TCP爲inet_csk_get_port(),
//UDP爲udp_v4_get_port(),這點與圖中不符,這是內核的一個變更
/* Make sure we are allowed to bind here. */
if (sk->sk_prot->get_port(sk, snum)) {
inet->saddr = inet->rcv_saddr = 0;
err = -EADDRINUSE;
goto out_release_sock;
}
//綁定成功,設置地址和端口綁定標記到傳輸控制塊中
if (inet->rcv_saddr)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
//已綁定端口的網絡字節序表示保存到inet->sport中,num保存的是主機字節序的已綁定端口值
inet->sport = htons(inet->num);
inet->daddr = 0;
inet->dport = 0;
//復位路由信息
sk_dst_reset(sk);
err = 0;
out_release_sock:
release_sock(sk);
out:
return err;
}
注:inet_bind()實際上是AF_INET協議族提供的通用bind()處理過程,TCP和UDP是共用該函數的,它們只有在get_port()處有區別。
2. UDP端口分配udp_v4_get_port()
int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
//回調函數ipv4_rcv_saddr_equel()用於判定兩個地址是否相同,見下文
return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
/**
* udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6
*
* @sk: socket struct in question
* @snum: port number to look up
* @saddr_comp: AF-dependent comparison of bound local IP addresses
*/
int udp_lib_get_port(struct sock *sk, unsigned short snum,
int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2))
{
struct udp_hslot *hslot;
//UDP所有已綁定端口的套接字的傳輸控制塊由全局的udp_table哈希表管理
struct udp_table *udptable = sk->sk_prot->h.udp_table;
int error = 1;
struct net *net = sock_net(sk);
//調用者沒有指定綁定哪個具體端口,這時需要自動選擇一個沒有被使用的端口
if (!snum) {
int low, high, remaining;
unsigned rand;
unsigned short first, last;
//定義一個可以容納65536個bit的數組
DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN);
//獲取可用於自動分配的端口範圍區間。這兩個系統參數由sysctl_local_ports.range[0]和
//sysctl_local_ports.range[1]指定,這兩個初始值爲[32768, 61000],可以通過
//"/proc/sys/net/ipv4/ip_local_port_range"修改這兩個參數。注意:這兩個參數TCP和UDP是共用的。
inet_get_local_port_range(&low, &high);
remaining = (high - low) + 1;
//隨機選取一個遍歷起點
rand = net_random();
first = (((u64)rand * remaining) >> 32) + low;
/*
* force rand to be an odd multiple of UDP_HTABLE_SIZE
*/
rand = (rand | 1) * UDP_HTABLE_SIZE;
for (last = first + UDP_HTABLE_SIZE; first != last; first++) {
//hslot指向哈希表的一個衝突鏈
hslot = &udptable->hash[udp_hashfn(net, first)];
//將bitmap清零
bitmap_zero(bitmap, PORTS_PER_CHAIN);
spin_lock_bh(&hslot->lock);
//檢查該衝突鏈上有哪些端口號已經被佔用了,在相應的bitmap中設置爲1
udp_lib_lport_inuse(net, snum, hslot, bitmap, sk, saddr_comp);
snum = first;
/*
* Iterate on all possible values of snum for this hash.
* Using steps of an odd multiple of UDP_HTABLE_SIZE
* give us randomization and full range coverage.
*/
do {
//端口號滿足要求,那麼就找到了
if (low <= snum && snum <= high &&
!test_bit(snum / UDP_HTABLE_SIZE, bitmap))
goto found;
snum += rand;
} while (snum != first);
spin_unlock_bh(&hslot->lock);
}
goto fail;
} else {
//調用者指定了要綁定哪個端口,需要判斷該端口是否可用
hslot = &udptable->hash[udp_hashfn(net, snum)];
spin_lock_bh(&hslot->lock);
//如果端口已經被使用,那麼綁定失敗
if (udp_lib_lport_inuse(net, snum, hslot, NULL, sk, saddr_comp))
goto fail_unlock;
}
found:
//找到了可用端口
inet_sk(sk)->num = snum;
sk->sk_hash = snum;
if (sk_unhashed(sk)) {
//將該傳輸控制塊加入到udp_table哈希表中
sk_nulls_add_node_rcu(sk, &hslot->head);
//這步沒有看懂
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
error = 0;
fail_unlock:
spin_unlock_bh(&hslot->lock);
fail:
return error;
}
2.1 判斷端口是否可用udp_lib_lport_inuse()
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
struct sock *sk,
int (*saddr_comp)(const struct sock *sk1,
const struct sock *sk2))
{
struct sock *sk2;
struct hlist_nulls_node *node;
//該函數有兩個執行路徑:1)如果指定了bitmap,那麼遍歷鏈表Hslot,尋找其中已經被使用的端口號,
//將bitmap中相應的bit置爲1;2)沒有指定bitmap,那麼只是單純的判斷端口號num是否已經被使用
sk_nulls_for_each(sk2, node, &hslot->head)
if (net_eq(sock_net(sk2), net) &&
sk2 != sk &&
(bitmap || sk2->sk_hash == num) &&
(!sk2->sk_reuse || !sk->sk_reuse) &&
(!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
(*saddr_comp)(sk, sk2)) {
if (bitmap)
__set_bit(sk2->sk_hash / UDP_HTABLE_SIZE,
bitmap);
else
return 1;
}
return 0;
}
綜上,UDP的端口選擇過程還是非常複雜,並且對是一個端口是否已經被使用的條件判定非常複雜,沒有真正理解。
3. 端口的自動綁定
系統調用中的各中send()到了內核就一個入口,對於AF_INET協議族就是inet_sendmsg()。
int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
{
struct sock *sk = sock->sk;
//如果當前端口號爲0,那麼執行自動綁定
if (!inet_sk(sk)->num && inet_autobind(sk))
return -EAGAIN;
return sk->sk_prot->sendmsg(iocb, sk, msg, size);
}
static int inet_autobind(struct sock *sk)
{
struct inet_sock *inet;
/* We may need to bind the socket. */
lock_sock(sk);
inet = inet_sk(sk);
if (!inet->num) {
//調用的依然是get_port()回調,後面就和前面顯示調用bind()的處理就完全一樣了
if (sk->sk_prot->get_port(sk, 0)) {
release_sock(sk);
return -EAGAIN;
}
inet->sport = htons(inet->num);
}
release_sock(sk);
return 0;
}
注:端口的自動綁定流程屬於AF_INET協議族的通用處理,TCP和UDP都符合這個流程。