9.7.1 Why
當單工模式下TCP數據發送方發送了一些數據後就不再發數據,數據接收方也不會發送報文,這時TCP連接處於靜止狀態(比如Telnet應用)。保活功能可以使用保活定時器向對端發送探測報文來確定對端的連接是否正常,如果對端有迴應則繼續維持連接,否則關閉連接,釋放資源。開啓保活功能需要使用SO_KEEPALIVE socket選項。
9.7.2 When
設置保活定時器的時機主要有三個:
(1)客戶端發送SYN後收到SYN|ACK,調用tcp_finish_connect函數完成連接時:
5291 void tcp_finish_connect(struct sock *sk, struct sk_buff *skb)
5292 {
5293 struct tcp_sock *tp = tcp_sk(sk);
5294 struct inet_connection_sock *icsk = inet_csk(sk);
5295
5296 tcp_set_state(sk, TCP_ESTABLISHED);
...
5317 if (sock_flag(sk, SOCK_KEEPOPEN)) //應用進程開啓keepalive服務
5318 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
...
5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5374 const struct tcphdr *th, unsigned int len)
5375 {
...
5480 tcp_finish_connect(sk, skb);
...
(2)服務器端發送SYN|ACK後收到合法的ACK,調用tcp_create_openreq_child創建子socket時: 381 struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
382 {
383 struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
384
385 if (newsk != NULL) {
...
441 if (sock_flag(newsk, SOCK_KEEPOPEN)) //應用進程開啓keepalive服務
442 inet_csk_reset_keepalive_timer(newsk,
443 keepalive_time_when(newtp));
...
(3)使用SO_KEEPALIVE socket選項開啓保活功能時: 621 int sock_setsockopt(struct socket *sock, int level, int optname,
622 char __user *optval, unsigned int optlen)
623 {
...
727 case SO_KEEPALIVE:
728 #ifdef CONFIG_INET
729 if (sk->sk_protocol == IPPROTO_TCP &&
730 sk->sk_type == SOCK_STREAM)
731 tcp_set_keepalive(sk, valbool);
732 #endif
733 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
734 break;
tcp_set_keepalive函數用於開啓或關閉keepalive服務:546 void tcp_set_keepalive(struct sock *sk, int val)
547 {
548 if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
549 return;
550
551 if (val && !sock_flag(sk, SOCK_KEEPOPEN))
552 inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk)));
553 else if (!val)
554 inet_csk_delete_keepalive_timer(sk); //拆除keepalive定時器
555 }
拆除keepalive定時器只能使用SO_KEEPALIVE socket選項。Keepalive定時器的超時時間由keepalive_time_when函數決定:
1114 static inline int keepalive_time_when(const struct tcp_sock *tp)
1115 {
1116 return tp->keepalive_time ? : sysctl_tcp_keepalive_time;
1117 }
其中sysctl_tcp_keepalive_tim由net.ipv4.tcp_keepalive_time內核參數設定,tp->keepalive_time由TCP_KEEPIDLE
socket選項設置:
621 int sock_setsockopt(struct socket *sock, int level, int optname,
622 char __user *optval, unsigned int optlen)
623 {
...
2525 case TCP_KEEPIDLE:
2526 if (val < 1 || val > MAX_TCP_KEEPIDLE)
2527 err = -EINVAL;
2528 else {
2529 tp->keepalive_time = val * HZ;
2530 if (sock_flag(sk, SOCK_KEEPOPEN) &&
2531 !((1 << sk->sk_state) &
2532 (TCPF_CLOSE | TCPF_LISTEN))) {
2533 u32 elapsed = keepalive_time_elapsed(tp);
2534 if (tp->keepalive_time > elapsed)
2535 elapsed = tp->keepalive_time - elapsed;
2536 else
2537 elapsed = 0;
2538 inet_csk_reset_keepalive_timer(sk, elapsed);
2539 }
2540 }
2541 break;
Keepalive定時器默認超時時間爲TCP_KEEPALIVE_TIME(2小時)。
9.7.3 What
保活定時器的超時爲tcp_keepalive_timer:
558 static void tcp_keepalive_timer (unsigned long data)
559 {
560 struct sock *sk = (struct sock *) data;
561 struct inet_connection_sock *icsk = inet_csk(sk);
562 struct tcp_sock *tp = tcp_sk(sk);
563 u32 elapsed;
564
565 /* Only process if socket is not in use. */
566 bh_lock_sock(sk);
567 if (sock_owned_by_user(sk)) {
568 /* Try again later. */
569 inet_csk_reset_keepalive_timer (sk, HZ/20);
570 goto out;
571 }
572
573 if (sk->sk_state == TCP_LISTEN) { //SYN-ACK定時器超時處理
574 tcp_synack_timer(sk);
575 goto out;
576 }
577
578 if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) {//FIN_WAIT2定時器超時處理
...
589 }
590
591 if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) //keepalive功能未開啓或socket已經關閉
592 goto out;
593
594 elapsed = keepalive_time_when(tp);
595
596 /* It is alive without keepalive 8) */
597 if (tp->packets_out || tcp_send_head(sk)) //有包在網絡中或有數據未發
598 goto resched; //連接是活動的,無需keepalive定時器操心
599
600 elapsed = keepalive_time_elapsed(tp); //計算自收到最後一個包到現在經歷了多長時間
601
602 if (elapsed >= keepalive_time_when(tp)) { //計算自收到最後一個包到現在經歷的時間達到保活定時器的超時門限
603 /* If the TCP_USER_TIMEOUT option is enabled, use that
604 * to determine when to timeout instead.
605 */
606 if ((icsk->icsk_user_timeout != 0 && //應用進程使用TCP_USER_TIMEOUT socket選項設置了超時時間
607 elapsed >= icsk->icsk_user_timeout && //未活動時間超過應用進程的限制
608 icsk->icsk_probes_out > 0) || //發送過探測報文
609 (icsk->icsk_user_timeout == 0 && 應用進程未設置超時時間
610 icsk->icsk_probes_out >= keepalive_probes(tp))) { //探測次數超過用戶設定的門限
611 tcp_send_active_reset(sk, GFP_ATOMIC); //發送RST復位連接
612 tcp_write_err(sk); //發送出錯報告,關閉本端連接
613 goto out;
614 }
615 if (tcp_write_wakeup(sk) <= 0) { //發送探測包
616 icsk->icsk_probes_out++; //此計數在收到ACK時清零
617 elapsed = keepalive_intvl_when(tp); //設置超時時間爲探測間隔時間
618 } else { //由於底層隊列擁塞包沒有發送出去
619 /* If keepalive was lost due to local congestion,
620 * try harder.
621 */
622 elapsed = TCP_RESOURCE_PROBE_INTERVAL; //縮短時間間隔爲0.5s
623 }
624 } else {
625 /* It is tp->rcv_tstamp + keepalive_time_when(tp) */
626 elapsed = keepalive_time_when(tp) - elapsed; //設置keepalive在從收到最後一個包開始到一個超時週期時再超時
627 }
628
629 sk_mem_reclaim(sk); //回收內存資源
630
631 resched:
632 inet_csk_reset_keepalive_timer (sk, elapsed); //重新設置保活定時器
633 goto out;
...
638 out:
639 bh_unlock_sock(sk);
640 sock_put(sk);
641 }
keepalive_probes函數返回最大探測次數:
1119 static inline int keepalive_probes(const struct tcp_sock *tp)
1120 {
1121 return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
1122 }
其中,tp->keepalive_probes由TCP_KEEPCNT socket選項設定,sysctl_tcp_keepalive_probes(默認爲TCP_KEEPALIVE_PROBES,即9)由net.ipv4.tcp_keepalive_probes內核參數設定。 keepalive_intvl_when函數返回探測間隔時間:
1109 static inline int keepalive_intvl_when(const struct tcp_sock *tp)
1110 {
1111 return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl;
1112 }
其中,tp->keepalive_intvl由TCP_KEEPINTVL socket選項設定,sysctl_tcp_keepalive_intvl(默認是TCP_KEEPALIVE_INTVL,即75s)由net.ipv4.tcp_keepalive_intvl內核參數設定。 現總結一下keepalive定時器的特性。在連接建立完成伊始就設置keepalive定時器,應用進程也可以使用socket選項設置或禁用它;每次定時器超時的時候,在自收到最後一個包到現在經歷的時間超過保活定時器的超時門限的情況下,如果超過了應用進程設定的超時上限或探測次數則發送RST報文給對端並關閉連接,否則發送探測報文(發送探測報文的過程見9.6
堅持(Persist)定時器),增加探測計數,並將超時時間設置爲keepalive_intvl,等待下次超時。當收到ACK時探測計數清零,收包時間也會刷新,整個探測過程重新開始。