9.8 延遲ACK定時器

9.8.1 Why

        TCP在收到數據後必須發送ACK給對端,但如果每收到一個包就給一個ACK的話會使得網絡中被注入過多報文。TCP的做法是在收到數據時不立即發送ACK,而是設置一個定時器,如果在定時器超時之前有數據發送給對端,則ACK會被攜帶在數據中捎帶過去;超時則由定時器發送ACK。這樣就減少了報文的發送,提高了協議的效率。

9.8.2 When

        設置延遲ACK的時機主要有以下幾個:

(1)發送SYN後收到SYN|ACK時:

 5373 static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5374                      const struct tcphdr *th, unsigned int len)
5375 {
...
5385     if (th->ack) {
,,,
5486         if (sk->sk_write_pending ||
5487             icsk->icsk_accept_queue.rskq_defer_accept ||
5488             icsk->icsk_ack.pingpong) {
5489             /* Save one ACK. Data will be ready after
5490              * several ticks, if write_pending is set.
5491              *
5492              * It may be deleted, but with this feature tcpdumps
5493              * look so _wonderfully_ clever, that I was not able
5494              * to stand against the temptation 8)     --ANK
5495              */
5496             inet_csk_schedule_ack(sk);
5497             icsk->icsk_ack.lrcvtime = tcp_time_stamp;
5498             tcp_enter_quickack_mode(sk);
5499             inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
5500                           TCP_DELACK_MAX, TCP_RTO_MAX); //設置延遲ACK定時器,超時時間200ms
(2)發送ACK時無法申請skb:
 net/ipv4/tcp_output.c
3027 void tcp_send_ack(struct sock *sk)
3028 {
3029     struct sk_buff *buff;
3030
3031     /* If we have been reset, we may not send again. */
3032     if (sk->sk_state == TCP_CLOSE)
3033         return;
3034
3035     /* We are not putting this on the write queue, so
3036      * tcp_transmit_skb() will set the ownership to this
3037      * sock.
3038      */
3039     buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC));
3040     if (buff == NULL) {
3041         inet_csk_schedule_ack(sk);     
3042         inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
3043         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
3044                       TCP_DELACK_MAX, TCP_RTO_MAX);      //超時時間200ms
3045         return;
3046     }
...
(3)有數據放入prequeue隊列中時:

1919 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1920 {
...
1933     if (tp->ucopy.memory > sk->sk_rcvbuf) {
...
1945     } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1946         wake_up_interruptible_sync_poll(sk_sleep(sk),
1947                        POLLIN | POLLRDNORM | POLLRDBAND);
1948         if (!inet_csk_ack_scheduled(sk))
1949             inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1950                           (3 * tcp_rto_min(sk)) / 4,
1951                           TCP_RTO_MAX);
 ...

(4)調用__tcp_ack_snd_check函數發送ACK時(發送ACK的具體情況見5.4 ACK發送與接收):

4758 static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
4759 {
4760     struct tcp_sock *tp = tcp_sk(sk);
4761 
4762         /* More than one full frame received... */
4763     if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss &&
4764          /* ... and right edge of window advances far enough.
4765           * (tcp_recvmsg() will send ACK otherwise). Or...
4766           */
4767          __tcp_select_window(sk) >= tp->rcv_wnd) ||
4768         /* We ACK each frame or... */
4769         tcp_in_quickack_mode(sk) ||
4770         /* We have out of order data. */
4771         (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
4772         /* Then ack it now */
4773         tcp_send_ack(sk);
4774     } else {
4775         /* Else, send delayed ack. */
4776         tcp_send_delayed_ack(sk);
4777     }
4778 }
        根據上述代碼我們總結一下(4)中使用延遲ACK定時器的條件:

(1)收到少於一個MSS的數據或通告窗口縮小

(2)沒有處於快速ACK模式

(3)無亂序數據

        上述條件都滿足則會調用tcp_send_delayed_ack會設置延遲ACK定時器:

2974 void tcp_send_delayed_ack(struct sock *sk)
2975 {
2976     struct inet_connection_sock *icsk = inet_csk(sk);
2977     int ato = icsk->icsk_ack.ato;
2978     unsigned long timeout;
2979
2980     if (ato > TCP_DELACK_MIN) {
2981         const struct tcp_sock *tp = tcp_sk(sk);
2982         int max_ato = HZ / 2;
2983
2984         if (icsk->icsk_ack.pingpong ||  //交互模式
2985             (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))  //需要儘快發送ACK,這時爲什麼要用最大的延遲?
2986             max_ato = TCP_DELACK_MAX;  //允許更大的延遲
2987
2988         /* Slow path, intersegment interval is "high". */
2989
2990         /* If some rtt estimate is known, use it to bound delayed ack.
2991          * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements
2992          * directly.
2993          */
2994         if (tp->srtt) {
2995             int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2996
2997             if (rtt < max_ato)
2998                 max_ato = rtt;
2999         }
3000
3001         ato = min(ato, max_ato);
3002     }
3003
3004     /* Stay within the limit we were given */
3005     timeout = jiffies + ato;    //超時時間小於TCP_DELACK_MIN(1/25s,即40ms)
3006
3007     /* Use new timeout only if there wasn't a older one earlier. */
3008     if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {    //已經設置了延遲ACK定時器
3009         /* If delack timer was blocked or is about to expire,
3010          * send ACK now.
3011          */
3012         if (icsk->icsk_ack.blocked ||    //延遲ACK定時器被阻塞,可能是在延遲ACK定時器超時時socket被應用進程鎖定導致ACK無法發送
3013             time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) {    //應該超時
3014             tcp_send_ack(sk);    //立即發送ACK
3015             return;
3016         }
3017
3018         if (!time_before(timeout, icsk->icsk_ack.timeout))
3019             timeout = icsk->icsk_ack.timeout;
3020     }
3021     icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
3022     icsk->icsk_ack.timeout = timeout;
3023     sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);    //安裝延遲ACK定時器
3024 }
        發送ACK時清除延遲ACK定時器:

 178 static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
 179 {
 180     tcp_dec_quickack_mode(sk, pkts);
 181     inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
 182 }
...
 828 static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 829                 gfp_t gfp_mask)
 830 {
...
 940     if (likely(tcb->tcp_flags & TCPHDR_ACK))
 941         tcp_event_ack_sent(sk, tcp_skb_pcount(skb));

9.8.3 What

        延遲ACK定時器的超時函數爲tcp_delack_timer

197 void tcp_delack_timer_handler(struct sock *sk)
198 {
199     struct tcp_sock *tp = tcp_sk(sk);
200     struct inet_connection_sock *icsk = inet_csk(sk);
201
202     sk_mem_reclaim_partial(sk);
203
204     if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
205         goto out;
206
207     if (time_after(icsk->icsk_ack.timeout, jiffies)) {    //未到超時時間
208         sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
209         goto out;
210     }
211     icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
212
213     if (!skb_queue_empty(&tp->ucopy.prequeue)) { //處理prequeue隊列
214         struct sk_buff *skb;
215
216         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
217
218         while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
219             sk_backlog_rcv(sk, skb);
220
221         tp->ucopy.memory = 0;
222     }
223
224     if (inet_csk_ack_scheduled(sk)) { //需要發送ACK
225         if (!icsk->icsk_ack.pingpong) { //非交互模式要儘快發送ACK
226             /* Delayed ACK missed: inflate ATO. */
227             icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
228         } else {  //交互模式允許更大的延遲
229             /* Delayed ACK missed: leave pingpong mode and
230              * deflate ATO.
231              */
232             icsk->icsk_ack.pingpong = 0;
233             icsk->icsk_ack.ato      = TCP_ATO_MIN;
234         }
235         tcp_send_ack(sk); //發送ACK
236         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKS);
237     }
238
239 out:
240     if (sk_under_memory_pressure(sk))
241         sk_mem_reclaim(sk);
242 }
243
244 static void tcp_delack_timer(unsigned long data)
245 {
246     struct sock *sk = (struct sock *)data;
247
248     bh_lock_sock(sk);
249     if (!sock_owned_by_user(sk)) {
250         tcp_delack_timer_handler(sk);
251     } else {
252         inet_csk(sk)->icsk_ack.blocked = 1;    //標識延遲ACK被鎖定,以後安裝延遲ACK定時器時要立即發送ACK
253         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
254         /* deleguate our work to tcp_release_cb() */
255         if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
256             sock_hold(sk);
257     }
258     bh_unlock_sock(sk);
259     sock_put(sk);
260 }
        255:如果延遲ACK定時器超時時socket被應用進程鎖定,則設置TCP_DELACK_TIMER_DEFERRED標記,這樣在應用進程釋放socket時會調用tcp_release_cb函數:
 741 void tcp_release_cb(struct sock *sk)
 742 {
...
 748         flags = tp->tsq_flags;
...
 761     if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
 762         tcp_delack_timer_handler(sk);
 763         __sock_put(sk);
 764     }
...
        tcp_delack_timer_handler函數最終也會獲得運行機會。


發佈了79 篇原創文章 · 獲贊 46 · 訪問量 22萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章