12.3 擁塞控制流程

12.3.1 TCP擁塞狀態

        TCP擁塞狀態共有5個:

135 enum tcp_ca_state {
136     TCP_CA_Open = 0,
137 #define TCPF_CA_Open    (1<<TCP_CA_Open)
138     TCP_CA_Disorder = 1,
139 #define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
140     TCP_CA_CWR = 2,
141 #define TCPF_CA_CWR (1<<TCP_CA_CWR)
142     TCP_CA_Recovery = 3,
143 #define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
144     TCP_CA_Loss = 4
145 #define TCPF_CA_Loss    (1<<TCP_CA_Loss)
146 };

        Open:是初始狀態,也是正常狀態

        Disorder:當第一次由於SACK塊或重複確認而檢測到擁塞時進入此狀態;此狀態下擁塞窗口不變,TCP需要保持網絡中的包的數量不變;TCP在進入Recovery狀態之前要進入本狀態

        CRW(Congestion Window Reduced):此狀態下TCP會減小擁塞窗口,但不會重傳已發送數據;這個狀態在本地擁塞或收到顯示擁塞通告(ECN)後設置

        Recovery:減小擁塞窗口直至到達ssthresh但不能增加擁塞窗口,會重傳數據

        Loss:所有已發送數據都會被標記爲丟失,擁塞窗口減小到一個報文段,然後數據發送端使用慢啓動算法增大擁塞窗口。這個狀態下不能使用快速重傳算法

12.3.2 擁塞窗口

        TCP在調用tcp_write_xmit函數發送數據時會檢查擁塞窗口:

1811 static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1812                int push_one, gfp_t gfp)
1813 { 
...
1842         cwnd_quota = tcp_cwnd_test(tp, skb);
1843         if (!cwnd_quota) {    //擁塞窗口不允許發送數據
1844             if (push_one == 2)  //發送丟失探測報文是允許的
1845                 /* Force out a loss probe pkt. */
1846                 cwnd_quota = 1;
1847             else    //其它的報文不允許
1848                 break;
1849         }
...
         tcp_cwnd_test函數來檢查擁塞窗口是否允許發送數據:

1407 static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1408                      const struct sk_buff *skb)
1409 {
1410     u32 in_flight, cwnd;
1411 
1412     /* Don't be strict about the congestion window for the final FIN.  */
1413     if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
1414         tcp_skb_pcount(skb) == 1)   
1415         return 1;    //不需要分段的帶FIN標記位的報文是不受擁塞窗口限制的
1416 
1417     in_flight = tcp_packets_in_flight(tp);  //得到在網絡中的報文數量
1418     cwnd = tp->snd_cwnd;
1419     if (in_flight < cwnd)
1420         return (cwnd - in_flight);    //當前擁塞窗口即允許發送的報文數量,減去在網絡中的報文數量就是現在允許發送的數量
1421 
1422     return 0;
1423 }
        可見擁塞窗口的值保存在tp->snd_cwnd中,這個值由擁塞控制算法來計算。

12.3.3 擁塞控制的起點

        TCP的擁塞控制是從ACK的處理開始的:

3325 static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) 
3326 {
3327     struct inet_connection_sock *icsk = inet_csk(sk);
3328     struct tcp_sock *tp = tcp_sk(sk);
3329     u32 prior_snd_una = tp->snd_una;
3330     u32 ack_seq = TCP_SKB_CB(skb)->seq;
3331     u32 ack = TCP_SKB_CB(skb)->ack_seq;
3332     bool is_dupack = false;
3333     u32 prior_in_flight;
3334     u32 prior_fackets;
3335     int prior_packets = tp->packets_out;
3336     int prior_sacked = tp->sacked_out;
3337     int pkts_acked = 0;
3338     int previous_packets_out = 0;
3339 
3340     /* If the ack is older than previous acks
3341      * then we can probably ignore it.
3342      */
3343     if (before(ack, prior_snd_una)) {
3344         /* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
3345         if (before(ack, prior_snd_una - tp->max_window)) {
3346             tcp_send_challenge_ack(sk);
3347             return -1;
3348         }
3349         goto old_ack;
3350     }
3351 
3352     /* If the ack includes data we haven't sent yet, discard
3353      * this segment (RFC793 Section 3.9).
3354      */
3355     if (after(ack, tp->snd_nxt))
3356         goto invalid_ack;
...
3374     if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {    //處於快速處理路徑並且有新被確認的數據
3375         /* Window is constant, pure forward advance.
3376          * No more checks are required.
3377          * Note, we use the fact that SND.UNA>=SND.WL2.
3378          */
3379         tcp_update_wl(tp, ack_seq);
3380         tp->snd_una = ack;
3381         flag |= FLAG_WIN_UPDATE;
3382 
3383         tcp_ca_event(sk, CA_EVENT_FAST_ACK);  //處理快速ACK擁塞事件
3384 
3385         NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPACKS);
3386     } else {  //處於慢速處理路徑或ack_seq號與之前重複
3387         if (ack_seq != TCP_SKB_CB(skb)->end_seq)
3388             flag |= FLAG_DATA;  //包中有數據
3389         else
3390             NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPPUREACKS);
3391 
3392         flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
3393 
3394         if (TCP_SKB_CB(skb)->sacked)
3395             flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3396 
3397         if (TCP_ECN_rcv_ecn_echo(tp, tcp_hdr(skb)))    //TCP開啓了ECN功能且在ACK中發現了ecn標記
3398             flag |= FLAG_ECE;
3399 
3400         tcp_ca_event(sk, CA_EVENT_SLOW_ACK);    //處理慢速ACK擁塞事件
3401     }
...
3409     if (!prior_packets)
3410         goto no_queue;
...
3413     previous_packets_out = tp->packets_out;
3414     flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una);
3415 
3416     pkts_acked = previous_packets_out - tp->packets_out;
3417 
3418     if (tcp_ack_is_dubious(sk, flag)) {
3419         /* Advance CWND, if state allows this. */
3420         if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
3421             tcp_cong_avoid(sk, ack, prior_in_flight);
3422         is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
3423         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3424                       prior_packets, is_dupack, flag);
3425     } else {
3426         if (flag & FLAG_DATA_ACKED)
3427             tcp_cong_avoid(sk, ack, prior_in_flight);
3428     }
3429 
3430     if (tp->tlp_high_seq)
3431         tcp_process_tlp_ack(sk, ack, flag);
3432 
3433     if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
3434         struct dst_entry *dst = __sk_dst_get(sk);
3435         if (dst)
3436             dst_confirm(dst);
3437     }
3438 
3439     if (icsk->icsk_pending == ICSK_TIME_RETRANS)
3440         tcp_schedule_loss_probe(sk);
3441     return 1;
3442 
3443 no_queue:
3444     /* If data was DSACKed, see if we can undo a cwnd reduction. */
3445     if (flag & FLAG_DSACKING_ACK)  
3446         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3447                       prior_packets, is_dupack, flag);
3448     /* If this ack opens up a zero window, clear backoff.  It was
3449      * being used to time the probes, and is probably far higher than
3450      * it needs to be for normal retransmission.
3451      */
3452     if (tcp_send_head(sk))
3453         tcp_ack_probe(sk);
3454 
3455     if (tp->tlp_high_seq)
3456         tcp_process_tlp_ack(sk, ack, flag);
3457     return 1;
3458 
3459 invalid_ack:
3460     SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3461     return -1;
3462 
3463 old_ack:
3464     /* If data was SACKed, tag it and see if we should send more data.
3465      * If data was DSACKed, see if we can undo a cwnd reduction.
3466      */
3467     if (TCP_SKB_CB(skb)->sacked) { 
3468         flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una);
3469         tcp_fastretrans_alert(sk, pkts_acked, prior_sacked,
3470                       prior_packets, is_dupack, flag);
3471     }
3472 
3473     SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3474     return 0;


        To Be continued...



發佈了79 篇原創文章 · 獲贊 46 · 訪問量 22萬+
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章