默認情況下sysctl_tcp_recovery的值爲1(TCP_RACK_LOSS_DETECTION),也可通過PROC文件:/proc/sys/net/ipv4/tcp_recovery進行修改,如果設置了標誌位TCP_RACK_NO_DUPTHRESH,表明RACK不使用重複ACK閾值(DupAck Threshold),默認未設置此標誌。
#define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */
#define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */
#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */
$ cat /proc/sys/net/ipv4/tcp_recovery
1
如下tcp_rack_reo_wnd函數,如果當前套接口沒有觀察到亂序發生,並且當前處於擁塞恢復狀態(TCP_CA_Recovery)或者TCP_CA_Loss擁塞丟失狀態,返回亂序窗口零,以便儘快的確認丟失報文,觸發重傳。反之,如果擁塞狀態不處於以上的兩種狀態,RACK開啓了DUPTHRESH的支持,並且SACK確認報文數量超過了亂序等級,很有可能發生了丟包,接下來可能會進入TCP_CA_Recovery或者TCP_CA_Loss狀態,也返回值爲零的亂序窗口,以便RACK作出快速響應。
否則,函數tcp_rack_reo_wnd返回正常的亂序窗口。
static u32 tcp_rack_reo_wnd(const struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
if (!tp->reord_seen) {
/* If reordering has not been observed, be aggressive during
* the recovery or starting the recovery by DUPACK threshold.
*/
if (inet_csk(sk)->icsk_ca_state >= TCP_CA_Recovery)
return 0;
if (tp->sacked_out >= tp->reordering &&
!(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
return 0;
}
/* To be more reordering resilient, allow min_rtt/4 settling delay.
* Use min_rtt instead of the smoothed RTT because reordering is
* often a path property and less related to queuing or delayed ACKs.
* Upon receiving DSACKs, linearly increase the window up to the
* smoothed RTT.
*/
return min((tcp_min_rtt(tp) >> 2) * tp->rack.reo_wnd_steps, tp->srtt_us >> 3);
如下丟失報文檢查函數,如果當前報文的發送時間戳加上最近測量的RTT和亂序窗口時長,小於當前TCP時間,即認爲此報文已經丟失。以上函數將亂序窗口時長設置爲0,報文更容易被認定爲丟失。
static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout)
{
*reo_timeout = 0;
reo_wnd = tcp_rack_reo_wnd(sk);
list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, tcp_tsorted_anchor) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
...
/* A packet is lost if it has not been s/acked beyond
* the recent RTT plus the reordering window.
*/
remaining = tcp_rack_skb_timeout(tp, skb, reo_wnd);
if (remaining <= 0) {
tcp_mark_skb_lost(sk, skb);
list_del_init(&skb->tcp_tsorted_anchor);
如下tcp_rack_mark_lost函數,根據以上tcp_rack_detect_loss函數返回的timeout超時時間值,設置ICSK_TIME_REO_TIMEOUT定時器。
void tcp_rack_mark_lost(struct sock *sk)
{
...
tcp_rack_detect_loss(sk, &timeout);
if (timeout) {
timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
timeout, inet_csk(sk)->icsk_rto);
如下定時器到期處理函數,如果tcp_rack_detect_loss函數標記了新的丟失報文,函數tcp_xmit_retransmit_queue將進行立即重傳。
void tcp_rack_reo_timeout(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, prior_inflight;
prior_inflight = tcp_packets_in_flight(tp);
tcp_rack_detect_loss(sk, &timeout);
if (prior_inflight != tcp_packets_in_flight(tp)) {
if (inet_csk(sk)->icsk_ca_state != TCP_CA_Recovery) {
tcp_enter_recovery(sk, false);
if (!inet_csk(sk)->icsk_ca_ops->cong_control)
tcp_cwnd_reduction(sk, 1, 0);
}
tcp_xmit_retransmit_queue(sk);
除了以上的亂序超時處理函數之外,在函數tcp_identify_packet_loss中,也調用tcp_rack_mark_lost標記丟失報文。
static void tcp_identify_packet_loss(struct sock *sk, int *ack_flag)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tcp_rtx_queue_empty(sk))
return;
if (unlikely(tcp_is_reno(tp))) {
...
} else if (tcp_is_rack(sk)) {
u32 prior_retrans = tp->retrans_out;
tcp_rack_mark_lost(sk);
if (prior_retrans > tp->retrans_out)
*ack_flag |= FLAG_LOST_RETRANS;
以上函數的調用位於ACK處理函數tcp_fastretrans_alert中,對應於最開始的函數tcp_rack_reo_wnd,如果套接口位於TCP_CA_Recovery或者TCP_CA_Loss狀態,標記丟失報文儘快重傳。否則套接口處於其他的擁塞狀態,tcp_time_to_recover函數將盡快判斷進入TCP_CA_Recovery狀態。
static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
int num_dupack, int *ack_flag, int *rexmit)
{
switch (icsk->icsk_ca_state) {
case TCP_CA_Recovery:
...
tcp_identify_packet_loss(sk, ack_flag);
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, num_dupack, rexmit);
tcp_identify_packet_loss(sk, ack_flag);
...
/* fall through */
default:
...
tcp_identify_packet_loss(sk, ack_flag);
if (!tcp_time_to_recover(sk, flag)) {
tcp_try_to_open(sk, flag);
return;
}
...
/* Otherwise enter Recovery state */
tcp_enter_recovery(sk, (flag & FLAG_ECE));
fast_rexmit = 1;
}
...
*rexmit = REXMIT_LOST;
如果在tcp_rack_detect_loss函數中標記了丟失報文,lost_out有值,進入TCP_CA_Recovery擁塞狀態,重傳丟失報文(REXMIT_LOST)。
static bool tcp_time_to_recover(struct sock *sk, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
/* Trick#1: The loss is proven. */
if (tp->lost_out)
return true;
內核版本 5.0