內核日誌 TCP: Possible SYN flooding on port

內核輸出“TCP: Possible SYN flooding on port”信息，在內核syncookie機制存在的情況下如何調整相應的參數增大併發syn值？

1. 內核輸出該信息的位置

int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)

{

struct tcp_extend_values tmp_ext;

struct tcp_options_received tmp_opt;

u8 *hash_location;

struct request_sock *req;

struct inet_request_sock *ireq;

struct tcp_sock *tp = tcp_sk(sk);

struct dst_entry *dst = NULL;

__be32 saddr = ip_hdr(skb)->saddr;

__be32 daddr = ip_hdr(skb)->daddr;

__u32 isn = TCP_SKB_CB(skb)->when;

#ifdef CONFIG_SYN_COOKIES

int want_cookie = 0;

#else

#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */

#endif

/* Never answer to SYNs send to broadcast or multicast */

if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))

goto drop;

/* TW buckets are converted to open requests without

* limitations, they conserve resources and peer is

* evidently real one.

if (inet_csk_reqsk_queue_is_full(sk)&& !isn) {

if (net_ratelimit())

syn_flood_warning(skb);

isn這裏可以不關心（對於syn連接，這個值一定是0）

看一下標紅的函數

static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)

{

return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);

}

static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)

{

return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;

}

這個代碼非常的巧妙，通過右移操作來判斷併發syn隊列是否已滿。要想增大併發syn請求，只需要增大max_qlen_log的值就可以了。

2. 如何增大max_qlen_log值

TCP傳輸控制塊結構體

/**inet_connection_sock - INET connection oriented sock

* @icsk_accept_queue: FIFO of established children

* @icsk_bind_hash: Bind node

* @icsk_timeout: Timeout

* @icsk_retransmit_timer: Resend (no ack)

* @icsk_rto: Retransmit timeout

* @icsk_pmtu_cookie Last pmtu seen by socket

* @icsk_ca_ops Pluggable congestion control hook

* @icsk_af_ops Operations which are AF_INET{4,6} specific

* @icsk_ca_state: Congestion control state

* @icsk_retransmits: Number of unrecovered [RTO] timeouts

* @icsk_pending: Scheduled timer event

* @icsk_backoff: Backoff

* @icsk_syn_retries: Number of allowed SYN (or equivalent) retries

* @icsk_probes_out: unanswered 0 window probes

* @icsk_ext_hdr_len: Network protocol overhead (IP/IPv6 options)

* @icsk_ack: Delayed ACK control data

* @icsk_mtup; MTU probing control data

struct inet_connection_sock {

/* inet_sock has to be the first member! */

struct inet_sock icsk_inet;

struct request_sock_queue icsk_accept_queue;

struct inet_bind_bucket *icsk_bind_hash;

unsigned long icsk_timeout;

struct timer_list icsk_retransmit_timer;

struct timer_list icsk_delack_timer;

__u32 icsk_rto;

__u32 icsk_pmtu_cookie;

const struct tcp_congestion_ops *icsk_ca_ops;

const struct inet_connection_sock_af_ops *icsk_af_ops;

unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu);

__u8 icsk_ca_state;

__u8 icsk_retransmits;

__u8 icsk_pending;

__u8 icsk_backoff;

__u8 icsk_syn_retries;

__u8 icsk_probes_out;

__u16 icsk_ext_hdr_len;

struct {

__u8 pending; /* ACK is pending */

__u8 quick; /* Scheduled number of quick acks */

__u8 pingpong; /* The session is interactive */

__u8 blocked; /* Delayed ACK was blocked by socket lock */

__u32 ato; /* Predicted tick of soft clock */

unsigned long timeout; /* Currently scheduled timeout */

__u32 lrcvtime; /* timestamp of last received data packet */

__u16 last_seg_size; /* Size of last incoming segment */

__u16 rcv_mss; /* MSS used for delayed ACK decisions */

} icsk_ack;

struct {

int enabled;

/* Range of MTUs to search */

int search_high;

int search_low;

/* Information on the current probe. */

int probe_size;

} icsk_mtup;

u32 icsk_ca_priv[16];

u32 icsk_user_timeout;

#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))

};

其中紅色標註的變量用來保存正在建立連接和已建立連接但未被accept的傳輸控制塊。

/** struct request_sock_queue - queue of request_socks

* @rskq_accept_head - FIFO head of established children

* @rskq_accept_tail - FIFO tail of established children

* @rskq_defer_accept - User waits for some data after accept()

* @syn_wait_lock - serializer

* %syn_wait_lock is necessary only to avoid proc interface having to grab the main

* lock sock while browsing the listening hash (otherwise it's deadlock prone).

* This lock is acquired in read mode only from listening_get_next() seq_file

* op and it's acquired in write mode _only_ from code that is actively

* changing rskq_accept_head. All readers that are holding the master sock lock

* don't need to grab this lock in read mode too as rskq_accept_head. writes

* are always protected from the main sock lock.

struct request_sock_queue {

struct request_sock *rskq_accept_head;

struct request_sock *rskq_accept_tail;

rwlock_t syn_wait_lock;

u8 rskq_defer_accept;

/* 3 bytes hole, try to pack */

struct listen_sock *listen_opt;

};

其中rskq_accept_head和rskq_accept_tail指向的鏈表保存已完成三次握手的傳輸控制塊；listen_opt中保存處於SYN_RECV狀態的傳輸控制塊。

這裏我們重點看一下listen_opt

struct listen_sock {

u8 max_qlen_log;

/* 3 bytes hole, try to use */

int qlen;

int qlen_young;

int clock_hand;

u32 hash_rnd;

u32 nr_table_entries;

struct request_sock *syn_table[0];

};

這裏syn_table的大小將會影響同時存在SYN_RECV狀態的半連接的數量。

通過listen系統調用跟蹤一下這個數值的設置：

* Perform a listen. Basically, we allow the protocol to do anything

* necessary for a listen, and if that works, we mark the socket as

* ready for listening.

SYSCALL_DEFINE2(listen, int, fd, int, backlog)

{

struct socket *sock;

int err, fput_needed;

int somaxconn;

sock = sockfd_lookup_light(fd,& err, &fput_needed);

if (sock) {

somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;

/*這裏限制backlog的值不會大於net.core.somaxconn的值*/

if ((unsigned)backlog > somaxconn)

backlog = somaxconn;

err = security_socket_listen(sock, backlog);

if (!err)

err = sock->ops->listen(sock, backlog);

fput_light(sock->file, fput_needed);

}

return err;

}

* Move a socket into listening state.

int inet_listen(struct socket *sock, int backlog)

{

struct sock *sk = sock->sk;

unsigned char old_state;

int err;

lock_sock(sk);

err = -EINVAL;

if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)

goto out;

old_state = sk->sk_state;

if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))

goto out;

/* Really, if the socket is already in listen state

* we can only allow the backlog to be adjusted.

if (old_state != TCP_LISTEN) {

err = inet_csk_listen_start(sk, backlog);

if (err)

goto out;

}

sk->sk_max_ack_backlog = backlog;

err = 0;

out:

release_sock(sk);

return err;

}

int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)

{

struct inet_sock *inet = inet_sk(sk);

struct inet_connection_sock *icsk = inet_csk(sk);

int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);

if (rc != 0)

return rc;

sk->sk_max_ack_backlog = 0;

sk->sk_ack_backlog = 0;

inet_csk_delack_init(sk);

/* There is race window here: we announce ourselves listening,

* but this transition is still not validated by get_port().

* It is OK, because this socket enters to hash table only

* after validation is complete.

sk->sk_state = TCP_LISTEN;

if (!sk->sk_prot->get_port(sk, inet->inet_num)) {

inet->inet_sport = htons(inet->inet_num);

sk_dst_reset(sk);

sk->sk_prot->hash(sk);

return 0;

}

sk->sk_state = TCP_CLOSE;

__reqsk_queue_destroy(&icsk->icsk_accept_queue);

return -EADDRINUSE;

}

int reqsk_queue_alloc(struct request_sock_queue *queue,

unsigned int nr_table_entries)

{

size_t lopt_size = sizeof(struct listen_sock);

struct listen_sock *lopt;

/*這裏可以看出listen_sock->max_qlen_log爲nr_table_entries和sysctl_max_syn_backlog的最小值加1

並向上去整到2的次方後的log。

比如： nr_table_entries = 128 sysctl_max_syn_backlog=20480，

min(nr_table_entries, sysctl_max_syn_backlog)= 128

roundup_pow_of_two(128+1)=256

max_qlen_log=8

nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);

nr_table_entries = max_t(u32, nr_table_entries, 8);

nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);

lopt_size += nr_table_entries * sizeof(struct request_sock *);

if (lopt_size > PAGE_SIZE)

lopt = vzalloc(lopt_size);

else

lopt = kzalloc(lopt_size, GFP_KERNEL);

if (lopt == NULL)

return -ENOMEM;

for (lopt->max_qlen_log = 3;

(1 << lopt->max_qlen_log)< nr_table_entries;

lopt->max_qlen_log++);

get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));

rwlock_init(&queue->syn_wait_lock);

queue->rskq_accept_head = NULL;

lopt->nr_table_entries = nr_table_entries;

write_lock_bh(&queue->syn_wait_lock);

queue->listen_opt = lopt;

write_unlock_bh(&queue->syn_wait_lock);

return 0;

}

經過上面的分析，要想增大併發syn值需要同時調整如下三個參數：

（1）net.core.somaxconn

（2）net.ipv4.tcp_max_syn_backlog

（3）listen系統調用的backlog參數

看來listen系統調用的backlog參數不僅影響已完成三次握手等待accept的最大連接數，還影響SYN_RECV狀態的鏈接數。

轉載自http://forever.blog.chinaunix.net

內核日誌 TCP: Possible SYN flooding on port

使用c#強大的表達式樹實現對象的深克隆之解決循環引用的問題

GPT-4o 引領人機交互新風向，向量數據庫賽道沸騰了

free AI online tools All In One

痞子衡嵌入式：恩智浦i.MX RT1xxx系列MCU啓動那些事（12.A）- uSDHC eMMC啓動時間(RT1170)

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（二）使用kube-vip實現集羣VIP訪問

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（三）數據卷掛載NFS（網絡文件系統）

企業大模型如何成爲自己數據的“百科全書”？

本地SSL證書過期輸入命令在IIS自動生成

.NET週刊【5月第2期 2024-05-12】

基於Ubuntu-22.04安裝K8s-v1.28.2實驗（一）部署K8s

netperf的安裝及使用

Linux內核源碼下載和安裝

【Django基礎入門】Breadcrumbs導航欄

內核日誌 TCP: Possible SYN flooding on port

oslo.messaging組件的學習之call方法

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結