先給出如下C程序.在此代碼中,我們故意屏蔽了發送端的close socket函數.運行後我們會發現發送端有大量的CLOSE_WAIT狀態的連接,此時接收端是大量的FIN_WAIT_2狀態的連接.
main.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <errno.h>
#include <netinet/tcp.h>
#include <netinet/in.h>
int socket_connect(const char *remoteip, int remoteport){
int socketfd = -1;
unsigned long uladdr = 0;
struct sockaddr_in sa = {0};
int bReuseaddr = 1;
socklen_t optlen = sizeof(bReuseaddr);
/*
int keepAlive = 1;
int keepIdle = 1;
int keepInterval = 1;
int keepCount = 2;
*/
socketfd = socket(AF_INET, SOCK_STREAM, 0);
if(socketfd < 0){
printf("Error: create socket error/n");
return -1;
}
/*
if(setsockopt(socketfd, SOL_SOCKET, SO_KEEPALIVE, &keepAlive, sizeof(keepAlive)) < 0) {
printf("Error: set socket [%d] opt keep alive error/n", socketfd);
return -1;
}
if(setsockopt(socketfd, SOL_TCP, TCP_KEEPIDLE, &keepIdle, sizeof(keepIdle)) < 0) {
printf("Error: set socket [%d] opt keep idle error/n", socketfd);
return -1;
}
if(setsockopt(socketfd, SOL_TCP, TCP_KEEPINTVL, &keepInterval, sizeof(keepInterval)) < 0) {
printf("Error: set socket [%d] opt keep alive error/n", socketfd);
return -1;
}
if(setsockopt(socketfd, SOL_TCP, TCP_KEEPCNT, &keepCount, sizeof(keepCount)) < 0) {
printf("Error: set socket [%d] opt keep alive error/n", socketfd);
return -1;
}
*/
uladdr = inet_addr(remoteip);
memset(&sa,0,sizeof(sa));
sa.sin_family=AF_INET;
sa.sin_port=htons(remoteport);
sa.sin_addr.s_addr=uladdr;
if( connect(socketfd, (struct sockaddr *)&sa, sizeof(struct sockaddr)) < 0) {
printf("Error: connect exception/n");
return -1;
}
return socketfd;
}
int socket_listen(int port, int listen_queue_size){
int ret = 0;
int s = 0;
struct sockaddr_in addr;
int bReuseaddr = 1;
socklen_t optlen = sizeof(bReuseaddr);
s = socket(AF_INET, SOCK_STREAM, IPPROTO_IP);
if(s < 0) {
return -1;
}
addr.sin_family = AF_INET;
addr.sin_port = htons(port);
addr.sin_addr.s_addr = htonl(INADDR_ANY);
ret = bind(s,(struct sockaddr *)&addr, sizeof(struct sockaddr_in));
if(ret < 0) {
printf("Info: bind port %d failed/n",port);
return -1;
}
ret = listen(s, listen_queue_size);
if(ret < 0){
return -1;
}
return s;
}
int socket_accept(int listen_socket,char* remoteip){
int len = 0;
struct sockaddr_in remote_addr;
int s = -1;
len = sizeof(struct sockaddr_in);
memset((char *)&remote_addr, 0, len);
s = accept(listen_socket, (struct sockaddr *)&remote_addr, (socklen_t *)&len);
if(s <= 0){
printf("accept ret = %d/n",errno);
return -1;
}
return s;
}
int main(int argc, char* argv[]){
int sender = 0;
int ret = 0;
int listen_sock = 0;
if(argc !=2){
printf("usage: ./test 1/n");
return -1;
}
sender = atoi(argv[1]);
if(sender){
while(1){
printf("new round connect/n");
sleep(4);
ret = socket_connect("10.224.55.145",8765);
if(ret < 0){
printf("socket connect failed/n");
return -1;
}else{
printf("socket connect success, sock = %d/n",ret);
// close(ret);
}
}
}else{
listen_sock = socket_listen(8765,100);
if(listen_sock < 0){
printf("socket listen failed/n");
return -1;
}else{
while(1){
printf("before accept socket/n");
ret = socket_accept(listen_sock,NULL);
if(ret < 0){
printf("socket accept failed/n");
return -1;
}else{
printf("socket accept success, sock = %d/n",ret);
close(ret);
}
}
}
}
return 0;
}
上面程序相當於接收端主動斷掉當前連接,那麼雙方關閉此TCP連接需要的四次揮手如下:
SERVER --- FIN --- CLINET
CLINET --- ACK --- SERVER
CLINET --- FIN --- SERVER
SERVER --- ACK --- CLINET
接受端關閉連接,步驟I,II沒問題.由於發送端沒有關閉連接,所以步驟III不會繼續執行.此時,發送端等待自己關閉連接,而接收端等待發送斷髮送FIN報文.所以,此時,發送端處於CLOSE_WAIT狀態,接收端處於FIN_WAIT_2狀態.
同理,發送端主動關閉連接,而屏蔽接收端的close socket函數.則發送端會有大量FIN_WAIT_2狀態的連接,而接收端會有大量的CLOSE_WAIT狀態的連接.當然,造成這種問題也可能是在關閉連接前要處理大量事情.
如何解決此問題?通過設置SO_KEEPALIVE選項能保證此連接會在2個小時後被檢測到有問題,此選項一般用於對端以一種非優雅的方式斷開的情況.如果我們不能接受如此之長的等待時間,可以通過設置TCP_KEEPIDLE,TCP_KEEPINTVL,TCP_KEEPCNT三個選項來縮短這個時間,從而使CLOSE_WAIT狀態的連接儘快釋放,而不會越來越多. 如程序socket_connect函數中屏蔽代碼實現的.
TCP_KEEPIDL:開始首次KeepAlive探測前的TCP空閉時間.
TCP_KEEPINTVL:兩次KeepAlive探測間的時間間隔.
TCP_KEEPCNT:判定斷開前的KeepAlive探測次數.
注意:這些屬性都必須在客戶端(發送端)設置.如果在服務器端(接收端)accepte後設置,只能釋放接收端未能釋放的連接.