我对服务器设置和拆卸进行了压力测试。主线程是服务器线程,另一个线程是客户端线程。
在客户端线程中,我调用socket(),connect(),send(),recv(),shutdown(),然后关闭(),这些都是紧密循环。我故意创建很多小连接。当我的任何套接字调用错误时,此循环终止。
在主线程中,我调用listen(),然后启动客户端线程。我接受()少量连接,并且在每个连接上,我调用recv()一次,并在接受套接字上调用shutdown()和close()之前发送()一次。然后我关闭()并关闭()侦听套接字,并加入客户端线程。
大约每1500次迭代一次,我的客户端线程将卡在recv()上,并且主线程被卡在pthread_join(client_thread)中。 “netstat -n -p tcp”在ESTABLISHED中显示两个TCP / IP条目,每个条目用于连接的每一侧。我的调试打印表明成功的客户端连接几乎与我的侦听套接字上的close()调用完全相同。在服务器上没有为该客户端连接调用accept()。如果我然后CTRL-C程序,客户端进入FIN_WAIT_2,服务器端进入CLOSE_WAIT。 FIN_WAIT_2最终会消失,但CLOSE_WAIT即使在注销/登录后也会挂起。
#include <iostream>
#include <pthread.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <unistd.h>
#include <fcntl.h>
#include <arpa/inet.h>
#include <sys/time.h>
#include <netdb.h>
#include <errno.h>
struct addrinfo *res = 0;
int count = 0;
struct tSocketCloser {
int s;
~tSocketCloser() {
printf("clientClosing: %d\n", s);
shutdown(s, SHUT_RDWR);
close(s);
printf("clientClosed: %d\n", s);
}
};
int start_server() {
int listenSocket = -1;
listenSocket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
printf("listenSocket: %d\n", listenSocket);
int ret = 0;
int one = 1;
ret = ::setsockopt(listenSocket, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
if(ret)
{
printf("SO_REUSEADDR %d\n", ret);
exit(-1);
}
/* SO_LINGER {1,0} and TCP_NODELAY were here */
// O_NONBLOCK code was here
ret = ::bind(listenSocket, res->ai_addr, static_cast<int>(res->ai_addrlen));
if(ret)
{
printf("Bind %d\n", ret);
exit(-1);
}
ret = ::listen(listenSocket, 1024);
if (ret)
{
printf("listen %d\n", ret);
exit(-1);
}
return listenSocket;
}
void stop_server(int listenSocket) {
int iters = rand() % 3;
for(int i = 0; i < iters; ++i)
{
struct sockaddr_storage clientAddress;
int size = sizeof(clientAddress);
tSocketCloser otherSock;
otherSock.s = ::accept(
listenSocket,
(struct sockaddr *) &clientAddress,
(socklen_t *) &size);
printf("accept: %d\n", otherSock.s);
int ret = 0;
int one = 1;
ret = setsockopt(otherSock.s, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(one));
if(ret)
{
printf("SO_NOSIGPIPE %d\n", ret);
break;
}
char buffer[2048] = {0};
ret = recv(otherSock.s, buffer, sizeof(buffer), 0);
if(ret == -1)
break;
ret = send(otherSock.s, buffer, sizeof(buffer), 0);
if(ret == -1)
break;
}
int sleep_time = abs(rand()%1000);
usleep(sleep_time);
printf("serverClosing: %d\n", listenSocket);
shutdown(listenSocket, SHUT_RDWR);
close(listenSocket);
printf("serverClosed: %d\n", listenSocket);
}
void *short_connect(void *)
{
while(true) {
++count;
int connectSocket = -1;
int ret = 0;
int one = 1;
connectSocket = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
tSocketCloser closer = {connectSocket};
/* SO_LINGER {1,0} and TCP_NODELAY were here */
ret = setsockopt(connectSocket, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(one));
if(ret)
{
printf("client SO_NOSIGPIPE %d\n", ret);
return NULL;
}
// O_NONBLOCK code was here
ret = connect(connectSocket, res->ai_addr, static_cast<int>(res->ai_addrlen));
if(ret)
{
printf("bad connect %d\n", ret);
return NULL;
}
printf("good connect %d\n",connectSocket);
char buffer[1024] = {0};
ret = send(connectSocket, buffer, sizeof(buffer), 0);
printf("%d: send %d\n", count, ret);
if(ret == -1)
return NULL;
ret = recv(connectSocket, buffer, sizeof(buffer), 0);
printf("%d: recv %d\n", count, ret);
if(ret == -1)
return NULL;
printf("Success!\n");
}
}
int main() {
struct addrinfo hints;
int error;
char port[sizeof("65536") + 1] = "9999";
std::memset(&hints, 0, sizeof(hints));
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
// Wildcard address
error = getaddrinfo(NULL, port, &hints, &res);
if (error) {
printf("getaddrinfo %d\n", error);
exit(error);
}
for(int i = 0; i < 1000; ++i)
{
int sock = start_server();
pthread_t clientThread = 0;
pthread_create(&clientThread, NULL, short_connect, NULL);
stop_server(sock);
void* ignore;
pthread_join(clientThread, &ignore);
}
return 0;
}
这是一些略带注释的输出:
listenSocket: 4 //what a good run looks like...
good connect 5
accept: 6
42: send 1024
clientClosing: 6
42: recv 1024
Success!
clientClosing: 5
clientClosed: 5
clientClosed: 6
good connect 5
accept: 6
43: send 1024
clientClosing: 6
43: recv 1024
clientClosed: 6
Success!
clientClosing: 5
clientClosed: 5
good connect 5 //client connects
44: send 1024
serverClosing: 4 //server starting close...
serverClosed: 4 //server done closing
44: recv -1 //recv errors out, as it should. Note the lack of accept() calls
clientClosing: 5 //client teardown
clientClosed: 5
listenSocket: 4 //what a bad run looks like...
good connect 5
accept: 6
45: send 1024
clientClosing: 6
45: recv 1024
clientClosed: 6
Success!
clientClosing: 5
clientClosed: 5
good connect 5
accept: 6
46: send 1024
clientClosing: 6
clientClosed: 6
46: recv 1024
Success!
clientClosing: 5
clientClosed: 5
serverClosing: 4 //server starting close...
good connect 5 //client connect
serverClosed: 4 //server done closing
47: send 1024 //successful send from client
//stuck in recv(), so we get no further prints
所以最大的问题是......如何在不进入此状态的情况下关闭我的侦听套接字? CLOSE_WAIT状态表明我需要关闭已接受的连接,但我没有要关闭的套接字/文件描述符。杀死托管服务器和客户端的程序并不会导致套接字被清除(CLOSE_WAIT套接字仍然在netstat小时后),这似乎很奇怪。
这一切都发生在OS X 10.8.3 x86_64上。