场景
南浔项目,服务器日志打印大量的异常日志:
TSocket::write_partial() send() <Host: ::ffff:41.230.95.17 Port: 63165>errno = 10053
提示服务器发送数据给客户端失败,然后主动断开连接。根据日志可以接收到客户端的登陆请求和心跳信息
因为客户端每一次登陆过来,服务器都无法成功发送信息进行回复,服务器因为捕获到10053网络错误,关闭链接,导致客户端立刻重新链接过来,并且由于同样的错误,导致客户端一直尝试连接
分析
10053错误码:你的主机中的软件中止了一个已建立的连接。目前不清楚是程序哪里调用了中止链接的方法。至少Thrift上层没有调用关闭的方法,因为上层的任何操作都有日志输出佐证,根据日志,是因为发生了10053的错误信息,上层才开始主动关闭链接的。只能是底层的逻辑出现了异常。
代码
uint32_t TSocket::write_partial(const uint8_t* buf, uint32_t len) {
if (socket_ == -1) {
return -1;
throw TTransportException(TTransportException::NOT_OPEN, "Called write on non-open socket");
}
uint32_t sent = 0;
int flags = 0;
#ifdef MSG_NOSIGNAL
// Note the use of MSG_NOSIGNAL to suppress SIGPIPE errors, instead we
// check for the EPIPE return condition and close the socket in that case
flags |= MSG_NOSIGNAL;
#endif // ifdef MSG_NOSIGNAL
int b = send(socket_, const_cast_sockopt(buf + sent), len - sent, flags);
++g_socket_syscalls;
if (b < 0)
{
if (errno == EWOULDBLOCK || errno == EAGAIN)
{
return 0;
}
// Fail on a send error
int errno_copy = errno;
GlobalOutput.perror("TSocket::write_partial() send() " + getSocketInfo(), errno_copy);
if (errno_copy == EPIPE || errno_copy == ECONNRESET || errno_copy == ENOTCONN)
{
close();
return -1;
//throw TTransportException(TTransportException::NOT_OPEN, "write() send()", errno_copy);
}
//在这里程序抛出了异常,然后被上层的代码捕获到,然后上层的处理程序
//立即调用关闭接口,中止了这个链接
throw TTransportException(TTransportException::UNKNOWN, "write() send()", errno_copy);
}
// Fail on blocked send
if (b == 0) {
throw TTransportException(TTransportException::NOT_OPEN, "Socket send returned 0.");
}
return b;
}
调用逻辑
void TSocket::write(const uint8_t* buf, uint32_t len) {
uint32_t sent = 0;
while (sent < len) {
uint32_t b = write_partial(buf + sent, len - sent);
if (b == 0) {
// This should only happen if the timeout set with SO_SNDTIMEO expired.
// Raise an exception.
throw TTransportException(TTransportException::TIMED_OUT,
"send timeout expired");
}
sent += b;
}
}
猜测
当前客户端还没有准备好接收数据(需要现场抓包分析,判断当前的TCP状态),即使发送失败,也可以不断开连接
调整测试
当前尝试的方案是,发送数据失败,不抛出异常,等待客户端主动断开连接,因为跟客户端之间做了心跳保活机制。
uint32_t TSocket::write_partial(const uint8_t* buf, uint32_t len) {
if (socket_ == -1) {
return -1;
throw TTransportException(TTransportException::NOT_OPEN, "Called write on non-open socket");
}
uint32_t sent = 0;
int flags = 0;
#ifdef MSG_NOSIGNAL
// Note the use of MSG_NOSIGNAL to suppress SIGPIPE errors, instead we
// check for the EPIPE return condition and close the socket in that case
flags |= MSG_NOSIGNAL;
#endif // ifdef MSG_NOSIGNAL
int b = send(socket_, const_cast_sockopt(buf + sent), len - sent, flags);
++g_socket_syscalls;
if (b < 0)
{
if (errno == EWOULDBLOCK || errno == EAGAIN)
{
return 0;
}
// Fail on a send error
int errno_copy = errno;
GlobalOutput.perror("TSocket::write_partial() send() " + getSocketInfo(), errno_copy);
if (errno_copy == EPIPE || errno_copy == ECONNRESET || errno_copy == ENOTCONN)
{
close();
return -1;
//throw TTransportException(TTransportException::NOT_OPEN, "write() send()", errno_copy);
}
//直接返回-1
return -1;
//throw TTransportException(TTransportException::UNKNOWN, "write() send()", errno_copy);
}
// Fail on blocked send
if (b == 0) {
throw TTransportException(TTransportException::NOT_OPEN, "Socket send returned 0.");
}
return b;
}
标签:partial,TSocket,errno,send,write,copy,TTransportException,sent
From: https://blog.51cto.com/fengyuzaitu/6161100