如果在多个系统调用中完成TCP套接字，为什么会减慢速度?

本文介绍了如果在多个系统调用中完成TCP套接字，为什么会减慢速度?的处理方法，对大家解决问题具有一定的参考价值，需要的朋友们下面随着小编来一起学习吧！

问题描述

为什么以下代码运行缓慢?慢速是指100x-1000x慢速.它只是直接在TCP套接字上重复执行读/写操作.奇怪的是，仅当我同时对读取和写入使用两个函数调用时，它仍然保持缓慢，如下所示.如果我更改服务器或客户端代码以使用单个函数调用(如注释中所述)，它将变得非常快.

代码段:

int main(...) {
  int sock = ...; // open TCP socket
  int i;
  char buf[100000];
  for(i=0;i<2000;++i)
  { if(amServer)
    { write(sock,buf,10);
      // read(sock,buf,20);
      read(sock,buf,10);
      read(sock,buf,10);
    }else
    { read(sock,buf,10);
      // write(sock,buf,20);
      write(sock,buf,10);
      write(sock,buf,10);
    }
  }
  close(sock);
}

我们在一个更大的程序中偶然发现了这个问题，它实际上是使用stdio缓冲的.当有效负载大小超过缓冲区大小一点点时，它就变得迟钝了.然后我做了一些strace的挖掘工作，最后将问题归结为这个问题.我可以通过使用缓冲策略来解决这个问题，但是我真的很想知道这里到底发生了什么.在我的计算机上，当我将两个读取调用更改为一个调用时，它从0.030 s持续到一分钟以上(在本地和远程计算机上都经过测试).

这些测试是在各种Linux发行版和各种内核版本上完成的.结果相同.

具有网络样板的完全可运行的代码:

#include <netdb.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <netinet/ip.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>

static int getsockaddr(const char* name,const char* port, struct sockaddr* res)
{
    struct addrinfo* list;
    if(getaddrinfo(name,port,NULL,&list) < 0) return -1;
    for(;list!=NULL && list->ai_family!=AF_INET;list=list->ai_next);
    if(!list) return -1;
    memcpy(res,list->ai_addr,list->ai_addrlen);
    freeaddrinfo(list);
    return 0;
}
// used as sock=tcpConnect(...); ...; close(sock);
static int tcpConnect(struct sockaddr_in* sa)
{
    int outsock;
    if((outsock=socket(AF_INET,SOCK_STREAM,0))<0) return -1;
    if(connect(outsock,(struct sockaddr*)sa,sizeof(*sa))<0) return -1;
    return outsock;
}
int tcpConnectTo(const char* server, const char* port)
{
    struct sockaddr_in sa;
    if(getsockaddr(server,port,(struct sockaddr*)&sa)<0) return -1;
    int sock=tcpConnect(&sa); if(sock<0) return -1;
    return sock;
}

int tcpListenAny(const char* portn)
{
    in_port_t port;
    int outsock;
    if(sscanf(portn,"%hu",&port)<1) return -1;
    if((outsock=socket(AF_INET,SOCK_STREAM,0))<0) return -1;
    int reuse = 1;
    if(setsockopt(outsock,SOL_SOCKET,SO_REUSEADDR,
              (const char*)&reuse,sizeof(reuse))<0) return fprintf(stderr,"setsockopt() failed\n"),-1;
    struct sockaddr_in sa = { .sin_family=AF_INET, .sin_port=htons(port)
                  , .sin_addr={INADDR_ANY} };
    if(bind(outsock,(struct sockaddr*)&sa,sizeof(sa))<0) return fprintf(stderr,"Bind failed\n"),-1;
    if(listen(outsock,SOMAXCONN)<0) return fprintf(stderr,"Listen failed\n"),-1;
    return outsock;
}

int tcpAccept(const char* port)
{
    int listenSock, sock;
    listenSock = tcpListenAny(port);
    if((sock=accept(listenSock,0,0))<0) return fprintf(stderr,"Accept failed\n"),-1;
    close(listenSock);
    return sock;
}

void writeLoop(int fd,const char* buf,size_t n)
{
    // Don't even bother incrementing buffer pointer
    while(n) n-=write(fd,buf,n);
}
void readLoop(int fd,char* buf,size_t n)
{
    while(n) n-=read(fd,buf,n);
}
int main(int argc,char* argv[])
{
    if(argc<3)
    { fprintf(stderr,"Usage: round {server_addr|--} port\n");
        return -1;
    }
    bool amServer = (strcmp("--",argv[1])==0);
    int sock;
    if(amServer) sock=tcpAccept(argv[2]);
    else sock=tcpConnectTo(argv[1],argv[2]);
    if(sock<0) { fprintf(stderr,"Connection failed\n"); return -1; }

    int i;
    char buf[100000] = { 0 };
    for(i=0;i<4000;++i)
    {
        if(amServer)
        { writeLoop(sock,buf,10);
            readLoop(sock,buf,20);
            //readLoop(sock,buf,10);
            //readLoop(sock,buf,10);
        }else
        { readLoop(sock,buf,10);
            writeLoop(sock,buf,20);
            //writeLoop(sock,buf,10);
            //writeLoop(sock,buf,10);
        }
    }

    close(sock);
    return 0;
}

此版本与其他代码段稍有不同，因为它在循环中读取/写入.因此，在此版本中，即使仅调用一次readLoop，两次单独的写入也会自动导致两次单独的read()调用.但是否则问题仍然存在.

解决方案

有趣.您正在成为 Nagle算法和 TCP延迟确认.

Nagle的算法是TCP中使用的一种机制，用于延迟小段的传输，直到积累了足够的数据为止，使其值得在网络上构建和发送段.摘自维基百科文章:

但是，TCP通常采用称为 TCP延迟确认的技术，该技术由将一批ACK应答(因为TCP使用累积的ACKS)累加在一起组成，以减少网络流量. /p>

该维基百科文章进一步提到了这一点:

(强调我的)

在您的特定情况下，由于服务器在读取回复之前不会发送更多数据，因此客户端会造成延迟:如果客户端写两次，则第二次写入将被延迟.

因此，当客户端进行两次写调用时，将发生以下情况:

客户签发第一笔邮件.
服务器接收到一些数据.它并没有希望更多的数据会到达(因此可以在一个ACK中批量处理一堆ACK).
客户端发出第二次写入.先前的写入操作尚未得到确认，因此Nagle的算法将传输延迟，直到有更多数据到达(直到收集到足够的数据以构成段为止)为止，或者对先前的写入操作进行了确认.
服务器已经厌倦了等待，并在500毫秒后确认了该段.
客户端最终完成第二次写入.

写1则将发生以下情况:

客户签发第一笔邮件.
服务器接收到一些数据.它并没有希望更多的数据能够到达(因此可以在一个ACK中批量处理一堆ACK).
服务器写入套接字. ACK是TCP头文件的一部分，因此，如果您正在编写，则不需花额外的钱就可以确认上一个段.做吧.
与此同时，客户端写入了一次，因此它已经在等待下一次读取-没有第二次写入等待服务器的ACK .

如果要在客户端继续写入两次，则需要禁用Nagle的算法.这是算法作者本人提出的解决方案:

( ="请参阅Wikipedia上的引用)

，出于各种原因，这可能不是最好的主意，但它说明了这一点，并表明这确实导致了延迟.

要禁用它，您需要使用setsockopt(2)在套接字上设置TCP_NODELAY选项.

这可以在tcpConnectTo()中为客户端完成:

int tcpConnectTo(const char* server, const char* port)
{
    struct sockaddr_in sa;
    if(getsockaddr(server,port,(struct sockaddr*)&sa)<0) return -1;
    int sock=tcpConnect(&sa); if(sock<0) return -1;

    int val = 1;
    if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) < 0)
        perror("setsockopt(2) error");

    return sock;
}

在服务器的tcpAccept()中:

int tcpAccept(const char* port)
{
    int listenSock, sock;
    listenSock = tcpListenAny(port);
    if((sock=accept(listenSock,0,0))<0) return fprintf(stderr,"Accept failed\n"),-1;
    close(listenSock);

    int val = 1;
    if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) < 0)
        perror("setsockopt(2) error");

    return sock;
}

看到它带来的巨大变化很有趣.

如果您不想弄乱套接字选项，那就足以确保客户端在下一次读取之前只写一次，并且只写一次.您仍然可以让服务器读取两次:

for(i=0;i<4000;++i)
{
    if(amServer)
    { writeLoop(sock,buf,10);
        //readLoop(sock,buf,20);
        readLoop(sock,buf,10);
        readLoop(sock,buf,10);
    }else
    { readLoop(sock,buf,10);
        writeLoop(sock,buf,20);
        //writeLoop(sock,buf,10);
        //writeLoop(sock,buf,10);
    }
}

Why is the following code slow? And by slow I mean 100x-1000x slow. It just repeatedly performs read/write directly on a TCP socket. The curious part is that it remains slow only if I use two function calls for both read AND write as shown below. If I change either the server or the client code to use a single function call (as in the comments), it becomes super fast.

Code snippet:

int main(...) {
  int sock = ...; // open TCP socket
  int i;
  char buf[100000];
  for(i=0;i<2000;++i)
  { if(amServer)
    { write(sock,buf,10);
      // read(sock,buf,20);
      read(sock,buf,10);
      read(sock,buf,10);
    }else
    { read(sock,buf,10);
      // write(sock,buf,20);
      write(sock,buf,10);
      write(sock,buf,10);
    }
  }
  close(sock);
}

We stumbled on this in a larger program, that was actually using stdio buffering. It mysteriously became sluggish the moment payload size exceeded the buffer size by a small margin. Then I did some digging around with strace, and finally boiled the problem down to this. I can solve this by fooling around with buffering strategy, but I'd really like to know what on earth is going on here. On my machine, it goes from 0.030 s to over a minute on my machine (tested both locally and over remote machines) when I change the two read calls to a single call.

These tests were done on various Linux distros, and various kernel versions. Same result.

Fully runnable code with networking boilerplate:

#include <netdb.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <netinet/ip.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>

static int getsockaddr(const char* name,const char* port, struct sockaddr* res)
{
    struct addrinfo* list;
    if(getaddrinfo(name,port,NULL,&list) < 0) return -1;
    for(;list!=NULL && list->ai_family!=AF_INET;list=list->ai_next);
    if(!list) return -1;
    memcpy(res,list->ai_addr,list->ai_addrlen);
    freeaddrinfo(list);
    return 0;
}
// used as sock=tcpConnect(...); ...; close(sock);
static int tcpConnect(struct sockaddr_in* sa)
{
    int outsock;
    if((outsock=socket(AF_INET,SOCK_STREAM,0))<0) return -1;
    if(connect(outsock,(struct sockaddr*)sa,sizeof(*sa))<0) return -1;
    return outsock;
}
int tcpConnectTo(const char* server, const char* port)
{
    struct sockaddr_in sa;
    if(getsockaddr(server,port,(struct sockaddr*)&sa)<0) return -1;
    int sock=tcpConnect(&sa); if(sock<0) return -1;
    return sock;
}

int tcpListenAny(const char* portn)
{
    in_port_t port;
    int outsock;
    if(sscanf(portn,"%hu",&port)<1) return -1;
    if((outsock=socket(AF_INET,SOCK_STREAM,0))<0) return -1;
    int reuse = 1;
    if(setsockopt(outsock,SOL_SOCKET,SO_REUSEADDR,
              (const char*)&reuse,sizeof(reuse))<0) return fprintf(stderr,"setsockopt() failed\n"),-1;
    struct sockaddr_in sa = { .sin_family=AF_INET, .sin_port=htons(port)
                  , .sin_addr={INADDR_ANY} };
    if(bind(outsock,(struct sockaddr*)&sa,sizeof(sa))<0) return fprintf(stderr,"Bind failed\n"),-1;
    if(listen(outsock,SOMAXCONN)<0) return fprintf(stderr,"Listen failed\n"),-1;
    return outsock;
}

int tcpAccept(const char* port)
{
    int listenSock, sock;
    listenSock = tcpListenAny(port);
    if((sock=accept(listenSock,0,0))<0) return fprintf(stderr,"Accept failed\n"),-1;
    close(listenSock);
    return sock;
}

void writeLoop(int fd,const char* buf,size_t n)
{
    // Don't even bother incrementing buffer pointer
    while(n) n-=write(fd,buf,n);
}
void readLoop(int fd,char* buf,size_t n)
{
    while(n) n-=read(fd,buf,n);
}
int main(int argc,char* argv[])
{
    if(argc<3)
    { fprintf(stderr,"Usage: round {server_addr|--} port\n");
        return -1;
    }
    bool amServer = (strcmp("--",argv[1])==0);
    int sock;
    if(amServer) sock=tcpAccept(argv[2]);
    else sock=tcpConnectTo(argv[1],argv[2]);
    if(sock<0) { fprintf(stderr,"Connection failed\n"); return -1; }

    int i;
    char buf[100000] = { 0 };
    for(i=0;i<4000;++i)
    {
        if(amServer)
        { writeLoop(sock,buf,10);
            readLoop(sock,buf,20);
            //readLoop(sock,buf,10);
            //readLoop(sock,buf,10);
        }else
        { readLoop(sock,buf,10);
            writeLoop(sock,buf,20);
            //writeLoop(sock,buf,10);
            //writeLoop(sock,buf,10);
        }
    }

    close(sock);
    return 0;
}

EDIT: This version is slightly different from the other snippet in that it reads/writes in a loop. So in this version, two separate writes automatically causes two separate read() calls, even if readLoop is called only once. But otherwise the problem still remains.

解决方案

Interesting. You are being a victim of the Nagle's algorithm together with TCP delayed acknowledgements.

The Nagle's algorithm is a mechanism used in TCP to defer transmission of small segments until enough data has been accumulated that makes it worth building and sending a segment over the network. From the wikipedia article:

However, TCP typically employs something known as TCP delayed acknowledgements, which is a technique that consists of accumulating together a batch of ACK replies (because TCP uses cumulative ACKS), to reduce network traffic.

That wikipedia article further mentions this:

(Emphasis mine)

In your specific case, since the server doesn't send more data before reading the reply, the client is causing the delay: if the client writes twice, the second write will be delayed.

So, when the client makes 2 write calls, this is what happens:

Client issues the first write.
The server receives some data. It doesn't acknowledge it in the hope that more data will arrive (so it can batch up a bunch of ACKs in one single ACK).
Client issues the second write. The previous write has not been acknowledged, so Nagle's algorithm defers transmission until more data arrives (until enough data has been collected to make a segment) or the previous write is ACKed.
Server is tired of waiting and after 500 ms acknowledges the segment.
Client finally completes the 2nd write.

With 1 write, this is what happens:

Client issues the first write.
The server receives some data. It doesn't acknowledge it in the hope that more data will arrive (so it can batch up a bunch of ACKs in one single ACK).
The server writes to the socket. An ACK is part of the TCP header, so if you're writing, you might as well acknowledge the previous segment at no extra cost. Do it.
Meanwhile, the client wrote once, so it was already waiting on the next read - there was no 2nd write waiting for the server's ACK.

If you want to keep writing twice on the client side, you need to disable the Nagle's algorithm. This is the solution proposed by the algorithm author himself:

(See the citation on Wikipedia)

As mentioned by David Schwartz in the comments, this may not be the greatest idea for various reasons, but it illustrates the point and shows that this is indeed causing the delay.

To disable it, you need to set the TCP_NODELAY option on the sockets with setsockopt(2).

This can be done in tcpConnectTo() for the client:

int tcpConnectTo(const char* server, const char* port)
{
    struct sockaddr_in sa;
    if(getsockaddr(server,port,(struct sockaddr*)&sa)<0) return -1;
    int sock=tcpConnect(&sa); if(sock<0) return -1;

    int val = 1;
    if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) < 0)
        perror("setsockopt(2) error");

    return sock;
}

And in tcpAccept() for the server:

int tcpAccept(const char* port)
{
    int listenSock, sock;
    listenSock = tcpListenAny(port);
    if((sock=accept(listenSock,0,0))<0) return fprintf(stderr,"Accept failed\n"),-1;
    close(listenSock);

    int val = 1;
    if (setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val)) < 0)
        perror("setsockopt(2) error");

    return sock;
}

It's interesting to see the huge difference this makes.

If you'd rather not mess with the socket options, it's enough to ensure that the client writes once - and only once - before the next read. You can still have the server read twice:

for(i=0;i<4000;++i)
{
    if(amServer)
    { writeLoop(sock,buf,10);
        //readLoop(sock,buf,20);
        readLoop(sock,buf,10);
        readLoop(sock,buf,10);
    }else
    { readLoop(sock,buf,10);
        writeLoop(sock,buf,20);
        //writeLoop(sock,buf,10);
        //writeLoop(sock,buf,10);
    }
}

这篇关于如果在多个系统调用中完成TCP套接字，为什么会减慢速度?的文章就介绍到这了，希望我们推荐的答案对大家有所帮助，也希望大家多多支持！