I/O模型

Unix下可用的I/O模型有五种:

  • 阻塞式I/O
  • 非阻塞式I/O
  • I/O复用(select和poll、epoll)
  • 信号驱动式I/O(SIGIO)
  • 异步I/O(POSIX的aio_系列函数)

select()和poll()在Unix系统中存在时间长,主要优势在于可移植性,主要缺点在于当同时检查大量的文件描述符时性能拓展性不佳。

epoll API的关键优势在于能让应用高效地检查大量的文件描述符,主要缺点是专属于Linux系统的API。

网络编程-I/O复用-LMLPHP

I/O复用-select

select()首次出现在BSD系统的套接字API中。

select()系统调用的用途:在一段指定的时间内,监听用户感兴趣的文件描述符上的可读、可写和异常事件。

系统调用select()会一直阻塞,直到一个或多个文件描述符集合成为就绪态。

#include <sys/select.h>
#include <sys/time.h>

//若有就绪描述符则返回其数目,若超时则返回0,若出错则返回-1
int select(int maxfdp1, fd_set *readset, fd_set *writeset,
           fd_set *exceptset, const struct timeval *timeout);

探究下fd_set的结构

/*typesizes.h*/
#define __FD_SETSIZE		1024

/*select.h*/
typedef long int __fd_mask;

//long int类型共有多少bits
#define __NFDBITS	(8 * (int) sizeof (__fd_mask))

typedef struct
{
    //long int型数组,数组大小 = 描述符最大数 / long int的位数
    //数组大小为 __FD_SETSIZE bits
    __fd_mask fds_bits[__FD_SETSIZE / __NFDBITS];
} fd_set;

select()程序示例:

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdarg.h>

static void usageError(const char* progName){
    fprintf(stderr, "Usage: %s {timeout | -} fd-num[rw]...\n", progName);
    fprintf(stderr, "    - means infinite timeout; \n");
    fprintf(stderr, "    r = monitor for read\n");
    fprintf(stderr, "    w = monitor for wirite\n\n");
    fprintf(stderr, "    e.g.: %s - 0rw 1w\n", progName);
    exit(1);
}
void cmdLineErr(const char *format, ...)
{
    va_list argList;

    fflush(stdout);           /* Flush any pending stdout */

    fprintf(stderr, "Command-line usage error: ");
    va_start(argList, format);
    vfprintf(stderr, format, argList);
    va_end(argList);

    fflush(stderr);           /* In case stderr is not line-buffered */
    exit(EXIT_FAILURE);
}

int main(int argc, char* argv[]){
    fd_set readfds, writefds;
    int ready, nfds, fd, numRead, j;
    struct timeval timeout;
    struct timeval *pto;
    char buf[10];

    if(argc < 2 || strcmp(argv[1], "--help") == 0){
        usageError(argv[0]);
    }

    if(strcmp(argv[1], "-") == 0){
        pto = NULL;
    }
    else{
        pto = &timeout;
        timeout.tv_sec = strtol(argv[1], NULL, 0);
        timeout.tv_usec = 0;
    }

    nfds = 0;
    FD_ZERO(&readfds);
    FD_ZERO(&writefds);

    for(j = 2; j < argc; j++){
        numRead = sscanf(argv[j], "%d%2[rw]", &fd, buf);
        if(numRead != 2){
            usageError(argv[0]);
        }
        if(fd >= FD_SETSIZE){
            cmdLineErr("file descriptor exceeds limit (%d)\n", FD_SETSIZE);
        }
        if(fd >= nfds){
            nfds = fd + 1;
        }
        if(strchr(buf, 'r') != NULL){
            FD_SET(fd, &readfds);
        }
        if(strchr(buf, 'w') != NULL){
            FD_SET(fd, &writefds);
        }
    }
    ready = select(nfds, &readfds, &writefds, NULL, pto);
    if(ready == -1){
        printf("errExit(select)");
        exit(1);
    }
    printf("ready = %d\n", ready);
    for(fd = 0; fd < nfds; fd++){
        printf("%d: %s%s\n",fd, FD_ISSET(fd, &readfds) ? "r" : "",
        FD_ISSET(fd, &writefds) ? "w" : "");
    }
    if(pto != NULL){
        printf("timeout after select(): %ld.%03ld\n",
               (long) timeout.tv_sec, (long) timeout.tv_usec / 1000);
    }
    exit(0);
}

select处理正常数据和带外数据:

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>

int main(int argc, char* argv[]){
    if(argc <= 2){
        printf("usage: %s ip_adress port_number\n", basename(argv[0]));
        return 1;
    }
    const char* ip = argv[1];
    int port = atoi(argv[2]);

    int ret = 0;
    struct sockaddr_in address;
    bzero(&address, sizeof(address));
    address.sin_family = AF_INET;
    inet_pton(AF_INET, ip, &address.sin_addr);
    address.sin_port = htons(port);

    int listenfd = socket(PF_INET, SOCK_STREAM, 0);
    assert(listenfd >= 0);
    ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address));
    assert(ret != -1);
    ret = listen(listenfd, 5);
    assert(ret != -1);

    struct sockaddr_in client_address;
    socklen_t client_addrlength = sizeof(client_address);
    int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
    if(connfd < 0){
        printf("error is: %d\n", errno);
        close(listenfd);
    }

    char buf[1024];
    fd_set read_fds;
    fd_set exception_fds;
    FD_ZERO(&read_fds);
    FD_ZERO(&exception_fds);

    while(true){
        memset(buf, '\0', sizeof(buf));
        FD_SET(connfd, &read_fds);
        FD_SET(connfd, &exception_fds);
        ret = select(connfd + 1, &read_fds, NULL, &exception_fds, NULL);
        if(ret < 0){
            printf("selection failure\n");
            break;
        }
        if(FD_ISSET(connfd, &read_fds)){
            ret = recv(connfd, buf, sizeof(buf)-1, 0);
            if(ret <= 0){
                break;
            }
            printf("get %d bytes of normal data: %s\n", ret, buf);
        }
        else if(FD_ISSET(connfd, &exception_fds)){
            ret = recv(connfd, buf, sizeof(buf)-1, MSG_OOB);
            if(ret <= 0){
                break;
            }
            printf("get %d bytes of oob data: %s\n", ret, buf);
        }
    }
    close(connfd);
    close(listenfd);
    return 0;
}

I/O复用-poll

poll函数起源于SVR3,最初局限于流设备,SVR4取消了这种限制,允许poll工作在任何描述符上。

poll提供的功能与select类似,不过在处理流设备时,它能够提供额外的信息。

#include <poll.h>

struct pollfd{
    int		fd;
    short	events;	//指定要测试的条件
    short	revents;//返回描述符的状态
}
//若有就绪描述符返回其数目,超时返回0,出错返回-1
int poll(struct pollfd *fdarray, unsigned long nfds, int timeout);

poll示例程序:

#include <time.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdarg.h>

static void usageError(const char* progName){
    fprintf(stderr, "Usage: %s {timeout | -} fd-num[rw]...\n", progName);
    fprintf(stderr, "    - means infinite timeout; \n");
    fprintf(stderr, "    r = monitor for read\n");
    fprintf(stderr, "    w = monitor for wirite\n\n");
    fprintf(stderr, "    e.g.: %s - 0rw 1w\n", progName);
    exit(1);
}

int main(int argc, char* argv[]){
    int numPipes, j, ready, randPipe, numWrites;
    int (*pfds)[2];//指向数组的指针
    struct pollfd *pollFd;

    if(argc < 2 || strcmp(argv[1], "--help") == 0){
        printf("%s num-pipes [num-writes]\n", argv[0]);
        exit(1);
    }

    numPipes = strtol(argv[1], NULL, 10);

    pfds = (int (*)[2])calloc(numPipes, sizeof(int [2]));
    if(pfds == NULL){
        printf("error malloc");
        exit(1);
    }
    pollFd = (pollfd*)calloc(numPipes, sizeof(struct pollfd));
    if(pollFd == NULL){
        printf("error malloc");
        exit(1);
    }

    for(j = 0; j < numPipes; j++){
        if(pipe(pfds[j]) == -1){
            printf("error pipe %d", j);
            exit(1);
        }
    }

    numWrites = (argc > 2) ? strtol(argv[2], NULL, 10) : 1;
    srandom((int)time(NULL));
    for(j = 0; j < numWrites; j++){
        randPipe = random() % numPipes;
        printf("Writing to fd: %3d (read fd: %3d)\n",
                pfds[randPipe][1], pfds[randPipe][0]);
        if (write(pfds[randPipe][1], "a", 1) == -1){
            printf("write %d", pfds[randPipe][1]);
            exit(1);
        }
    }

    for(j = 0; j < numPipes; j++){
        pollFd[j].fd = pfds[j][0];
        pollFd[j].events = POLLIN;
    }

    ready = poll(pollFd, numPipes, -1);
    if(ready == -1){
        printf("poll error");
        exit(1);
    }

    printf("poll() returned: %d\n", ready);

    for(j = 0; j < numPipes; j++){
        if(pollFd[j].revents & POLLIN){
            printf("Readable: %d %3d\n", j, pollFd[j].fd);
        }
    }
    return 0;
}

I/O复用-epoll

epoll API由三组系统调用组成;

  • epoll_create()创建一个epoll实例
  • epoll_ctl()操作同epoll实例相关联的兴趣列表
  • epoll_wait()返回与epoll相关联的就绪列表中的成员

epoll实例:epoll API的核心数据结构,和一个打开的文件描述符相关联。这个文件描述符不用来做IO操作,相反它是内核数据结构的句柄,这些内核数据结构实现了两个目的:

  • 记录兴趣列表
  • 维护就绪列表

epoll_create

#include <sys/epoll.h>
int epoll_create(int size);

参数size指定我们想要通过epoll实例来检查的描述符个数,不是上限,只是告知内核应该如何为内部数据结构划分初始大小。

函数返回epoll实例的文件描述符,该文件描述符不需要时需要close()。

当所有与epoll实例相关的文件描述符都被关闭时,实例被销毁,相关资源释放。(多个文件描述符可能引用到相同的epoll实例,这是由于调用了fork()或dup()这样的类似函数所致)。

epoll_ctl

#include <sys/epoll.h>
int epoll_ctl(int epfd, int op, int fd, struct epoll *ev);

成功返回0,失败返回-1并设置errno。

参数fd:指明修改兴趣列表中哪一个文件描述符的设定

参数op:指定需要执行的操作

  • EPOLL_CTL_ADD:添加
  • EPOLL_CTL_MOD:修改
  • EPOLL_CTL_DEL:删除

参数ev:

struct epoll_event{
    uint32_t	 events;//epoll事件,位掩码
    epoll_data_t data;	//用户数据
}
typedef union epoll_data{
    void		*ptr;
    int			fd;
    uint32_t	u32;
    uint64_t	u64;
}epoll_data_t;
  • 结构体epoll_event在的events字段是一个位掩码,指定待检查的描述符fd上感兴趣的事件集合
  • data字段是一个联合体,当描述符fd成为就绪态时,联合体的成员可用来指定传回给调用进程的信息
    • 联合体成员不能一起使用,常用fd
    • 想要将文件描述符和用户数据关联起来,以实现快速的数据访问,只能使用其它手段,比如放弃使用fd,而在ptr指向的用户数据中包含fd

epoll_wait

#include <sys/epoll.h>
int epoll_wait(int epfd, struct epoll_event *evlist, int maxevents, int timeout);

成功返回就绪态的文件描述符的个数,失败返回-1并设置errno

参数evlist指向的结构体数组中返回的是有关就绪态文件描述符的信息。数组evlist的空间由调用者负责申请,所包含的元素个数在参数maxevents中指定。

在数组evlist中每个元素返回的都是单个就绪态文件描述符的信息:

  • events字段返回在该描述符上已经发生的事件掩码
  • data字段返回的是适用epoll_ctl()注册监听事件时在ev.data中所指定的值。data字段是唯一可获知同这个事件相关的文件描述符号的途径,因此,在调用epoll_ctl()时要么将ev.data.fd设为文件描述符号,要么将ev.data.ptr设为指向包含文件描述符号的结构体

参数timeout用来确定epoll_wait()的阻塞行为:

  • timeout为-1,调用将一直阻塞,直到兴趣列表中的文件描述符上有事件发生,或者直到捕获到一个信号为止
  • timeout为0,执行一次非阻塞式的检查
  • timeout大于0,调用将阻塞至多timeout毫秒,直到文件描述符上有事件发生,或者直到捕获到一个信号为止

在多线程程序中,可以在一个线程中使用epoll_ctl()将文件描述符添加到另一个线程中由epoll_wait()所监视的epoll实例的兴趣列表中去。这些对兴趣列表的修改将立刻得到处理,而epoll_wait()调用将返回有关新添加的文件描述符的就绪信息。

epoll事件:除了有一个额外的前缀E外,大多数位掩码的名称同poll中对应的事件掩码名称相同。例外情况:

  • EPOLLET:epoll支持边缘触发
  • EPOLLONESHOT:只触发一次,触发完标记为非激活状态,需要使用EPOLL_CTL_MOD操作重新激活对这个文件描述符的检查

网络编程-I/O复用-LMLPHP

epoll程序示例:

#include <sys/epoll.h>
#include <fcntl.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>


#define MAX_BUF     1000
#define MAX_EVENTS  5

int main(int argc, char* argv[]){
    int epfd, ready, fd, s, j, numOpenFds;
    struct epoll_event ev;
    struct epoll_event evlist[MAX_EVENTS];
    char buf[MAX_BUF];

    if(argc < 2 || strcmp(argv[1], "--help")==0){
        printf("usage: %s file...\n", argv[0]);
        exit(1);
    }

    epfd = epoll_create(argc - 1);
    if(epfd == -1){
        printf("error epoll_create");
        exit(1);
    }

    for(j = 1; j < argc; j++){
        fd = open(argv[j], O_RDONLY);
        if(fd == -1){
            printf("error open");
            exit(1);
        }
        printf("Opened \"%s\" on fd %d\n", argv[j], fd);

        ev.events = EPOLLIN;
        ev.data.fd = fd;
        if(epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1){
            printf("error epoll_ctl");
            exit(1);
        }
    }

    numOpenFds = argc - 1;

    while(numOpenFds > 0){
        printf("About to epoll_wait()\n");
        ready = epoll_wait(epfd, evlist, MAX_EVENTS, -1);
        if(ready == -1){
            if(errno == EINTR)continue;
            else{
                printf("error epoll_wait");
                exit(1);
            }
        }
        printf("Ready: %d\n", ready);

        for(j = 0; j < ready; j++){
            printf("  fd = %d; events: %s%s%s\n", evlist[j].data.fd,
                (evlist[j].events & EPOLLIN)  ? "EPOLLIN ":"",
                (evlist[j].events & EPOLLHUP) ? "EPOLLHUP":"",
                (evlist[j].events & EPOLLERR) ? "EPOLLERR":"");
            if(evlist[j].events & EPOLLIN){
                s = read(evlist[j].data.fd, buf, MAX_BUF);
                if(s == -1){
                    printf("error read");
                }
                printf("    read %d bytes : %.*s",s,s,buf);
            }
            else if(evlist[j].events & (EPOLLHUP | EPOLLERR)){
                printf("    closing fd %d\n", evlist[j].data.fd);
                if(close(evlist[j].data.fd) == -1){
                    printf("error close");
                    exit(1);
                }
                numOpenFds--;
            }
        }
    }
    printf("All file descriptors closed; bye\n");
    exit(0);
}

ET模式比LT模式触发事件的次数更少:

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <pthread.h>

#define MAX_EVENT_NUMBER 1024
#define BUFFER_SIZE 10

int setnonblocking(int fd){
    int old_option = fcntl(fd, F_GETFL);
    int new_option = old_option | O_NONBLOCK;
    fcntl(fd, F_SETFL, new_option);
    return old_option;
}

void addfd(int epollfd, int fd, bool enable_et){
    epoll_event event;
    event.data.fd = fd;
    event.events = EPOLLIN;
    if(enable_et){
        event.events |= EPOLLET;
    }
    epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
    setnonblocking(fd);
}

void lt(epoll_event *events, int number, int epollfd, int listenfd){
    char buf[BUFFER_SIZE];
    for(int i = 0; i < number; i++){
        int sockfd = events[i].data.fd;
        if(sockfd == listenfd){
            struct sockaddr_in client_address;
            socklen_t client_addrlength = sizeof(client_address);
            int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
            addfd(epollfd, connfd, false);
        }
        else if(events[i].events & EPOLLIN){
            printf("event trigger once\n");
            memset(buf, '\0', BUFFER_SIZE);
            int ret = recv(sockfd, buf, BUFFER_SIZE-1,0);
            if(ret <= 0){
                close(sockfd);
                continue;
            }
            printf("get %d bytes of content: %s\n", ret, buf);
        }
        else{
            printf("something else happened \n");
        }
    }
}

void et(epoll_event* events, int number, int epollfd, int listenfd){
    char buf[BUFFER_SIZE];
    for(int i = 0; i < number; i++){
        int sockfd = events[i].data.fd;
        if(sockfd == listenfd){
            struct sockaddr_in client_address;
            socklen_t client_addrlength = sizeof(client_address);
            int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
            addfd(epollfd, connfd, true);
        }
        else if(events[i].events & EPOLLIN){
            printf("event trigger once\n");
            while(true){
                memset(buf, '\0',BUFFER_SIZE);
                int ret = recv(sockfd, buf, BUFFER_SIZE-1, 0);
                if(ret < 0){
                    if((errno == EAGAIN) || (errno == EWOULDBLOCK)){
                        printf("read later\n");
                        break;
                    }
                    close(sockfd);
                    break;
                }
                else if(ret == 0){
                    close(sockfd);
                }
                else{
                    printf("get %d bytes of content: %s\n",ret, buf);
                }
            }
        }
        else{
            printf("something else happend \n");
        }
    }
}

int main(int argc, char* argv[]){
    if(argc <= 2){
        printf("usage: %s ip_address port_number\n", basename(argv[0]));
        return 1;
    }
    const char *ip = argv[1];
    int port  = atoi(argv[2]);

    int ret = 0;
    struct sockaddr_in address;
    bzero(&address, sizeof(address));
    address.sin_family = AF_INET;
    inet_pton(AF_INET, ip, &address.sin_addr);
    address.sin_port = htons(port);

    int listenfd = socket(PF_INET, SOCK_STREAM, 0);
    assert(listenfd >= 0);

    ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address));
    assert(ret != -1);

    ret = listen(listenfd, 5);
    assert(ret != -1);

    epoll_event events[MAX_EVENT_NUMBER];
    int epollfd = epoll_create(5);
    assert(epollfd != -1);
    addfd(epollfd, listenfd, true);

    while(true){
        int ret = epoll_wait(epollfd, events, MAX_EVENT_NUMBER, -1);
        if(ret < 0){
            printf("epoll failure\n");
            break;
        }
        lt(events, ret, epollfd, listenfd);
        //et(events, ret, epollfd, listenfd);
    }
    close(listenfd);
    return 0;
}
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <fcntl.h>
#include <stdlib.h>
#include <sys/epoll.h>
#include <pthread.h>

#define MAX_EVENT_NUMBER    1024
#define BUFFER_SIZE         1024
struct fds{
    int epollfd;
    int sockfd;
};

int setnonblocking(int fd){
    int old_option = fcntl(fd, F_GETFL);
    int new_option = old_option | O_NONBLOCK;
    fcntl(fd, F_SETFL, new_option);
    return old_option;
}

void addfd(int epollfd, int fd, bool oneshot){
    epoll_event event;
    event.data.fd = fd;
    event.events = EPOLLIN | EPOLLET;
    if(oneshot){
        event.events |= EPOLLONESHOT;
    }
    epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &event);
    setnonblocking(fd);
}

void reset_oneshot(int epollfd, int fd){
    epoll_event event;
    event.data.fd = fd;
    event.events = EPOLLIN | EPOLLET | EPOLLONESHOT;
    epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &event);
}

void *worker(void *arg){
    int sockfd = ((fds*)arg)->sockfd;
    int epollfd = ((fds*)arg)->epollfd;
    printf("start new thread to receive data on fd: %d\n", sockfd);
    char buf[BUFFER_SIZE];
    memset(buf, '\0', BUFFER_SIZE);
    while(1){
        int ret = recv(sockfd, buf, BUFFER_SIZE-1, 0);
        if(ret == 0){
            close(sockfd);
            printf("foreiner closed the connection\n");
            break;
        }
        else if(ret < 0){
            if(errno == EAGAIN){
                reset_oneshot(epollfd, sockfd);
                printf("read later\n");
                break;
            }
        }
        else{
            printf("get content: %s\n", buf);
            sleep(5);
        }
    }
    printf("end thread receving data on fd : %d\n", sockfd);
}

int main(int argc, char* argv[]){
    if(argc < 2){
        printf("usage: %s ip_address port_number\n", basename(argv[0]));
        return 1;
    }
    const char* ip = argv[1];
    int port = atoi(argv[2]);

    int ret = 0;
    struct sockaddr_in address;
    bzero(&address, sizeof(address));
    address.sin_family = AF_INET;
    inet_pton(AF_INET, ip, &address.sin_addr);
    address.sin_port = htons(port);

    int listenfd = socket(PF_INET, SOCK_STREAM, 0);
    assert(listenfd >= 0);

    ret = bind(listenfd, (struct sockaddr*)&address, sizeof(address));
    assert(ret != -1);

    ret = listen(listenfd, 5);
    assert(ret != -1);

    epoll_event events[MAX_EVENT_NUMBER];
    int epollfd = epoll_create(5);
    assert(epollfd != -1);

    addfd(epollfd, listenfd, false);

    while(1){
        int ret = epoll_wait(epollfd, events, MAX_EVENT_NUMBER, -1);
        if(ret < 0){
            printf("epoll failure\n");
            break;
        }
        for(int i = 0; i < ret; i++){
            int sockfd = events[i].data.fd;
            if(sockfd == listenfd){
                struct sockaddr_in client_address;
                socklen_t client_addrlength = sizeof(client_address);
                int connfd = accept(listenfd, (struct sockaddr*)&client_address, &client_addrlength);
                addfd(epollfd, connfd, true);
            }
            else if(events[i].events & EPOLLIN){
                pthread_t thread;
                fds fds_for_new_worker;
                fds_for_new_worker.epollfd = epollfd;
                fds_for_new_worker.sockfd = sockfd;
                pthread_create(&thread, NULL, worker, (void*)&fds_for_new_worker);
            }
            else{
                printf("something else happened \n");
            }
        }
    }
    close(listenfd);
    return 0;
}xxxxxxxxxx c
01-28 13:27