3.  SYSCALL_DEFINE2(listen, int, fd, int, backlog)函数         该函数主要是在用户空间使用listen系统调用函数进行调用执行,在Linux内核中的还是使用System call vectors实现,在net/socket.c文件中SYSCALL_DEFINE2(socketcall, int, call,unsigned long __user *, args){……….case SYS_LISTEN:                   err = sys_listen(a0, a1);                   break;……………………}                其中sys_listen(a0,a1);最终调用的是下面SYSCALL_DEFINE2(listen, int, fd, int, backlog)进行处理。SYSCALL_DEFINE2(listen, int, fd, int,backlog){         structsocket *sock;         interr, fput_needed;         intsomaxconn;// 表示socket监听(listen)的backlog上限  /*类似于bind中调用,这里也是根据创建返回的文件描述符查找一个socket的实例,在ctreate中创建的,具体查找方式就不在分析,不熟悉请参考上篇文章,这时主要通过file实例中的private_data成员获取到sock*/         sock= sockfd_lookup_light(fd, &err, &fput_needed);         if(sock) {/* /proc/sys/net/core# cat somaxconn128这里默认时128,Hadoop集群时一般都会增大该值。*/                   somaxconn= sock_net(sock->sk)->core.sysctl_somaxconn;                   if((unsigned int)backlog > somaxconn)//如果backlog值大于somaxconn,backlog就位somaxconn,也就是最大值不能大于somaxconn                            backlog= somaxconn;                    err= security_socket_listen(sock, backlog);/*调用对应的socket层的listen函数,如果是TCP的话,inet_listen,根据net/ipv4/af_inet.c文件中的const struct proto_ops inet_stream_ops = {.listen                   = inet_listen,}定义*/                   if(!err)                            err= sock->ops->listen(sock, backlog);                    fput_light(sock->file,fput_needed);         }         returnerr;}3.1  backlog         通过man  listen我们看看对backlog的解释The behavior ofthe backlog argument on TCP sockets changed with Linux 2.2.  Now it specifies the queue length for completely  established sockets  waiting to be accepted,instead of the number of incomplete connection requests.    Themaximum length of the queue for incomplete sockets can be  set using /proc/sys/net/ipv4/tcp_max_syn_backlog.When  syncookies are enabled there is no logical maximum length and this setting isignored.  See tcp(7) for moreinformation.If the backlogargument is greater than the value in /proc/sys/net/core/somaxconn, then itis  silently  truncated to that  value;  the default  value  in this file is 128.  In kernels before 2.4.25, this limit was ahard coded  value, SOMAXCONN, with thevalue 128.上面的解释的大体意思为:从Linux2.2内核版本开始,backlog的行为发生了改变,现在该参数指定了等待accepted的全连接队列的长度。而不是半连接的请求的队列长度。全连接需要在完成三次握手之后。半连接最大长度可以使用/proc/sys/net/ipv4/tcp_max_syn_backlog进行设置。这个默认值为cat/proc/sys/net/ipv4/tcp_max_syn_backlog1024当syncookies被设置后,该参数被忽略掉。如果backlog值大于/proc/sys/net/core/somaxconn,它将被截断,默认值为128。也就是 当传参backlog的值 >= somaxconn时,已完成连结队列的数量最多就是somaxconn.3.2  inet_listen函数         该函数主要是做一些检查工作,例如当前连接的状态,sock的类型,最主要的处理在inet_csk_listen_start函数中。intinet_listen(struct socket *sock, int backlog){         struct sock *sk = sock->sk;         unsigned char old_state;         int err;          lock_sock(sk);          err = -EINVAL;/*检查sock的状态是否为SS_UNCONNECTED,sock的类型是否为SOCK_STREAM ,只有SOCK_STREAM 类型的sock才需要进行listen,建立socket后的初始状态为SS_UNCONNECTED */         if (sock->state != SS_UNCONNECTED ||sock->type != SOCK_STREAM)                   goto out;  /*获取sock的当前状态,后续要变成老状态*/         old_state = sk->sk_state;/*当前连接的状态需要CLOSED状态和LISTEN状态*/         if (!((1                   goto out;          /* Really, if the socket is already inlisten state          * we can only allow the backlog to beadjusted.如果现在的状态不是监听状态*/         if (old_state != TCP_LISTEN) {                   /* Check special setups fortesting purpose to enable TFO w/o                    * requiring TCP_FASTOPEN sockopt.                    * Note that only TCP sockets (SOCK_STREAM)will reach here.                    * Also fastopenq may already been allocatedbecause this                    * socket was in TCP_LISTEN state previouslybut was                    * shutdown() (rather than close()).                    */                   if ((sysctl_tcp_fastopen& TFO_SERVER_ENABLE) != 0 &&                      inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) {                            if((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)                                     err =fastopen_init_queue(sk, backlog);                            else if((sysctl_tcp_fastopen &                                       TFO_SERVER_WO_SOCKOPT2) != 0)                                     err =fastopen_init_queue(sk,                                         ((uint)sysctl_tcp_fastopen) >> 16);                            else                                     err = 0;                            if (err)                                     goto out;                   }                   err =inet_csk_listen_start(sk, backlog);//启动监听功能                   if (err)                            goto out;         }/*    如果socket的状态已经处于监听状态,这里只是对backlog进行了调整*/         sk->sk_max_ack_backlog = backlog;         err = 0; out:         release_sock(sk);         return err;}3.2.1  inet_csk_listen_start         该函数使TCP传输控制块进入监听状态,实现监听的过程是:为管理连接请求的散列表分配存储空间,接着使TCP的sock状态迁移到LISTEN状态,然后将sock加入到监听散列表中。intinet_csk_listen_start(struct sock *sk, const int nr_table_entries){         struct inet_sock *inet = inet_sk(sk);         struct inet_connection_sock *icsk =inet_csk(sk); /*初始化全连接队列*/         int rc =reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);          if (rc != 0)                   return rc;          sk->sk_max_ack_backlog = 0;//最大的全连接队列         sk->sk_ack_backlog = 0;//当前的全连接队列         inet_csk_delack_init(sk);          /* There is race window here: weannounce ourselves listening,          * but this transition is still not validatedby get_port().          * It is OK, because this socket enters to hashtable only          * after validation is complete.          */         sk->sk_state = TCP_LISTEN;//设置现在的状态为TCP_LISTEN状态 /*检查端口号是否可用,防止bind后修改。struct proto tcp_prot= { .unhash                         =inet_unhash,         .get_port          = inet_csk_get_port, }调用get_port函数与bind时调用的是同一个函数如果正确返回为0,其中inet_num就是bind是bind的端口,如果没有绑定端口,则进行绑定端口操作*/         if (!sk->sk_prot->get_port(sk,inet->inet_num)) {                   inet->inet_sport =htons(inet->inet_num);                   sk_dst_reset(sk);                   sk->sk_prot->hash(sk);/*把Socket添加到监听HASH表中,struct proto tcp_prot ={ .hash                       = inet_hash,*/                   return 0;         }         sk->sk_state = TCP_CLOSE;//如果端口不再可用,设置socket的状态为TCP_CLOSE,并销毁全连接队列         __reqsk_queue_destroy(&icsk->icsk_accept_queue);         return -EADDRINUSE;}3.2.2  reqsk_queue_allocintreqsk_queue_alloc(struct request_sock_queue *queue,                         unsigned int nr_table_entries){         size_t lopt_size = sizeof(structlisten_sock);         struct listen_sock *lopt;  /*这里nr_table_entries 最大值传进来的是128,sysctl_max_syn_backlog 值为256,所以,这里最小值不会小于8,最大值不会大于128,在[8.128]之间*/         nr_table_entries = min_t(u32,nr_table_entries, sysctl_max_syn_backlog);         nr_table_entries = max_t(u32,nr_table_entries, 8);         /*取一个最接近2^n的值赋给nr_table_entries */nr_table_entries= roundup_pow_of_two(nr_table_entries + 1);         lopt_size += nr_table_entries *sizeof(struct request_sock *);//确定队列大小         if (lopt_size > PAGE_SIZE)                   lopt = vzalloc(lopt_size);//如果申请的空间大于1页,则申请虚拟地址空间连续         else                   lopt = kzalloc(lopt_size, GFP_KERNEL);//小于1页,在常规内存中分配内存         if (lopt == NULL)                   return -ENOMEM; /* for循环是计算nr_table_entries以2为底的对数,计算的结果就存储在max_qlen_log成员中,例如:如果nr_table_entries =1024,max_qlen_log=10*/         for (lopt->max_qlen_log = 3;             (1 max_qlen_log)             lopt->max_qlen_log++); /*上面的这几行代码实际上是确认了半连接队列的长度,这个值还受系统配置sysctl_max_syn_backlog的影响,所以如果想调大监听套接字的半连接队列,除了增大listen()的backlog参数外,还需要调整sysctl_max_syn_backlog系统配置的值, proc文件为/proc/sys/net/ipv4/tcp_max_syn_backlog */         get_random_bytes(&lopt->hash_rnd,sizeof(lopt->hash_rnd));//得到一个随机数,用于HASH         rwlock_init(&queue->syn_wait_lock);         queue->rskq_accept_head = NULL;//全连接队列置为空         lopt->nr_table_entries =nr_table_entries;//半连接队列的最大长度          write_lock_bh(&queue->syn_wait_lock);         queue->listen_opt = lopt;//初始化半连接队列,其实就是icsk_accept_queue.listen_opt->syn_table         write_unlock_bh(&queue->syn_wait_lock);          return 0;}3.2.3  inet_hash函数voidinet_hash(struct sock *sk){         if (sk->sk_state != TCP_CLOSE) {                   local_bh_disable();                   __inet_hash(sk);                   local_bh_enable();         }}static void__inet_hash(struct sock *sk){         struct inet_hashinfo *hashinfo =sk->sk_prot->h.hashinfo;         struct inet_listen_hashbucket *ilb;/*Socket不处于监听状态*/         if (sk->sk_state != TCP_LISTEN) {                   __inet_hash_nolisten(sk,NULL);/*这里对应的是已经建立连接的*/                   return;         }          WARN_ON(!sk_unhashed(sk)); /*根据监听的端口号,查找相对应的HASH*/         ilb =&hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];          spin_lock(&ilb->lock);/*把sock添加到监听HASH桶的头部,连接到sk->sk_nulls_node*/         __sk_nulls_add_node_rcu(sk,&ilb->head);         sock_prot_inuse_add(sock_net(sk),sk->sk_prot, 1);         spin_unlock(&ilb->lock);}3.3 流程和总结(1)listen初始化了半连接队列和全连接队列(2)实现侦听,使TCP传输控制块的状态迁移到LISTEN状态,然后将传输控制块添加到侦听散列表中   
10-15 14:48
查看更多