声明:版权所有,转载请保留出处http://forever.blog.chinaunix.net内核版本基于SLES 11 SP2 3.0.13内核(该内核syncookie机制存在bug,导致0.0.0.0数据包) 内核输出“TCP: Possible SYN flooding on port”信息,在内核syncookie机制存在bug的情况下如何调整相应的参数增大并发syn值?1.     内核输出该信息的位置inttcp_v4_conn_request(struct sock *sk, struct sk_buff *skb){        struct tcp_extend_values tmp_ext;        struct tcp_options_received tmp_opt;        u8 *hash_location;        struct request_sock *req;        struct inet_request_sock *ireq;        struct tcp_sock *tp = tcp_sk(sk);        struct dst_entry *dst = NULL;        __be32 saddr = ip_hdr(skb)->saddr;        __be32 daddr = ip_hdr(skb)->daddr;        __u32 isn = TCP_SKB_CB(skb)->when;#ifdefCONFIG_SYN_COOKIES        int want_cookie = 0;#else#definewant_cookie 0 /* Argh, why doesn't gcc optimize this :( */#endif         /* Never answer to SYNs send tobroadcast or multicast */        if (skb_rtable(skb)->rt_flags &(RTCF_BROADCAST | RTCF_MULTICAST))                goto drop;         /* TW buckets are converted to openrequests without         * limitations, they conserve resourcesand peer is         * evidently real one.         */        if (inet_csk_reqsk_queue_is_full(sk)&& !isn) {                if (net_ratelimit())                        syn_flood_warning(skb); isn这里可以不关心(对于syn连接,这个值一定是0)看一下标红的函数static inline intinet_csk_reqsk_queue_is_full(const struct sock *sk){        returnreqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);}static inline intreqsk_queue_is_full(const struct request_sock_queue *queue){                                                     return queue->listen_opt->qlen >>queue->listen_opt->max_qlen_log;}这个代码非常的巧妙,通过右移操作来判断并发syn队列是否已满。要想增大并发syn请求,只需要增大max_qlen_log的值就可以了。 2.     如何增大max_qlen_log值TCP传输控制块结构体/**inet_connection_sock - INET connection oriented sock * * @icsk_accept_queue:     FIFO of established children * @icsk_bind_hash:        Bind node * @icsk_timeout:          Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto:              Retransmit timeout * @icsk_pmtu_cookie       Last pmtu seen by socket * @icsk_ca_ops            Pluggable congestion control hook * @icsk_af_ops            Operations which are AF_INET{4,6}specific * @icsk_ca_state:         Congestion control state * @icsk_retransmits:      Number of unrecovered [RTO] timeouts * @icsk_pending:          Scheduled timer event * @icsk_backoff:          Backoff * @icsk_syn_retries:      Number of allowed SYN (or equivalent)retries * @icsk_probes_out:       unanswered 0 window probes * @icsk_ext_hdr_len:      Network protocol overhead (IP/IPv6options) * @icsk_ack:              Delayed ACK control data * @icsk_mtup;             MTU probing control data */structinet_connection_sock {        /* inet_sock has to be the firstmember! */        struct inet_sock          icsk_inet;        struct request_sock_queue icsk_accept_queue;        struct inet_bind_bucket   *icsk_bind_hash;        unsigned long             icsk_timeout;        struct timer_list         icsk_retransmit_timer;        struct timer_list         icsk_delack_timer;        __u32                     icsk_rto;        __u32                     icsk_pmtu_cookie;        const struct tcp_congestion_ops*icsk_ca_ops;        const structinet_connection_sock_af_ops *icsk_af_ops;        unsigned int              (*icsk_sync_mss)(struct sock *sk,u32 pmtu);        __u8                      icsk_ca_state;        __u8                      icsk_retransmits;        __u8                      icsk_pending;        __u8                      icsk_backoff;        __u8                      icsk_syn_retries;        __u8                      icsk_probes_out;        __u16                     icsk_ext_hdr_len;        struct {                __u8              pending;       /* ACK is pending                         */                __u8              quick;         /* Scheduled number of quick acks         */                __u8              pingpong;      /* The session is interactive             */                __u8              blocked;       /* Delayed ACK was blocked by socketlock */                __u32             ato;           /* Predicted tick of soft clock           */                unsigned long     timeout;       /* Currently scheduled timeout            */                __u32             lrcvtime;      /* timestamp of last received data packet*/                __u16             last_seg_size; /* Size of lastincoming segment          */                __u16             rcv_mss;       /* MSS used for delayed ACK decisions     */        } icsk_ack;        struct {                int               enabled;                 /* Range of MTUs to search */                int               search_high;                int               search_low;                 /* Information on the currentprobe. */                int               probe_size;        } icsk_mtup;        u32                       icsk_ca_priv[16];        u32                       icsk_user_timeout;#defineICSK_CA_PRIV_SIZE       (16 * sizeof(u32))}; 其中红色标注的变量用来保存正在建立连接和已建立连接但未被accept的传输控制块。/** structrequest_sock_queue - queue of request_socks *      * @rskq_accept_head - FIFO head of establishedchildren * @rskq_accept_tail - FIFO tail of establishedchildren * @rskq_defer_accept - User waits for somedata after accept() * @syn_wait_lock - serializer    *      * %syn_wait_lock is necessary only to avoidproc interface having to grab the main * lock sock while browsing the listening hash(otherwise it's deadlock prone). *      * This lock is acquired in read mode only fromlistening_get_next() seq_file * op and it's acquired in write mode _only_from code that is actively * changing rskq_accept_head. All readers thatare holding the master sock lock * don't need to grab this lock in read modetoo as rskq_accept_head. writes * are always protected from the main socklock. */    structrequest_sock_queue {              struct request_sock     *rskq_accept_head;        struct request_sock     *rskq_accept_tail;        rwlock_t                syn_wait_lock;          u8                      rskq_defer_accept;        /* 3 bytes hole, try to pack */        struct listen_sock      *listen_opt;    };             其中rskq_accept_head和rskq_accept_tail指向的链表保存已完成三次握手的传输控制块;listen_opt中保存处于SYN_RECV状态的传输控制块。这里我们重点看一下listen_optstructlisten_sock {        u8                      max_qlen_log;        /* 3 bytes hole, try to use */        int                     qlen;        int                     qlen_young;        int                     clock_hand;        u32                     hash_rnd;        u32                     nr_table_entries;        struct request_sock     *syn_table[0];};这里syn_table的大小将会影响同时存在SYN_RECV状态的半连接的数量。通过listen系统调用跟踪一下这个数值的设置:/* *     Perform a listen. Basically, we allow the protocol to do anything *     necessary for a listen, and if that works, we mark the socket as *     ready for listening. */ SYSCALL_DEFINE2(listen,int, fd, int, backlog){        struct socket *sock;        int err, fput_needed;        int somaxconn;         sock = sockfd_lookup_light(fd,&err, &fput_needed);        if (sock) {                somaxconn =sock_net(sock->sk)->core.sysctl_somaxconn;/*这里限制backlog的值不会大于net.core.somaxconn的值*/                if ((unsigned)backlog >somaxconn)                        backlog = somaxconn;                 err =security_socket_listen(sock, backlog);                if (!err)                        err = sock->ops->listen(sock, backlog);                 fput_light(sock->file,fput_needed);        }        return err;}/* *     Move a socket into listening state. */intinet_listen(struct socket *sock, int backlog){        struct sock *sk = sock->sk;        unsigned char old_state;        int err;         lock_sock(sk);         err = -EINVAL;        if (sock->state != SS_UNCONNECTED ||sock->type != SOCK_STREAM)                goto out;         old_state = sk->sk_state;        if (!((1                goto out;         /* Really, if the socket is already inlisten state         * we can only allow the backlog to beadjusted.         */        if (old_state != TCP_LISTEN) {                err = inet_csk_listen_start(sk,backlog);                if (err)                        goto out;        }        sk->sk_max_ack_backlog = backlog;        err = 0; out:        release_sock(sk);        return err;}intinet_csk_listen_start(struct sock *sk, const int nr_table_entries){        struct inet_sock *inet = inet_sk(sk);        struct inet_connection_sock *icsk =inet_csk(sk);        int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue,nr_table_entries);         if (rc != 0)                return rc;               sk->sk_max_ack_backlog = 0;        sk->sk_ack_backlog = 0;        inet_csk_delack_init(sk);               /* There is race window here: weannounce ourselves listening,         * but this transition is still notvalidated by get_port().         * It is OK, because this socket entersto hash table only         * after validation is complete.         */            sk->sk_state = TCP_LISTEN;        if (!sk->sk_prot->get_port(sk,inet->inet_num)) {                inet->inet_sport =htons(inet->inet_num);                 sk_dst_reset(sk);                sk->sk_prot->hash(sk);                 return 0;        }         sk->sk_state = TCP_CLOSE;        __reqsk_queue_destroy(&icsk->icsk_accept_queue);        return -EADDRINUSE;}intreqsk_queue_alloc(struct request_sock_queue *queue,                      unsigned intnr_table_entries){        size_t lopt_size = sizeof(structlisten_sock);        struct listen_sock *lopt; /*这里可以看出listen_sock->max_qlen_log为nr_table_entries和sysctl_max_syn_backlog的最小值加1   并向上去整到2的次方后的log。   比如: nr_table_entries = 128sysctl_max_syn_backlog=20480,               min(nr_table_entries,sysctl_max_syn_backlog)= 128               roundup_pow_of_two(128+1)=256               max_qlen_log=8*/        nr_table_entries = min_t(u32,nr_table_entries, sysctl_max_syn_backlog);        nr_table_entries = max_t(u32,nr_table_entries, 8);        nr_table_entries =roundup_pow_of_two(nr_table_entries + 1);       lopt_size += nr_table_entries * sizeof(struct request_sock *);        if (lopt_size > PAGE_SIZE)                lopt = vzalloc(lopt_size);        else                lopt = kzalloc(lopt_size,GFP_KERNEL);        if (lopt == NULL)                return -ENOMEM;         for (lopt->max_qlen_log = 3;             (1 max_qlen_log)             lopt->max_qlen_log++);        get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));        rwlock_init(&queue->syn_wait_lock);        queue->rskq_accept_head = NULL;        lopt->nr_table_entries =nr_table_entries;        write_lock_bh(&queue->syn_wait_lock);        queue->listen_opt = lopt;       write_unlock_bh(&queue->syn_wait_lock);         return 0;} 经过上面的分析,要想增大并发syn值需要同时调整如下三个参数: (1)net.core.somaxconn(2)net.ipv4.tcp_max_syn_backlog(3)listen系统调用的backlog参数  看来listen系统调用的backlog参数不仅影响已完成三次握手等待accept的最大连接数,还影响SYN_RECV状态的链接数。
10-12 05:42
查看更多