tcp客户端与服务器端建立连接需要经过三次握手过程,本文主要分析客户端主动打开中的第一次握手部分,即客户端发送syn段到服务器端;

tcp_v4_connect为发起连接主流程,首先对必要参数进行检查,获取路由信息,改变连接状态成SYN_SENT,再调用inet_hash_connect将控制块加入到ehash,最后调用tcp_connect发送syn;

 /* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt; /* timewait控制块结构 */
struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; /* 地址长度不合法 */
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL; /* 地址族不合法 */
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT; /* 设置下一跳和目的地址 */
nexthop = daddr = usin->sin_addr.s_addr; /* 获取ip选项 */
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk)); /* 使用了源路由选项 */
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
/* 下一跳地址设置为选项中的地址 */
nexthop = inet_opt->opt.faddr;
} /* 获取源端口目的端口 */
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port; /* 查找路由 */
fl4 = &inet->cork.fl.u.ip4;
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
/* 查找失败 */
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
} /* 查找成功 */ /* 路由是组播或者广播 */
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
} /* 选项为空或者未启用源路由选项 */
/* 设置目的地址为路由缓存中地址 */
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr; /* 源地址为空 */
/* 使用路由缓存中的源地址 */
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
/* 设置接收地址为源地址 */
sk_rcv_saddr_set(sk, inet->inet_saddr); /* 控制块中的时间戳存在&& 目的地址不是当前地址 */
/* 控制块被使用过,重新初始化 */
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = ;
tp->rx_opt.ts_recent_stamp = ;
if (likely(!tp->repair))
tp->write_seq = ;
} /* 设置目的端口 */
inet->inet_dport = usin->sin_port;
/* 设置目的地址 */
sk_daddr_set(sk, daddr); /* 获取ip选项长度 */
inet_csk(sk)->icsk_ext_hdr_len = ;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* 设置mss */
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT; /* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
*/
/* 设置连接状态为TCP_SYN_SENT */
tcp_set_state(sk, TCP_SYN_SENT); /* 端口绑定,加入ehash */
err = inet_hash_connect(tcp_death_row, sk);
if (err)
goto failure; /* 设置hash值 */
sk_set_txhash(sk); /*
如果源端口或者目的端口发生变化,
重新获取路由,并更新sk的路由缓存
*/
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4; /* 存储目的路由缓存和网络设备特性到控制块 */
sk_setup_caps(sk, &rt->dst);
rt = NULL; if (likely(!tp->repair)) {
/* 获取发送序号 */
if (!tp->write_seq)
tp->write_seq = secure_tcp_seq(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
/* 时间戳偏移 */
tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr,
inet->inet_daddr);
} /* 设置ip首部的id */
inet->inet_id = tp->write_seq ^ jiffies; /* fastopen */
if (tcp_fastopen_defer_connect(sk, &err))
return err;
if (err)
goto failure; /* 发送syn */
err = tcp_connect(sk); if (err)
goto failure; return ; failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = ;
inet->inet_dport = ;
return err;
}

__inet_hash_connect将端口检查通过的控制块加入到ehash;函数对是否设置端口进行了不同处理,若未设置端口,则需要查找一个端口;函数还调用check_established检查是否可以复用处在TIME_WAIT的控制块,以及调用inet_ehash_nolisten将端口对应的控制块加入的ehash;

 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct sock *sk, u32 port_offset,
int (*check_established)(struct inet_timewait_death_row *,
struct sock *, __u16, struct inet_timewait_sock **))
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
struct inet_bind_bucket *tb;
u32 remaining, offset;
int ret, i, low, high;
static u32 hint; /* 存在端口 */
if (port) {
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)]; /* 找到端口绑定信息 */
tb = inet_csk(sk)->icsk_bind_hash;
spin_lock_bh(&head->lock); /* 当前端口绑定的只有当前控制块 */
if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
/* 将控制块加入只ehash */
inet_ehash_nolisten(sk, NULL);
spin_unlock_bh(&head->lock);
return ;
}
spin_unlock(&head->lock);
/* No definite answer... Walk to established hash table */
/* 检查复用情况 */
ret = check_established(death_row, sk, port, NULL);
local_bh_enable();
return ret;
} /* 没有确定端口,则随机端口 */ inet_get_local_port_range(net, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
if (likely(remaining > ))
remaining &= ~1U; offset = (hint + port_offset) % remaining;
/* In first pass we try ports of @low parity.
* inet_csk_get_port() does the opposite choice.
*/
offset &= ~1U;
other_parity_scan:
port = low + offset; /* 遍历端口 */
for (i = ; i < remaining; i += , port += ) {
if (unlikely(port >= high))
port -= remaining;
/* 保留端口 */
if (inet_is_local_reserved_port(net, port))
continue; /* 找到端口对应的绑定hash桶 */
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because
* the established check is already unique enough.
*/
/* 遍历绑定的链表中的节点 */
inet_bind_bucket_for_each(tb, &head->chain) { /* 找到端口相同节点 */
if (net_eq(ib_net(tb), net) && tb->port == port) { /* 设置被重用了,继续找,随机端口不能重用 */
if (tb->fastreuse >= ||
tb->fastreuseport >= )
goto next_port;
WARN_ON(hlist_empty(&tb->owners)); /* 检查timewait复用情况 */
if (!check_established(death_row, sk,
port, &tw))
goto ok;
goto next_port;
}
} /* 遍历没有重复 */ /* 创建该端口的绑定信息节点,加入绑定hash */
tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
net, head, port);
if (!tb) {
spin_unlock_bh(&head->lock);
return -ENOMEM;
} /* 设置默认重用标记 */
tb->fastreuse = -;
tb->fastreuseport = -;
goto ok;
next_port:
spin_unlock_bh(&head->lock);
cond_resched();
} /* 继续从下一半端口中找 */
offset++;
if ((offset & ) && remaining > )
goto other_parity_scan; return -EADDRNOTAVAIL; ok:
hint += i + ; /* Head lock still held and bh's disabled */ /* 控制块加入该端口的使用者列表 */
inet_bind_hash(sk, tb, port); /* 初始化源端口,加入到ehash */
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw);
}
/*有timewait控制块则从bind列表中移除 */
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock); /* 调度销毁timewait控制块 */
if (tw)
inet_twsk_deschedule_put(tw);
local_bh_enable();
return ;
}

__inet_check_established用于检查与相同端口中处于TIME_WAIT状态的控制块是否可以复用;

 /* called with local bh disabled */
static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk, __u16 lport,
struct inet_timewait_sock **twp)
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_sock *inet = inet_sk(sk);
__be32 daddr = inet->inet_rcv_saddr;
__be32 saddr = inet->inet_daddr;
int dif = sk->sk_bound_dev_if;
INET_ADDR_COOKIE(acookie, saddr, daddr);
const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport,
saddr, inet->inet_dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL; spin_lock(lock); /* 遍历链表 */
sk_nulls_for_each(sk2, node, &head->chain) { /* hash不等 */
if (sk2->sk_hash != hash)
continue; /* 找到节点 */
if (likely(INET_MATCH(sk2, net, acookie,
saddr, daddr, ports, dif))) {
/* 节点连接处于timewait状态 */
if (sk2->sk_state == TCP_TIME_WAIT) {
tw = inet_twsk(sk2); /* 可以复用 */
if (twsk_unique(sk, sk2, twp))
break;
} /* 不处于tw,或者不能复用 */
goto not_unique;
}
} /* Must record num and sport now. Otherwise we will see
* in hash table socket with a funny identity.
*/
/* 设置端口和hash */
inet->inet_num = lport;
inet->inet_sport = htons(lport);
sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk)); /* 节点加入ehash */
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
/* 删除tw节点 */
sk_nulls_del_node_init_rcu((struct sock *)tw);
__NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock); /* 增加使用计数 */
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, ); /* 设置能复用的控制块 */
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
inet_twsk_deschedule_put(tw);
}
return ; not_unique:
spin_unlock(lock);
return -EADDRNOTAVAIL;
}

inet_ehash_nolisten用于将控制块加入ehash,并根据结果做不同处理;

 /* 添加到ehash中 */
bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
{
/* 添加到ehash中 */
bool ok = inet_ehash_insert(sk, osk); if (ok) {
/* 成功增加计数 */
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, );
} else {
/* 增加孤儿数量 */
percpu_counter_inc(sk->sk_prot->orphan_count);
/* 标识连接关闭状态 */
sk->sk_state = TCP_CLOSE;
/* 设置销毁标记 */
sock_set_flag(sk, SOCK_DEAD);
/* 销毁控制块 */
inet_csk_destroy_sock(sk);
}
return ok;
}

tcp_connect用于构造syn包并发送之,发送之后需要设置syn包的重传定时器;

 /* Build a SYN and send it off. */
int tcp_connect(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int err; /* 检查重建路由 */
if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
return -EHOSTUNREACH; /* Routing failure or similar. */ /* 初始化控制块中与连接相关的成员 */
tcp_connect_init(sk); if (unlikely(tp->repair)) {
tcp_finish_connect(sk, NULL);
return ;
} /* 分配skb */
buff = sk_stream_alloc_skb(sk, , sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS; /* 无数据的skb相关控制信息初始化 */
tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); /* 设置发送syn的时间 */
tp->retrans_stamp = tcp_time_stamp; /* 加入发送队列 */
tcp_connect_queue_skb(sk, buff); /* enc拥塞通告支持 */
tcp_ecn_send_syn(sk, buff); /* Send off SYN; include data in Fast Open. */
/* 发送syn */
err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) :
tcp_transmit_skb(sk, buff, , sk->sk_allocation);
if (err == -ECONNREFUSED)
return err; /* We change tp->snd_nxt after the tcp_transmit_skb() call
* in order to make this packet get counted in tcpOutSegs.
*/
/* 设置序号信息 */
tp->snd_nxt = tp->write_seq;
tp->pushed_seq = tp->write_seq;
TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */
/* 启动重传定时器 */
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
return ;
}
05-26 13:35