kernel 校验和实现

Kernel checksum implementation

) TCP包的错误检测使用16位累加和校验. 除了TCP包本身,
TCP校验数据块还包括源IP地址,目的IP地址, TCP包长度, TCP协议号组成的12字节伪头标. ) 校验和为16位字补码和, 数据块长度为奇数时, 数据块末尾添零处理.
校验和的计算与顺序无关, 可以从数据块开始计算, 也可以从未尾开始向前计算. ) 为了提高计算效率, TCP包的校验和并不一次算出,
而是采用32位部分累加和(sk->csum)进行增量计算.
csum_partial()用来计算数据块的32位部分累加和, 累加和可以用csum_fold()折叠为16位校验和.
csum_partial_copy_nocheck()可在拷贝用户数据的同时计算出它的部分累加和. ) 为了加快执行速度, csum_partial()将8个32位字分为一组用分立的指令进行32位累加,
这样可加长循环体中指令长度, 提高CPU指令流水线的效率. ) 并不是所有的TCP包都必须校验, skb->ip_summed用来控制校验操作.
对于loopback设备的收发包, 其skb->ip_summed设为HECKSUM_UNNECESSARY, 忽略校验过程.
如果 skb->ip_summed == CHECKSUM_HW 说明TCP包本身的校验已经由硬件链路层完成,新kernel(2.6.)已经不使用这个选项。 下面我们介绍具体实现,基于kernel-2.6., x86 architecture. [函数实现]
TCP包接收校验的初始化
static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
//如果TCP包本身的校验已经完成
if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, skb->csum)) { //附加伪头进行校验
skb->ip_summed = CHECKSUM_UNNECESSARY;
return ;
}
}
//生成包含伪头的累加和
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, skb->len, IPPROTO_TCP, );
if (skb->len <= ) {
return __skb_checksum_complete(skb); //计算数据部分校验和
}
return ;
}
附加伪头进行校验
static inline __sum16 tcp_v4_check(int len, __be32 saddr, __be32 daddr, __wsum base)
{
return csum_tcpudp_magic(saddr, daddr, len, IPPROTO_TCP, base);
}
static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
生成包含伪头的累加和(源,目的,长度,协议号)
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, unsigned short proto, __wsum sum)
{
__asm__(
"addl %1, %0 ;\n" //addl 加法
"adcl %2, %0 ;\n" //adcl 带进位的加法
"adcl %3, %0 ;\n"
"adcl $0, %0 ;\n" //如果有进位,进行累加
: "=r" (sum)
: "g" (daddr), "g"(saddr), "g"((len + proto) << ), ""(sum)
); return sum;
}
将32位累加和折叠成16位校验和
static inline __sum16 csum_fold(__wsum sum)
{
__asm__(
"addl %1, %0 ;\n"
"adcl $0xffff, %0 ;\n"
: "=r" (sum)
: "r" ((__force u32)sum << ), "" ((__force u32)sum & 0xffff0000)
);
return (__force __sum16)(~(__force u32)sum >> );
}
基于伪头累加和,完成全包校验
static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
{
return skb->ip_summed != CHECKSUM_UNNECESSARY && __tcp_checksum_complete(skb);
}
__sum16 __skb_checksum_complete(struct sk_buff *skb)
{
return __skb_checksum_complete_head(skb, skb->len);
}
__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
{
__sum16 sum; sum = csum_fold(skb_checksum(skb, , len, skb->csum));
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
netdev_rx_csum_fault(skb->dev);
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
return sum;
}
__wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
int pos = ; /* Checksum header. */
if (copy > ) {
if (copy > len)
copy = len; csum = csum_partial(skb->data + offset, copy, csum);
if ((len -= copy) == )
return csum; offset += copy;
pos = copy;
}
......
}
计算32位中间累加和
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
{
//arch/x86/lib/checksum_32.S 汇编文件
}
基于TCP用户数据的中间累加和, 生成TCP包校验码
void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(sk);
struct tcphdr *th = tcp_hdr(skb); if (skb->ip_summed == CHECKSUM_PARTIAL) {
th->check = ~tcp_v4_check(len, inet->saddr, inet->daddr, ); //附加伪头进行校验
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
} else {
//完整的tcp校验和计算方法
th->check = tcp_v4_check(len, inet->saddr, inet->daddr, csum_partial((char *)th, th->doff << , skb->csum));
}
}
在拷贝用户数据时同时计算累加和
unsigned int csum_partial_copy_nocheck(const char *src, char *dst, int len, int sum)
{
return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); // arch/x86/lib/checksum_32.S
}
ip头校验和计算
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
unsigned int sum; __asm__ __volatile__(
"movl (%1), %0 ;\n"
"subl $4, %2 ;\n"
"jbe 2f ;\n"
"addl 4(%1), %0 ;\n" //sum = sum + *(iph+4)
"adcl 8(%1), %0 ;\n" //sum = sum + *(iph+8) + carry
"adcl 12(%1), %0 ;\n" //sum = sum + *(iph+12) + carry
"1: adcl 16(%1), %0 ;\n" //sum = sum + *(iph+16) + carry
"lea 4(%1), %1 ;\n" //iph = iph + 4
"decl %2 ;\n"
"jne 1b ;\n"
"adcl $0, %0 ;\n"
"movl %0, %2 ;\n"
"shrl $16, %0 ;\n"
"addw %w2, %w0 ;\n"
"adcl $0, %0 ;\n"
"notl %0 ;\n"
"2: ;\n"
/* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs,
or gcc will assume they contain their original values. */
: "=r" (sum), "=r" (iph), "=r" (ihl)
: "" (iph), "" (ihl)
: "memory"
);
return (__force __sum16)sum;
}
递减ip->ttl,更新校验和
static inline int ip_decrease_ttl(struct iphdr *iph)
{
u32 check = (__force u32)iph->check;
check += (__force u32)htons(0x0100);
iph->check = (__force __sum16)(check + (check>=0xFFFF));
return --iph->ttl;
}
static inline __wsum csum_add(__wsum csum, __wsum addend)
{
u32 res = (__force u32)csum;
res += (__force u32)addend;
return (__force __wsum)(res + (res < (__force u32)addend));
} static inline __wsum csum_sub(__wsum csum, __wsum addend)
{
return csum_add(csum, ~addend);
} static inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset)
{
u32 sum = (__force u32)csum2;
if (offset & )
sum = ((sum & 0xFF00FF)<<) + ((sum>>) & 0xFF00FF);
return csum_add(csum, (__force __wsum)sum);
}
static inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset)
{
u32 sum = (__force u32)csum2;
if (offset & )
sum = ((sum & 0xFF00FF)<<) + ((sum>>) & 0xFF00FF);
return csum_sub(csum, (__force __wsum)sum);
}
[/函数实现]
03-31 14:31