linux内核协议栈 UDP之数据报接收过程

UDP报文接收概述UDP数据报的接收要分两部分来看:

  1. 网络层接收完数据包后递交给UDP后,UDP的处理过程 。该过程UDP需要做的工作就是接收数据包并对其进行校验,校验成功后将其放入接收队列 sk_receive_queue 中等待用户空间程序来读取 。
  2. 用户空间程序调用read()等系统调用读取已经放入接收队列 sk_receive_queue 中的数据 。

linux内核协议栈 UDP之数据报接收过程

文章插图
 
从IP层接收数据包 udp_rcv()该函数是在AF_INET协议族初始化时,由UDP注册给网络层的回调函数,当网络层代码处理完一个输入数据包后,如果该数据包是发往本机的,并且其上层协议就是UDP,那么会调用该回调函数 。
int udp_rcv(struct sk_buff *skb){ return __udp4_lib_rcv(skb, &udp_table, IPPROTO_UDP);} @skb: 输入数据包@udptable:已绑定端口的UDP传输控制块,将从该哈希表查找给skb属于哪个套接字@proto:L4协议号,到这里可能是IPPROTO_UDP或者IPPROTO_UDPLITEint __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,int proto){ struct sock *sk; struct udphdr *uh; unsigned short ulen; struct rtable *rt = skb_rtable(skb); __be32 saddr, daddr; struct net *net = dev_net(skb->dev);/**Validate the packet.*/ //调整SKB内部数据布局,使得线性地址空间中至少包含UDP首部 if (!pskb_may_pull(skb, sizeof(struct udphdr)))goto drop;/* No space for header. */uh= udp_hdr(skb); ulen = ntohs(uh->len); //skb中的数据长度不能小于UDP首部指示的数据包长度,即数据包是完整的 if (ulen > skb->len)goto short_packet;if (proto == IPPROTO_UDP) {//1. UDP数据包长度必须大于首部长度//2. pskb_trim_rcum()会去掉可能的填充(UDP数据包过小,IP可能会填充),然后重新计算校验和if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))goto short_packet;uh = udp_hdr(skb); } //计算校验和 if (udp4_csum_init(skb, uh, proto))goto csum_error;//获取数据包中的源IP和目的IP地址 saddr = ip_hdr(skb)->saddr; daddr = ip_hdr(skb)->daddr; //对于多播或者广播报文的处理 if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable);//根据报文的源端口号和目的端口号查询udptable,寻找应该接收该数据包的传输控制块 sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); //找到了处理该数据包的传输控制块,调用udp_queue_rcv_skb()接收数据包if (sk != NULL) {int ret = udp_queue_rcv_skb(sk, skb);sock_put(sk);/* a return value > 0 means to resubmit the input, but* it wants the return to be -protocol, or 0*/if (ret > 0)return -ret;return 0; } //到这里,说明没有传输控制块接收该数据包,做些统计然后丢弃该数据包//IPSec相关 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))goto drop; nf_reset(skb);/* No socket. Drop packet silently, if checksum is wrong */ if (udp_lib_checksum_complete(skb))goto csum_error; //累计输入数据包错误统计值,并且回复端口不可达ICMP报文 UDP_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);/** Hmm.We got an UDP packet to a port to which we* don't wanna listen.Ignore it.*/ kfree_skb(skb); return 0; short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%un",proto == IPPROTO_UDPLITE ? "-Lite" : "",&saddr,ntohs(uh->source),ulen,skb->len,&daddr,ntohs(uh->dest)); goto drop; csum_error: /** RFC1122: OK.Discards the bad packet silently (as far as* the network is concerned, anyway) as per 4.1.3.4 (MUST).*/ LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %dn",proto == IPPROTO_UDPLITE ? "-Lite" : "",&saddr,ntohs(uh->source),&daddr,ntohs(uh->dest),ulen);drop: UDP_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0;}疑惑:为何校验和的计算和验证要分udp4_csum_init()和udp_lib_checksum_complete()两步完成???
 查找数据包所属套接字 __udp4_lib_lookup_skb()如上,非常关键的一步就是根据数据包中目的地址信息寻找应该由谁来处理该数据包 。
static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb,__be16 sport, __be16 dport,struct udp_table *udptable){ struct sock *sk; const struct iphdr *iph = ip_hdr(skb);//在网络层可能已经为该数据包查询过传输控制块了,这时会将查询结果记录到skb->sk中 if (unlikely(sk = skb_steal_sock(skb)))return sk; else//之前没有查询过,继续查询return __udp4_lib_lookup(dev_net(skb_dst(skb)->dev), iph->saddr, sport,iph->daddr, dport, inet_iif(skb),udptable);} @dif: 该数据包的输入网络设备接口static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,__be16 sport, __be32 daddr, __be16 dport,int dif, struct udp_table *udptable){ struct sock *sk, *result; struct hlist_nulls_node *node; //目的端口号为哈希表的key unsigned short hnum = ntohs(dport); unsigned int hash = udp_hashfn(net, hnum); struct udp_hslot *hslot = &udptable->hash[hash]; int score, badness;rcu_read_lock();begin: //遍历冲突链,寻找一个分值最高的保存到result中 result = NULL; badness = -1; sk_nulls_for_each_rcu(sk, node, &hslot->head) {score = compute_score(sk, net, saddr, hnum, sport,daddr, dport, dif);if (score > badness) {result = sk;badness = score;} } /** if the nulls value we got at the end of this lookup is* not the expected one, we must restart lookup.* We probably met an item that was moved to another chain.*/ if (get_nulls_value(node) != hash)goto begin;if (result) {if (unlikely(!atomic_inc_not_zero(&result->sk_refcnt)))result = NULL;else if (unlikely(compute_score(result, net, saddr, hnum, sport,daddr, dport, dif) < badness)) {sock_put(result);goto begin;} } rcu_read_unlock(); return result;}


推荐阅读