写点什么

linux 内核协议栈 邻居协议之 ARP 协议处理初始化

用户头像
赖猫
关注
发布于: 2021 年 02 月 18 日

前言


上面分析完了通用邻居层的架构以及代码处理,下面分析 ipv4 的邻居协议 arp。对于 linux 邻居协议层,我认为通用邻居层是最重要的实现,arp 协议层的处理,主要是涉及三个方面:


  1. 处理 arp 请求、应答,并创建相应的邻居项

  2. 发送 arp 请求,并创建相应的邻居项

  3. 处理应用层通过 ioctl 创建或者删除邻居项的请求。它们最终都好调用通用邻居层的函数。


1 arp 协议相关数据结构


1.1 arp 协议格式



1.2 arp 数据结构


/* *	This structure defines an ethernet arp header. */ struct arphdr {	__be16		ar_hrd;		/* format of hardware address	*/	__be16		ar_pro;		/* format of protocol address	*/	unsigned char	ar_hln;		/* length of hardware address	*/	unsigned char	ar_pln;		/* length of protocol address	*/	__be16		ar_op;		/* ARP opcode (command)		*/ #if 0	 /*	  *	 Ethernet looks like this : This bit is variable sized however...	  */	unsigned char		ar_sha[ETH_ALEN];	/* sender hardware address	*/	unsigned char		ar_sip[4];		/* sender IP address		*/	unsigned char		ar_tha[ETH_ALEN];	/* target hardware address	*/	unsigned char		ar_tip[4];		/* target IP address		*/#endif };
复制代码


1.3 arp 操作码


/* ARP protocol opcodes. */#define	ARPOP_REQUEST	1		/* ARP request			*/#define	ARPOP_REPLY	2		/* ARP reply			*/#define	ARPOP_RREQUEST	3		/* RARP request			*/#define	ARPOP_RREPLY	4		/* RARP reply			*/#define	ARPOP_InREQUEST	8		/* InARP request		*/#define	ARPOP_InREPLY	9		/* InARP reply			*/#define	ARPOP_NAK	10		/* (ATM)ARP NAK			*/
复制代码


2 arp 协议初始化 arp_init()


  1. 使用 neigh_table_init() 初始化 arp 协议对应的邻居表 arp_tbl

  2. 为 arp 协议注册协议处理函数 arp_rcv() 对于网络设备驱动处理完的数据,会由函数 netif_receive_skb 继续二、三层协议的处理。对于进入 netif_receive_skb 的数据包,如果桥接数据没有进行处理,则会遍历 ptype_basehash 数组中的每一个 hash 表中的所有已注册的协议处理函数,查找与 skb 数据包相同的协议处理函数,对于 arp 数据包来说,就会通过 deliver_skb,调用到函数 arp_rcv 进行 arp 数据包的处理

  3. 向 proc 文件系统中注册 arp 相关的 proc 文件

  4. 向 netdev_chain 通知链中注册 arp 的事件通知函数,主要是处理二层地址改变的事件


/* *	Called once on startup. */ static struct packet_type arp_packet_type __read_mostly = {	.type =	cpu_to_be16(ETH_P_ARP),	.func =	arp_rcv,};  void __init arp_init(void){	neigh_table_init(&arp_tbl); 	dev_add_pack(&arp_packet_type);	arp_proc_init();#ifdef CONFIG_SYSCTL	neigh_sysctl_register(NULL, &arp_tbl.parms, "ipv4", NULL);#endif	register_netdevice_notifier(&arp_netdev_notifier);}
复制代码


2.1 邻居表初始化 neigh_table_init()


  1. 调用 neigh_table_init_no_netlink() 初始化邻居表项的成员值

  2. 将该邻居表添加到邻居表链表 neigh_tables 中


void neigh_table_init(struct neigh_table *tbl){	struct neigh_table *tmp; 	neigh_table_init_no_netlink(tbl);	write_lock(&neigh_tbl_lock);	for (tmp = neigh_tables; tmp; tmp = tmp->next) {		if (tmp->family == tbl->family)			break;	}	tbl->next	= neigh_tables;	neigh_tables	= tbl;	write_unlock(&neigh_tbl_lock); 	if (unlikely(tmp)) {		pr_err("Registering multiple tables for family %d\n",		       tbl->family);		dump_stack();	}}EXPORT_SYMBOL(neigh_table_init); 
复制代码


2.1.1 初始化邻居表项成员 neigh_table_init_no_netlink()


  1. 设置邻居表的 reachable_time

  2. 为该邻居表申请 slab 缓存,用于创建邻居项

  3. 为邻居表的邻居项 hash 数组申请缓存

  4. 创建一个带有延迟功能的工作队列,用于进行邻居项的垃圾回收


static void neigh_table_init_no_netlink(struct neigh_table *tbl){	unsigned long now = jiffies;	unsigned long phsize; 	write_pnet(&tbl->Index of /, &init_net);	atomic_set(&tbl->parms.refcnt, 1);	tbl->parms.reachable_time =			  neigh_rand_reach_time(tbl->parms.base_reachable_time); 	tbl->stats = alloc_percpu(struct neigh_statistics);	if (!tbl->stats)		panic("cannot create neighbour cache statistics"); #ifdef CONFIG_PROC_FS	if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,			      &neigh_stat_seq_fops, tbl))		panic("cannot create neighbour proc dir entry");#endif 	RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 	phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);	tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 	if (!tbl->nht || !tbl->phash_buckets)		panic("cannot allocate neighbour cache hashes"); 	if (!tbl->entry_size)		tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +					tbl->key_len, NEIGH_PRIV_ALIGN);	else		WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 	rwlock_init(&tbl->lock);    /*创建带有延迟功能的工作队列,并将创建的工作添加      到工作队列keventd_wq中去,并开启一个定时器延迟tbl->parms.reachable_time      后,调用queue_work执行函数neigh_periodic_work          */	INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);	schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);	setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);	skb_queue_head_init_class(&tbl->proxy_queue,			&neigh_table_proxy_queue_class); 	tbl->last_flush = now;	tbl->last_rand	= now + tbl->parms.reachable_time * 20;}
复制代码


2.2 arp 表初始值 struct neigh_table arp_tbl


struct neigh_table arp_tbl = {	.family		= AF_INET,	.key_len	= 4,//ipv4 地址长度	.hash		= arp_hash,//arp 协议的hash函数	.constructor	= arp_constructor,//邻居表现初始化函数,初始化neighbour项中与协议相关的成员	.proxy_redo	= parp_redo,	.id		= "arp_cache",	.parms		= {		.tbl			= &arp_tbl,		.base_reachable_time	= 30 * HZ,		.retrans_time		= 1 * HZ,		.gc_staletime		= 60 * HZ,		.reachable_time		= 30 * HZ,		.delay_probe_time	= 5 * HZ,		.queue_len_bytes	= 64*1024,		.ucast_probes		= 3,		.mcast_probes		= 3,		.anycast_delay		= 1 * HZ,		.proxy_delay		= (8 * HZ) / 10,		.proxy_qlen		= 64,		.locktime		= 1 * HZ,	},	.gc_interval	= 30 * HZ,	.gc_thresh1	= 128,	.gc_thresh2	= 512,	.gc_thresh3	= 1024,};EXPORT_SYMBOL(arp_tbl);
复制代码


这个邻居表项设置了 arp 邻居表项的初始化处理函数的设置函数 arp_constructor;邻居项异步垃圾回收启动相关的阀值设置 gc_thresh1、gc_thresh2、gc_thresh3;一个邻居项发送 arp request 数据包的最大值;重传时间;邻居项状态转换相关的时间间隔值 base_reachable_time、reachable_time、delay_probe_time;arp 协议相关的 hash 函数。


3 arp 协议邻居项的初始化函数 arp_constructor()


该函数用于 arp 协议中,初始化 neighbour 项中与 arp 协议相关的项


  1. 设置邻居项的状态

  2. 设置邻居项的 ops 指针

  3. 设置邻居项的 output 函数指针


static int arp_constructor(struct neighbour *neigh){	__be32 addr = *(__be32 *)neigh->primary_key;	struct net_device *dev = neigh->dev;	struct in_device *in_dev;	struct neigh_parms *parms; 	rcu_read_lock();	in_dev = __in_dev_get_rcu(dev);	if (in_dev == NULL) {		rcu_read_unlock();		return -EINVAL;	} 	neigh->type = inet_addr_type(dev_net(dev), addr); 	parms = in_dev->arp_parms;	__neigh_parms_put(neigh->parms);	neigh->parms = neigh_parms_clone(parms);	rcu_read_unlock();		//对于以太网设备,其dev->header_ops为eth_header_ops	if (!dev->header_ops) {		neigh->nud_state = NUD_NOARP;		neigh->ops = &arp_direct_ops;		neigh->output = neigh_direct_output;	} else {		/* Good devices (checked by reading texts, but only Ethernet is		   tested)		   ARPHRD_ETHER: (ethernet, apfddi)		   ARPHRD_FDDI: (fddi)		   ARPHRD_IEEE802: (tr)		   ARPHRD_METRICOM: (strip)		   ARPHRD_ARCNET:		   etc. etc. etc.		   ARPHRD_IPDDP will also work, if author repairs it.		   I did not it, because this driver does not work even		   in old paradigm.		 */ #if 1		/* So... these "amateur" devices are hopeless.		   The only thing, that I can say now:		   It is very sad that we need to keep ugly obsolete		   code to make them happy.		   They should be moved to more reasonable state, now		   they use rebuild_header INSTEAD OF hard_start_xmit!!!		   Besides that, they are sort of out of date		   (a lot of redundant clones/copies, useless in 2.1),		   I wonder why people believe that they work.		 */		switch (dev->type) {		default:			break;		case ARPHRD_ROSE:#if IS_ENABLED(CONFIG_AX25)		case ARPHRD_AX25:#if IS_ENABLED(CONFIG_NETROM)		case ARPHRD_NETROM:#endif			neigh->ops = &arp_broken_ops;			neigh->output = neigh->ops->output;			return 0;#else			break;#endif		}#endif		/*		对于组播类型的 neighbour 项,则将该邻居项的状态设置为 NUD_NOARP		对于不需要 arp 的设备或者回环设备,将 nud_state 设置为 NUD_NOARP		对于广播类型或者点对点设备的邻居项,不需要 arp		*/		if (neigh->type == RTN_MULTICAST) {			neigh->nud_state = NUD_NOARP;			arp_mc_map(addr, neigh->ha, dev, 1);		} else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {			neigh->nud_state = NUD_NOARP;			memcpy(neigh->ha, dev->dev_addr, dev->addr_len);		} else if (neigh->type == RTN_BROADCAST ||			   (dev->flags & IFF_POINTOPOINT)) {			neigh->nud_state = NUD_NOARP;			memcpy(neigh->ha, dev->broadcast, dev->addr_len);		}		/*		如果设备的header_ops->cache存在,则将邻居项的ops设置为arp_hh_ops,		对于以太网设备,其header_ops->cache为eth_header_cache,所以对于以太网设备		其neighbour->ops为arp_hh_ops		*/		if (dev->header_ops->cache)			neigh->ops = &arp_hh_ops;		else			neigh->ops = &arp_generic_ops;				//对于邻居项状态为有效状态时,则将neigh->output设置为neigh->ops->connected_output		if (neigh->nud_state & NUD_VALID)			neigh->output = neigh->ops->connected_output;		else			neigh->output = neigh->ops->output;	}	return 0;}
复制代码


Linux、C/C++技术交流群:【960994558】整理了一些个人觉得比较好的学习书籍、大厂面试题、技术教学视频资料共享在里面(包括 C/C++,Linux,Nginx,ZeroMQ,MySQL,Redis,fastdfs,MongoDB,ZK,流媒体,CDN,P2P,K8S,Docker,TCP/IP,协程,DPDK 等等.),有需要的可以自行添加哦!~




用户头像

赖猫

关注

还未添加个人签名 2020.11.28 加入

纸上得来终觉浅,绝知此事要躬行

评论

发布
暂无评论
linux内核协议栈 邻居协议之ARP协议处理初始化