网络模块初始化-net( 网络名字空间 )

[ include/net/net_namespace.h ]

We want to make it look to user space like the kernel implements multiple network stacks.

To implement this some of the currently global variables in the network stack need to have one instance per network namespace,or the global data structure needs to have a network namespace field.

Currently control enters the network stack in one of 4 major ways.Through operations on a socket, through a packet coming in from a network device, through miscellaneous syscalls from a process, and through operations on a virtual filesystem.So the current design calls for placing a pointer to struct net (the network namespace structure) on network devices, sockets, processes, and on filesystems so we have a clear understanding of which network namespace operations should be done in the context of.

Packets do not contain a pointer to a network device structure.Instead their network device is derived from which network device or which socket they are passing through.

On the input path we only need to look at the network namespace to determine which routing tables to use, and which sockets the packet can be destined for.

Similarly on the output path we only need to consult the network namespace for the output routing tables which point to which network devices we can use.

So while there are accesses to the network namespace as we process each packet they are in well contained spots that occur rarely.

Where the network namespace appears most is on the control,setup, and clean up code paths, in the network stack that we change rarely. There we currently don't have anything except a global context so modifications are necessary, but since the network parameter is not implicit it should not require much thought to use.

The implementation strategy follows the classic global lock reduction pattern. First all of the interfaces at a given level in the network stack are made to filter out traffic from anything except the initial network namespace,and then those interfaces are allowed to see packets from any network namespace. Then some subset of those interfaces are taught to  handle packets from all namespaces, after the more specific protocol layers below them have been made to filter those packets.

对于网络模块,有很多全局性的构造和析构操作,实现这些操作用了面向对象的方法:
  1. 定义了一个全局列表,所有的网络空间名字都注册到此列表:
    [ net/core/net_namespace.c ]
    LIST_HEAD(net_namespace_list);	// 所有的 net 列表
    EXPORT_SYMBOL_GPL(net_namespace_list);
  2. 内核定义了一个默认的网络名字空间,如果没有自定义的加入,这就是唯一的一个,所有的网络相关的全局变量都记录在这里

    [ net/core/net_namespace.c ]

    struct net init_net = {
    	.dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),	// 两个指针都指向自身
    };
    EXPORT_SYMBOL(init_net);
    
  3. 定义了pernet_operations结构,来封装函数接口

    [ include/net/net_namespace.h ]

    struct pernet_operations {
    	struct list_head list;
    	int (*init)(struct net *net);
    	void (*exit)(struct net *net);
    	void (*exit_batch)(struct list_head *net_exit_list);
    	int *id;
    	size_t size;
    }; 
    
  4. 定义了一个全局列表,所有的pernet_operations对象都注册到此列表上

    [ net/core/net_namespace.c ]

    /*
     *	Our network namespace constructor/destructor lists
     */
    
    static LIST_HEAD(pernet_list);	// 所有的pernet_operations的列表
    static struct list_head *first_device = &pernet_list;
    当对象注册到pernet_list中时,会自动调用init进行初始化(一般是初始化net中的成员变量),注销时,调用exit。当注册时,是对net_namespace_list中所有的网络名字空间进行操作。
注册函数如下:

[ net/core/net_namespace.c ]

/**
 *      register_pernet_subsys - register a network namespace subsystem
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a subsystem which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_subsys(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error =  register_pernet_operations(first_device, ops);
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);

/**
 *      unregister_pernet_subsys - unregister a network namespace subsystem
 *    @ops: pernet operations structure to manipulate
 *
 *    Remove the pernet operations structure from the list to be
 *    used when network namespaces are created or destroyed.  In
 *    addition run the exit method for all existing network
 *    namespaces.
 */
void unregister_pernet_subsys(struct pernet_operations *ops)
{
    mutex_lock(&net_mutex);
    unregister_pernet_operations(ops);
    mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
[ net/core/net_namespace.c ]
/**
 *      register_pernet_device - register a network namespace device
 *	@ops:  pernet operations structure for the subsystem
 *
 *	Register a device which has init and exit functions
 *	that are called when network namespaces are created and
 *	destroyed respectively.
 *
 *	When registered all network namespace init functions are
 *	called for every existing network namespace.  Allowing kernel
 *	modules to have a race free view of the set of network namespaces.
 *
 *	When a new network namespace is created all of the init
 *	methods are called in the order in which they were registered.
 *
 *	When a network namespace is destroyed all of the exit methods
 *	are called in the reverse of the order with which they were
 *	registered.
 */
int register_pernet_device(struct pernet_operations *ops)
{
	int error;
	mutex_lock(&net_mutex);
	error = register_pernet_operations(&pernet_list, ops);
	if (!error && (first_device == &pernet_list))
		first_device = &ops->list;
	mutex_unlock(&net_mutex);
	return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);

/**
 *      unregister_pernet_device - unregister a network namespace netdevice
 *	@ops: pernet operations structure to manipulate
 *
 *	Remove the pernet operations structure from the list to be
 *	used when network namespaces are created or destroyed.  In
 *	addition run the exit method for all existing network
 *	namespaces.
 */
void unregister_pernet_device(struct pernet_operations *ops)
{
	mutex_lock(&net_mutex);
	if (&ops->list == first_device)
		first_device = first_device->next;
	unregister_pernet_operations(ops);
	mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
这两组函数一组是注册网络设备,一组是注册网络子系统,不同的地方是一个注册到first_device,一个注册到&pernet_list。初始化时first_device指向&pernet_list。最后这个列表的结构如下:

这两组函数都调用了下面一组函数:

[ net/core/net_namespace.c ]

static int register_pernet_operations(struct list_head *list,
				      struct pernet_operations *ops)
{
	int error;

	/* ops->id大部分情况下都为空指针
	 */
	if (ops->id) {
again:
		error = ida_get_new_above(&net_generic_ids, 1, ops->id);	// 得到新的ID
		if (error < 0) {
			if (error == -EAGAIN) {
				ida_pre_get(&net_generic_ids, GFP_KERNEL);
				goto again;
			}
			return error;
		}
		max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);	// 最大的ID,全局量,在net_alloc_generic时使用
	}
	error = __register_pernet_operations(list, ops);
	if (error) {
		rcu_barrier();
		if (ops->id)
			ida_remove(&net_generic_ids, *ops->id);
	}

	return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{
    
    __unregister_pernet_operations(ops);
    rcu_barrier();
    if (ops->id)
        ida_remove(&net_generic_ids, *ops->id);
}
最后执行操作的是下面的函数:

[ net/core/net_namespace.c ]

static int __register_pernet_operations(struct list_head *list,
					struct pernet_operations *ops)
{
	struct net *net;
	int error;
	LIST_HEAD(net_exit_list);

	list_add_tail(&ops->list, list);	// 将ops加入到list前面
	if (ops->init || (ops->id && ops->size)) {	// 如果定义了init函数或是定义了ID和在小
		/* 对每个net_namespace_list中的net进行操作
		 * 将ops插入到net的私有数据空间中,使的net可以支持此ops
		 */
		for_each_net(net) {
			error = ops_init(ops, net);
			if (error)
				goto out_undo;
			list_add_tail(&net->exit_list, &net_exit_list);	// 将net加入到临时列表中,当出错时将ops从当中删除
		}
	}
	return 0;

out_undo:
	/* If I have an error cleanup all namespaces I initialized */
	list_del(&ops->list);
	ops_exit_list(ops, &net_exit_list);
	ops_free_list(ops, &net_exit_list);
	return error;
}

static void __unregister_pernet_operations(struct pernet_operations *ops)
{
    struct net *net;
    LIST_HEAD(net_exit_list);

    list_del(&ops->list);
    for_each_net(net)
        list_add_tail(&net->exit_list, &net_exit_list);
    ops_exit_list(ops, &net_exit_list);
    ops_free_list(ops, &net_exit_list);
} 
在加入列表后,实际调用init函数的操作如下:

[ net/core/net_namespace.c ]

static int ops_init(const struct pernet_operations *ops, struct net *net)
{
	int err = -ENOMEM;
	void *data = NULL;

	/* 如果 ops->id 和ops->size不为空
	 * 通常这两者都为空
	 */
	if (ops->id && ops->size) {
		data = kzalloc(ops->size, GFP_KERNEL);	// 分配私有数据空间
		if (!data)
			goto out;

		err = net_assign_generic(net, *ops->id, data);	// 将data放入net的私有数据空间
		if (err)
			goto cleanup;
	}
	err = 0;
	/* 如果定义了初始化函数,调用初始化函数
	 */
	if (ops->init)
		err = ops->init(net);
	if (!err)
		return 0;

cleanup:
	kfree(data);

out:
	return err;
}
当ops包含有私有数据时,要把私有数据的指针放入net中进行保存:

[ net/core/net_namespace.c ]

static int net_assign_generic(struct net *net, int id, void *data)
{
	struct net_generic *ng, *old_ng;

	BUG_ON(!mutex_is_locked(&net_mutex));
	BUG_ON(id == 0);

	old_ng = rcu_dereference_protected(net->gen, lockdep_is_held(&net_mutex));	// 老的私有数据
	ng = old_ng;
	if (old_ng->len >= id)	// 原有的私有数据空间够大,直接进行设置
		goto assign;

	ng = net_alloc_generic();	// 分配私有数据空间
	if (ng == NULL)
		return -ENOMEM;

	/*
	 * Some synchronisation notes:
	 *
	 * The net_generic explores the net->gen array inside rcu
	 * read section. Besides once set the net->gen->ptr[x]
	 * pointer never changes (see rules in netns/generic.h).
	 *
	 * That said, we simply duplicate this array and schedule
	 * the old copy for kfree after a grace period.
	 */

	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));	// 将老数据复制到新分配的空间

	rcu_assign_pointer(net->gen, ng);	// 设置net的私有数据
	kfree_rcu(old_ng, rcu);	// 释放老的数据
assign:
	ng->ptr[id - 1] = data;	// 设置私有数据
	return 0;
}
分配私有空间的操作如下:

[ net/core/net_namespace.c ]

#define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */

static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;

// 分配私有数据空间
static struct net_generic *net_alloc_generic(void)
{
	struct net_generic *ng;
	size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);	// 13字节的指针,加上结构其它两个成员,共16字节的大小

	ng = kzalloc(generic_size, GFP_KERNEL);
	if (ng)
		ng->len = max_gen_ptrs;	// 指针数组的长度

	return ng;
}

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章