Namespace of Process/PID in Linux

命名空間是一種資源劃分方案。資源有很多類別,比如:process IDs, hostnames, user IDs, file names, and some names associated with network access, and interprocess communication.

Process ID (pid)

一個PID命名空間具有獨立的process IDs空間。PID命名空間是有層級的,上一級命名空間可以看到其所有下級的PIDs。從而最初始的命名空間可看到所有的進程。PID命名空間中第一個進程ID爲1,它和init進程一樣特殊,最典型的就是負責接管所有的孤兒進程(orphaned processes)。終止進程PID 1將直接終止其所在的已經所有下級PID命名空間裏的進程。

現在讓我們走進Linux的實現:

1. 進程與命名空間

struct task_struct {
...
/* Namespaces: */
struct nsproxy			*nsproxy;
...
};

每個進程綁定的命名空間[2]

/*
 * A structure to contain pointers to all per-process
 * namespaces - fs (mount), uts, network, sysvipc, etc.
 *
 * The pid namespace is an exception -- it's accessed using
 * task_active_pid_ns.  The pid namespace here is the
 * namespace that children will use.
 *
 * 'count' is the number of tasks holding a reference.
 * The count for each namespace, then, will be the number
 * of nsproxies pointing to it, not the number of tasks.
 *
 * The nsproxy is shared by tasks which share all namespaces.
 * As soon as a single namespace is cloned or unshared, the
 * nsproxy is copied.
 */
struct nsproxy {
	atomic_t count;
	struct uts_namespace *uts_ns;                       //UNIX Timesharing System
	struct ipc_namespace *ipc_ns;
	struct mnt_namespace *mnt_ns;
	struct pid_namespace *pid_ns_for_children;
	struct net 	     *net_ns;                       //網路相關的命名空間參數
	struct time_namespace *time_ns;                
	struct time_namespace *time_ns_for_children;
	struct cgroup_namespace *cgroup_ns;
};
extern struct nsproxy init_nsproxy;

上述結構只提供子進程的PID命名空間,進程自身的PID命名空間通過下面函數獲得[3]

struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
{
	return ns_of_pid(task_pid(tsk));
}
EXPORT_SYMBOL_GPL(task_active_pid_ns);


// https://github.com/torvalds/linux/blob/master/include/linux/sched.h#L1309
static inline struct pid *task_pid(struct task_struct *task)
{
	return task->thread_pid;
}


// https://github.com/torvalds/linux/blob/master/include/linux/pid.h#L144
/*
 * ns_of_pid() returns the pid namespace in which the specified pid was
 * allocated.
 *
 * NOTE:
 * 	ns_of_pid() is expected to be called for a process (task) that has
 * 	an attached 'struct pid' (see attach_pid(), detach_pid()) i.e @pid
 * 	is expected to be non-NULL. If @pid is NULL, caller should handle
 * 	the resulting NULL pid-ns.
 */
static inline struct pid_namespace *ns_of_pid(struct pid *pid)
{
	struct pid_namespace *ns = NULL;
	if (pid)
		ns = pid->numbers[pid->level].ns;
	return ns;
}


// 一個PID命名空間init進程的判斷
/*
 * is_child_reaper returns true if the pid is the init process
 * of the current namespace. As this one could be checked before
 * pid_ns->child_reaper is assigned in copy_process, we check
 * with the pid number.
 */
static inline bool is_child_reaper(struct pid *pid)
{
	return pid->numbers[pid->level].nr == 1;
}

2. PID命名空間數據結構 [4]

enum { /* definitions for pid_namespace's hide_pid field */
	HIDEPID_OFF	  = 0,
	HIDEPID_NO_ACCESS = 1,
	HIDEPID_INVISIBLE = 2,
};

struct pid_namespace {
	struct kref kref;
	struct idr idr;
	struct rcu_head rcu;
	unsigned int pid_allocated;
	struct task_struct *child_reaper;
	struct kmem_cache *pid_cachep;
	unsigned int level;
	struct pid_namespace *parent;        //父級命名空間
#ifdef CONFIG_PROC_FS
	struct dentry *proc_self;
	struct dentry *proc_thread_self;
#endif
#ifdef CONFIG_BSD_PROCESS_ACCT
	struct fs_pin *bacct;
#endif
	struct user_namespace *user_ns;
	struct ucounts *ucounts;
	kgid_t pid_gid;
	int hide_pid;
	int reboot;	/* group exit code if this pidns was rebooted */
	struct ns_common ns;
} __randomize_layout;

extern struct pid_namespace init_pid_ns;

2. 初始化全局默認PID命名空間[5]

/*
 * PID-map pages start out as NULL, they get allocated upon
 * first use and are never deallocated. This way a low pid_max
 * value does not cause lots of bitmaps to be allocated, but
 * the scheme scales to up to 4 million PIDs, runtime.
 */
struct pid_namespace init_pid_ns = {
	.kref = KREF_INIT(2),
	.idr = IDR_INIT(init_pid_ns.idr),
	.pid_allocated = PIDNS_ADDING,
	.level = 0,
	.child_reaper = &init_task,
	.user_ns = &init_user_ns,
	.ns.inum = PROC_PID_INIT_INO,
#ifdef CONFIG_PID_NS
	.ns.ops = &pidns_operations,
#endif
};
EXPORT_SYMBOL_GPL(init_pid_ns);

3. PID與Namespace的關聯[6]

struct pid init_struct_pid = {
	.count		= REFCOUNT_INIT(1),
	.tasks		= {
		{ .first = NULL },
		{ .first = NULL },
		{ .first = NULL },
	},
	.level		= 0,
	.numbers	= { {
		.nr		= 0,                   // PID
		.ns		= &init_pid_ns,        //指定 Namespace
	}, }
};

3.1 Related Structs[7]

enum pid_type
{
	PIDTYPE_PID,
	PIDTYPE_TGID,
	PIDTYPE_PGID,
	PIDTYPE_SID,
	PIDTYPE_MAX,
};

/*
 * struct upid is used to get the id of the struct pid, as it is
 * seen in particular namespace. Later the struct pid is found with
 * find_pid_ns() using the int nr and struct pid_namespace *ns.
 */

struct upid {
	int nr;
	struct pid_namespace *ns;
};

struct pid
{
	refcount_t count;
	unsigned int level;
	spinlock_t lock;
	/* lists of tasks that use this pid */
	struct hlist_head tasks[PIDTYPE_MAX];
	struct hlist_head inodes;
	/* wait queue for pidfd notifications */
	wait_queue_head_t wait_pidfd;
	struct rcu_head rcu;
	struct upid numbers[1];
};

4. PID命名空間的創建

二種方式:

(1)fork or clone: 使用特定選項。

(2)unshare系統調用將進程的某些部分從父進程分離,其中也包括命名空間。

預定義的Fork/Clone的命名空間FLAG:

// https://github.com/torvalds/linux/blob/master/include/uapi/linux/sched.h#L8
/*
 * cloning flags:
 */
...
#define CLONE_NEWNS	0x00020000	/* New mount namespace group */
...
#define CLONE_NEWCGROUP		0x02000000	/* New cgroup namespace */
#define CLONE_NEWUTS		0x04000000	/* New utsname namespace */
#define CLONE_NEWIPC		0x08000000	/* New ipc namespace */
#define CLONE_NEWUSER		0x10000000	/* New user namespace */
#define CLONE_NEWPID		0x20000000	/* New pid namespace */
#define CLONE_NEWNET		0x40000000	/* New network namespace */

[1] https://github.com/torvalds/linux/blob/master/include/linux/sched.h#L922

[2] https://github.com/torvalds/linux/blob/master/include/linux/nsproxy.h#L16

[3] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L487 

[4] https://github.com/torvalds/linux/blob/master/include/linux/pid_namespace.h

[5] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L73

[6] https://github.com/torvalds/linux/blob/master/kernel/pid.c#L56

[7] https://github.com/torvalds/linux/blob/master/include/linux/pid.h#L48

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章