muti-thread & fork

1 要點

fork會共用原來的代碼段,對於數據段和堆棧進行“寫時拷貝”, 對於內核全局變量應用,例如文件句柄進行+1。

因此fork會產生一個和原來進程佔用內存一樣的進程,注意只是和原來進程的內存模型一樣,而不會產生和父進程一樣的多線程進程,fork後的子進程會成爲一個單線程進程,其他線程默認終止,這個單線程即是發生fork調用時的線程。


2  原型分析

在kernel/fork.c我們找到了fork函數原型:

long do_fork(unsigned long clone_flags,
	      unsigned long stack_start,
	      struct pt_regs *regs,
	      unsigned long stack_size,
	      int __user *parent_tidptr,
	      int __user *child_tidptr)
{
	...

	p = copy_process(clone_flags, stack_start, regs, stack_size,
			 child_tidptr, NULL, trace);
	/*
	 * Do this prior waking up the new thread - the thread pointer
	 * might get invalid after that point, if the thread exits quickly.
	 */
	...
}
省略的部分爲標誌判斷,和新進程任務調度代碼,其核心工作都是由copy_process完成。

static struct task_struct *copy_process(unsigned long clone_flags,
					unsigned long stack_start,
					struct pt_regs *regs,
					unsigned long stack_size,
					int __user *child_tidptr,
					struct pid *pid,
					int trace)
{
...

	retval = security_task_create(clone_flags);
	if (retval)
		goto fork_out;

	retval = -ENOMEM;
	p = dup_task_struct(current);
	if (!p)
		goto fork_out;

	ftrace_graph_init_task(p);

	rt_mutex_init_task(p);

...

	/* Perform scheduler related setup. Assign this task to a CPU. */
	sched_fork(p);

	retval = perf_event_init_task(p);
	if (retval)
		goto bad_fork_cleanup_policy;
	retval = audit_alloc(p);
	if (retval)
		goto bad_fork_cleanup_policy;
	/* copy all the process information */
	retval = copy_semundo(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_audit;
	retval = copy_files(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_semundo;
	retval = copy_fs(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_files;
	retval = copy_sighand(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_fs;
	retval = copy_signal(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_sighand;
	retval = copy_mm(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_signal;
	retval = copy_namespaces(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_mm;
	retval = copy_io(clone_flags, p);
	if (retval)
		goto bad_fork_cleanup_namespaces;
	<strong>retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);</strong>
...
}
爲了結構清晰,省略了大量代碼。

int copy_thread(unsigned long clone_flags, unsigned long sp,
	unsigned long unused,
	struct task_struct *p, struct pt_regs *regs)
{
	struct pt_regs *childregs;
	struct task_struct *tsk;
	int err;

	childregs = task_pt_regs(p);
	*childregs = *regs;
	childregs->ax = 0;
	childregs->sp = sp;

	p->thread.sp = (unsigned long) childregs;
	p->thread.sp0 = (unsigned long) (childregs+1);

	p->thread.ip = (unsigned long) ret_from_fork;

	task_user_gs(p) = get_user_gs(regs);

	p->fpu_counter = 0;
	p->thread.io_bitmap_ptr = NULL;
	tsk = current;
	err = -ENOMEM;

	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));

	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
		p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
						IO_BITMAP_BYTES, GFP_KERNEL);
		if (!p->thread.io_bitmap_ptr) {
			p->thread.io_bitmap_max = 0;
			return -ENOMEM;
		}
		set_tsk_thread_flag(p, TIF_IO_BITMAP);
	}

	err = 0;

	/*
	 * Set a new TLS for the child thread?
	 */
	if (clone_flags & CLONE_SETTLS)
		err = do_set_thread_area(p, -1,
			(struct user_desc __user *)childregs->si, 0);

	if (err && p->thread.io_bitmap_ptr) {
		kfree(p->thread.io_bitmap_ptr);
		p->thread.io_bitmap_max = 0;
	}
	return err;
}
copy_thread的主要工作室設置線程棧, tls, 寄存器等信息。

從上面可以看出,對於多線程fork,並不會產生一個多線程進程,只會產生一個和多線程佔用內存一樣大小的單線程進程,posix線程id即是父線程中的posix 線程id。


3 測試結論

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/wait.h>

#define __NR_gettid 186
void *f1()
{
	printf("tid:%ld\n", pthread_self());
	sleep(100000000);
}

int main()
{
	int i = 0;
	pthread_t pth1[20]; 
	while(i++<20){
		pthread_create(&pth1[i], NULL, f1, NULL);
		sleep(1);
	}

	printf("create thread finish!!!\n");
	sleep(10);
	
	int status;
	int ret = fork();
	if(ret == 0){
		printf("child: parent pid: %d, tid:%ld\n", getpid(), pthread_self());
		sleep(30);
		printf("clild exit.");
		return;
	}else if(ret > 0){
		printf("parent: parent pid: %d, tid:%ld\n", getpid(), pthread_self());
		waitpid(-1, &status, 0);
	}
	pause();
}


由上圖也可以看出,父進程有20個線程,子線程只有一個線程,但他們佔用的內存一樣大。

4 總結

多線程中調用fork並不會導致內存泄露,因爲子進程退出後,所有資源由系統自動銷燬,但是如果子進程進入死循環,則有可能導致資源不足。

另一方面,由於子進程複製父進程的內存及變量信息,會導致一些全局鎖,信號量重複鎖定的問題。所以儘量不要在多線程中調用fork,如果必須,在調用fork後立即調用exec覆蓋子進程是一個不錯的方案,對於無法立即執行exec的程序,需要調用pthread_atfork()進行各個資源的釋放。



發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章