之前一直以为master进程负责监听请求,当请求到来后,分发给worker进程进行处理。
最近发现好像并不是这么回事。先看fpm_main.c文件中的main()方法,里面有这么一段:
if (0 > fpm_init(argc, argv, fpm_config ? fpm_config : CGIG(fpm_config), fpm_prefix, fpm_pid, test_conf, php_allow_to_run_as_root, force_daemon, force_stderr)) {
if (fpm_globals.send_config_pipe[1]) {
int writeval = 0;
zlog(ZLOG_DEBUG, "Sending \"0\" (error) to parent via fd=%d", fpm_globals.send_config_pipe[1]);
zend_quiet_write(fpm_globals.send_config_pipe[1], &writeval, sizeof(writeval));
close(fpm_globals.send_config_pipe[1]);
}
return FPM_EXIT_CONFIG;
}
...
fcgi_fd = fpm_run(&max_requests);
...
request = fpm_init_request(fcgi_fd);
其中fpm_init()方法主要负责解析php-fpm.conf文件,获取进程相关参数、初始化配置等;之后是fpm_run()方法,代码如下:
/* children: return listening socket
parent: never return */
int fpm_run(int *max_requests) /* {{{ */
{
struct fpm_worker_pool_s *wp;
/* create initial children in all pools */
for (wp = fpm_worker_all_pools; wp; wp = wp->next) {
int is_parent;
is_parent = fpm_children_create_initial(wp);
if (!is_parent) {
goto run_child;
}
/* handle error */
if (is_parent == 2) {
fpm_pctl(FPM_PCTL_STATE_TERMINATING, FPM_PCTL_ACTION_SET);
fpm_event_loop(1);
}
}
/* run event loop forever */
fpm_event_loop(0);
run_child: /* only workers reach this point */
fpm_cleanups_run(FPM_CLEANUP_CHILD);
*max_requests = fpm_globals.max_requests;
return fpm_globals.listening_socket;
}
先看注释可以知道,该方法返回了子进程worker监听的套接字,但主进程master并不返回。看方法体,php-fpm有多个pool,每个pool下有多个worker进程,最终是调用了fpm_children_create_initial()方法里的fpm_children_make()方法,在这个方法里执行fork()生成了worker进程。生成子进程后,父进程就调用fpm_event_loop()方法进入无限循环状态,而子进程则返回监听的套接字并开始监听。
实验如下,先看服务器运行的php-fpm进程:
[root@localhost src]# ps -ef | grep php
root 18204 1 0 Jun07 ? 00:00:15 php-fpm: master process (/usr/local/etc/php-fpm.conf)
www 18205 18204 0 Jun07 ? 00:00:04 php-fpm: pool www
www 18206 18204 0 Jun07 ? 00:00:04 php-fpm: pool www
以上可以看到master进程pid为18204,worker进程有两个,pid分别为18205和18206。通过strace跟踪master进程:
[root@localhost fpm]# strace -t -p 18204
strace: Process 18204 attached
15:14:21 epoll_wait(9, [], 1, 484) = 0
15:14:22 getsockopt(8, SOL_TCP, TCP_INFO, "\n\0\0\0\0\0\0\0@B\17\0\0\0\0\0\30\2\0\0\0\0\0\0\0\0\0\0\200\0\0\0"..., [104]) = 0
15:14:22 epoll_wait(9, [], 1, 1000) = 0
15:14:23 getsockopt(8, SOL_TCP, TCP_INFO, "\n\0\0\0\0\0\0\0@B\17\0\0\0\0\0\30\2\0\0\0\0\0\0\0\0\0\0\200\0\0\0"..., [104]) = 0
15:14:23 epoll_wait(9, [], 1, 1000) = 0
15:14:24 getsockopt(8, SOL_TCP, TCP_INFO, "\n\0\0\0\0\0\0\0@B\17\0\0\0\0\0\30\2\0\0\0\0\0\0\0\0\0\0\200\0\0\0"..., [104]) = 0
15:14:24 epoll_wait(9, [], 1, 1000) = 0
15:14:25 getsockopt(8, SOL_TCP, TCP_INFO, "\n\0\0\0\0\0\0\0@B\17\0\0\0\0\0\30\2\0\0\0\0\0\0\0\0\0\0\200\0\0\0"..., [104]) = 0
可以发现master进程会不断循环调用epoll_wait()和getsockopt()两个方法,用来处理异步信号事件和定时器事件。假如这时kill掉一个worker进程:
kill -9 18205
再看跟踪的master进程会有如下变化:
15:14:39 epoll_wait(9, 0x2193460, 1, 1000) = -1 EINTR (Interrupted system call)
15:14:39 --- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_KILLED, si_pid=18205, si_uid=1001, si_status=SIGKILL, si_utime=396, si_stime=41} ---
15:14:39 write(7, "C", 1) = 1
15:14:39 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
15:14:39 epoll_wait(9, [{EPOLLIN, {u32=17865216, u64=17865216}}], 1, 657) = 1
15:14:39 read(5, "C", 1) = 1
15:14:39 wait4(-1, [{WIFSIGNALED(s) && WTERMSIG(s) == SIGKILL}], WNOHANG|WSTOPPED, NULL) = 18205
15:14:39 write(3, "[13-Jun-2020 15:14:39] WARNING: "..., 123) = 123
15:14:39 clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fd55147ba10) = 23221
15:14:39 write(3, "[13-Jun-2020 15:14:39] NOTICE: ["..., 62) = 62
15:14:39 wait4(-1, 0x7ffd8e55a8fc, WNOHANG|WSTOPPED, NULL) = 0
15:14:39 read(5, 0x7ffd8e55a9df, 1) = -1 EAGAIN (Resource temporarily unavailable)
在执行epoll_wait发生中断,最终调用clone()方法重新生成一个worker进程,并返回新子进程pid为23221。处理完后继续不断调用epoll_wait()和getsockopt()俩方法。这是查看php-fpm进程进行验证:
[root@localhost src]# ps -ef | grep php
root 18204 1 0 Jun07 ? 00:00:15 php-fpm: master process (/usr/local/etc/php-fpm.conf)
www 18206 18204 0 Jun07 ? 00:00:04 php-fpm: pool www
www 23221 18204 0 15:14 ? 00:00:00 php-fpm: pool www
接下来再跟踪worker进程,看看在干嘛:
[root@localhost fpm]# strace -t -p 18206
strace: Process 18206 attached
16:04:32 accept(0,
发现worker进程阻塞在accept()方法这了。这时我发起一个请求,会看到如下变化:
[root@localhost fpm]# strace -t -p 18206
strace: Process 18206 attached
16:04:32 accept(0, {sa_family=AF_INET, sin_port=htons(32934), sin_addr=inet_addr("127.0.0.1")}, [16]) = 3
16:07:02 poll([{fd=3, events=POLLIN}], 1, 5000) = 1 ([{fd=3, revents=POLLIN}])
16:07:02 times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 807001318
16:07:02 read(3, "\1\1\0\1\0\10\0\0", 8) = 8
16:07:02 read(3, "\0\1\0\0\0\0\0\0", 8) = 8
16:07:02 read(3, "\1\4\0\1\3w\1\0", 8) = 8
16:07:02 read(3, "\17\30SCRIPT_FILENAME/home/www/blog/"..., 888) = 888
16:07:02 read(3, "\1\4\0\1\0\0\0\0", 8) = 8
16:07:02 lstat("/home/www/blog/index.php", {st_mode=S_IFREG|0755, st_size=382, ...}) = 0
16:07:02 lstat("/home/www/blog", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
16:07:02 lstat("/home/www", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
16:07:02 lstat("/home", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
16:07:02 stat("/home/www/blog/.user.ini", 0x7ffcfb49b160) = -1 ENOENT (No such file or directory)
16:07:02 rt_sigaction(SIGPROF, NULL, {SIG_DFL, [], 0}, 8) = 0
...
16:07:02 rt_sigprocmask(SIG_UNBLOCK, [PROF], NULL, 8) = 0
16:07:02 mmap(NULL, 65536, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3fcbea7000
16:07:02 open("/home/www/blog/index.php", O_RDONLY) = 5
16:07:02 fstat(5, {st_mode=S_IFREG|0755, st_size=382, ...}) = 0
16:07:02 fstat(5, {st_mode=S_IFREG|0755, st_size=382, ...}) = 0
16:07:02 fstat(5, {st_mode=S_IFREG|0755, st_size=382, ...}) = 0
16:07:02 mmap(NULL, 382, PROT_READ, MAP_SHARED, 5, 0) = 0x7f3fcbf35000
16:07:02 getcwd("/usr/local/php/bin", 4095) = 19
16:07:02 chdir("/home/www/blog") = 0
16:07:02 munmap(0x7f3fcbf35000, 382) = 0
16:07:02 close(5) = 0
16:07:02 chdir("/usr/local/php/bin") = 0
16:07:02 times({tms_utime=0, tms_stime=0, tms_cutime=0, tms_cstime=0}) = 807001319
16:07:02 stat("/etc/localtime", {st_mode=S_IFREG|0644, st_size=528, ...}) = 0
16:07:02 write(4, "127.0.0.1 - 13/Jun/2020:16:07:0"..., 61) = 61
16:07:02 setitimer(ITIMER_PROF, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
16:07:02 write(3, "\1\6\0\1\0N\2\0X-Powered-By: PHP/7.2.4\r"..., 104) = 104
16:07:02 shutdown(3, SHUT_WR) = 0
16:07:02 recvfrom(3, "\1\5\0\1\0\0\0\0", 8, 0, NULL, NULL) = 8
16:07:02 recvfrom(3, "", 8, 0, NULL, NULL) = 0
16:07:02 close(3) = 0
16:07:02 setitimer(ITIMER_PROF, {it_interval={0, 0}, it_value={0, 0}}, NULL) = 0
16:07:02 accept(0,
由上可以看出,是worker进程监听socket,当请求过来时,worker进程对其进行处理,处理完后又重新调用accept()方法陷入阻塞状态。