erlang進程監控的實現原理

Erlang支持Monitor和Link兩種監控進程的方式,使得所有進程可以連成一個整體。當某個進程出錯退出時,監控進程會收到該進程退出的消息通知。有了這些特點,使用Erlang建立一個簡單,並且健壯的系統就不是什麼難事。前面有文章分析了兩種方式的用法,這裏分析下monitor和link的實現。

源碼分析

monitor 和link實現有點類似,下面以monitor爲例做說明(erlang版本R16B02)

erlang:monitor/2的實現

// bif.c 實現 erlang:monitor/2
BIF_RETTYPE monitor_2(BIF_ALIST_2)
{
    Eterm target = BIF_ARG_2;
    BIF_RETTYPE ret;
    DistEntry  *dep = NULL; 
    int deref_de = 0;

    /* 目前只支持 erlang:monitor(process, Target) */
    if (BIF_ARG_1 != am_process) {
	goto error;
    }

    if (is_internal_pid(target)) { // 如果是本節點進程
    local_pid:
	ret = local_pid_monitor(BIF_P, target); // 處理本節點進程
    } else if (is_external_pid(target)) { // 如果是其他節點進程
	dep = external_pid_dist_entry(target);
	if (dep == erts_this_dist_entry) // 如果進程歸屬於本節點,跳到本節點進程處理
	    goto local_pid;
	ret = remote_monitor(BIF_P, BIF_ARG_1, BIF_ARG_2, dep, target, 0); // 處理其他節點進程
    } else if (is_atom(target)) { // Target是atom處理
	ret = local_name_monitor(BIF_P, target);
    } else if (is_tuple(target)) { // Target是tuple處理
	Eterm *tp = tuple_val(target);
	Eterm remote_node;
	Eterm name;
	if (arityval(*tp) != 2) 
	    goto error;
	remote_node = tp[2];
	name = tp[1];
	if (!is_atom(remote_node) || !is_atom(name)) {
	    goto error;
	}
	if (!erts_is_alive && remote_node != am_Noname) {
	    goto error; /* Remote monitor from (this) undistributed node */
	}
	dep = erts_sysname_to_connected_dist_entry(remote_node);
	if (dep == erts_this_dist_entry) {
	    deref_de = 1;
	    ret = local_name_monitor(BIF_P, name);
	} else {
	    if (dep)
		deref_de = 1;
	    ret = remote_monitor(BIF_P, BIF_ARG_1, BIF_ARG_2, dep, name, 1);
	}
    } else {
    error:
	ERTS_BIF_PREP_ERROR(ret, BIF_P, BADARG);
    }
    if (deref_de) {
	deref_de = 0;
	erts_deref_dist_entry(dep);
    }

    return ret;
}
現在,看下本節點進程的監控處理:
// bif.c 實現本地節點進程監控處理
static BIF_RETTYPE local_pid_monitor(Process *p, Eterm target)
{
    BIF_RETTYPE ret;
    Eterm mon_ref;
    Process *rp;
    ErtsProcLocks p_locks = ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_LINK;

    mon_ref = erts_make_ref(p);
    ERTS_BIF_PREP_RET(ret, mon_ref);
    if (target == p->common.id) { // 如果進程監控自己
	return ret;
    }

    erts_smp_proc_lock(p, ERTS_PROC_LOCK_LINK); // 鎖住進程link操作,避免進程監控數據被髒寫
    rp = erts_pid2proc_opt(p, p_locks,
			   target, ERTS_PROC_LOCK_LINK,  // 同樣是link鎖
			   ERTS_P2P_FLG_ALLOW_OTHER_X);
    if (!rp) {
	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_LINK);
	p_locks &= ~ERTS_PROC_LOCK_LINK;
	erts_queue_monitor_message(p, &p_locks,
				   mon_ref, am_process, target, am_noproc);
    }
    else {
	ASSERT(rp != p);

	// 當前進程添加監控數據
	erts_add_monitor(&ERTS_P_MONITORS(p), MON_ORIGIN, mon_ref, target, NIL); 
	// 目標進程添加被監控數據
	erts_add_monitor(&ERTS_P_MONITORS(rp), MON_TARGET, mon_ref, p->common.id, NIL); 

	erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_LINK);
    }

    erts_smp_proc_unlock(p, p_locks & ~ERTS_PROC_LOCK_MAIN);

    return ret;
}
實際上,這裏只是修改進程的監控數據,監控者和被監控者兩份數據。
來看下erts_add_monitor的實現:
// erl_monitors.c 實現進程增加監控信息
void erts_add_monitor(ErtsMonitor **root, Uint type, Eterm ref, Eterm pid, 
		      Eterm name)
{
    void *tstack[STACK_NEED];
    int tpos = 0;
    int dstack[STACK_NEED+1];
    int dpos = 1;
    int state = 0;
    ErtsMonitor **this = root;
    Sint c;
  
    dstack[0] = DIR_END;
    for (;;) {
	if (!*this) { /* Found our place */
	    state = 1;
	    *this = create_monitor(type,ref,pid,name);
	    break;
	} else if ((c = CMP_MON_REF(ref,(*this)->ref)) < 0) { 
	    /* go left */
	    dstack[dpos++] = DIR_LEFT;
	    tstack[tpos++] = this;
	    this = &((*this)->left);
	} else if (c > 0) { /* go right */
	    dstack[dpos++] = DIR_RIGHT;
	    tstack[tpos++] = this;
	    this = &((*this)->right);
	} else { /* Equal key is an error for monitors */
	    erl_exit(1,"Insertion of already present monitor!");
	    break;
	}
    }
    insertion_rotation(dstack, dpos, tstack, tpos, state);
}
再看下這個宏,取的就是進程結構的監控數據。就是說,每個進程都有一份監控數據,記錄了監控和被監控信息,保存爲AVL樹結構。
#define ERTS_P_MONITORS(P)	((P)->common.u.alive.monitors)

進程監控的處理

前面分析,監控進程只是在被監控進程打個標記,那進程退出時是怎麼處理的?
// erl_monitor.c 觸發所有monitor(遍歷 monitor 數據,執行 doit 函數回調)
void erts_sweep_monitors(ErtsMonitor *root, 
			 void (*doit)(ErtsMonitor *, void *),
			 void *context) 
{
    ErtsMonitor *tstack[STACK_NEED];
    int tpos = 0;
    int dstack[STACK_NEED+1];
    int dpos = 1;
    int dir;
    
    dstack[0] = DIR_END;

    for (;;) {
	if (root == NULL) {
	    if ((dir = dstack[dpos-1]) == DIR_END) {
		return;
	    }
	    if (dir == DIR_LEFT) {
		/* Still has DIR_RIGHT to do */
		dstack[dpos-1] = DIR_RIGHT;
		root = (tstack[tpos-1])->right;
	    } else {
		/* stacktop is an object to be deleted */
		(*doit)(tstack[--tpos],context); // 執行回調
		--dpos;
		root = NULL;
	    }
	} else {
	    dstack[dpos++] = DIR_LEFT;
	    tstack[tpos++] = root;
	    root = root->left;
	}
    }
}

什麼時候會觸發監控回調?

1.進程關閉
2.分佈式端口關閉

以上都會觸發監控回調,這裏以進程關閉做說明:
// erl_process.c 進程關閉處理(有刪節)
void erts_continue_exit_process(Process *p)
{
    //...
    mon = ERTS_P_MONITORS(p);
    lnk = ERTS_P_LINKS(p);

    //...

    if (lnk) { // link的處理
	DeclareTmpHeap(tmp_heap,4,p);
	Eterm exit_tuple;
	Uint exit_tuple_sz;
	Eterm* hp;
	UseTmpHeap(4,p);
	hp = &tmp_heap[0];
	exit_tuple = TUPLE3(hp, am_EXIT, p->common.id, reason);
	exit_tuple_sz = size_object(exit_tuple);
	{
	    ExitLinkContext context = {p, reason, exit_tuple, exit_tuple_sz};
	    erts_sweep_links(lnk, &doit_exit_link, &context);
	}
	UnUseTmpHeap(4,p);
    }

    { // monitor的處理
	ExitMonitorContext context = {reason, p};
	erts_sweep_monitors(mon,&doit_exit_monitor,&context); /* Allocates TmpHeap, but we
								 have none here */
    }

    //...
}
看下以上代碼中回調函數的處理
// erl_process.c 進程關閉監控處理
static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext)
{
    ExitMonitorContext *pcontext = vpcontext;
    DistEntry *dep;
    ErtsMonitor *rmon;
    Process *rp;

    if (mon->type == MON_ORIGIN) { //如果該進程有監控其他進程,刪除其他進程的被監控信息
	/* We are monitoring someone else, we need to demonitor that one.. */
	if (is_atom(mon->pid)) { /* remote by name */
	    ASSERT(is_node_name_atom(mon->pid));
	    dep = erts_sysname_to_connected_dist_entry(mon->pid);
	    if (dep) { // 如果該進程監控遠程節點的進程
		erts_smp_de_links_lock(dep);
		// 先刪除DistEntry的監控信息
		rmon = erts_remove_monitor(&(dep->monitors), mon->ref);
		erts_smp_de_links_unlock(dep);
		if (rmon) { // 然後通知遠程節點去掉被監控信息
		    ErtsDSigData dsd;
		    int code = erts_dsig_prepare(&dsd, dep, NULL,
						 ERTS_DSP_NO_LOCK, 0);
		    if (code == ERTS_DSIG_PREP_CONNECTED) {
			code = erts_dsig_send_demonitor(&dsd,
							rmon->pid,
							mon->name,
							mon->ref,
							1);
			ASSERT(code == ERTS_DSIG_SEND_OK);
		    }
		    erts_destroy_monitor(rmon);
		}
		erts_deref_dist_entry(dep);
	    }
	} else {
	    ASSERT(is_pid(mon->pid));
	    if (is_internal_pid(mon->pid)) { // 如果是本節點進程
		rp = erts_pid2proc(NULL, 0, mon->pid, ERTS_PROC_LOCK_LINK);
		if (!rp) {
		    goto done;
		}
		// 刪除被監控進程的監控信息
		rmon = erts_remove_monitor(&ERTS_P_MONITORS(rp), mon->ref);
		erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_LINK);
		if (rmon == NULL) {
		    goto done;
		}
		erts_destroy_monitor(rmon);
	    } else { /* remote by pid */
		ASSERT(is_external_pid(mon->pid));
		dep = external_pid_dist_entry(mon->pid);
		ASSERT(dep != NULL);
		if (dep) {
		    erts_smp_de_links_lock(dep);
			// 先刪除DistEntry的監控信息
		    rmon = erts_remove_monitor(&(dep->monitors), mon->ref);
		    erts_smp_de_links_unlock(dep);
		    if (rmon) {// 然後通知遠程節點去掉被監控信息
			ErtsDSigData dsd;
			int code = erts_dsig_prepare(&dsd, dep, NULL,
						     ERTS_DSP_NO_LOCK, 0);
			if (code == ERTS_DSIG_PREP_CONNECTED) {
			    code = erts_dsig_send_demonitor(&dsd,
							    rmon->pid,
							    mon->pid,
							    mon->ref,
							    1);
			    ASSERT(code == ERTS_DSIG_SEND_OK);
			}
			erts_destroy_monitor(rmon);
		    }
		}
	    }
	}
    } else { //如果有進程監控該進程,則通知監控進程
	ASSERT(mon->type == MON_TARGET);
	ASSERT(is_pid(mon->pid) || is_internal_port(mon->pid));
	if (is_internal_port(mon->pid)) { // 如果監控進程是本節點端口
	    Port *prt = erts_id2port(mon->pid);
	    if (prt == NULL) {
		goto done;
	    }
	    erts_fire_port_monitor(prt, mon->ref);
	    erts_port_release(prt); 
	} else if (is_internal_pid(mon->pid)) { // 如果監控進程是本節點進程
	    Eterm watched;
	    DeclareTmpHeapNoproc(lhp,3);
	    ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK
				      | ERTS_PROC_LOCKS_MSG_SEND);
	    rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks);
	    if (rp == NULL) {
		goto done;
	    }
	    UseTmpHeapNoproc(3);
		// 先把監控進程的監控信息移除掉
	    rmon = erts_remove_monitor(&ERTS_P_MONITORS(rp), mon->ref);
	    if (rmon) {
		erts_destroy_monitor(rmon);
		watched = (is_atom(mon->name)
			   ? TUPLE2(lhp, mon->name, 
				    erts_this_dist_entry->sysname)
			   : pcontext->p->common.id);
		// 然後把進程關閉信息以消息通知監控進程 {'DOWN',Ref,process,Pid,Reason}
		erts_queue_monitor_message(rp, &rp_locks, mon->ref, am_process, 
					   watched, pcontext->reason);
	    }
	    UnUseTmpHeapNoproc(3);
	    /* else: demonitor while we exited, i.e. do nothing... */
	    erts_smp_proc_unlock(rp, rp_locks);
	} else { // 如果監控進程是遠程節點進程
	    ASSERT(is_external_pid(mon->pid));    
	    dep = external_pid_dist_entry(mon->pid);
	    ASSERT(dep != NULL);
	    if (dep) {
		erts_smp_de_links_lock(dep);
		// 先刪除DistEntry的監控信息
		rmon = erts_remove_monitor(&(dep->monitors), mon->ref);
		erts_smp_de_links_unlock(dep);
		if (rmon) {// 然後通知遠程節點該進程退出消息
		    ErtsDSigData dsd;
		    int code = erts_dsig_prepare(&dsd, dep, NULL,
						 ERTS_DSP_NO_LOCK, 0);
		    if (code == ERTS_DSIG_PREP_CONNECTED) {
			code = erts_dsig_send_m_exit(&dsd,
						     mon->pid,
						     (rmon->name != NIL
						      ? rmon->name
						      : rmon->pid),
						     mon->ref,
						     pcontext->reason);
			ASSERT(code == ERTS_DSIG_SEND_OK);
		    }
		    erts_destroy_monitor(rmon);
		}
	    }
	}
    }
 done:
    /* As the monitors are previously removed from the process, 
       distribution operations will not cause monitors to disappear,
       we can safely delete it. */
       
    erts_destroy_monitor(mon);
}


跨節點進程監控的實現

前面說到本節點的處理,那跨節點進程監控是怎麼實現的,有什麼區別?
// bif.c 跨節點進程監控的處理
static BIF_RETTYPE remote_monitor(Process *p, Eterm bifarg1, Eterm bifarg2,
	       DistEntry *dep, Eterm target, int byname)
{
    ErtsDSigData dsd;
    BIF_RETTYPE ret;
    int code;

    erts_smp_proc_lock(p, ERTS_PROC_LOCK_LINK);
    code = erts_dsig_prepare(&dsd, dep, p, ERTS_DSP_RLOCK, 0); // 獲取分佈式端口的狀態
    switch (code) {
    case ERTS_DSIG_PREP_NOT_ALIVE: // 端口還沒激活使用,使用Trap處理
	/* Let the dmonitor_p trap handle it */
    case ERTS_DSIG_PREP_NOT_CONNECTED: // 端口未連接,使用Trap處理
	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_LINK);
	ERTS_BIF_PREP_TRAP2(ret, dmonitor_p_trap, p, bifarg1, bifarg2); // 使用Trap處理,在下次調度時調用erlang:dmonitor_p/2
	break;
    case ERTS_DSIG_PREP_CONNECTED: // 端口已連接,可發送數據
	if (!(dep->flags & DFLAG_DIST_MONITOR)
	    || (byname && !(dep->flags & DFLAG_DIST_MONITOR_NAME))) {
	    erts_smp_de_runlock(dep);
	    erts_smp_proc_unlock(p, ERTS_PROC_LOCK_LINK);
	    ERTS_BIF_PREP_ERROR(ret, p, BADARG);
	}
	else {
	    Eterm p_trgt, p_name, d_name, mon_ref;

	    mon_ref = erts_make_ref(p);

	    if (byname) {
		p_trgt = dep->sysname;
		p_name = target;
		d_name = target;
	    }
	    else {
		p_trgt = target;
		p_name = NIL;
		d_name = NIL;
	    }

	    erts_smp_de_links_lock(dep);
		// 當前進程添加監控數據 
	    erts_add_monitor(&ERTS_P_MONITORS(p), MON_ORIGIN, mon_ref, p_trgt,
			     p_name);
		// DistEntry添加被監控數據 
	    erts_add_monitor(&(dep->monitors), MON_TARGET, mon_ref, p->common.id,
			     d_name);

	    erts_smp_de_links_unlock(dep);
	    erts_smp_de_runlock(dep);
	    erts_smp_proc_unlock(p, ERTS_PROC_LOCK_LINK);

		// 發監控消息到遠程節點
	    code = erts_dsig_send_monitor(&dsd, p->common.id, target, mon_ref);
	    if (code == ERTS_DSIG_SEND_YIELD)
		ERTS_BIF_PREP_YIELD_RETURN(ret, p, mon_ref);
	    else
		ERTS_BIF_PREP_RET(ret, mon_ref);
	}
	break;
    default: // 其他端口狀態,如端口將被掛起
	ASSERT(! "Invalid dsig prepare result");
	ERTS_BIF_PREP_ERROR(ret, p, EXC_INTERNAL_ERROR);
	break;
    }

    return ret;
}
接着,看下發消息給遠程節點的處理。
// dist.c 發監控消息到遠程節點
int erts_dsig_send_monitor(ErtsDSigData *dsdp, Eterm watcher, Eterm watched,
		       Eterm ref)
{
    Eterm ctl;
    DeclareTmpHeapNoproc(ctl_heap,5);
    int res;

    UseTmpHeapNoproc(5);
    ctl = TUPLE4(&ctl_heap[0],
		 make_small(DOP_MONITOR_P),
		 watcher, watched, ref);

	// 構造消息{DOP_MONITOR_P, LocalPid, RemotePidOrName, Ref} 發給遠程節點
    res = dsig_send(dsdp, ctl, THE_NON_VALUE, 0);
    UnUseTmpHeapNoproc(5);
    return res;
}
看下遠程接收到這個消息後的處理。
// dist.c 處理其他節點發來的消息(有刪節)
int erts_net_message(Port *prt,
		     DistEntry *dep,
		     byte *hbuf,
		     ErlDrvSizeT hlen,
		     byte *buf,
		     ErlDrvSizeT len)
{
    // ...
    switch (type = unsigned_val(tuple[1])) {
    // ...
	
	// 處理 {DOP_MONITOR_P, Remote pid, local pid or name, ref}
    case DOP_MONITOR_P: {
	/* A remote process wants to monitor us, we get:
	   {DOP_MONITOR_P, Remote pid, local pid or name, ref} */
	Eterm name;
	
	if (tuple_arity != 4) {
	    goto invalid_message;
	}

	watcher = tuple[2];
	watched = tuple[3];  /* local proc to monitor */
	ref     = tuple[4];

	if (is_not_ref(ref)) {
	    goto invalid_message;
	}

	if (is_atom(watched)) {
	    name = watched;
	    rp = erts_whereis_process(NULL, 0,
				      watched, ERTS_PROC_LOCK_LINK,
				      ERTS_P2P_FLG_ALLOW_OTHER_X);
	}
	else {
	    name = NIL;
	    rp = erts_pid2proc_opt(NULL, 0,
				   watched, ERTS_PROC_LOCK_LINK,
				   ERTS_P2P_FLG_ALLOW_OTHER_X);
	}

	if (!rp) { // 如果被監控進程不存在,則回覆進程退出消息
	    ErtsDSigData dsd;
	    int code;
	    code = erts_dsig_prepare(&dsd, dep, NULL, ERTS_DSP_NO_LOCK, 0);
	    if (code == ERTS_DSIG_PREP_CONNECTED) {
		code = erts_dsig_send_m_exit(&dsd, watcher, watched, ref,
					     am_noproc);
		ASSERT(code == ERTS_DSIG_SEND_OK);
	    }
	}
	else {
	    if (is_atom(watched))
		watched = rp->common.id;
	    erts_smp_de_links_lock(dep);
		// DistEntry添加監控數據
	    erts_add_monitor(&(dep->monitors), MON_ORIGIN, ref, watched, name);
		// 進程添加被監控數據
	    erts_add_monitor(&ERTS_P_MONITORS(rp), MON_TARGET, ref, watcher, name);
	    erts_smp_de_links_unlock(dep);
	    erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_LINK);
	}

	break;
    }

    //...
}

對比本節點和跨節點的處理
本節點進程監控處理如下:(進程X監控進程Y)
/**********************************************************************
 *        Process X          Process Y
 *       +-------------+    +-------------+
 * Type: | MON_ORIGIN  |    | MON_TARGET  |
 *       +-------------+    +-------------+
 * Pid:  | Pid(Y)      |    | Pid(X)      |
 *       +-------------+    +-------------+
 **********************************************************************/
跨節點的處理:(節點A的進程X監控節點B的進程Y)
/**********************************************************************
 *                    Node A              |           Node B
 *       ---------------------------------+----------------------------------
 *        Process X (@A)   Distentry @A       Distentry @B     Process Y (@B)
 *                         for node B         for node A       
 *       +-------------+  +-------------+    +-------------+  +-------------+
 * Type: | MON_ORIGIN  |  | MON_TARGET  |    | MON_ORIGIN  |  | MON_TARGET  |
 *       +-------------+  +-------------+    +-------------+  +-------------+
 * Pid:  | Atom(node B)|  | Pid(X)      |    | Pid(Y)      |  | Pid(X)      |
 *       +-------------+  +-------------+    +-------------+  +-------------+
 **********************************************************************/
對比就是多了一步DistEntry的處理,這是由跨節點網絡的不穩定性決定的。遠程進程出現異常,可能是進程掛了,也有可能是節點連接出問題。當遠程節點出現異常,就要觸發這個節點關聯進程的處理。

小結

從上面的分析可以瞭解,進程監控實際只是在被監控進程打個標記,然後在這個被監控進程出現異常時處理所有監控進程。

參考:http://blog.csdn.net/mycwq/article/details/46961489
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章