Linux中的通知鏈技術

在Linux 內核中，各個子系統之間有很強的相互關係，某些子系統可能對其它子系統產生的事件感興趣。爲了讓某個子系統在發生某個事件時通知感興趣的子系統， Linux 內核引入了通知鏈技術。通知鏈只能夠在內核的子系統之間使用，而不能夠在內核和用戶空間進行事件的通知。

1 數據結構：

通知鏈有四種類型：

· 原子通知鏈（ Atomic notifier chains ）：通知鏈元素的回調函數（當事件發生時要執行的函數）只能在中斷上下文中運行，不允許阻塞。對應的鏈表頭結構：

struct atomic_notifier_head {

spinlock_t lock;

struct notifier_block *head;

};

· 可阻塞通知鏈（ Blocking notifier chains ）：通知鏈元素的回調函數在進程上下文中運行，允許阻塞。對應的鏈表頭：

struct blocking_notifier_head {

struct rw_semaphore rwsem;

struct notifier_block *head;

};

· 原始通知鏈（ Raw notifier chains ）：對通知鏈元素的回調函數沒有任何限制，所有鎖和保護機制都由調用者維護。對應的鏈表頭：

struct raw_notifier_head {

struct notifier_block *head;

};

· SRCU 通知鏈（ SRCU notifier chains ）：可阻塞通知鏈的一種變體。對應的鏈表頭：

struct srcu_notifier_head {

struct mutex mutex;

struct srcu_struct srcu;

struct notifier_block *head;

};

通知鏈的核心結構：

struct notifier_block {

int (*notifier_call)(struct notifier_block *, unsigned long, void *);

struct notifier_block *next;

int priority;

};

其中notifier_call 是通知鏈要執行的函數指針， next 用來連接其它的通知結構， priority 是這個通知的優先級，同一條鏈上的 notifier_block{} 是按優先級排列的。內核代碼中一般把通知鏈命名爲 xxx_chain, xxx_nofitier_chain 這種形式的變量名。

2 運作機制

通知鏈的運作機制包括兩個角色：

· 被通知者：對某一事件感興趣一方。定義了當事件發生時，相應的處理函數，即回調函數。但需要事先將其註冊到通知鏈中（被通知者註冊的動作就是在通知鏈中增加一項）。

· 通知者：事件的通知者。當檢測到某事件，或者本身產生事件時，通知所有對該事件感興趣的一方事件發生。他定義了一個通知鏈，其中保存了每一個被通知者對事件的處理函數（回調函數）。通知這個過程實際上就是遍歷通知鏈中的每一項，然後調用相應的事件處理函數。

包括以下過程：

· 通知者定義通知鏈

· 被通知者向通知鏈中註冊回調函數

· 當事件發生時，通知者發出通知（執行通知鏈中所有元素的回調函數）

被通知者調用 notifier_chain_register 函數註冊回調函數，該函數按照優先級將回調函數加入到通知鏈中：

static int notifier_chain_register(struct notifier_block **nl,

struct notifier_block *n)

{

while ((*nl) != NULL) {

if (n->priority > (*nl)->priority)

break;

nl = &((*nl)->next);

}

n->next = *nl;

rcu_assign_pointer(*nl, n);

return 0;

}

注銷回調函數則使用 notifier_chain_unregister 函數，即將回調函數從通知鏈中刪除：

static int notifier_chain_unregister(struct notifier_block **nl,

struct notifier_block *n)

{

while ((*nl) != NULL) {

if ((*nl) == n) {

rcu_assign_pointer(*nl, n->next);

return 0;

}

nl = &((*nl)->next);

}

return -ENOENT;

}

通知者調用 notifier_call_chain 函數通知事件的到達，這個函數會遍歷通知鏈中所有的元素，然後依次調用每一個的回調函數（即完成通知動作）：

/**

* notifier_call_chain - Informs the registered notifiers about an event.

* @nl: Pointer to head of the blocking notifier chain

* @val: Value passed unmodified to notifier function

* @v: Pointer passed unmodified to notifier function

* @nr_to_call: Number of notifier functions to be called. Don't care

* value of this parameter is -1.

* @nr_calls: Records the number of notifications sent. Don't care

* value of this field is NULL.

* @returns: notifier_call_chain returns the value returned by the

* last notifier function called.

static int __kprobes notifier_call_chain(struct notifier_block **nl,

unsigned long val, void *v,

int nr_to_call, int *nr_calls)

{

int ret = NOTIFY_DONE;

struct notifier_block *nb, *next_nb;

nb = rcu_dereference(*nl);

while (nb && nr_to_call) {

next_nb = rcu_dereference(nb->next);

#ifdef CONFIG_DEBUG_NOTIFIERS

if (unlikely(!func_ptr_is_kernel_text(nb->notifier_call))) {

WARN(1, "Invalid notifier called!");

nb = next_nb;

continue;

}

#endif

ret = nb->notifier_call(nb, val, v);

if (nr_calls)

(*nr_calls)++;

if ((ret & NOTIFY_STOP_MASK) == NOTIFY_STOP_MASK)

break;

nb = next_nb;

nr_to_call--;

}

return ret;

}

參數nl 是通知鏈的頭部， val 表示事件類型， v 用來指向通知鏈上的函數執行時需要用到的參數，一般不同的通知鏈，參數類型也不一樣，例如當通知一個網卡被註冊時， v 就指向 net_device 結構， nr_to_call 表示準備最多通知幾個， -1 表示整條鏈都通知， nr_calls 非空的話，返回通知了多少個。

每個被執行的notifier_block 回調函數的返回值可能取值爲以下幾個：

·NOTIFY_DONE ：表示對相關的事件類型不關心

·NOTIFY_OK ：順利執行

·NOTIFY_BAD ：執行有錯

·NOTIFY_STOP ：停止執行後面的回調函數

·NOTIFY_STOP_MASK ：停止執行的掩碼

Notifier_call_chain()把最後一個被調用的回調函數的返回值作爲它的返回值。

3 內核網絡代碼中對通知鏈的使用

內核網絡部分使用的一些通知鏈：

·inetaddr_chain ： ipv4 地址變動時的通知鏈

·netdev_chain ：網絡設備狀態變動時的通知鏈

網絡代碼中對通知鏈的調用一般都有一個包裝函數，例如對netdev_chain 的註冊就是由 register_netdevice_notifier() 函數完成的：

int register_netdevice_notifier(struct notifier_block *nb)

{

struct net_device *dev;

struct net_device *last;

struct net *net;

int err;

rtnl_lock();

err = raw_notifier_chain_register(&netdev_chain, nb);

if (err)

goto unlock;

if (dev_boot_phase)

goto unlock;

for_each_net(net) {

for_each_netdev(net, dev) {

err = nb->notifier_call(nb, NETDEV_REGISTER, dev);

err = notifier_to_errno(err);

if (err)

goto rollback;

if (!(dev->flags & IFF_UP))

continue;

nb->notifier_call(nb, NETDEV_UP, dev);

}

unlock:

rtnl_unlock();

return err;

rollback:

last = dev;

for_each_net(net) {

for_each_netdev(net, dev) {

if (dev == last)

break;

if (dev->flags & IFF_UP) {

nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);

nb->notifier_call(nb, NETDEV_DOWN, dev);

}

nb->notifier_call(nb, NETDEV_UNREGISTER, dev);

}

raw_notifier_chain_unregister(&netdev_chain, nb);

goto unlock;

}

這個函數主要完成兩件事情：

1）把參數struct notifier_block *nb 註冊到 netdev_chain 通知鏈上；

2）系統中所有已經被註冊過的或者激活的網絡設備的事件都要被新增的這個通知的回調函數重新調用一遍，使設備更新到一個完整的狀態。

dev_boot_phase定義如下，表示在啓動階段。

static int dev_boot_phase = 1;

例如，在啓動階段的網絡模塊初始化過程中，有一個調用過程inet_init()-->ip_init()-->ip_rt_init()-->devinet_init() ，會註冊一個 ip_netdev_notifier 通知鏈：

register_netdevice_notifier(&ip_netdev_notifier);

而ip_netdev_notifier 定義爲：

static struct notifier_block ip_netdev_notifier = {

.notifier_call = inetdev_event,

};

inetdev_event()實現爲：

static int inetdev_event(struct notifier_block *this, unsigned long event,

void *ptr)

{

struct net_device *dev = ptr;

struct in_device *in_dev = __in_dev_get_rtnl(dev);

ASSERT_RTNL();

if (!in_dev) {

if (event == NETDEV_REGISTER) {

in_dev = inetdev_init(dev);

if (!in_dev)

return notifier_from_errno(-ENOMEM);

if (dev->flags & IFF_LOOPBACK) {

IN_DEV_CONF_SET(in_dev, NOXFRM, 1);

IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);

}

} else if (event == NETDEV_CHANGEMTU) {

/* Re-enabling IP */

if (inetdev_valid_mtu(dev->mtu))

in_dev = inetdev_init(dev);

}

goto out;

}

switch (event) {

case NETDEV_REGISTER:

printk(KERN_DEBUG "inetdev_event: bug/n");

dev->ip_ptr = NULL;

break;

case NETDEV_UP:

if (!inetdev_valid_mtu(dev->mtu))

break;

if (dev->flags & IFF_LOOPBACK) {

struct in_ifaddr *ifa;

if ((ifa = inet_alloc_ifa()) != NULL) {

ifa->ifa_local =

ifa->ifa_address = htonl(INADDR_LOOPBACK);

ifa->ifa_prefixlen = 8;

ifa->ifa_mask = inet_make_mask(8);

in_dev_hold(in_dev);

ifa->ifa_dev = in_dev;

ifa->ifa_scope = RT_SCOPE_HOST;

memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);

inet_insert_ifa(ifa);

}

ip_mc_up(in_dev);

/* fall through */

case NETDEV_CHANGEADDR:

if (IN_DEV_ARP_NOTIFY(in_dev))

arp_send(ARPOP_REQUEST, ETH_P_ARP,

in_dev->ifa_list->ifa_address,

dev,

in_dev->ifa_list->ifa_address,

NULL, dev->dev_addr, NULL);

break;

case NETDEV_DOWN:

ip_mc_down(in_dev);

break;

case NETDEV_CHANGEMTU:

if (inetdev_valid_mtu(dev->mtu))

break;

/* disable IP when MTU is not enough */

case NETDEV_UNREGISTER:

inetdev_destroy(in_dev);

break;

case NETDEV_CHANGENAME:

/* Do not notify about label change, this event is

* not interesting to applications using netlink.

inetdev_changename(dev, in_dev);

devinet_sysctl_unregister(in_dev);

devinet_sysctl_register(in_dev);

break;

}

out:

return NOTIFY_DONE;

}

在註冊的時候傳遞的是 NETDEV_REGISTER 事件，所以在in_dev 不爲空時，只做 switch 語句中的一個動作： dev->ip_ptr = NULL ；在 in_dev 爲空時，調用 inetdev_init() 函數分配一個 struct in_device ，此時如果是 Loopback 設備纔有動作了。

4 舉例

這個例子由參考文章二給出。

在這裏，寫了一個簡單的通知鏈表的代碼。

實際上，整個通知鏈的編寫也就兩個過程：
    首先是定義自己的通知鏈的頭節點，並將要執行的函數註冊到自己的通知鏈中。
    其次則是由另外的子系統來通知這個鏈，讓其上面註冊的函數運行。

    這裏將第一個過程分成了兩步來寫，第一步是定義了頭節點和一些自定義的註冊函數（針對該頭節點的），第二步則是使用自定義的註冊函數註冊了一些通知鏈節點。分別在代碼buildchain.c 與 regchain.c 中。
    發送通知信息的代碼爲notify.c 。

代碼1 buildchain.c
    它的作用是自定義一個通知鏈表test_chain ，然後再自定義兩個函數分別向這個通知鏈中加入或刪除節點，最後再定義一個函數通知這個 test_chain 鏈。

#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/module.h>
MODULE_LICENSE("GPL");

/*
* 定義自己的通知鏈頭結點以及註冊和卸載通知鏈的外包函數
*/

/*
* RAW_NOTIFIER_HEAD是定義一個通知鏈的頭部結點，
* 通過這個頭部結點可以找到這個鏈中的其它所有的 notifier_block
*/
static RAW_NOTIFIER_HEAD(test_chain);

/*
* 自定義的註冊函數，將 notifier_block 節點加到剛剛定義的 test_chain 這個鏈表中來
* raw_notifier_chain_register會調用 notifier_chain_register
*/
int register_test_notifier(struct notifier_block *nb)
{
        return raw_notifier_chain_register(&test_chain, nb);
}
EXPORT_SYMBOL(register_test_notifier);

int unregister_test_notifier(struct notifier_block *nb)
{
        return raw_notifier_chain_unregister(&test_chain, nb);
}
EXPORT_SYMBOL(unregister_test_notifier);

/*
* 自定義的通知鏈表的函數，即通知 test_chain 指向的鏈表中的所有節點執行相應的函數
*/
int test_notifier_call_chain(unsigned long val, void *v)
{
        return raw_notifier_call_chain(&test_chain, val, v);
}
EXPORT_SYMBOL(test_notifier_call_chain);

/*
* init and exit
*/
static int __init init_notifier(void)
{
        printk("init_notifier/n");
        return 0;
}

static void __exit exit_notifier(void)
{
        printk("exit_notifier/n");
}
module_init(init_notifier);
module_exit(exit_notifier);

代碼2 regchain.c
該代碼的作用是將test_notifier1 test_notifier2 test_notifier3 這三個節點加到之前定義的 test_chain 這個通知鏈表上，同時每個節點都註冊了一個函數。

#include <asm/uaccess.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/types.h>
#include <linux/module.h>
MODULE_LICENSE("GPL");

/*
* 註冊通知鏈
*/

extern int register_test_notifier(struct notifier_block*);
extern int unregister_test_notifier(struct notifier_block*);

static int test_event1(struct notifier_block *this, unsigned long event, void *ptr)
{
        printk("In Event 1: Event Number is %d/n", event);
        return 0;
}

static int test_event2(struct notifier_block *this, unsigned long event, void *ptr)
{
        printk("In Event 2: Event Number is %d/n", event);
        return 0;
}

static int test_event3(struct notifier_block *this, unsigned long event, void *ptr)
{
        printk("In Event 3: Event Number is %d/n", event);
        return 0;
}

/*
* 事件 1 ，該節點執行的函數爲 test_event1
*/
static struct notifier_block test_notifier1 =
{
        .notifier_call = test_event1,
};

/*
* 事件 2 ，該節點執行的函數爲 test_event1
*/
static struct notifier_block test_notifier2 =
{
        .notifier_call = test_event2,
};

/*
* 事件 3 ，該節點執行的函數爲 test_event1
*/
static struct notifier_block test_notifier3 =
{
        .notifier_call = test_event3,
};

/*
* 對這些事件進行註冊
*/
static int __init reg_notifier(void)
{
        int err;
        printk("Begin to register:/n");

        err = register_test_notifier(&test_notifier1);
        if (err)
        {
                printk("register test_notifier1 error/n");
                return -1;
        }
        printk("register test_notifier1 completed/n");

        err = register_test_notifier(&test_notifier2);
        if (err)
        {
                printk("register test_notifier2 error/n");
                return -1;
        }
        printk("register test_notifier2 completed/n");

        err = register_test_notifier(&test_notifier3);
        if (err)
        {
                printk("register test_notifier3 error/n");
                return -1;
        }
        printk("register test_notifier3 completed/n");
        return err;
}

/*
* 卸載剛剛註冊了的通知鏈
*/
static void __exit unreg_notifier(void)
{
        printk("Begin to unregister/n");
        unregister_test_notifier(&test_notifier1);
        unregister_test_notifier(&test_notifier2);
        unregister_test_notifier(&test_notifier3);
        printk("Unregister finished/n");
}
module_init(reg_notifier);
module_exit(unreg_notifier);

代碼3 notify.c
該代碼的作用就是向test_chain 通知鏈中發送消息，讓鏈中的函數運行。

Makefile文件（我修改了）

注意，記得先檢查有沒有安裝當前linux 版本的內核頭文件：

obj-m:=buildchain.o regchain.o notify.o

CURRENT_PATH := $(shell pwd)

LINUX_KERNEL := $(shell uname -r)

KERNELDIR := /usr/src/linux-headers-$(LINUX_KERNEL)

all:

make -C $(KERNELDIR) M=$(CURRENT_PATH) modules

clean:

make -C $(KERNELDIR) M=$(CURRENT_PATH) clean

運行（注意insmod 要 root 權限）

make

insmod buildchain.ko
insmod regchain.ko
insmod notify.ko

這樣就可以看到通知鏈運行的效果了

下面是我在自己的機器上面運行得到的結果（dmesg 命令） :

init_notifier
Begin to register:
register test_notifier1 completed
register test_notifier2 completed
register test_notifier3 completed
Begin to notify:
==============================
In Event 1: Event Number is 1
In Event 2: Event Number is 1
In Event 3: Event Number is 1
==============================