參考:http://blog.csdn.net/luoshengyang/article/details/6595744
內核中的日誌文件:
kernel/common/drivers/staging/android/logger.h
kernel/common/drivers/staging/android/logger.c
一、Logger的數據結構:
#ifndef _LINUX_LOGGER_H
#define _LINUX_LOGGER_H
#include <linux/types.h>
#include <linux/ioctl.h>
struct logger_entry {
__u16 len; /* length of the payload */
__u16 __pad; /* no matter what, we get 2 bytes of padding */
__s32 pid; /* generating process's pid */
__s32 tid; /* generating process's tid */
__s32 sec; /* seconds since Epoch */
__s32 nsec; /* nanoseconds */
char msg[0]; /* the entry's payload */
};
#define LOGGER_LOG_RADIO "log_radio" /* radio-related messages */
#define LOGGER_LOG_EVENTS "log_events" /* system/hardware events */
#define LOGGER_LOG_MAIN "log_main" /* everything else */
#define LOGGER_ENTRY_MAX_LEN (4*1024)
#define LOGGER_ENTRY_MAX_PAYLOAD \
(LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry))
#define __LOGGERIO 0xAE
#define LOGGER_GET_LOG_BUF_SIZE _IO(__LOGGERIO, 1) /* size of log */
#define LOGGER_GET_LOG_LEN _IO(__LOGGERIO, 2) /* used log len */
#define LOGGER_GET_NEXT_ENTRY_LEN _IO(__LOGGERIO, 3) /* next entry len */
#define LOGGER_FLUSH_LOG _IO(__LOGGERIO, 4) /* flush log */
#endif /* _LINUX_LOGGER_H */
struct logger_entry是一個用於描述一條Log記錄的結構體。
len成員變量:記錄了這條記錄的有效負載的長度,有效負載指定的日誌記錄本身的長度,但是不包括用於描述這個記錄的struct logger_entry結構體。回憶一下我們調用android.util.Log接口來使用日誌系統時,會指定日誌的優先級別Priority、Tag字符串以及Msg字符串,Priority + Tag + Msg三者內容的長度加起來就是記錄的有效負載長度了。
__pad成員變量:是用來對齊結構體的。
pid和tid成員變量:分別用來記錄是哪條進程寫入了這條記錄。
sec和nsec成員變量:記錄日誌寫的時間。
msg成員變量:記錄的就有效負載的內容了,它的大小由len成員變量來確定。
接着定義兩個宏:
#define LOGGER_ENTRY_MAX_LEN (4*1024)
#define LOGGER_ENTRY_MAX_PAYLOAD \
(LOGGER_ENTRY_MAX_LEN - sizeof(struct logger_entry))
從這兩個宏可以看出,每條日誌記錄的有效負載長度加上結構體logger_entry的長度不能超過4K個字節。
再看logger.c
/*
* struct logger_log - represents a specific log, such as 'main' or 'radio'
*
* This structure lives from module insertion until module removal, so it does
* not need additional reference counting. The structure is protected by the
* mutex 'mutex'.
*/
struct logger_log {
unsigned char * buffer; /* the ring buffer itself */
struct miscdevice misc; /* misc device representing the log */
wait_queue_head_t wq; /* wait queue for readers */
struct list_head readers; /* this log's readers */
struct mutex mutex; /* mutex protecting buffer */
size_t w_off; /* current write head offset */
size_t head; /* new readers start here */
size_t size; /* size of the log */
};
/*
* struct logger_reader - a logging device open for reading
*
* This object lives from open to release, so we don't need additional
* reference counting. The structure is protected by log->mutex.
*/
struct logger_reader {
struct logger_log * log; /* associated log */
struct list_head list; /* entry in logger_log's list */
size_t r_off; /* current read head offset */
};
/* logger_offset - returns index 'n' into the log via (optimized) modulus */
#define logger_offset(n) ((n) & (log->size - 1))
結構體struct logger_log就是真正用來保存日誌的地方了。
buffer成員變量:是用保存日誌信息的內存緩衝區,它的大小由size成員變量確定。
misc成員變量:可以看出,logger驅動程序使用的設備屬於misc類型的設備,通過在Android模擬器上執行cat /proc/devices命令可以看出,misc類型設備的主設備號是10。
wq成員變量:是一個等待隊列,用於保存正在等待讀取日誌的進程。
readers成員變量:用來保存當前正在讀取日誌的進程,正在讀取日誌的進程由結構體logger_reader來描述。
mutex成員變量:是一個互斥量,用來保護log的併發訪問。可以看出,這裏的日誌系統的讀寫問題,其實是一個生產者-消費者的問題,因此,需要互斥量來保護log的併發訪問。
w_off成員變量:用來記錄下一條日誌應該從哪裏開始寫。
head成員變量:用來表示打開日誌文件中,應該從哪一個位置開始讀取日誌。
結構體struct logger_reader用來表示一個讀取日誌的進程,log成員變量指向要讀取的日誌緩衝區。list成員變量用來連接其它讀者進程。r_off成員變量表示當前要讀取的日誌在緩衝區中的位置。
struct logger_log結構體中用於保存日誌信息的內存緩衝區buffer是一個循環使用的環形緩衝區,緩衝區中保存的內容是以struct logger_entry爲單位的,每個單位的組成爲:
struct logger_entry | priority | tag | msg
由於是內存緩衝區buffer是一個循環使用的環形緩衝區,給定一個偏移值,它在buffer中的位置由下logger_offset來確定:
#define logger_offset(n) ((n) & (log->size - 1)) 很常用的一種取緩衝器的方法
二. Logger驅動程序模塊的初始化過程分析
繼續看logger.c文件,定義了三個日誌設備:
/*
* Defines a log structure with name 'NAME' and a size of 'SIZE' bytes, which
* must be a power of two, greater than LOGGER_ENTRY_MAX_LEN, and less than
* LONG_MAX minus LOGGER_ENTRY_MAX_LEN.
*/
#define DEFINE_LOGGER_DEVICE(VAR, NAME, SIZE) \
static unsigned char _buf_ ## VAR[SIZE]; \
static struct logger_log VAR = { \
.buffer = _buf_ ## VAR, \
.misc = { \
.minor = MISC_DYNAMIC_MINOR, \
.name = NAME, \
.fops = &logger_fops, \
.parent = NULL, \
}, \
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(VAR .wq), \
.readers = LIST_HEAD_INIT(VAR .readers), \
.mutex = __MUTEX_INITIALIZER(VAR .mutex), \
.w_off = 0, \
.head = 0, \
.size = SIZE, \
};
DEFINE_LOGGER_DEVICE(log_main, LOGGER_LOG_MAIN, 64*1024)
DEFINE_LOGGER_DEVICE(log_events, LOGGER_LOG_EVENTS, 256*1024)
DEFINE_LOGGER_DEVICE(log_radio, LOGGER_LOG_RADIO, 64*1024)
分別是log_main、log_events和log_radio,名稱分別LOGGER_LOG_MAIN、LOGGER_LOG_EVENTS和LOGGER_LOG_RADIO,它們的次設備號爲MISC_DYNAMIC_MINOR,即爲在註冊時動態分配。在logger.h文件中,有這三個宏的定義:
#define LOGGER_LOG_RADIO "log_radio"/* radio-related messages */
#define LOGGER_LOG_EVENTS "log_events"/* system/hardware events */
#define LOGGER_LOG_MAIN "log_main"/* everything else */
static struct file_operations logger_fops = {
.owner = THIS_MODULE,
.read = logger_read,
.aio_write = logger_aio_write,
.poll = logger_poll,
.unlocked_ioctl = logger_ioctl,
.compat_ioctl = logger_ioctl,
.open = logger_open,
.release = logger_release,
};
日誌驅動程序模塊的初始化函數爲logger_init:
static int __init logger_init(void)
{
int ret;
ret = init_log(&log_main);
if (unlikely(ret))
goto out;
ret = init_log(&log_events);
if (unlikely(ret))
goto out;
ret = init_log(&log_radio);
if (unlikely(ret))
goto out;
out:
return ret;
}
device_initcall(logger_init);
logger_init函數通過調用init_log函數來初始化了上述提到的三個日誌設備:
static int __init init_log(struct logger_log *log)
{
int ret;
ret = misc_register(&log->misc);
if (unlikely(ret)) {
printk(KERN_ERR "logger: failed to register misc "
"device for log '%s'!\n", log->misc.name);
return ret;
}
printk(KERN_INFO "logger: created %luK log '%s'\n",
(unsigned long) log->size >> 10, log->misc.name);
return 0;
}
init_log函數主要調用了misc_register函數來註冊misc設備,misc_register函數定義在kernel/common/drivers/char/misc.c文件中:/**
* misc_register - register a miscellaneous device
* @misc: device structure
*
* Register a miscellaneous device with the kernel. If the minor
* number is set to %MISC_DYNAMIC_MINOR a minor number is assigned
* and placed in the minor field of the structure. For other cases
* the minor number requested is used.
*
* The structure passed is linked into the kernel and may not be
* destroyed until it has been unregistered.
*
* A zero is returned on success and a negative errno code for
* failure.
*/
int misc_register(struct miscdevice * misc)
{
struct miscdevice *c;
dev_t dev;
int err = 0;
INIT_LIST_HEAD(&misc->list);
mutex_lock(&misc_mtx);
list_for_each_entry(c, &misc_list, list) {
if (c->minor == misc->minor) {
mutex_unlock(&misc_mtx);
return -EBUSY;
}
}
if (misc->minor == MISC_DYNAMIC_MINOR) {
int i = DYNAMIC_MINORS;
while (--i >= 0)
if ( (misc_minors[i>>3] & (1 << (i&7))) == 0)
break;
if (i<0) {
mutex_unlock(&misc_mtx);
return -EBUSY;
}
misc->minor = i;
}
if (misc->minor < DYNAMIC_MINORS)
misc_minors[misc->minor >> 3] |= 1 << (misc->minor & 7);
dev = MKDEV(MISC_MAJOR, misc->minor);
misc->this_device = device_create(misc_class, misc->parent, dev, NULL,
"%s", misc->name);
if (IS_ERR(misc->this_device)) {
err = PTR_ERR(misc->this_device);
goto out;
}
/*
* Add it to the front, so that later devices can "override"
* earlier defaults
*/
list_add(&misc->list, &misc_list);
out:
mutex_unlock(&misc_mtx);
return err;
}
註冊完成後,通過device_create創建設備文件節點。這裏,將創建/dev/log/main、/dev/log/events和/dev/log/radio三個設備文件,這樣,用戶空間就可以通過讀寫這三個文件和驅動程序進行交互。三. Logger驅動程序的日誌記錄讀取過程分析。
繼續看logger.c 文件,註冊的讀取日誌設備文件的方法爲logger_read:
/*
* logger_read - our log's read() method
*
* Behavior:
*
* - O_NONBLOCK works
* - If there are no log entries to read, blocks until log is written to
* - Atomically reads exactly one log entry
*
* Optimal read size is LOGGER_ENTRY_MAX_LEN. Will set errno to EINVAL if read
* buffer is insufficient to hold next entry.
*/
static ssize_t logger_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
struct logger_reader *reader = file->private_data;
struct logger_log *log = reader->log;
ssize_t ret;
DEFINE_WAIT(wait);
start:
while (1) {
prepare_to_wait(&log->wq, &wait, TASK_INTERRUPTIBLE);
mutex_lock(&log->mutex);
ret = (log->w_off == reader->r_off);
mutex_unlock(&log->mutex);
if (!ret)
break;
if (file->f_flags & O_NONBLOCK) {
ret = -EAGAIN;
break;
}
if (signal_pending(current)) {
ret = -EINTR;
break;
}
schedule();
}
finish_wait(&log->wq, &wait);
if (ret)
return ret;
mutex_lock(&log->mutex);
/* is there still something to read or did we race? */
if (unlikely(log->w_off == reader->r_off)) {
mutex_unlock(&log->mutex);
goto start;
}
/* get the size of the next entry */
ret = get_entry_len(log, reader->r_off);
if (count < ret) {
ret = -EINVAL;
goto out;
}
/* get exactly one entry from the log */
ret = do_read_log_to_user(log, reader, buf, ret);
out:
mutex_unlock(&log->mutex);
return ret;
}
注意,在函數開始的地方,表示讀取日誌上下文的struct logger_reader是保存在文件指針的private_data成員變量裏面的,這是在打開設備文件時設置的,設備文件打開方法爲logger_open:/*
* logger_open - the log's open() file operation
*
* Note how near a no-op this is in the write-only case. Keep it that way!
*/
static int logger_open(struct inode *inode, struct file *file)
{
struct logger_log *log;
int ret;
ret = nonseekable_open(inode, file);
if (ret)
return ret;
log = get_log_from_minor(MINOR(inode->i_rdev));
if (!log)
return -ENODEV;
if (file->f_mode & FMODE_READ) {
struct logger_reader *reader;
reader = kmalloc(sizeof(struct logger_reader), GFP_KERNEL);
if (!reader)
return -ENOMEM;
reader->log = log;
INIT_LIST_HEAD(&reader->list);
mutex_lock(&log->mutex);
reader->r_off = log->head;
list_add_tail(&reader->list, &log->readers);
mutex_unlock(&log->mutex);
file->private_data = reader;
} else
file->private_data = log;
return 0;
}
新打開日誌設備文件時,是從log->head位置開始讀取日誌的,保存在struct logger_reader的成員變量r_off中。
start標號處的while循環是在等待日誌可讀,如果已經沒有新的日誌可讀了,那麼就要讀進程就要進入休眠狀態,等待新的日誌寫入後再喚醒,這是通過prepare_wait和schedule兩個調用來實現的。如果沒有新的日誌可讀,並且設備文件不是以非阻塞O_NONBLOCK的方式打開或者這時有信號要處理(signal_pending(current)),那麼就直接返回,不再等待新的日誌寫入。判斷當前是否有新的日誌可讀的方法是:
ret = (log->w_off == reader->r_off);
即判斷當前緩衝區的寫入位置和當前讀進程的讀取位置是否相等,如果不相等,則說明有新的日誌可讀。
w_off : 當前緩衝區的寫入位置
r_off : 當前讀進程的讀取位置
繼續向下看,如果有新的日誌可讀,那麼就,首先通過get_entry_len來獲取下一條可讀的日誌記錄的長度,從這裏可以看出,日誌讀取進程是以日誌記錄爲單位進行讀取的,一次只讀取一條記錄。get_entry_len的函數實現如下:
/*
* get_entry_len - Grabs the length of the payload of the next entry starting
* from 'off'.
*
* Caller needs to hold log->mutex.
*/
static __u32 get_entry_len(struct logger_log *log, size_t off)
{
__u16 val;
switch (log->size - off) {
case 1:
memcpy(&val, log->buffer + off, 1);
memcpy(((char *) &val) + 1, log->buffer, 1);
break;
default:
memcpy(&val, log->buffer + off, 2);
}
return sizeof(struct logger_entry) + val;
}
上面我們提到,每一條日誌記錄是由兩大部分組成的,一個用於描述這條日誌記錄的結構體struct logger_entry,另一個是記錄體本身,即有效負載。結構體struct logger_entry的長度是固定的,只要知道有效負載的長度,就可以知道整條日誌記錄的長度了。而有效負載的長度是記錄在結構體struct
logger_entry的成員變量len中,而len成員變量的地址與struct logger_entry的地址相同,因此,只需要讀取記錄的開始位置的兩個字節就可以了。又由於日誌記錄緩衝區是循環使用的,這兩個節字有可能是第一個字節存放在緩衝區最後一個字節,而第二個字節存放在緩衝區的第一個節,除此之外,這兩個字節都是連在一起的。因此,分兩種情況來考慮,對於前者,分別通過讀取緩衝區最後一個字節和第一個字節來得到日誌記錄的有效負載長度到本地變量val中,對於後者,直接讀取連續兩個字節的值到本地變量val中。這兩種情況是通過判斷日誌緩衝區的大小和要讀取的日誌記錄在緩衝區中的位置的差值來區別的,如果相差1,就說明是前一種情況了。最後,把有效負載的長度val加上struct
logger_entry的長度就得到了要讀取的日誌記錄的總長度了。接着往下看,得到了要讀取的記錄的長度,就調用do_read_log_to_user函數來執行真正的讀取動作:
static ssize_t do_read_log_to_user(struct logger_log *log,
struct logger_reader *reader,
char __user *buf,
size_t count)
{
size_t len;
/*
* We read from the log in two disjoint operations. First, we read from
* the current read head offset up to 'count' bytes or to the end of
* the log, whichever comes first.
*/
len = min(count, log->size - reader->r_off);
if (copy_to_user(buf, log->buffer + reader->r_off, len))
return -EFAULT;
/*
* Second, we read any remaining bytes, starting back at the head of
* the log.
*/
if (count != len)
if (copy_to_user(buf + len, log->buffer, count - len))
return -EFAULT;
reader->r_off = logger_offset(reader->r_off + count);
return count;
}
這個函數簡單地調用copy_to_user函數來把位於內核空間的日誌緩衝區指定的內容拷貝到用戶空間的內存緩衝區就可以了,同時,把當前讀取日誌進程的上下文信息中的讀偏移r_off前進到下一條日誌記錄的開始的位置上。
四. Logger驅動程序的日誌記錄寫入過程分析。
繼續看logger.c 文件,註冊的寫入日誌設備文件的方法爲logger_aio_write:
/*
* logger_aio_write - our write method, implementing support for write(),
* writev(), and aio_write(). Writes are our fast path, and we try to optimize
* them above all else.
*/
ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t ppos)
{
struct logger_log *log = file_get_log(iocb->ki_filp);
size_t orig = log->w_off;
struct logger_entry header;
struct timespec now;
ssize_t ret = 0;
now = current_kernel_time();
header.pid = current->tgid;
header.tid = current->pid;
header.sec = now.tv_sec;
header.nsec = now.tv_nsec;
header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD);
/* null writes succeed, return zero */
if (unlikely(!header.len))
return 0;
mutex_lock(&log->mutex);
/*
* Fix up any readers, pulling them forward to the first readable
* entry after (what will be) the new write offset. We do this now
* because if we partially fail, we can end up with clobbered log
* entries that encroach on readable buffer.
*/
fix_up_readers(log, sizeof(struct logger_entry) + header.len);
do_write_log(log, &header, sizeof(struct logger_entry));
while (nr_segs-- > 0) {
size_t len;
ssize_t nr;
/* figure out how much of this vector we can keep */
len = min_t(size_t, iov->iov_len, header.len - ret);
/* write out this segment's payload */
nr = do_write_log_from_user(log, iov->iov_base, len);
if (unlikely(nr < 0)) {
log->w_off = orig;
mutex_unlock(&log->mutex);
return nr;
}
iov++;
ret += nr;
}
mutex_unlock(&log->mutex);
/* wake up any blocked readers */
wake_up_interruptible(&log->wq);
return ret;
}
輸入的參數:
iocb: 表示io上下文
iov: 表示要寫入的內容
nr_segs: 長度,表示有nr_segs個段的內容要寫入。我們知道,每個要寫入的日誌的結構形式爲:
struct logger_entry | priority | tag | msg
其中, priority、tag和msg這三個段的內容是由iov參數從用戶空間傳遞下來的,分別對應iov裏面的三個元素。
而logger_entry是由內核空間來構造的:
struct logger_entry header;
struct timespec now;
now = current_kernel_time();
header.pid = current->tgid;
header.tid = current->pid;
header.sec = now.tv_sec;
header.nsec = now.tv_nsec;
header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD);
然後調用do_write_log首先把logger_entry結構體寫入到日誌緩衝區中:
/*
* do_write_log - writes 'len' bytes from 'buf' to 'log'
*
* The caller needs to hold log->mutex.
*/
static void do_write_log(struct logger_log *log, const void *buf, size_t count)
{
size_t len;
len = min(count, log->size - log->w_off);
memcpy(log->buffer + log->w_off, buf, len);
if (count != len)
memcpy(log->buffer, buf + len, count - len);
log->w_off = logger_offset(log->w_off + count);
}
由於logger_entry是內核堆棧空間分配的,直接用memcpy拷貝就可以了。
接着,通過一個while循環把iov的內容寫入到日誌緩衝區中,也就是日誌的優先級別priority、日誌Tag和日誌主體Msg:
while (nr_segs-- > 0) {
size_t len;
ssize_t nr;
/* figure out how much of this vector we can keep */
len = min_t(size_t, iov->iov_len, header.len - ret);
/* write out this segment's payload */
nr = do_write_log_from_user(log, iov->iov_base, len);
if (unlikely(nr < 0)) {
log->w_off = orig;
mutex_unlock(&log->mutex);
return nr;
}
iov++;
ret += nr;
}
由於iov的內容是由用戶空間傳下來的,需要調用do_write_log_from_user來寫入:
static ssize_t do_write_log_from_user(struct logger_log *log,
const void __user *buf, size_t count)
{
size_t len;
len = min(count, log->size - log->w_off);
if (len && copy_from_user(log->buffer + log->w_off, buf, len))
return -EFAULT;
if (count != len)
if (copy_from_user(log->buffer, buf + len, count - len))
return -EFAULT;
log->w_off = logger_offset(log->w_off + count);
return count;
}
這裏,我們還漏了一個重要的步驟:
/*
* Fix up any readers, pulling them forward to the first readable
* entry after (what will be) the new write offset. We do this now
* because if we partially fail, we can end up with clobbered log
* entries that encroach on readable buffer.
*/
fix_up_readers(log, sizeof(struct logger_entry) + header.len);
爲什麼要調用fix_up_reader這個函數呢?這個函數又是作什麼用的呢?是這樣的,由於日誌緩衝區是循環使用的,即舊的日誌記錄如果沒有及時讀取,而緩衝區的內容又已經用完時,就需要覆蓋舊的記錄來容納新的記錄。而這部分將要被覆蓋的內容,有可能是某些reader的下一次要讀取的日誌所在的位置,以及爲新的reader準備的日誌開始讀取位置head所在的位置。因此,需要調整這些位置,使它們能夠指向一個新的有效的位置。我們來看一下fix_up_reader函數的實現:
/*
* fix_up_readers - walk the list of all readers and "fix up" any who were
* lapped by the writer; also do the same for the default "start head".
* We do this by "pulling forward" the readers and start head to the first
* entry after the new write head.
*
* The caller needs to hold log->mutex.
*/
static void fix_up_readers(struct logger_log *log, size_t len)
{
size_t old = log->w_off;
size_t new = logger_offset(old + len);
struct logger_reader *reader;
if (clock_interval(old, new, log->head))
log->head = get_next_entry(log, log->head, len);
list_for_each_entry(reader, &log->readers, list)
if (clock_interval(old, new, reader->r_off))
reader->r_off = get_next_entry(log, reader->r_off, len);
}
判斷log->head和所有讀者reader的當前讀偏移reader->r_off是否在被覆蓋的區域內,如果是,就需要調用get_next_entry來取得下一個有效的記錄的起始位置來調整當前位置:/*
* get_next_entry - return the offset of the first valid entry at least 'len'
* bytes after 'off'.
*
* Caller must hold log->mutex.
*/
static size_t get_next_entry(struct logger_log *log, size_t off, size_t len)
{
size_t count = 0;
do {
size_t nr = get_entry_len(log, off);
off = logger_offset(off + nr);
count += nr;
} while (count < len);
return off;
}
而判斷log->head和所有讀者reader的當前讀偏移reader->r_off是否在被覆蓋的區域內,是通過clock_interval函數來實現的:/*
* clock_interval - is a < c < b in mod-space? Put another way, does the line
* from a to b cross c?
*/
static inline int clock_interval(size_t a, size_t b, size_t c)
{
if (b < a) {
if (a < c || b >= c)
return 1;
} else {
if (a < c && b >= c)
return 1;
}
return 0;
}
最後,日誌寫入完畢,還需要喚醒正在等待新日誌的reader進程:
/* wake up any blocked readers */
wake_up_interruptible(&log->wq);