proc添加模塊和systemtap添加tracepoint

實現過程其實和一個驅動很像,部分文件系統的接口調用即可。

  1. start()函數完成讀數據前的一些預先操作,通常如加鎖,定位數據記錄位置等,該函數返回值就是show()函數第二個參數
  2. show()函數實現讀數據過程,將要輸出的數據直接用seq_printf()函數打印到seq流緩衝區中,由seq_printf()函數輸出到用戶空間

轉載一個圖片,內容是關於proc和seq_file的基本聯繫。
在這裏插入圖片描述

============
fs/jbd2/journal.c

@@ -1050,12 +1050,14 @@ static void jbd2_stats_proc_init(journal_t *journal)
 	if (journal->j_proc_entry) {
 		proc_create_data("info", S_IRUGO, journal->j_proc_entry,
 				 &jbd2_seq_info_fops, journal);
+		jbd2_mon_proc_init(journal->j_proc_entry, journal);
 	}
 }
  
 static void jbd2_stats_proc_exit(journal_t *journal)
 {
 	remove_proc_entry("info", journal->j_proc_entry);
+	jbd2_mon_proc_exit(journal->j_proc_entry);
 	remove_proc_entry(journal->j_devname, proc_jbd2_stats);
 }
  
@@ -1139,6 +1141,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 	struct buffer_head *bh;
 	char *p;
 	int n;
+	int i = 0, j = 0;
  
 	if (!journal)
 		return NULL;
@@ -1172,11 +1175,22 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 	}
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+	for (i = 0; i < MAX_JBD2_MONITOR_BUFFER; i++) {
+		journal->j_monitor_buffer[i] = kmalloc(sizeof(struct jbd2_delay_stat), GFP_KERNEL);
+		if (!journal->j_monitor_buffer[i]) {
+			printk(KERN_ERR "%s: Cannot allocate buffer for memory\n",
+				__func__);
+			goto out_err;
+		}
+		jbd2_mon_buffer_init(journal->j_monitor_buffer[i]);
+	}
  
 	return journal;
 out_err:
 	kfree(journal->j_wbuf);
 	jbd2_stats_proc_exit(journal);
+	for (j = 0; j < i; j++)
+		kfree(journal->j_monitor_buffer[j]);
 	kfree(journal);
 	return NULL;
 }
@@ -1197,6 +1211,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	int err;
 	int n;
 	unsigned long long blocknr;
+	int i = 0, j = 0;
  
 	if (!journal)
 		return NULL;
@@ -1246,10 +1261,21 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	}
 	journal->j_sb_buffer = bh;
 	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+	for (i = 0; i < MAX_JBD2_MONITOR_BUFFER; i++) {
+		journal->j_monitor_buffer[i] = kmalloc(sizeof(struct jbd2_delay_stat), GFP_KERNEL);
+		if (!journal->j_monitor_buffer[i]) {
+			printk(KERN_ERR "%s: Cannot allocate buffer for memory\n",
+				__func__);
+			goto out_err;
+		}
+		jbd2_mon_buffer_init(journal->j_monitor_buffer[i]);
+	}
  
 	return journal;
 out_err:
 	kfree(journal->j_wbuf);
+	for (j = 0; j < i; j++)
+		kfree(journal->j_monitor_buffer[j]);
 	jbd2_stats_proc_exit(journal);
 	kfree(journal);
 	return NULL;
@@ -1677,6 +1703,7 @@ recovery_error:
 int jbd2_journal_destroy(journal_t *journal)
 {
 	int err = 0;
+	int i;
  
 	/* Wait for the commit thread to wake up and die. */
 	journal_kill_thread(journal);
@@ -1720,6 +1747,8 @@ int jbd2_journal_destroy(journal_t *journal)
 	if (journal->j_chksum_driver)
 		crypto_free_shash(journal->j_chksum_driver);
 	kfree(journal->j_wbuf);
+	for (i = 0; i < MAX_JBD2_MONITOR_BUFFER; i++)
+		kfree(journal->j_monitor_buffer[i]);
 	kfree(journal);
  
 	return err;

============
fs/jbd2/transaction.c

@@ -29,6 +29,9 @@
 #include <linux/backing-dev.h>
 #include <linux/bug.h>
 #include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
  
 #include <trace/events/jbd2.h>
  
@@ -181,6 +184,7 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 	transaction_t *t = journal->j_running_transaction;
 	int needed;
 	int total = blocks + rsv_blocks;
+	u64 calltime, time_monitor, rettime;
  
 	/*
 	 * If the current transaction is locked down for commit, wait
@@ -220,12 +224,17 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 	 * in the new transaction.
 	 */
 	if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) {
+		calltime = sched_clock();
 		atomic_sub(total, &t->t_outstanding_credits);
 		read_unlock(&journal->j_state_lock);
 		write_lock(&journal->j_state_lock);
 		if (jbd2_log_space_left(journal) < jbd2_space_needed(journal))
 			__jbd2_log_wait_for_space(journal);
 		write_unlock(&journal->j_state_lock);
+		rettime = sched_clock();
+		time_monitor = rettime - calltime;
+		/* We're going to collect some data to monitor */
+		jbd2_mon_collect_atc_stat(journal, time_monitor);
 		return 1;
 	}
  
@@ -786,11 +795,14 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	char *frozen_buffer = NULL;
 	int need_copy = 0;
 	unsigned long start_lock, time_lock;
+	u64 calltime, time_monitor, rettime;
  
 	if (is_handle_aborted(handle))
 		return -EROFS;
 	journal = transaction->t_journal;
  
+	calltime = sched_clock();
+
 	jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
  
 	JBUFFER_TRACE(jh, "entry");
@@ -993,6 +1005,13 @@ done:
 	jbd2_journal_cancel_revoke(handle, jh);
  
 out:
+	/* Compute the time-delay */
+	rettime = sched_clock();
+	time_monitor = rettime - calltime;
+	/* This is the trace point for systemtap scripts */
+	trace_jbd2_write_access_delay(jh2bh(jh), current, time_monitor);
+	/* We're going to collect some data to monitor */
+	jbd2_mon_collect_dgwa_stat(journal, jh2bh(jh), current, time_monitor);
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
 		jbd2_free(frozen_buffer, bh->b_size);
  
  
============
include/linux/jbd2.h

@@ -684,6 +684,26 @@ jbd2_time_diff(unsigned long start, unsigned long end)
  
 #define JBD2_NR_BATCH	64
  
+/* Monitoring the fs-delay for per process in the manner of jbd2-operations*/
+struct jbd2_delay_stat {
+#define MAX_NUM_LEVEL 9 
+	dev_t dev;
+	pid_t pid;
+	long long time_delay;
+	spinlock_t init_lock;
+	spinlock_t show_lock;
+	u64 count[MAX_NUM_LEVEL];
+};
+
+extern struct jbd2_delay_stat jbd2_write_access_buffer;
+
+extern void jbd2_mon_proc_init(struct proc_dir_entry *parent, journal_t *journal);
+extern void jbd2_mon_proc_exit(struct proc_dir_entry *parent);
+extern void jbd2_mon_buffer_init(struct jbd2_delay_stat *buffer);
+extern void jbd2_mon_collect_dgwa_stat(struct journal_s *journal, struct buffer_head *bh, 
+				struct task_struct *ts, u64 time_monitor);
+extern void jbd2_mon_collect_atc_stat(struct journal_s *journal, u64 time_monitor);
+
 /**
  * struct journal_s - The journal_s type is the concrete type associated with
  *     journal_t.
@@ -994,6 +1014,10 @@ struct journal_s
  
 	/* Precomputed journal UUID checksum for seeding other checksums */
 	__u32 j_csum_seed;
+
+	/* Storing statistics to print monitor result */
+#define MAX_JBD2_MONITOR_BUFFER 2
+	struct jbd2_delay_stat *j_monitor_buffer[MAX_JBD2_MONITOR_BUFFER];
 };
  
 /*

============
fs/jbd2/time_delay_monitor.c 

#include <linux/jbd2.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>

enum jbd2_mon_func_map {
	atc,	
	dgwa,
};

enum time_count_t {
	T_0_50,
	T_50_100,
	T_100_200,
	T_200_500,
	T_500_1k,
	T_1k_5k,
	T_5k_10k,
	T_10k_100k,
	T_100k_BIGGER,
	T_COUNT,
};

/* XXX_show() function is used to print classification info */
static int jbd2_mon_seq_show(struct seq_file *s, struct jbd2_delay_stat *buffer)
{
	u64 count[T_COUNT];

	spin_lock(&buffer->show_lock);
	memcpy(count, buffer->count, sizeof(count));
	spin_unlock(&buffer->show_lock);

	seq_printf(s, "0-50us: \t%llu\n", count[T_0_50]);
	seq_printf(s, "50-100us: \t%llu\n", count[T_50_100]);
	seq_printf(s, "100us-200us: \t%llu\n", count[T_100_200]);
	seq_printf(s, "200-500us: \t%llu\n", count[T_200_500]);
	seq_printf(s, "500-1000us: \t%llu\n", count[T_500_1k]);
	seq_printf(s, "1-5ms: \t\t%llu\n", count[T_1k_5k]);
	seq_printf(s, "5-10ms: \t%llu\n", count[T_5k_10k]);
	seq_printf(s, "10-100ms: \t%llu\n", count[T_10k_100k]);
	seq_printf(s, "100ms-bigger: \t%llu\n", count[T_100k_BIGGER]);
	seq_putc(s, '\n');
	return 0;
}

static void jbd2_mon_do_init_count(u64 time, 
				struct jbd2_delay_stat *buffer)
{
	enum time_count_t index;

	time = time / 1000;
	if (time < 50)
		index = T_0_50;
	else if (time < 100)
		index = T_50_100;
	else if (time < 200)
		index = T_100_200;
	else if (time < 500)
		index = T_200_500;
	else if (time < 1000)
		index = T_500_1k;
	else if (time < 5 * 1000)
		index = T_1k_5k;
	else if (time < 10 * 1000)
		index = T_5k_10k;
	else if (time < 100 * 1000)
		index = T_10k_100k;
	else
		index = T_100k_BIGGER;
	buffer->count[index]++;
}

/* Interface:
 * This function can be applied to classify all the processess */
static void jbd2_mon_buffer_count_init(u64 time, 
			struct jbd2_delay_stat *buffer)
{
	return jbd2_mon_do_init_count(time, buffer);
}

/* Interface:
 * Init jbd2_delay_stat structure */
static void jbd2_mon_buffer_process_init(struct buffer_head *bh,
			struct task_struct *ts, u64 time_monitor,
			struct jbd2_delay_stat *buffer)
{
	buffer->dev = bh->b_bdev->bd_dev;
	buffer->pid = ts->pid;
	buffer->time_delay = time_monitor;
}

static void jbd2_mon_collect_stat(struct buffer_head *bh, struct task_struct *ts,
	   		u64 time_monitor, struct jbd2_delay_stat *buffer)
{
	spin_lock(&buffer->init_lock);
	if (bh != NULL && ts != NULL)
		jbd2_mon_buffer_process_init(bh, ts, time_monitor, buffer);
	jbd2_mon_buffer_count_init(time_monitor, buffer);
	spin_unlock(&buffer->init_lock);
}

static int jbd2_mon_seq_open(struct inode *inode, struct file *file, 
				enum jbd2_mon_func_map index, struct seq_operations *ops)
{
	journal_t *journal = PDE_DATA(inode);
	struct jbd2_delay_stat *buffer;
	int rc;

	buffer = journal->j_monitor_buffer[index];
	rc = seq_open(file, ops);
	if (rc == 0) {
		struct seq_file *m = file->private_data;
		m->private = buffer;
	}
	return rc;
}

/* The first monitoring point: 
 * Monitoring add_transaction_credits() function for classification */
static void *jbd2_mon_seq_atc_start(struct seq_file *s, loff_t *pos)
{
	return *pos ? NULL : SEQ_START_TOKEN;
}

static void jbd2_mon_seq_atc_stop(struct seq_file *s, void *pos)
{
}

static void *jbd2_mon_seq_atc_next(struct seq_file *s, void *v, loff_t *pos)
{
	return NULL;
}

static int jbd2_mon_seq_atc_show(struct seq_file *s, void *v)
{
	struct jbd2_delay_stat *buffer = s->private;

	if (v != SEQ_START_TOKEN)
		goto out;
	jbd2_mon_seq_show(s, buffer);

out:
	return 0;
}

static struct seq_operations jbd2_mon_seq_atc_ops = {
	.start = jbd2_mon_seq_atc_start,
	.stop = jbd2_mon_seq_atc_stop,
	.next = jbd2_mon_seq_atc_next,
	.show = jbd2_mon_seq_atc_show
};

static int jbd2_mon_seq_atc_open(struct inode *inode, struct file *file)
{
	enum jbd2_mon_func_map index = atc;

	return jbd2_mon_seq_open(inode, file, index, &jbd2_mon_seq_atc_ops);
}

static const struct file_operations jbd2_mon_seq_atc_fops = {
	.open = jbd2_mon_seq_atc_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};

/* Called by add_transaction_credits() function in journal.c
 *
 * Collect the statistic through monitoring add_transaction_credits() time delay
 * We are able to collect data from other functions in different files */
void jbd2_mon_collect_atc_stat(struct journal_s *journal, u64 time_monitor)
{
	struct jbd2_delay_stat *buffer = NULL;
	enum jbd2_mon_func_map index = atc;

	buffer = journal->j_monitor_buffer[index];
	jbd2_mon_collect_stat(NULL, NULL, time_monitor, buffer);
}

/* The second monitoring point: 
 * Monitoring do_get_write_access() function for classification */
static void *jbd2_mon_seq_dgwa_start(struct seq_file *s, loff_t *pos)
{
	return *pos ? NULL : SEQ_START_TOKEN;
}

static void jbd2_mon_seq_dgwa_stop(struct seq_file *s, void *pos)
{
}

static void *jbd2_mon_seq_dgwa_next(struct seq_file *s, void *v, loff_t *pos)
{
	return NULL;
}

static int jbd2_mon_seq_dgwa_show(struct seq_file *s, void *v)
{
	struct jbd2_delay_stat *buffer = s->private;

	if (v != SEQ_START_TOKEN)
		goto out;
	jbd2_mon_seq_show(s, buffer);

out:
	return 0;
}

static struct seq_operations jbd2_mon_seq_dgwa_ops = {
	.start = jbd2_mon_seq_dgwa_start,
	.stop = jbd2_mon_seq_dgwa_stop,
	.next = jbd2_mon_seq_dgwa_next,
	.show = jbd2_mon_seq_dgwa_show
};

static int jbd2_mon_seq_dgwa_open(struct inode *inode, struct file *file)
{
	enum jbd2_mon_func_map index = dgwa;

	return jbd2_mon_seq_open(inode, file, index, &jbd2_mon_seq_dgwa_ops);
}

static const struct file_operations jbd2_mon_seq_dgwa_fops = {
	.open = jbd2_mon_seq_dgwa_open,
	.read = seq_read,
	.llseek = seq_lseek,
	.release = seq_release,
};

/* Called by do_get_write_access() function in journal.c
 * 
 * Collect the statistic through monitoring do_get_write_access() time delay
 * We are able to collect data from other functions in different files */
void jbd2_mon_collect_dgwa_stat(struct journal_s *journal, struct buffer_head *bh,
			struct task_struct *ts, u64 time_monitor)
{
	struct jbd2_delay_stat *buffer = NULL;
	enum jbd2_mon_func_map index = dgwa;
	
	buffer = journal->j_monitor_buffer[index];
	jbd2_mon_collect_stat(bh, ts, time_monitor, buffer);
}

void jbd2_mon_buffer_init(struct jbd2_delay_stat *buffer)
{
	int i = 0;

	spin_lock_init(&buffer->init_lock);
	spin_lock_init(&buffer->show_lock);
	for (i = 0; i < MAX_NUM_LEVEL; i++)
		buffer->count[i] = 0;
}

/* Called by journal_init() function in journal.c when loading jbd2 module */
void jbd2_mon_proc_init(struct proc_dir_entry *parent, journal_t *journal)
{
	proc_create_data("fsdelay_do_get_write_access", 0, parent,
			&jbd2_mon_seq_dgwa_fops, journal);
	proc_create_data("fsdelay_add_transaction_credits", 0, parent,
			&jbd2_mon_seq_atc_fops, journal);
}

void jbd2_mon_proc_exit(struct proc_dir_entry *parent)
{
	remove_proc_entry("fsdelay_do_get_write_access", parent);
	remove_proc_entry("fsdelay_add_transaction_credits", parent);
}

===systemtap部分====
include/trace/events/jbd2.h

@@ -10,6 +10,28 @@
 struct transaction_chp_stats_s;
 struct transaction_run_stats_s;
  
+TRACE_EVENT(jbd2_write_access_delay,
+
+    TP_PROTO(struct buffer_head *bh, struct task_struct *thread, long long time_monitor),
+
+	TP_ARGS(bh, thread, time_monitor),
+
+    TP_STRUCT__entry(
+        __field(        dev_t,  dev                     )
+        __field(        pid_t,  pid)
+        __field(        long long,  time_monitor)
+    ),
+
+    TP_fast_assign(
+        __entry->dev            = bh->b_bdev->bd_dev;
+        __entry->pid            = thread->pid;
+		__entry->time_monitor	= time_monitor;
+    ),
+
+    TP_printk("dev %d,%d pid %d time %lld",
+          MAJOR(__entry->dev), MINOR(__entry->dev), __entry->pid, __entry->time_monitor)
+);
+
 TRACE_EVENT(jbd2_checkpoint,
  
 	TP_PROTO(journal_t *journal, int result),

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章