(写的不好,欢迎善意拍砖)
转载请注明出处:http://blog.csdn.net/fengningning/article/details/8214450
Emacs的历史悠久和强大自不必多说,它内建了Elisp(Lisp的一种方言)的解释引擎。它对很多后来的语言如Ruby(参见Ruby之父的How Emacs changed my life)产生了重大的影响。诸多计算机界的大拿对它青睐有加,如Donald Ervin Knuth。Coders at Work一书中有多处提到了Emacs。
Emacs是一个伟大的软件!
我在这里进行源码剖析的是GNU Emacs,而不是Emacs的其他实现(如XEmacs,它们一样很强悍)。GNU Emacs的体系架构初探,可以参见 《程序员杂志》 2003.9.1中的'GNU Emacs体系架构评论'一文,洪峰老师的评论非常犀利,见解独到。
与硬件直接作用的GNU Emacs模块(如显示模块)采用C语言编写,而绝大多数文本编辑模块则统统是利用Lisp语言来编写,据统计,Lisp占的代码量有75%之多(不是太准确的数字)。从Lisp中可以调用C语言编写的内建函数,我觉得所有C语言内建函数中最神秘的当属'eval',不管认同与否,因为eval几乎就是Elisp的解释引擎(解释引擎先进行read,再进行eval)。因此,首先窥探一下eval函数,应该是意义重大的。
时间有限,我只能先提及eval的冰山一角,它的具体逻辑还得慢慢分析。不管怎么样,先将那些神秘的宏展开,露出它的本色,再品味一下涉及到的数据结构,也是一件很爽快的事。
(注:其实我应该先说一下Lisp_Object的,但~~先来eval,我喜欢这个函数!)
// File: emacs-24.2/src/eval.c
DEFUN ("eval", Feval, Seval, 1, 2, 0,
doc: /* Evaluate FORM and return its value.
If LEXICAL is t, evaluate using lexical scoping. */)
(Lisp_Object form, Lisp_Object lexical)
{
int count = SPECPDL_INDEX ();
specbind (Qinternal_interpreter_environment,
NILP (lexical) ? Qnil : Fcons (Qt, Qnil));
return unbind_to (count, eval_sub (form));
}
// 先来看一下DEFUN的定义
#define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc) \
Lisp_Object fnname DEFUN_ARGS_ ## maxargs ; \
static DECL_ALIGN (struct Lisp_Subr, sname) = \
{ PVEC_SUBR, \
{ .a ## maxargs = fnname }, \
minargs, maxargs, lname, intspec, 0}; \
Lisp_Object fnname
#endif
// 再看一下DEFUN是怎么被调用的
DEFUN ("eval", Feval, Seval, 1, 2, 0,
doc: /* Evaluate FORM and return its value.
If LEXICAL is t, evaluate using lexical scoping. */)
// 展开之后得到
Lisp_Object Feval DEFUN_ARGS_2 ;
static DECL_ALIGN (struct Lisp_Subr, Seval) =
{
PVEC_SUBR,
{ .a2 = Feval},
1, 2, "eval", 0, 0
};
Lisp_Object Feval
// 后面紧跟着函数体,由此可见这个宏虽然做了很多其他工作,但最基本的,它定义了 fnname为名称的函数
// 展开后的代码的第一行,究竟做了什么?
Lisp_Object Feval DEFUN_ARGS_2 ;
// 还是先看一下 DEFUN_ARGS_2 这个宏
#define DEFUN_ARGS_2 (Lisp_Object, Lisp_Object)
// 继续展开
Lisp_Object Feval (Lisp_Object, Lisp_Object) ;
// 终于明白了,原来这行只是做了函数的声明而已
// 下面这个语句块似乎有点复杂,不急,我们先把不太直观的DECL_ALIGN给解决掉
static DECL_ALIGN (struct Lisp_Subr, Seval) =
{
PVEC_SUBR,
{ .a2 = Feval},
1, 2, "eval", 0, 0
};
# define DECL_ALIGN(type, var) \
type __attribute__ ((__aligned__ (1 << GCTYPEBITS))) var
// 我想,对于宏的展开我们已经轻车熟路了,只需要懂得语法,小心行事即可
static struct Lisp_Subr __attribute__ ((__aligned__ (1 << GCTYPEBITS))) Seval
{
PVEC_SUBR,
{ .a2 = Feval},
1, 2, "eval", 0, 0
};
// 原来,DECL_ALIGN真的只是做了字节对齐的操作,虽然字节对齐在C编程中非常重要,如果不注意会惹出许多乱子,但我们现在太关注细节只会让自己迷失方向。忽略它!
static struct Lisp_Subr Seval
{
PVEC_SUBR,
{ .a2 = Feval},
1, 2, "eval", 0, 0
};
// 现在清爽多了,这个语句的作用是定义一个变量,这个变量是struct Lisp_Subr类型
// PVEC_SUBR 是一个枚举变量,其值为 0x4000,类似的还有PVEC_CHAR_TABLE,暂且把它当做是一个flag,丢在一边先不管。
// 还有一个叫做 Lisp_Subr的结构体没弄清楚
/* This structure describes a built-in function.
It is generated by the DEFUN macro only.
defsubr makes it into a Lisp object.
This type is treated in most respects as a pseudovector,
but since we never dynamically allocate or free them,
we don't need a struct vectorlike_header and its 'next' field. */
struct Lisp_Subr
{
EMACS_INT size;
union {
Lisp_Object (*a0) (void);
Lisp_Object (*a1) (Lisp_Object);
Lisp_Object (*a2) (Lisp_Object, Lisp_Object);
Lisp_Object (*a3) (Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a4) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a5) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a6) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a7) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*a8) (Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object, Lisp_Object);
Lisp_Object (*aUNEVALLED) (Lisp_Object args);
Lisp_Object (*aMANY) (ptrdiff_t, Lisp_Object *);
} function;
short min_args, max_args;
const char *symbol_name;
const char *intspec;
const char *doc;
};
// 一堆的注释,一堆放在联合体里面的函数指针,还有几个貌似眼熟的字段。// 从注释中可以看出,这个结构体代表了一个Emacs中用C语言实现的内部函数,里面的信息包含了Lisp调用时需要的信息(如果不是全部的话)
// 回过头来看一下DEFUN的定义
#define DEFUN(lname, fnname, sname, minargs, maxargs, intspec, doc)
// 可以找到如下的对应关系
/*
sname -> struct Lisp_Subr结构体变量名
lname -> Lisp_Subr.symbol_name
fnname -> Lisp_Subr.function
minargs -> Lisp_Subr.min_args
maxargs -> Lisp_Subr.max_args
intspec -> Lisp_Subr.intspec
doc -> Lisp_Subr.intspec
*/
// 看来各个参数都已经归位了
// 最后,我们似乎反了一个严重的错误,没有仔细阅读 DEFUN这个宏的代码注释。其实Emacs的源代码是有着非常好的注释的。不过现在看还来得及:
/* Define a built-in function for calling from Lisp.
`lname' should be the name to give the function in Lisp,
as a null-terminated C string.
`fnname' should be the name of the function in C.
By convention, it starts with F.
`sname' should be the name for the C constant structure
that records information on this function for internal use.
By convention, it should be the same as `fnname' but with S instead of F.
It's too bad that C macros can't compute this from `fnname'.
`minargs' should be a number, the minimum number of arguments allowed.
`maxargs' should be a number, the maximum number of arguments allowed,
or else MANY or UNEVALLED.
MANY means pass a vector of evaluated arguments,
in the form of an integer number-of-arguments
followed by the address of a vector of Lisp_Objects
which contains the argument values.
UNEVALLED means pass the list of unevaluated arguments
`intspec' says how interactive arguments are to be fetched.
If the string starts with a `(', `intspec' is evaluated and the resulting
list is the list of arguments.
If it's a string that doesn't start with `(', the value should follow
the one of the doc string for `interactive'.
A null string means call interactively with no arguments.
`doc' is documentation for the user. */
/* This version of DEFUN declares a function prototype with the right
arguments, so we can catch errors with maxargs at compile-time. */
未完待续