代碼是林樂的,當然我不知道全部都是他寫的還是他修改的。我看了這份代碼,的確是我所找到的裏面最好的一份了。分析表的建立都由程序完成,這纔是這個分析器所本來應該有的樣子啊。
基本的流程我是看懂了,不過還需要詳細分析。代碼是用C寫的,但是有些東西我沒遇到過,比如vector,set之類的,我可以先找點C語言的資料瞭解一下,找不到的話當面找林樂問問。這些對於這個程序而言是個小節,只是我以前沒想到C語言可以寫出這個樣子。
以下是代碼:
LL.cpp:
#include <iostream>
#include <vector>
#include <string>
#include <sstream>
#include <set>
#include "LL.h"
using namespace std;
#define GRAMMARFILE "Grammar.txt"
#define CODEFILE "Code2.txt"
//#define PRINTFIRST "d://first.txt"
//#define PRINTFOLLOW "d://follow.txt"
//#define PRINTTABLE "d://1.txt"
#define PRINTERROR
//#define PRINTPROGRESS
int IsLetter(char c)
{
return ( (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') );
}
int IsNumber(char c)
{
return (c >= '0' && c <= '9');
}
int IsBlank(char c)
{
return (c == ' ' || c == '/t' || c == '/n' );
}
string Keyword[] = {"program","var","array","of","integer","real","function",
"procedure","begin","end","if","then","else","while",
"do","not","or","div","mod","and"};
const int KEYWORD_NUM = 20;
vector<GRAMMAR> Grammar;
vector<string> IdToName;
vector<int> IsTerminal;
vector< set<int> > First;
vector< set<int> > Follow;
vector< vector<int> > Table;
char Code[10000];
int AtLine = 1;
int Error = 0;
static int p = 0;
int HashName(string str)
{
int i;
set<int> e;
for(i=0;i<IdToName.size();i++)
{
if( str == IdToName[i] )
return i;
}
IdToName.push_back(str);
IsTerminal.push_back( YES );
First.push_back(e);
Follow.push_back(e);
return IdToName.size() - 1;
}
int NextToken(string &ret)
{
int flag;
#define GOTOSTATUS(x) {status=(x);ret+=Code[p];p++;}
do
{
flag = 0;
while( IsBlank(Code[p]) ) //忽略空白符
{
if( Code[p] == '/n' ) AtLine++;
flag = 1;
p++;
}
if( Code[p] == '{' ) //忽略註釋
{
while( Code[p]!='/0' && Code[p]!='}' )
{
if( Code[p] == '/n' ) AtLine++;
p++;
}
if( Code[p] == '}' ) p++;
flag = 1;
}
}while(flag);
if( Code[p] == '/0' ) return EOF; //文件結束
int status = 0;
ret = "";
while(1)
{
switch( status )
{
case 0:
switch( Code[p] )
{
case '(':
GOTOSTATUS(1);
break;
case ')':
GOTOSTATUS(2);
break;
case '[':
GOTOSTATUS(3);
break;
case ']':
GOTOSTATUS(4);
break;
case '=':
GOTOSTATUS(5);
break;
case '*':
GOTOSTATUS(6);
break;
case '/':
GOTOSTATUS(7);
break;
case ';':
GOTOSTATUS(8);
break;
case ':':
GOTOSTATUS(9);
break;
case '.':
GOTOSTATUS(11);
break;
case '<':
GOTOSTATUS(13);
break;
case '>':
GOTOSTATUS(16);
break;
case '+':
GOTOSTATUS(19);
break;
case '-':
GOTOSTATUS(20);
break;
case '$':
GOTOSTATUS(27);
break;
case ',':
GOTOSTATUS(28);
break;
default:
if( IsLetter(Code[p]) )
{
GOTOSTATUS(18);
}
else if( IsNumber(Code[p]) )
{
GOTOSTATUS(21);
}
else
{
char t[100];
sprintf(t,"0x%x",Code[p]);
ret = t;
p ++;
return ERROR;
}
}
break;
case 1: // (
case 2: // )
case 3: // [
case 4: // ]
case 5: // =
case 6: // *
case 7: // /
case 8: // ;
case 19: // +
case 20: // -
case 27: // $
case 10: // :=
case 12: // ..
case 14: // <=
case 15: // <>
case 17: // >=
case 28: // ,
return HashName(ret); // 返回相應代碼
break;
case 9:
if( Code[p] == '=' )
{
GOTOSTATUS(10);
}
else
{
return HashName(":"); // :
}
break;
case 11:
if( Code[p] == '.' )
{
GOTOSTATUS(12);
}
else
{
return HashName("."); // .
}
break;
case 13:
if( Code[p] == '=' )
{
GOTOSTATUS(14);
}
else if( Code[p] == '>' )
{
GOTOSTATUS(15);
}
else
{
return HashName("<"); // <
}
break;
case 16:
if( Code[p] == '=' )
{
GOTOSTATUS(17);
}
else
{
return HashName(">"); // >
}
break;
case 18:
if( IsLetter(Code[p]) || IsNumber(Code[p]) )
{
GOTOSTATUS(18);
}
else
{
int i;
for(i=0;i<KEYWORD_NUM;i++) //判斷是否爲關鍵字
{
if( Keyword[i] == ret )
{
return HashName(ret);
}
}
return HashName("id"); // ID;
}
break;
case 21:
if( IsNumber(Code[p]) )
{
GOTOSTATUS(21);
}
else if( Code[p] == '.' )
{
GOTOSTATUS(22);
}
else if( Code[p] == 'E' || Code[p] == 'e' )
{
GOTOSTATUS(24);
}
else
{
return HashName("num"); // NUM;
}
break;
case 22:
if( IsNumber(Code[p]) )
{
GOTOSTATUS(23);
}
else
{
ret = "讀取浮點數失敗,小數點後應跟有數字";
return ERRNUM;
}
break;
case 23:
if( IsNumber(Code[p]) )
{
GOTOSTATUS(23);
}
else if( Code[p] == 'e' || Code[p] == 'E' )
{
GOTOSTATUS(24);
}
else
{
return HashName("num"); // NUM
}
break;
case 24:
if( Code[p] == '+' || Code[p] == '-' )
{
GOTOSTATUS(25);
}
else if( IsNumber(Code[p]) )
{
GOTOSTATUS(26);
}
else
{
ret = "讀取浮點數失敗,指數符號E後不正確";
return ERRNUM;
}
break;
case 25:
if( IsNumber(Code[p]) )
{
GOTOSTATUS(26);
}
else
{
ret = "讀取浮點數失敗,指數部分正負號後應跟有數字";
return ERRNUM;
}
break;
case 26:
if( IsNumber(Code[p]) )
{
GOTOSTATUS(26);
}
else
{
return HashName("num"); //NUM
}
break;
default:
ret = "系統錯誤";
return ERROR;
}
}
return OK;
}
void PrintError(string str)
{
#ifdef PRINTERROR
Error ++;
printf("Error(%d):%s/n",Error,str.data());
#endif
}
int GetToken(string &ret)
{
int v;
char tmp[1000];
while(1)
{
v = NextToken(ret);
if( v == ERROR )
{
sprintf(tmp,"在第%d行,發現未識別符號 %s ",AtLine,ret.data());
PrintError(tmp);
continue;
}
else if( v == ERRNUM )
{
sprintf(tmp,"在第%d行,%s",AtLine,ret.data());
PrintError(tmp);
return HashName("num");
}
else
{
return v;
}
}
return OK;
}
int LLAnalyze()
{
int stack[MAX_STACK];
int ns = 0;
int type;
int acc = HashName("$");
int id = 1;
#define GETTOKEN { type = GetToken(ret); if( type == EOF ){ PrintError("編譯失敗,文件提早結束");return ERROR;} }
string ret;
stack[ns++] = acc;
stack[ns++] = Grammar[0].Left;
GETTOKEN;
while(1)
{
if( ns == 0 )
{
printf("發生錯誤:分析棧爲空,語法分析意外結束/n");
return ERROR;
}
int top = stack[--ns];
if( top == acc && type == acc )
{
break;
}
else if( IsTerminal[top] )
{
if( top == type )
{
GETTOKEN;
}
else
{
char tmp[100];
sprintf(tmp,"在第%d行,發生錯誤,輸入期望爲%s 但讀入%s/n",AtLine,IdToName[top].data(),ret.data());
PrintError(tmp);
}
}
else
{
if( Table[top][type] == -1 )
{
int i;
char tmp[100];
sprintf(tmp,"在第%d行,發生錯誤,非期望的符號 %s/n",AtLine,ret.data());
PrintError(tmp);
ns ++;
GETTOKEN;
}
else if( Table[top][type] == SYNC )
{
char tmp[100];
sprintf(tmp,"在第%d行,發生錯誤,非期望的符號 %s ,採用下一條產生式/n",AtLine,ret.data());
PrintError(tmp);
}
else
{
int g = Table[top][type] ;
int i;
#ifdef PRINTPROGRESS
printf("%d.使用產生式 %d : %s ->",id++, g , IdToName[Grammar[g].Left].data());
for(i=0;i<Grammar[g].Right.size();i++)
printf(" %s",IdToName[Grammar[g].Right[i]].data());
printf("/n");
#endif
if( Grammar[g].Right.size() != 1 || Grammar[g].Right[0] != HashName("@") )
{
for(i=Grammar[g].Right.size()-1;i>=0;i--)
{
stack[ns++] = Grammar[g].Right[i];
}
}
}
}
}
if( Error == 0 )
{
printf("語法分析成功結束/n");
return OK;
}
else
{
printf("語法分析失敗/n");
return ERROR;
}
}
int AddSet(set<int> &to,set<int> &from,int WithEmpty = 1) //返回to集合是否被修改,WithEmpty爲1時也加入空,否則不加入
{
int oldsize = to.size();
int empty = HashName("@");
set<int>::iterator it;
for(it=from.begin();it!=from.end();it++)
{
if( WithEmpty == 0 && (*it) == empty )
continue;
to.insert(*it);
}
if( oldsize == to.size() )
return 0;
return 1;
}
int HasEmpty(set<int> &s) //判斷集合是否包含空
{
set<int>::iterator it;
it = s.find( HashName("@") ); //查找@
if( it == s.end() )
return 0;
return 1;
}
int MakeFirst()
{
int flag;
int i,j;
for(i=0;i<IsTerminal.size();i++) // 非終結符自身即爲First
{
if( IsTerminal[i] )
First[i].insert( i );
}
do
{
flag = 0;
for(i=0;i<Grammar.size();i++)
{
for(j=0;j<Grammar[i].Right.size();j++)
{
if(AddSet(First[Grammar[i].Left],First[Grammar[i].Right[j]]))
{
flag = 1;
}
if( !HasEmpty( First[ Grammar[i].Right[j]] ) ) //若不包含空則跳出
break;
}
}
}while(flag);
#ifdef PRINTFIRST
FILE *fp;
if( strcmp(PRINTFIRST,"stdout") == 0 )
fp = stdout;
else
fp = fopen(PRINTFIRST,"w");
if( fp != NULL )
{
fprintf(fp,"=============First=============/n");
for(i=0;i<IsTerminal.size();i++)
{
fprintf(fp,"%s/t-",IdToName[i].data());
set<int>::iterator it;
for(it=First[i].begin();it!=First[i].end();it++)
{
fprintf(fp," %s",IdToName[ *it ].data());
}
fprintf(fp,"/n");
}
if( fp != stdout )
fclose(fp);
}
#endif
return OK;
}
int MakeFollow() //生成Follow集合
{
int flag;
int i,j,k;
Follow[ Grammar[0].Left ].insert( HashName("$") ); //插入 $
do
{
flag = 0;
for(i=0;i<Grammar.size();i++)
{
for(j=0;j<Grammar[i].Right.size();j++)
{
if( !IsTerminal[ Grammar[i].Right[j] ] )
{
for(k=j+1;k<Grammar[i].Right.size();k++)
{
if( AddSet(Follow[ Grammar[i].Right[j] ], First[ Grammar[i].Right[k] ],0) )
flag = 1;
if( !HasEmpty(First[ Grammar[i].Right[k] ]) ) //若不包含終結符則跳出
break;
}
if( k >= Grammar[i].Right.size() ) //如果右部都能推出空
{
if( AddSet(Follow[ Grammar[i].Right[j] ],Follow[ Grammar[i].Left ],0) )
flag = 1;
}
}
}
}
}while(flag);
#ifdef PRINTFOLLOW
FILE *fp;
if( strcmp(PRINTFOLLOW,"stdout") == 0 )
fp = stdout;
else
fp = fopen(PRINTFOLLOW,"w");
if( fp != NULL )
{
fprintf(fp,"=============Follow=============/n");
for(i=0;i<IsTerminal.size();i++)
{
if( !IsTerminal[i] )
{
fprintf(fp,"%s/t-",IdToName[i].data());
set<int>::iterator it;
for(it=Follow[i].begin();it!=Follow[i].end();it++)
{
fprintf(fp," %s",IdToName[*it].data());
}
fprintf(fp,"/n");
}
}
if( fp != stdout )
fclose(fp);
}
#endif
return OK;
}
void PrintTable() //輸出產生式和轉換表
{
#ifdef PRINTTABLE
FILE *fp;
if( strcmp(PRINTTABLE,"stdout") == 0 )
fp = stdout;
else fp = fopen(PRINTTABLE,"w");
if( fp != NULL )
{
int i,j;
fprintf(fp,"產生式列表爲:/n");
for(i=0;i<Grammar.size();i++)
{
fprintf(fp,"%d:/t%s ->",i,IdToName[Grammar[i].Left].data());
for(j=0;j<Grammar[i].Right.size();j++)
fprintf(fp," %s",IdToName[Grammar[i].Right[j]].data());
fprintf(fp,"/n");
}
fprintf(fp,"/n/n轉換表格爲:/n");
fprintf(fp," ,");
for(i=1;i<IsTerminal.size();i++)
{
if( IsTerminal[i] )
{
if( IdToName[i] == "," )
fprintf(fp,"%-6s,","逗號");
else
fprintf(fp,"%-6s,",IdToName[i].data());
}
}
fprintf(fp,"/n");
for(i=0;i<IsTerminal.size();i++)
{
if( !IsTerminal[i] )
{
fprintf(fp,"%-6s,",IdToName[i].data());
for(j=1;j<IsTerminal.size();j++)
{
if( IsTerminal[j] )
{
if( Table[i][j] == -2 )
fprintf(fp,"%-6s,","sync");
else if( Table[i][j] != -1 )
fprintf(fp,"%-6d,",Table[i][j]);
else fprintf(fp," ,");
}
}
fprintf(fp,"/n");
}
}
if( fp != stdout )
fclose(fp);
}
#endif
}
int MakeSync() //在表中填入同步符號,對於非終結符 A 則它的同步符號爲 Follow{A}
{
int i;
for(i=0;i<IsTerminal.size();i++)
{
if( IsTerminal[i] == 0 )
{
set<int>::iterator it;
for(it=Follow[i].begin();it!=Follow[i].end();it++)
{
int k = *it;
if( Table[i][k] == -1 )
{
Table[i][k] = SYNC;
}
}
}
}
return OK;
}
int MakeTable()
{
int i,j;
vector<int> v(IsTerminal.size(),-1);
for(i=0;i<IsTerminal.size();i++)
{
Table.push_back(v);
}
for(i=0;i<Grammar.size();i++)
{
int x = Grammar[i].Left;
set<int>::iterator it;
for(j=0;j<Grammar[i].Right.size();j++)
{
int k = Grammar[i].Right[j];
for(it=First[k].begin();it!=First[k].end();it++)
{
int y = *it;
if( Table[x][y] != -1 && Table[x][y] != i )
{
printf("Conflict %s - %s Table[x][y] = %d /n",IdToName[x].data(),IdToName[y].data(),Table[x][y]);
}
Table[x][y] = i;
}
if( !HasEmpty(First[k]) ) //如果這部分不包含空,則找下部分,否則退出
break;
}
if( j >= Grammar[i].Right.size() ) //是否整個產生式都可能包含空
{
for(it=Follow[x].begin();it!=Follow[x].end();it++)
{
int y = *it;
if( Table[x][y] != -1 && Table[x][y] != i)
{
printf("Conflict %s - %s/n",IdToName[x].data(),IdToName[y].data());
}
Table[x][y] = i;
}
}
}
MakeSync();
PrintTable();
return OK;
}
int ReadGrammar(char *filename)
{
char tmp[1000];
FILE *fp = fopen(filename,"r");
if( fp == NULL ) return ERROR;
HashName("@"); // @爲 0
HashName("$"); // $爲 1
while( fgets(tmp,1000,fp) )
{
if( tmp[0] == '/' )
continue;
if( strlen(tmp) < 3 )
continue;
string t = tmp;
istringstream strcin(t);
GRAMMAR g;
strcin >> t;
g.Left = HashName(t);
IsTerminal[ g.Left ] = 0;
strcin >> t; //讀掉 ->
while( strcin >> t )
{
g.Right.push_back( HashName(t) );
}
Grammar.push_back(g);
}
fclose(fp);
//int i,j;
//for(i=0;i<Grammar.size();i++)
//{
// printf("%s ->",IdToName[Grammar[i].Left].data());
// for(j=0;j<Grammar[i].Right.size();j++)
// {
// printf(" %s",IdToName[Grammar[i].Right[j]].data());
// }
// printf("/n");
//}
//for(i=0;i<IdToName.size();i++)
//{
// printf("%s====%d/n",IdToName[i].data(),IsTerminal[i]);
//}
return OK;
}
int ReadInput()
{
int i = 0;
char c;
while( (c=getchar()) != EOF )
Code[i++] = c;
Code[i++] = '$';
Code[i++] = '/0';
return OK;
}
int main()
{
freopen(CODEFILE,"r",stdin);
ReadGrammar(GRAMMARFILE);
MakeFirst();
MakeFollow();
MakeTable();
ReadInput();
LLAnalyze();
return 0;
}
LL.h:
#ifndef __const_h__
#define __const_h__
#include <string>
#include <vector>
using namespace std;
#define MAX_STACK 1000
#define ERROR -2
#define ERRNUM -3
#define OK 1
#define SYNC (-2)
#define YES 1
#define NO 0
typedef struct
{
int Left;
vector<int> Right;
}GRAMMAR;
#endif
Grammar.txt:
PP -> P
P -> program id ( IL ) ; DE SDS CS .
//IL -> id
//IL -> IL , id
IL -> id IL1
IL1 -> , id IL1
IL1 -> @
//DE -> DE var IL : T ;
//DE -> @
DE -> DE1
DE1 -> var IL : T ; DE1
DE1 -> @
T -> ST
T -> array [ num .. num ] of ST
ST -> integer
ST -> real
//SDS -> SDS SD ;
//SDS -> @
SDS -> SDS1
SDS1 -> SD ; SDS1
SDS1 -> @
SD -> SH DE CS
SH -> function id ARG : ST ;
SH -> procedure id ARG ;
ARG -> ( PL )
ARG -> @
//PL -> IL : T
//PL -> PL ; IL : T
PL -> IL : T PL1
PL1 -> ; IL : T PL1
PL1 -> @
CS -> begin OS end
OS -> SL
OS -> @
//SL -> STA
//SL -> SL ; STA
SL -> STA SL1
SL1 -> ; STA SL1
SL1 -> @
//STA -> if EXP then STA else STA
//STA -> if EXP then STA
STA -> if EXP then STA STA3
STA3 -> @
STA3 -> else STA
//STA -> @
STA -> CS
STA -> while EXP do STA
//STA -> VAR ASSOP EXP
//STA -> PS
//VAR -> id
//VAR -> id [ EXP ]
//PS -> id
//PS -> id ( EL )
STA -> id STA2
STA2 -> PS2
STA2 -> VAR2 ASSOP EXP
VAR -> id VAR2
VAR2 -> [ EXP ]
VAR2 -> @
//PS -> id PS2
PS2 -> ( EL )
PS2 -> @
//EL -> EXP
//EL -> EL , EXP
EL -> EXP EL1
EL1 -> , EXP EL1
EL1 -> @
//EXP -> SE
//EXP -> SE REOP SE
EXP -> SE EXP2
EXP2 -> REOP SE
EXP2 -> @
//SE -> TERM
//SE -> SI TERM
//SE -> SE ADDOP TERM
SE -> TERM SE1
SE -> SI TERM SE1
SE1 -> ADDOP TERM SE1
SE1 -> @
//TERM -> FAC
//TERM -> TERM MULOP FAC
TERM -> FAC TERM1
TERM1 -> MULOP FAC TERM1
TERM1 -> @
//FAC -> id
//FAC -> id ( EL )
FAC -> id FAC2
FAC2 -> ( EL )
FAC2 -> @
FAC -> num
FAC -> ( EXP )
FAC -> not FAC
SI -> +
SI -> -
ASSOP -> :=
ADDOP -> +
ADDOP -> -
ADDOP -> or
REOP -> =
REOP -> <>
REOP -> <
REOP -> <=
REOP -> >=
REOP -> >
MULOP -> *
MULOP -> /
MULOP -> div
MULOP -> mod
MULOP -> and
code2.txt:
program example(input,output);
var n,i,j,p,s:integer;
procedure TFSetLyrics( NewLyrics:real);
begin
FLyrics:=TrimRight(NewLyrics)
end;
begin
while not eof do
begin
s := (i+j) / 2 / 3;
s[1] := 1;
readlnn(3,6);
if (n=(1-3)) then break else break;
s:=0;
while n>gdc(1) do
begin
s:=s+n mod 10;
n:=n div 10
end;
p:=0;
while s>9 do
begin
p:=s;
s:=0;
while p>0 do
begin
s:=s+p mod 10;
p:=p div 10
end
end;
writeln(s)
end
end.