編譯原理課程即將結束,開始了在校中最麻煩的實驗,編譯實驗......同在一個系,其他班的編譯實驗分成好幾塊,簡短的文法,
完成一些小功能,就我們班的老師,撂下一句話:參考書本,把編譯器實現了,可以一組兩人分工合作。
源代碼連接:http://download.csdn.net/download/supersmart_dong/10224159
詞法分析流程圖:
首先要做的第一步就是寫數據結構以及完成單詞表。詞法分析的任務就是將一段程序代碼,分割單詞,把單詞信息寫出來。
例如在代碼: while A<2 do A:=A+1; 中進行詞法分析得出來結果,(while,關鍵字) (A,標識符) (<,算符)(2,整數) (do,關鍵字) (A,標識符)
(:=,算符) (A,標識符)(+,算符) (1,整數) (;,界符) 將代碼中單詞一個個的取出來進行分析便是詞法分析的任務。輸出內容是符號表文件和
token文件。
單詞表如下圖:
單詞 | 編碼 | 單詞 | 編碼 | 單詞 | 編碼 | 單詞 | 編碼 |
4 | 1 | or | 11 | ( | 21 | := | 31 |
begin | 2 | program | 12 | ) | 22 | = | 32 |
bool | 3 | real | 13 | + | 23 | <= | 33 |
do | 4 | then | 14 | - | 24 | < | 34 |
else | 5 | true | 15 | * | 25 | <> | 35 |
end | 6 | var | 16 | / | 26 | > | 36 |
false | 7 | while | 17 | . | 27 | >= | 37 |
if | 8 | 標識符 | 18 | , | 28 |
|
|
integer | 9 | 整數 | 19 | : | 29 |
|
|
not | 10 | 實數 | 20 | ; | 30 |
|
單詞表可以用一維對象數組實現,根據編碼或者數組索引來判斷該單詞是不是關鍵字還是算符或界符。
之後寫一些判斷的函數,判斷是否是關鍵字,是否是數字,是否是算符,是否是界符
然後按照流程來,讀取文件,一個字符一個字符的讀,如果讀到第i=0個字符是字母,則讀第i+1個,一直讀到不是字母或數字爲止,
將這i個字符構成一個單詞,查詢是不是關鍵字,如果不是則爲標識符。如果讀到的第i=0個字符是數字,則繼續讀第i+1個,一直讀到不是
字母或數字爲止。判斷這單詞是不是數字(浮點數,整數),如果讀到第i=0字符既不是字母也不是數字,則讀取下一個字符(記爲變量A)和下
兩個字符(記爲變量B),判斷這A或B中有沒有算符,判斷A是不是界符,如果都不是則爲非法字符。像這樣讀完整個程序結束輸出符號表文件。
#include<iostream>
#include<string>
#include<fstream>
#include <cassert>
using namespace std;
struct WordToken
{
string name;
int code;
};
struct WordSymble
{
string name; //
int code;
string type;
int addr = -1; //符號表位置
int linenum; //行號
};
struct symble
{
int number; //序號
string type; //類型
string name; //名字
};
#pragma region 單詞表
WordToken keyword[] = { { "and", 1 }, { "begin", 2 }, { "bool", 3 }, { "do", 4 },
{ "else", 5 }, { "end", 6 }, { "false", 7 }, { "if", 8 },
{ "integer", 9 }, { "not", 10 }, { "or", 11 }, { "program", 12 }, { "real", 13 }, { "then", 14 }, { "true", 15 }, { "var", 16 }, { "while", 17 } };
WordToken operatorword[] = { { "+", 23 }, { "-", 24 }, { "*", 25 }, { "/", 26 }, { ">", 31 }, { ":=", 38 }, { "=", 32 }, { "<=", 33 }, { "<", 34 }, { "<>", 35 }, { ">", 36 }, { ">=", 37 } };
WordToken delimeter[] = { { "(", 21 }, { ")", 22 }, { ".", 27 }, { ",", 28 }, { ":", 29 }, { ";", 30 } };
#pragma endregion
int iskeyword(string s)//關鍵字
{
int i = 0;
if (s != "") {
if (((s[0] >= 'A') && (s[0] <= 'Z')) || ((s[0] >= 'a') && (s[0] <= 'z')))
{
while (i<17)
{
if (keyword[i].name == s)
{
return keyword[i].code;
}
i++;
}
return 18; //標識符
}
}
return -1;
}
int isoperator(string s)//算符
{
int i = 0;
if (s != "")
{
while (i<12)
{
if (s == operatorword[i].name)
{
return operatorword[i].code;
break;
}
i++;
}
}
return -1;
}
int isdelimeter(string s)//界符
{
int i = 0;
if (s != "")
{
while (i<6)
{
if (s == delimeter[i].name)
{
return delimeter[i].code;
break;
}
i++;
}
}
return -1;
}
int isdight(string &s, int n)//整數
{
int i = 0;
int j = 0;
string ss;
bool a = true;
string wrong;
while (i< s.length())
{
if (j <= 1 && a)
{
if (s[i] == '.')
{
j++;
}
if (((s[i] >= 'A') && (s[i] <= 'Z')) || ((s[i] >= 'a') && (s[i] <= 'z')))
{
a = false;
}
i++;
}
else
{
for (int k = 0; k < i - 1; k++)
{
ss += s[k];
s = ss;
}
for (int k = i - 1; k < s.length(); k++)
{
wrong += s[k];
}
break;
}
}
if (j == 2 || !a)
{
cout << "錯誤行號爲" << n + 1 << " ";
cout << "錯誤內容爲" << wrong << " ";
cout << "錯誤類型爲" << "錯誤單詞" << endl;
}
if (j == 0)
{
return 19;
}
else if (j >= 1)
{
return 20;
}
return -1;
}
int length = 0;
extern int line = 0;
string word;
string text;
int k = 0; //wordSysmble個數 1開始
int l = 0;
string alltext[100];
WordSymble wss[1000];
symble fuhaobiao[1000];
void get_token() //生成符號表和token
{
for (; alltext[line] != ""; line++)
{
text = alltext[line];
length = text.length();
for (int i = 0; i < length; i++)
{
if (text[i] != ' ') {
word = "";
if (((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')))
{
while (((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')) || ((text[i] >= 48) && (text[i] <= 57)))
{
word += text[i];
i++;
}
i--;
if (iskeyword(word) != -1)
{
if (iskeyword(word) == 18)
{
wss[k].name = word;
wss[k].code = iskeyword(word);
wss[k].type = "標識符";
wss[k].addr = l;
wss[k].linenum = line;
fuhaobiao[l].name = word;
fuhaobiao[l].type = "標識符";
fuhaobiao[l].number = l;
l++;
k++;
}
else {
wss[k].name = word;
wss[k].code = iskeyword(word);
wss[k].type = "關鍵字";
wss[k].linenum = line;
k++;
}
}
}
else if ((text[i] >= 48) && (text[i] <= 57))
{
while (((text[i] >= 48) && (text[i] <= 57)) || (text[i] == '.') || ((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')))
{
word += text[i];
i++;
}
i--;
int a = isdight(word, line);
if (a == 19)
{
wss[k].name = word;
wss[k].code = 19;
wss[k].type = "整數";
wss[k].addr = l;
wss[k].linenum = line;
fuhaobiao[l].name = word;
fuhaobiao[l].type = "整數";
fuhaobiao[l].number = l;
l++;
k++;
}
else if (a == 20)
{
wss[k].name = word;
wss[k].code = 20;
wss[k].type = "浮點數";
wss[k].addr = l;
wss[k].linenum = line;
fuhaobiao[l].name = word;
fuhaobiao[l].type = "浮點數";
fuhaobiao[l].number = l;
l++;
k++;
}
}
else
{
word += text[i];
string ss = word;
ss += text[i + 1];
if (isoperator(ss) != -1)
{
word += text[i + 1];
i = i + 1;
wss[k].name = word;
wss[k].code = isoperator(word);
wss[k].type = "算符";
wss[k].linenum = line;
k++;
}
else if (isdelimeter(word) != -1)
{
wss[k].name = word;
wss[k].code = isdelimeter(word);
wss[k].type = "界符";
wss[k].linenum = line;
k++;
}
else if (isoperator(word) != -1)
{
wss[k].name = word;
wss[k].code = isoperator(word);
wss[k].type = "算符";
wss[k].linenum = line;
k++;
}
else {
wss[k].name = word;
wss[k].code = 100;
wss[k].type = "非法字符";
wss[k].linenum = line;
k++;
}
}
}
}
}
}
void readtext(string file)
{
ifstream infile;
infile.open(file.data()); //將文件流對象與文件連接起來
assert(infile.is_open()); //若失敗,則輸出錯誤消息,並終止程序運行
int i = 0;
string s;
while (getline(infile, s))
{
alltext[i] += s;
i++;
}
infile.close();
}
void printReadtext(string url = "D: / a.txt")
{
readtext(url);
cout << "程序如下:" << endl;
for (int i = 0; alltext[i] != ""; i++)
{
cout << alltext[i] << endl;
}
cout << endl;
cout << "下面進行詞法分析" << endl;
cout << endl;
get_token();
}
void printTokenResult()
{
int j = 0;
cout << endl;
while (wss[j].name != "")
{
cout << "(" << wss[j].name << "," << wss[j].code << "," << wss[j].type << "," << wss[j].addr << "," << wss[j].linenum << ")" << endl;
j++;
}
j = 0;
cout << endl;
cout << "符號表爲" << endl;
while (fuhaobiao[j].name != "")
{
cout << "(" << fuhaobiao[j].number << "," << fuhaobiao[j].name << "," << fuhaobiao[j].type << ")" << endl;
j++;
}
cout << endl;
}
部分輸出結果如圖所示: