編譯實驗(一)詞法分析

 編譯原理課程即將結束,開始了在校中最麻煩的實驗,編譯實驗......同在一個系,其他班的編譯實驗分成好幾塊,簡短的文法,
完成一些小功能,就我們班的老師,撂下一句話:參考書本,把編譯器實現了,可以一組兩人分工合作。
源代碼連接:http://download.csdn.net/download/supersmart_dong/10224159
詞法分析流程圖:

首先要做的第一步就是寫數據結構以及完成單詞表。詞法分析的任務就是將一段程序代碼,分割單詞,把單詞信息寫出來。
例如在代碼: while A<2 do A:=A+1;  中進行詞法分析得出來結果,(while,關鍵字) (A,標識符) (<,算符)(2,整數) (do,關鍵字) (A,標識符)
(:=,算符) (A,標識符)(+,算符) (1,整數) (;,界符) 將代碼中單詞一個個的取出來進行分析便是詞法分析的任務。輸出內容是符號表文件和
token文件。
單詞表如下圖:

單詞

編碼

單詞

編碼

單詞

編碼

單詞

編碼

4

1

or

11

21

:=

31

begin

2

program

12

22

=

32

bool

3

real

13

+

23

<=

33

do

4

then

14

-

24

<

34

else

5

true

15

*

25

<>

35

end

6

var

16

/

26

>

36

false

7

while

17

.

27

>=

37

if

8

標識符

18

28

 

 

integer

9

整數

19

:

29

 

 

not

10

實數

20

30

 

單詞表可以用一維對象數組實現,根據編碼或者數組索引來判斷該單詞是不是關鍵字還是算符或界符。
之後寫一些判斷的函數,判斷是否是關鍵字,是否是數字,是否是算符,是否是界符
然後按照流程來,讀取文件,一個字符一個字符的讀,如果讀到第i=0個字符是字母,則讀第i+1個,一直讀到不是字母或數字爲止,
將這i個字符構成一個單詞,查詢是不是關鍵字,如果不是則爲標識符。如果讀到的第i=0個字符是數字,則繼續讀第i+1個,一直讀到不是
字母或數字爲止。判斷這單詞是不是數字(浮點數,整數),如果讀到第i=0字符既不是字母也不是數字,則讀取下一個字符(記爲變量A)和下
兩個字符(記爲變量B),判斷這A或B中有沒有算符,判斷A是不是界符,如果都不是則爲非法字符。像這樣讀完整個程序結束輸出符號表文件。
#include<iostream>
#include<string>
#include<fstream>
#include <cassert>
using namespace std;
struct WordToken
{
	string name;
	int code;
};
struct WordSymble
{
	string name;   //
	int code;
	string type;
	int addr = -1; //符號表位置
	int linenum;  //行號
};
struct symble
{
	int number;  //序號
	string type;  //類型
	string name;  //名字
};
#pragma region 單詞表

WordToken keyword[] = { { "and", 1 }, { "begin", 2 }, { "bool", 3 }, { "do", 4 },
{ "else", 5 }, { "end", 6 }, { "false", 7 }, { "if", 8 },
{ "integer", 9 }, { "not", 10 }, { "or", 11 }, { "program", 12 }, { "real", 13 }, { "then", 14 }, { "true", 15 }, { "var", 16 }, { "while", 17 } };
WordToken operatorword[] = { { "+", 23 }, { "-", 24 }, { "*", 25 }, { "/", 26 }, { ">", 31 }, { ":=", 38 }, { "=", 32 }, { "<=", 33 }, { "<", 34 }, { "<>", 35 }, { ">", 36 }, { ">=", 37 } };
WordToken delimeter[] = { { "(", 21 }, { ")", 22 }, { ".", 27 }, { ",", 28 }, { ":", 29 }, { ";", 30 } };
#pragma endregion

int iskeyword(string s)//關鍵字
{
	int i = 0;
	if (s != "") {
		if (((s[0] >= 'A') && (s[0] <= 'Z')) || ((s[0] >= 'a') && (s[0] <= 'z')))
		{
			while (i<17)
			{
				if (keyword[i].name == s)
				{
					return  keyword[i].code;
				}
				i++;
			}

			return 18;	//標識符
		}

	}
	return -1;
}
int isoperator(string s)//算符
{
	int i = 0;
	if (s != "")
	{
		while (i<12)
		{
			if (s == operatorword[i].name)
			{
				return operatorword[i].code;
				break;
			}
			i++;
		}
	}
	return -1;
}
int isdelimeter(string s)//界符
{
	int i = 0;
	if (s != "")
	{
		while (i<6)
		{
			if (s == delimeter[i].name)
			{
				return delimeter[i].code;
				break;
			}
			i++;
		}
	}
	return -1;
}
int isdight(string &s, int n)//整數
{
	int i = 0;
	int j = 0;
	string ss;
	bool a = true;
	string wrong;
	while (i< s.length())
	{
		if (j <= 1 && a)
		{
			if (s[i] == '.')
			{
				j++;

			}
			if (((s[i] >= 'A') && (s[i] <= 'Z')) || ((s[i] >= 'a') && (s[i] <= 'z')))
			{
				a = false;
			}
			i++;
		}
		else
		{
			for (int k = 0; k < i - 1; k++)
			{
				ss += s[k];
				s = ss;
			}
			for (int k = i - 1; k < s.length(); k++)
			{
				wrong += s[k];
			}

			break;
		}

	}

	if (j == 2 || !a)
	{
		cout << "錯誤行號爲" << n + 1 << "   ";
		cout << "錯誤內容爲" << wrong << "   ";
		cout << "錯誤類型爲" << "錯誤單詞" << endl;
	}
	if (j == 0)
	{
		return 19;

	}
	else if (j >= 1)
	{
		return 20;
	}
	return -1;
}
int length = 0;
extern int line = 0;
string word;
string text;
int k = 0; //wordSysmble個數 1開始
int l = 0;
string alltext[100];
WordSymble wss[1000];
symble fuhaobiao[1000];
void get_token() //生成符號表和token
{
	for (; alltext[line] != ""; line++)
	{
		text = alltext[line];
		length = text.length();
		for (int i = 0; i < length; i++)
		{
			if (text[i] != ' ') {
				word = "";
				if (((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')))
				{
					while (((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')) || ((text[i] >= 48) && (text[i] <= 57)))
					{
						word += text[i];
						i++;
					}
					i--;
					if (iskeyword(word) != -1)
					{
						if (iskeyword(word) == 18)
						{
							wss[k].name = word;
							wss[k].code = iskeyword(word);
							wss[k].type = "標識符";
							wss[k].addr = l;
							wss[k].linenum = line;
							fuhaobiao[l].name = word;
							fuhaobiao[l].type = "標識符";
							fuhaobiao[l].number = l;
							l++;
							k++;

						}
						else {
							wss[k].name = word;
							wss[k].code = iskeyword(word);
							wss[k].type = "關鍵字";
							wss[k].linenum = line;
							k++;
						}
					}
				}

				else if ((text[i] >= 48) && (text[i] <= 57))
				{
					while (((text[i] >= 48) && (text[i] <= 57)) || (text[i] == '.') || ((text[i] >= 'A') && (text[i] <= 'Z')) || ((text[i] >= 'a') && (text[i] <= 'z')))
					{
						word += text[i];
						i++;
					}
					i--;
					int a = isdight(word, line);
					if (a == 19)
					{
						wss[k].name = word;
						wss[k].code = 19;
						wss[k].type = "整數";
						wss[k].addr = l;
						wss[k].linenum = line;
						fuhaobiao[l].name = word;
						fuhaobiao[l].type = "整數";
						fuhaobiao[l].number = l;
						l++;
						k++;
					}
					else if (a == 20)
					{
						wss[k].name = word;
						wss[k].code = 20;
						wss[k].type = "浮點數";
						wss[k].addr = l;
						wss[k].linenum = line;
						fuhaobiao[l].name = word;
						fuhaobiao[l].type = "浮點數";
						fuhaobiao[l].number = l;
						l++;
						k++;
					}
				}
				else
				{
					word += text[i];
					string ss = word;
					ss += text[i + 1];
					if (isoperator(ss) != -1)
					{
						word += text[i + 1];
						i = i + 1;
						wss[k].name = word;
						wss[k].code = isoperator(word);
						wss[k].type = "算符";
						wss[k].linenum = line;
						k++;
					}
					else if (isdelimeter(word) != -1)
					{
						wss[k].name = word;
						wss[k].code = isdelimeter(word);
						wss[k].type = "界符";
						wss[k].linenum = line;
						k++;
					}
					else if (isoperator(word) != -1)
					{
						wss[k].name = word;
						wss[k].code = isoperator(word);
						wss[k].type = "算符";
						wss[k].linenum = line;
						k++;

					}
					else {
						wss[k].name = word;
						wss[k].code = 100;
						wss[k].type = "非法字符";
						wss[k].linenum = line;
						k++;
					}
				}
			}
		}
	}
}
void readtext(string file)
{
	ifstream infile;
	infile.open(file.data());   //將文件流對象與文件連接起來 
	assert(infile.is_open());   //若失敗,則輸出錯誤消息,並終止程序運行 
	int i = 0;
	string s;
	while (getline(infile, s))
	{
		alltext[i] += s;

		i++;
	}
	infile.close();
}
void printReadtext(string url = "D: / a.txt")
{
	readtext(url);
	cout << "程序如下:" << endl;
	for (int i = 0; alltext[i] != ""; i++)
	{
		cout << alltext[i] << endl;
	}
	cout << endl;
	cout << "下面進行詞法分析" << endl;
	cout << endl;
	get_token();
}
void printTokenResult()
{
	int j = 0;
	cout << endl;
	while (wss[j].name != "")
	{
		cout << "(" << wss[j].name << "," << wss[j].code << "," << wss[j].type << "," << wss[j].addr << "," << wss[j].linenum << ")" << endl;
		j++;
	}
	j = 0;
	cout << endl;
	cout << "符號表爲" << endl;
	while (fuhaobiao[j].name != "")
	{
		cout << "(" << fuhaobiao[j].number << "," << fuhaobiao[j].name << "," << fuhaobiao[j].type << ")" << endl;
		j++;
	}
	cout << endl;
}


部分輸出結果如圖所示:

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章