編譯原理詞法分析

編譯原理詞法分析

要求:對如下工作進行展開描述
(1) 給出語言的詞法規則描述
• 標識符、關鍵字、整常數、字符常數、浮點常數
• 單界符:+,-,×,:,…
• 雙界符:/*,:=,…
• 註釋
(2) 針對這種單詞的狀態轉換圖和程序框圖
(3) 核心數據結構的設計
如符號表、關鍵字等
(4) 錯誤處理
錯誤的位置及類型等

#include<iostream>
#include<fstream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cstdlib>

using namespace std;

int aa;// fseek的時候用來接着的
string  word = "";
string  reserved_word[28];//保留
char buffer;//每次讀進來的一個字符
int num = 0;//每個單詞中當前字符的位置
int line = 1; //行數
int row = 1; //列數,就是每行的第幾個
bool flag; //文件是否結束了
int flag2;//單詞的類型

//設置保留字
void set_reserve()
{
	reserved_word[1] = "procedure";
	reserved_word[2] = "def";
	reserved_word[3] = "if";
	reserved_word[4] = "else";
	reserved_word[5] = "while";
	reserved_word[6] = "call";
	reserved_word[7] = "begin";
	reserved_word[8] = "end";
	reserved_word[9] = "and";
	reserved_word[10] = "or";

	reserved_word[11] = "return";
	reserved_word[12] = "float";
	reserved_word[13] = "double";
	reserved_word[14] = "void";
	reserved_word[15] = "cin";
	reserved_word[16] = "cout";
	reserved_word[17] = "char";
	reserved_word[18] = "for";
	reserved_word[19] = "true";
	reserved_word[20] = "false";
	reserved_word[21] = "int";
	reserved_word[22] = "bool";
	reserved_word[23] = "main";
	reserved_word[24] = "#";
	reserved_word[25] = "include";
}

//看這個字是不是字母
bool judge_word(char x)
{
	if (x >= 'a' && x <= 'z' || x >= 'A' && x <= 'Z') {
		return true;
	}
	else return false;
}

//看這個字是不是數字
bool judge_number(char x)
{
	if (x >= '0' && x <= '9') {
		return true;
	}
	else return false;
}

//看這個字符是不是界符
bool judge_jiefu(char x)
{
	if (x == '(' || x == ')' || x == ',' || x == ';' || x == '{' || x == '}' || x == '"' || x == '\'') {
		return true;
	}
	else return false;
}


//加減乘
bool judge_yunsuanfu1(char x)
{
	if (x == '+' || x == '-' || x == '*')
	{
		return true;
	}
	else return false;
}

//等於 賦值,大於小於 大於等於,小於等於,大於小於
bool judge_yunsuannfu2(char x)
{
	if (x == '=' || x == '>' || x == '<') {
		return true;
	}
	else return false;
}


//這個最大的函數的總體作用是從文件裏讀一個單詞
int scan(FILE* fp)
{
	buffer = fgetc(fp);
	if (feof(fp)) {
		flag = 0; return 0;
	}
	//cout<<buffer;
	else if (buffer == ' ')
	{
		row++;
		return 0;
	}
	else if (buffer == '\n')
	{
		line++;
		row = 1;
		return 0;
	}

	//如果是字母開頭或'_' 看關鍵字還是普通單詞
	else if (judge_word(buffer) || buffer == '_')
	{
		word += buffer; row++;
		while ((buffer = fgetc(fp)) && (judge_word(buffer) || judge_number(buffer) || buffer == '_'))
		{
			word += buffer; row++;
		}
		if (feof(fp)) {
			flag = 0; return 1;
		}
		//這個函數的意義是 因爲保留字不區分大小寫 要把大寫字母全變成小寫再比較
		string temp = word;
		for (int j = 0; j < temp.length(); j++)
		{
			if (temp[j] >= 'A' && temp[j] <= 'Z')
			{
				temp[j] += 32;
			}
		}
		for (int i = 1; i <= 25; i++) {
			if (temp == reserved_word[i]) {
				aa = fseek(fp, -1, SEEK_CUR);
				return 3;
			}
		}
		aa = fseek(fp, -1, SEEK_CUR);
		return 1;
	}

	//開始是加減乘 一定是類型4
	else if (judge_yunsuanfu1(buffer))
	{
		word += buffer; row++;
		return 4;
	}

	//開始是數字就一定是數字 2
	else if (judge_number(buffer))
	{
		int flagp = 0;
		word += buffer; row++;
		while ((buffer = fgetc(fp)) && (judge_number(buffer)||buffer == '.'))
		{
			word += buffer; row++;
			if (buffer == '.')
			{
				flagp = 1;
			}
			/*
			if (buffer = fgetc(fp))
			{
				if (buffer == '.')
				{
					word += buffer; row++;
				}
				else
				{
					fseek(fp, -1, SEEK_CUR);
				}
			}
			*/
		}
		if (feof(fp)) {
			if (flagp == 0)
			{
				flag = 0; return 2;
			}
			else
			{
				flag = 0; return 7;
			}
			
		}
		aa = fseek(fp, -1, SEEK_CUR);
		if (flagp == 0)
		{
			return 2;
		}
		else
		{
			return 7;
		}
	}

	//檢驗界符
	else if (judge_jiefu(buffer))
	{
		word += buffer; row++;
		return 6;
	}

	//檢驗 <=、  >=、  <>、  == =、 <、>
	else if (judge_yunsuannfu2(buffer))
	{
		row++;
		word += buffer;
		if (buffer == '<')   //爲了檢驗題目中的<> <=
		{
			buffer = fgetc(fp);
			if (buffer == '>' || buffer == '=')
			{
				word += buffer;
				row++;
				return 5;
			}
		}
		//檢驗  >= ==
		else {
			buffer = fgetc(fp);
			if (buffer == '=')
			{
				word += buffer;
				row++;
				return 5;
			}
		}
		if (feof(fp)) {
			flag = 0;
		}
		aa = fseek(fp, -1, SEEK_CUR);
		return 4;
	}

	//首字符是/ 有可能是除號 也有可能是註釋
	else if (buffer == '/')
	{
		row++; word += buffer;
		buffer = fgetc(fp);
		//這種情況是除號
		if (buffer != '*' && buffer != '/')
		{
			aa = fseek(fp, -1, SEEK_CUR);
			return 4;
		}
		// 這一行剩下的全被註釋了
		if (buffer == '/')
		{
			word.clear();
			while ((buffer = fgetc(fp)) && buffer != '\n' && !feof(fp))
			{
				//真的什麼也沒有做
			}
			if (feof(fp)) {
				flag = 0; return 0;
			}
			else {
				aa = fseek(fp, -1, SEEK_CUR);
			}
			//line++; row = 1;
			return 0;
		}
		if (buffer == '*')
		{
			bool flag5 = 1;
			while (flag5)
			{
				word.clear();
				buffer = fgetc(fp);
				row++;
				if (buffer == '\n') { line++; row = 1; }
				if (buffer != '*')continue;
				else {
					buffer = fgetc(fp);
					row++; if (buffer == '\n') { line++; row = 1; }
					if (buffer == '/') {
						flag5 = 0;
					}
					else continue;
				}
				if (feof(fp)) { flag = 0; return 0; }
			}

		}

	}

	else {
		word += buffer;
		row++;
		return -1;
	}
}

int main()
{
	set_reserve();//設置保留字
	cout << "introduction" << endl;
	cout << "open " << "code.txt" << endl;

	cout << "press any key" << endl;
	system("pause");

	flag = 1;
	//ifstream a("需要解析的源代碼.txt");
	FILE* fp;
	if (!(fp = fopen("code.txt", "r")))
	{
		cout << "not found the file or other error " << endl;
		flag = 0;
	}

	while (flag == 1)
	{
		//flag2 返回的類型
		flag2 = scan(fp);//反覆調用函數提取單詞

		if (flag2 == 1)
		{
			cout << "type:1 標識符       " << "line " << line << " row " << row - word.length() << "  " << word << endl;
			if (word.length() > 20)
				cout << "ERROR Identifier length cannot exceed 20 characters" << endl;
			word.clear();
		}
		else if (flag2 == 3)
		{
			cout << "type:3 關鍵字       " << "line " << line << " row " << row - word.length() << "  " << word << endl;
			word.clear();
		}
		else if (flag2 == 4)
		{
			cout << "type:4 操作數       " << "line " << line << " row " << row - 1 << "  " << word << endl;
			word.clear();
		}
		else if (flag2 == 2)
		{
			cout << "type:2 常整形       " << "line " << line << " row " << row - word.length() << "  " << word << endl;
			//if (word[0] == '0')
			//	cout << "ERROR: The first digit cannot be 0!" << endl;
			word.clear();
		}
		else if (flag2 == 7)
		{
			cout << "type:7 浮點數       " << "line " << line << " row " << row - word.length() << "  " << word << endl;

			word.clear();
		}
		else if (flag2 == 6)
		{
			cout << "type:6 分隔符       " << "line " << line << " row " << row - 1 << "  " << word << endl;
			word.clear();
		}
		else if (flag2 == 5)
		{
			cout << "type:5 二元操作數   " << "line " << line << " row " << row - 2 << "  " << word << endl;
			word.clear();
		}
		//非法字符
		else if (flag2 == -1)
		{
			cout << "Illegal character      " << "line " << line << " row " << row - 1 << "  " << word << endl;
			word.clear();
		}
	}

	int a = fclose(fp);
	cout << "press e to close" << endl;
	char end;
	while (cin >> end && end != 'e') {
		cout << "只有e可以關閉" << endl;
	}
	return 0;
}
發佈了52 篇原創文章 · 獲贊 21 · 訪問量 8232
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章