編譯原理詞法分析
要求:對如下工作進行展開描述
(1) 給出語言的詞法規則描述
• 標識符、關鍵字、整常數、字符常數、浮點常數
• 單界符:+,-,×,:,…
• 雙界符:/*,:=,…
• 註釋
(2) 針對這種單詞的狀態轉換圖和程序框圖
(3) 核心數據結構的設計
如符號表、關鍵字等
(4) 錯誤處理
錯誤的位置及類型等
#include<iostream>
#include<fstream>
#include<cstdio>
#include<cstring>
#include<string>
#include<cstdlib>
using namespace std;
int aa;// fseek的時候用來接着的
string word = "";
string reserved_word[28];//保留
char buffer;//每次讀進來的一個字符
int num = 0;//每個單詞中當前字符的位置
int line = 1; //行數
int row = 1; //列數,就是每行的第幾個
bool flag; //文件是否結束了
int flag2;//單詞的類型
//設置保留字
void set_reserve()
{
reserved_word[1] = "procedure";
reserved_word[2] = "def";
reserved_word[3] = "if";
reserved_word[4] = "else";
reserved_word[5] = "while";
reserved_word[6] = "call";
reserved_word[7] = "begin";
reserved_word[8] = "end";
reserved_word[9] = "and";
reserved_word[10] = "or";
reserved_word[11] = "return";
reserved_word[12] = "float";
reserved_word[13] = "double";
reserved_word[14] = "void";
reserved_word[15] = "cin";
reserved_word[16] = "cout";
reserved_word[17] = "char";
reserved_word[18] = "for";
reserved_word[19] = "true";
reserved_word[20] = "false";
reserved_word[21] = "int";
reserved_word[22] = "bool";
reserved_word[23] = "main";
reserved_word[24] = "#";
reserved_word[25] = "include";
}
//看這個字是不是字母
bool judge_word(char x)
{
if (x >= 'a' && x <= 'z' || x >= 'A' && x <= 'Z') {
return true;
}
else return false;
}
//看這個字是不是數字
bool judge_number(char x)
{
if (x >= '0' && x <= '9') {
return true;
}
else return false;
}
//看這個字符是不是界符
bool judge_jiefu(char x)
{
if (x == '(' || x == ')' || x == ',' || x == ';' || x == '{' || x == '}' || x == '"' || x == '\'') {
return true;
}
else return false;
}
//加減乘
bool judge_yunsuanfu1(char x)
{
if (x == '+' || x == '-' || x == '*')
{
return true;
}
else return false;
}
//等於 賦值,大於小於 大於等於,小於等於,大於小於
bool judge_yunsuannfu2(char x)
{
if (x == '=' || x == '>' || x == '<') {
return true;
}
else return false;
}
//這個最大的函數的總體作用是從文件裏讀一個單詞
int scan(FILE* fp)
{
buffer = fgetc(fp);
if (feof(fp)) {
flag = 0; return 0;
}
//cout<<buffer;
else if (buffer == ' ')
{
row++;
return 0;
}
else if (buffer == '\n')
{
line++;
row = 1;
return 0;
}
//如果是字母開頭或'_' 看關鍵字還是普通單詞
else if (judge_word(buffer) || buffer == '_')
{
word += buffer; row++;
while ((buffer = fgetc(fp)) && (judge_word(buffer) || judge_number(buffer) || buffer == '_'))
{
word += buffer; row++;
}
if (feof(fp)) {
flag = 0; return 1;
}
//這個函數的意義是 因爲保留字不區分大小寫 要把大寫字母全變成小寫再比較
string temp = word;
for (int j = 0; j < temp.length(); j++)
{
if (temp[j] >= 'A' && temp[j] <= 'Z')
{
temp[j] += 32;
}
}
for (int i = 1; i <= 25; i++) {
if (temp == reserved_word[i]) {
aa = fseek(fp, -1, SEEK_CUR);
return 3;
}
}
aa = fseek(fp, -1, SEEK_CUR);
return 1;
}
//開始是加減乘 一定是類型4
else if (judge_yunsuanfu1(buffer))
{
word += buffer; row++;
return 4;
}
//開始是數字就一定是數字 2
else if (judge_number(buffer))
{
int flagp = 0;
word += buffer; row++;
while ((buffer = fgetc(fp)) && (judge_number(buffer)||buffer == '.'))
{
word += buffer; row++;
if (buffer == '.')
{
flagp = 1;
}
/*
if (buffer = fgetc(fp))
{
if (buffer == '.')
{
word += buffer; row++;
}
else
{
fseek(fp, -1, SEEK_CUR);
}
}
*/
}
if (feof(fp)) {
if (flagp == 0)
{
flag = 0; return 2;
}
else
{
flag = 0; return 7;
}
}
aa = fseek(fp, -1, SEEK_CUR);
if (flagp == 0)
{
return 2;
}
else
{
return 7;
}
}
//檢驗界符
else if (judge_jiefu(buffer))
{
word += buffer; row++;
return 6;
}
//檢驗 <=、 >=、 <>、 == =、 <、>
else if (judge_yunsuannfu2(buffer))
{
row++;
word += buffer;
if (buffer == '<') //爲了檢驗題目中的<> <=
{
buffer = fgetc(fp);
if (buffer == '>' || buffer == '=')
{
word += buffer;
row++;
return 5;
}
}
//檢驗 >= ==
else {
buffer = fgetc(fp);
if (buffer == '=')
{
word += buffer;
row++;
return 5;
}
}
if (feof(fp)) {
flag = 0;
}
aa = fseek(fp, -1, SEEK_CUR);
return 4;
}
//首字符是/ 有可能是除號 也有可能是註釋
else if (buffer == '/')
{
row++; word += buffer;
buffer = fgetc(fp);
//這種情況是除號
if (buffer != '*' && buffer != '/')
{
aa = fseek(fp, -1, SEEK_CUR);
return 4;
}
// 這一行剩下的全被註釋了
if (buffer == '/')
{
word.clear();
while ((buffer = fgetc(fp)) && buffer != '\n' && !feof(fp))
{
//真的什麼也沒有做
}
if (feof(fp)) {
flag = 0; return 0;
}
else {
aa = fseek(fp, -1, SEEK_CUR);
}
//line++; row = 1;
return 0;
}
if (buffer == '*')
{
bool flag5 = 1;
while (flag5)
{
word.clear();
buffer = fgetc(fp);
row++;
if (buffer == '\n') { line++; row = 1; }
if (buffer != '*')continue;
else {
buffer = fgetc(fp);
row++; if (buffer == '\n') { line++; row = 1; }
if (buffer == '/') {
flag5 = 0;
}
else continue;
}
if (feof(fp)) { flag = 0; return 0; }
}
}
}
else {
word += buffer;
row++;
return -1;
}
}
int main()
{
set_reserve();//設置保留字
cout << "introduction" << endl;
cout << "open " << "code.txt" << endl;
cout << "press any key" << endl;
system("pause");
flag = 1;
//ifstream a("需要解析的源代碼.txt");
FILE* fp;
if (!(fp = fopen("code.txt", "r")))
{
cout << "not found the file or other error " << endl;
flag = 0;
}
while (flag == 1)
{
//flag2 返回的類型
flag2 = scan(fp);//反覆調用函數提取單詞
if (flag2 == 1)
{
cout << "type:1 標識符 " << "line " << line << " row " << row - word.length() << " " << word << endl;
if (word.length() > 20)
cout << "ERROR Identifier length cannot exceed 20 characters" << endl;
word.clear();
}
else if (flag2 == 3)
{
cout << "type:3 關鍵字 " << "line " << line << " row " << row - word.length() << " " << word << endl;
word.clear();
}
else if (flag2 == 4)
{
cout << "type:4 操作數 " << "line " << line << " row " << row - 1 << " " << word << endl;
word.clear();
}
else if (flag2 == 2)
{
cout << "type:2 常整形 " << "line " << line << " row " << row - word.length() << " " << word << endl;
//if (word[0] == '0')
// cout << "ERROR: The first digit cannot be 0!" << endl;
word.clear();
}
else if (flag2 == 7)
{
cout << "type:7 浮點數 " << "line " << line << " row " << row - word.length() << " " << word << endl;
word.clear();
}
else if (flag2 == 6)
{
cout << "type:6 分隔符 " << "line " << line << " row " << row - 1 << " " << word << endl;
word.clear();
}
else if (flag2 == 5)
{
cout << "type:5 二元操作數 " << "line " << line << " row " << row - 2 << " " << word << endl;
word.clear();
}
//非法字符
else if (flag2 == -1)
{
cout << "Illegal character " << "line " << line << " row " << row - 1 << " " << word << endl;
word.clear();
}
}
int a = fclose(fp);
cout << "press e to close" << endl;
char end;
while (cin >> end && end != 'e') {
cout << "只有e可以關閉" << endl;
}
return 0;
}