現在在學編譯原理,做了個簡單的詞法分析器。可以自定義關鍵字、界符以及運算符。
效果如下:
有一點不滿意的地方是必須輸入兩次ctrl+z,原來打算的是按下回車就結束輸入的
還有一點想法是不輸入這些設置,而是讀取一個配置文件,識別關鍵字,界符以及運算符
代碼如下:
#include<iostream>
#include<vector>
#include <map>
#include<iterator>
#include<sstream>
#include<string>
#include<fstream>
using namespace std;
vector <string> keywords;
vector <pair<string,string> > symble;
vector <pair<string,string> > bound;
char ch;
int isKeyWord(const string& s,int& n)
{
if(keywords.empty())
{
return 0;
}
for(vector<string>::iterator it = keywords.begin();
it != keywords.end();it++,n++)
{
if(s == *it)
{
return 1;
}
}
return -1;
}
int isSymble(const string& s, string& result)
{
for(vector<pair<string,string> >::iterator it = symble.begin();
it != symble.end();it++)
{
if(s == (*it).first)
{
result = (*it).second;
return 1;
}
}
return 0;
}
int isBound(const string& s, string& result)
{
for(vector<pair<string,string> >::iterator it = bound.begin();
it != bound.end();it++)
{
if(s == (*it).first)
{
result = (*it).second;
return 1;
}
}
return 0;
}
void analyse(FILE *fp)
{
string temp = "";
string str = "";
string result = "";
int id = 0;
while((ch = fgetc(fp)) != EOF)
{
temp = "";
str = ch;
id = 0;
if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
{
while(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
{
ch = fgetc(fp);
}
fseek(fp,-1L,SEEK_CUR);
}
else if(isalpha(ch))
{
while(isalpha(ch) || isdigit(ch))
{
temp = temp + ch;
ch = fgetc(fp);
}
fseek(fp,-1L,SEEK_CUR);
if(isKeyWord(temp,id) == 1)
{
cout << temp << "\t$關鍵字 , " << id << endl;
}
else
{
cout << temp << "\t$標識符" << endl;
}
}
else if(isdigit(ch))
{
while(isdigit(ch))
{
temp = temp + ch;
ch = fgetc(fp);
}
fseek(fp,-1L,SEEK_CUR);
cout << temp << "\t$整型" << endl;
}
else if(isSymble(str,result))
{
cout << ch << "\t$" << "運算符" << result << endl;
/*case '+':cout << ch << "\t$ADD" << endl;break;
case '-':cout << ch << "\t$SUBTRACT" << endl;break;
case '*':cout << ch << "\t$MULTIPLY" << endl;break;
case '/' :cout << ch << "\t$DIVIDE" << endl;break;
case '=' :cout << ch << "\t$ASSIGN" << endl;break;
case '(' :cout << ch << "\t$LPAR" << endl;break;
case ')' :cout << ch << "\t$RPAR" << endl;break;
case '[' :cout << ch << "\t$LSB" << endl;break;
case ']' :cout << ch << "\t$RSB" << endl;break;
case ';' :cout << ch << "\t$SEMICOLON" << endl;break;
case '.' :cout << ch << "\t$DOT" << endl;break;
case ',' :cout << ch << "\t$COMMA" << endl;break;
case '{' :cout << ch << "\t$LBRACE" << endl; break;
case '}' :cout << ch << "\t$RBRACE" << endl;break;
default :cout << ch << "\t$UnKnow" << endl;*/
}
else if(isBound(str,result))
{
cout << ch << "\t$" << "界符" << result << endl;
}
else
{
cout << ch << "\t$未知" << endl;
}
}
}
int main()
{
string line, symbelLine,boundLine,word,filename,symbleName,symbleId,boundName,boundId;
cout << "請輸入要解析的文件名" << endl;
cin >> filename;
cout << "請輸入該編程語言的關鍵字" << endl;
while(getline(cin,line))
{
istringstream stream(line);
while(stream >> word)
{
keywords.push_back(word);
}
}
cin.clear();
cout << "請輸入該編程語言的運算符,格式爲 符號名稱 符號" << endl;
while(getline(cin,symbelLine))
{
istringstream stream(symbelLine);
while(stream >> symbleName >> symbleId)
{
symble.push_back(pair<string,string>(symbleName,symbleId));
}
}
cin.clear();
cout << "請輸入該編程語言的界符,格式爲 符號名稱 符號" << endl;
while(getline(cin,boundLine))
{
istringstream stream(boundLine);
while(stream >> boundName >> boundId)
{
bound.push_back(pair<string,string>(boundName,boundId));
}
}
FILE *fp;
fp = fopen(filename.c_str(),"r");
if(!fp)
{
cout << "文件操作錯誤,請檢查後重試" << endl;
return -1;
}
analyse(fp);
fclose(fp);
cout << "按任意鍵退出" << endl;
getchar();
return 0;
}
實現起來很簡單,就不註釋了。呵呵。