c++ 簡單詞法分析器的實現

      現在在學編譯原理,做了個簡單的詞法分析器。可以自定義關鍵字、界符以及運算符。

      效果如下

有一點不滿意的地方是必須輸入兩次ctrl+z,原來打算的是按下回車就結束輸入的

還有一點想法是不輸入這些設置,而是讀取一個配置文件,識別關鍵字,界符以及運算符

代碼如下:

#include<iostream>
#include<vector>
#include <map>
#include<iterator>
#include<sstream>
#include<string>
#include<fstream>
using namespace std;

vector <string> keywords;
vector <pair<string,string> > symble;
vector <pair<string,string> > bound;
char ch;
int isKeyWord(const string& s,int& n)
{
    if(keywords.empty())
    {
        return 0;
    }
    for(vector<string>::iterator it = keywords.begin();
        it != keywords.end();it++,n++)
    {
       if(s == *it)
       {
            return 1;
       }
    }
    return -1;
}
int isSymble(const string& s, string& result)
{
    for(vector<pair<string,string> >::iterator it = symble.begin();
        it != symble.end();it++)
    {
       if(s == (*it).first)
       {
            result = (*it).second;
            return 1;
       }
    }
    return 0;
}
int isBound(const string& s, string& result)
{
    for(vector<pair<string,string> >::iterator it = bound.begin();
        it != bound.end();it++)
    {
       if(s == (*it).first)
       {
            result = (*it).second;
            return 1;
       }
    }
    return 0;
}
void analyse(FILE *fp)
{
    string temp = "";
    string str = "";
    string result = "";
    int id = 0;
    while((ch = fgetc(fp)) != EOF)
    {
        temp = "";
        str = ch;
        id = 0;
        if(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
        {
            while(ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
            {
                ch = fgetc(fp);
            }
            fseek(fp,-1L,SEEK_CUR);
        }
        else if(isalpha(ch))
        {
            while(isalpha(ch) || isdigit(ch))
            {
                temp = temp + ch;
                ch = fgetc(fp);
            }
            fseek(fp,-1L,SEEK_CUR);
            if(isKeyWord(temp,id) == 1)
            {
                cout << temp << "\t$關鍵字 , " << id << endl;
            }
            else
            {
                cout << temp << "\t$標識符" << endl;
            }
        }
        else if(isdigit(ch))
        {
            while(isdigit(ch))
            {
                temp = temp + ch;
                ch = fgetc(fp);
            }
            fseek(fp,-1L,SEEK_CUR);
            cout << temp << "\t$整型" << endl;
        }
        else if(isSymble(str,result))
        {
            cout << ch << "\t$" << "運算符" << result << endl;
            /*case '+':cout << ch << "\t$ADD" << endl;break;
            case '-':cout << ch << "\t$SUBTRACT" << endl;break;
            case '*':cout << ch << "\t$MULTIPLY" << endl;break;
            case '/' :cout << ch << "\t$DIVIDE" << endl;break;
            case '=' :cout << ch << "\t$ASSIGN" << endl;break;
            case '(' :cout << ch << "\t$LPAR" << endl;break;
            case ')' :cout << ch << "\t$RPAR" << endl;break;
            case '[' :cout << ch << "\t$LSB" << endl;break;
            case ']' :cout << ch << "\t$RSB" << endl;break;
            case ';' :cout << ch << "\t$SEMICOLON" << endl;break;
            case '.' :cout << ch << "\t$DOT" << endl;break;
            case ',' :cout << ch << "\t$COMMA" << endl;break;
            case '{' :cout << ch << "\t$LBRACE" << endl; break;
            case '}' :cout << ch << "\t$RBRACE" << endl;break;
            default :cout << ch << "\t$UnKnow" << endl;*/
        }
        else if(isBound(str,result))
        {
            cout << ch << "\t$" << "界符" << result << endl;
        }
        else
        {
            cout << ch << "\t$未知" << endl;
        }
    }
}
int main()
{
    string line, symbelLine,boundLine,word,filename,symbleName,symbleId,boundName,boundId;
    cout << "請輸入要解析的文件名" << endl;
    cin >> filename;
    cout << "請輸入該編程語言的關鍵字" << endl;
    while(getline(cin,line))
    {
        istringstream stream(line);
        while(stream >> word)
        {
            keywords.push_back(word);
        }
    }
    cin.clear();
    cout << "請輸入該編程語言的運算符,格式爲 符號名稱  符號" << endl;
    while(getline(cin,symbelLine))
    {
        istringstream stream(symbelLine);
        while(stream >> symbleName >> symbleId)
        {
            symble.push_back(pair<string,string>(symbleName,symbleId));
        }    
    }
    cin.clear();
    cout << "請輸入該編程語言的界符,格式爲 符號名稱  符號" << endl;
    while(getline(cin,boundLine))
    {
        istringstream stream(boundLine);
        while(stream >> boundName >> boundId)
        {
            bound.push_back(pair<string,string>(boundName,boundId));
        }    
    }
    FILE *fp;
    fp = fopen(filename.c_str(),"r");
    if(!fp)
    {
        cout << "文件操作錯誤,請檢查後重試" << endl;
        return -1;
    }
    analyse(fp);
    fclose(fp);
    cout << "按任意鍵退出" << endl;
    getchar();
    return 0;
}

實現起來很簡單,就不註釋了。呵呵。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章