詞法分析(C++實現)不使用狀態機

不用自動機

沒怎麼測試,只測試了2個文件都沒問題.= =

 先是頭文件


#include<string>
#include<set>
#include<fstream>
using namespace std;
class Analyse{
public:
	Analyse();
	char getNowChar();
	char nextChar();
	void setNowChar(char);
	bool isEnd;
	void start();

private:
	void step0(char);
	void stringStep();
	void charStep();
	void noteStep();
	string getRemainBlock();
	string getRemainLine();
	void digitStep();
	void letterStep();
	void signStepOrIllegal();

	char nowChar;
	ifstream *reader;
	string *token;


	static const string keyWorkStr[];
	static const string operaStr[];
	static const string specialStr[];

	static const set<string> *keyWork;
	static const set<string> *special;
	static const set<string> *opera;

};



然後是CPP文件



#include"CompliExam.h"
#include<iostream>
#include<ctype.h>
const string Analyse::keyWorkStr[] = {
		"asm","auto","bool",
		"break","delete","case",
		"catch","char","const",
		"class","continue","default",
		"do","double","else",
		"enum","dynamic_cast","explicit",
		"extern","float","for",
		"goto","if","friend",
		"inline","mutable","int",
		"namespace","long","operator",
		"new","register","private",
		"protected","public","return",
		"short","static","signed",
		"static_cast","switch","sizeof",
		"this","struct","template",
		"throw","try","void",
		"true","while","const_cast",
		"typedef","typeid","typename",
		"union","unsigned","using",
		"virtual","volatile","wchar_t",
		"export","reinterpret_cast","false",
		//以上爲語言關鍵字

		"include","main","cin",
		"cout","define","elif",
		"endif","ifdef","ifndef",
		"undef","line","error","pragma",
		"endl","ends",
		//以上爲預處理指令以及關鍵操作

		"assert.h","ctype.h","errno.h",
		"float.h","iso646.h","limits.h",
		"locale.h","math.h","setjmp.h",
		"signal.h","stdarg.h","stddef.h",
		"stdio.h","stdlib.h","string.h",
		"time.h","wchar.h","wctype.h",
		"fstream.h","iomanip.h","iostream.h",
		"vector","complex.h","queue",
		"stack","set","istream",
		"iostream","ctype.h","algorithm",
		"bitset","iomanip","ios",
		"string","ostream","iterator",
		"utility","cmath","complex",
		"memory","map","list",
		"fstream","exception","deque",
		"cstring","ctime","new"
		//以上爲常用庫.不完全,超出字符串長度限制
	};//關鍵字字符串

const string Analyse::operaStr[] = {
		"+","-","*",
		"/","++","--",
		"<<",">>","<",
		">",">=","<=",
		"==","=","*=",
		"+=","-=","/=",
		"%=","&=","|=",
		"^=","&","&&",
		"||","!=","~",
		"<<=",">>=","%"
	};//操作符字串

const string Analyse::specialStr[] = {
		"(",")","[",
		"]","!",":",
		".",",","{",
		"}","#",";",
		"@","?"
	};//特殊符號字串

char Analyse::getNowChar(){
	return nowChar;
};

void Analyse::setNowChar(char c){
	nowChar = c;
}

	/**
	 *構造函數,初始化set
	 */
Analyse::Analyse(){
	isEnd = false;
	reader = NULL;
	token = NULL;
	cout<<"生成分析實例:"<<endl;
}


const set<string>* Analyse::keyWork = new set<string>(keyWorkStr,keyWorkStr + sizeof(Analyse::keyWorkStr) / sizeof(keyWorkStr[0]));
const set<string>* Analyse::special = new set<string>(specialStr,specialStr + sizeof(specialStr) / sizeof(specialStr[0]));
const set<string>* Analyse::opera = new set<string>(operaStr,operaStr + sizeof(operaStr) / sizeof(operaStr[0]));


	/**
	 * 讀取下一個字符,並且更新當前字符
	 * 讀取相應輸入文件,沒有取默認文件
	 * 如果到達文件尾則將isEnd標誌爲true
	 */
char Analyse::nextChar(){
	if(reader == NULL){
		reader = new ifstream("Knight.cpp");
	}
	char c;
	if((c = reader->get()) != EOF){
		setNowChar(c);
		return c;
	}
	isEnd = true;
	return NULL;
}

	/**
	 * 接收一個字符判斷:
	 * 如果該字符是換行,空字符,製表符則忽略取下一個字符
	 * 程序總體把分析分成6種種類去分析,並由初始模塊根據第一字符原則判斷進入哪個類別,.分別是
	 * 1.	字符串:雙引號關聯起來的字串
	 * 2.	單個字符:單引號關聯起來的字符
	 * 3.	註釋:行註釋以及塊註釋
	 * 4.	數字:自然數以及小數
	 * 5.	文字(letter):標識符以及關鍵字
	 * 6.	符號:操作符以及特殊符號或非法字符
	 */
void Analyse::step0(char c){
	if(isEnd){
		cout<<"分析結束"<<endl;
		reader->close();
		return;
	}
	while(c == '\n' || c == ' ' || c == '	' || c == '\r'){
			c = nextChar();
		}
	
		
		if(c == '\"'){
			stringStep();
		}else if(c == '\''){
			charStep();
		}else if(c == '/'){
			noteStep();
		}else if(isdigit(c)){
			digitStep();
		}else if(isalpha(c) || c == '_'){
			letterStep();
		}else{
			signStepOrIllegal();
		}
};


	/**
	 * 進入字符串步驟
	 * 過濾第一種缺陷情況就是"\"",但是不能過濾第二重轉義字符出現的缺陷.
	 * 因爲這種情況不多,所以忽略
	 */
void Analyse::stringStep(){
	token = new string("\"");
	
		char c;
		do{
			c = nextChar();
			token->append(1,c);
			if(c == '\"' && !(token->compare("\"\\\"") == 0 )){
				break;
			}
		}while(true);
		cout<<token->c_str()<<" :字符串"<<endl;
		step0(nextChar());
}

	/**
	 * 進入單字符步驟
	 * 可能出現轉義字符打印錯誤,故提供一重修復.
	 * 鑑於深層情況少見,忽略
	 */
void Analyse::charStep(){
	token = new string("\'");
		
		char c;
		do{
			c = nextChar();
			token->append(1,c);
			if(c == '\'' && !(token->compare("'\\'") == 0)){
				break;
			}
		}while(true);
		cout<<token->c_str()<<" :單字符"<<endl;
		step0(nextChar());
}

	/**
	 * 進入註釋步驟
	 * 行註釋以及塊註釋
	 */
void Analyse::noteStep(){
	token = new string("/");
		char c = nextChar();
		
		if(c == '/'){//行註釋
			token->append(1,'/');
			token->append(getRemainLine());
		}else if(c == '*'){//塊註釋
			token->append(1,'*');
			token->append(getRemainBlock());
		}else{
			cout<<"註釋代碼未知情況"<<endl;
		}
		cout<<token->c_str()<<" :註釋"<<endl;
		step0(nextChar());
}


	/**
	 * 返回註釋塊字符串
	 * 策略是一直掃描直到掃描到*和/符號
	 * 
	 * @return String
	 */
string Analyse::getRemainBlock(){
	string buffer("");
		char c;
		char c2;
		while(true){
			c = nextChar();
			if(c == '\t')
				continue;
			buffer.append(1,c);
			if(c == '*'){
				c2 = nextChar();
				if(c2 == '\t')
					continue;
				
				if(c2 == '/'){//如果繼*後的符號是斜槓,那麼就退出循環
					buffer.append(1,c2);
					break;
				}
				buffer.append(1,c2);
			}
		}
		return buffer;
}

	/**
	 * 返回行註釋的字符串
	 * 掃描策略是直接掃描直到換行符
	 * @return
	 */
string Analyse::getRemainLine(){
	string buffer("");
		char c;
		while(true){
			c = nextChar();
			if(c == '\n' || c == '\r'){
				break;
			}
			buffer.append(1,c);
		}
		return buffer;
}

	/**
	 * 進入數字過程
	 * 不包括正負號
	 * 最多隻能出現一個點.
	 */
void Analyse::digitStep(){
	bool dot = false;
		token = new string(1,getNowChar());//把當前數字加入
		
		char c = nextChar();
		while(c == '.' || isdigit(c)){
			if(c == '.'){
				if(dot){//就是點已經出現過了
					break;
				}
				dot = true;
			}
			token->append(1,c);//點或者數字都加入字串
			c = nextChar();
		}
		cout<<token->c_str()<<" :數字"<<endl;
		
		//跳回步驟0
		step0(getNowChar());
}

	/**
	 * 進入文字過程
	 */
void Analyse::letterStep(){
	bool dot = false;
	token = new string(1,getNowChar());//吧當前的字符串加入
		char c = nextChar();
		while(c == '_' || isdigit(c) || isalpha(c) || c == '.'){
			if(c == '.'){
				if(dot){//就是點已經出現過了
					break;
				}
				dot = true;
			}
			token->append(1,c);
			c = nextChar();
		}
		
		if(keyWork->find(*token) != keyWork->end()){
			cout<<token->c_str()<<" :關鍵字"<<endl;
		}else{
			cout<<token->c_str()<<" :標識符"<<endl;
		}
		
		step0(getNowChar());
}

	/**
	 * 進入符號階段
	 * 在這裏分別區分非法字符
	 * 操作符,特殊字符
	 */
void Analyse::signStepOrIllegal(){
	token = new string();
		char c = getNowChar();
		string buffer(1,getNowChar());
		while((opera->find(buffer) != opera->end()) || (special->find(buffer) != special->end())){
			token->append(1,c);
			c = nextChar();
			buffer.append(1,c);
		}
		if(token->size() != 0){//非法字符
			if(special->find(buffer) != special->end()){
				cout<<token->c_str()<<" :特殊字符"<<endl;
			}else{
				cout<<token->c_str()<<" :操作符"<<endl;
			}
		}else{
			cout<<token->c_str()<<" :非法字符"<<endl;
		}
		
		step0(getNowChar());
}

	/**
	 * 啓動分析
	 */
void Analyse::start(){
	step0(nextChar());
}


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章