不用自動機
沒怎麼測試,只測試了2個文件都沒問題.= =
先是頭文件
#include<string>
#include<set>
#include<fstream>
using namespace std;
class Analyse{
public:
Analyse();
char getNowChar();
char nextChar();
void setNowChar(char);
bool isEnd;
void start();
private:
void step0(char);
void stringStep();
void charStep();
void noteStep();
string getRemainBlock();
string getRemainLine();
void digitStep();
void letterStep();
void signStepOrIllegal();
char nowChar;
ifstream *reader;
string *token;
static const string keyWorkStr[];
static const string operaStr[];
static const string specialStr[];
static const set<string> *keyWork;
static const set<string> *special;
static const set<string> *opera;
};
然後是CPP文件
#include"CompliExam.h"
#include<iostream>
#include<ctype.h>
const string Analyse::keyWorkStr[] = {
"asm","auto","bool",
"break","delete","case",
"catch","char","const",
"class","continue","default",
"do","double","else",
"enum","dynamic_cast","explicit",
"extern","float","for",
"goto","if","friend",
"inline","mutable","int",
"namespace","long","operator",
"new","register","private",
"protected","public","return",
"short","static","signed",
"static_cast","switch","sizeof",
"this","struct","template",
"throw","try","void",
"true","while","const_cast",
"typedef","typeid","typename",
"union","unsigned","using",
"virtual","volatile","wchar_t",
"export","reinterpret_cast","false",
//以上爲語言關鍵字
"include","main","cin",
"cout","define","elif",
"endif","ifdef","ifndef",
"undef","line","error","pragma",
"endl","ends",
//以上爲預處理指令以及關鍵操作
"assert.h","ctype.h","errno.h",
"float.h","iso646.h","limits.h",
"locale.h","math.h","setjmp.h",
"signal.h","stdarg.h","stddef.h",
"stdio.h","stdlib.h","string.h",
"time.h","wchar.h","wctype.h",
"fstream.h","iomanip.h","iostream.h",
"vector","complex.h","queue",
"stack","set","istream",
"iostream","ctype.h","algorithm",
"bitset","iomanip","ios",
"string","ostream","iterator",
"utility","cmath","complex",
"memory","map","list",
"fstream","exception","deque",
"cstring","ctime","new"
//以上爲常用庫.不完全,超出字符串長度限制
};//關鍵字字符串
const string Analyse::operaStr[] = {
"+","-","*",
"/","++","--",
"<<",">>","<",
">",">=","<=",
"==","=","*=",
"+=","-=","/=",
"%=","&=","|=",
"^=","&","&&",
"||","!=","~",
"<<=",">>=","%"
};//操作符字串
const string Analyse::specialStr[] = {
"(",")","[",
"]","!",":",
".",",","{",
"}","#",";",
"@","?"
};//特殊符號字串
char Analyse::getNowChar(){
return nowChar;
};
void Analyse::setNowChar(char c){
nowChar = c;
}
/**
*構造函數,初始化set
*/
Analyse::Analyse(){
isEnd = false;
reader = NULL;
token = NULL;
cout<<"生成分析實例:"<<endl;
}
const set<string>* Analyse::keyWork = new set<string>(keyWorkStr,keyWorkStr + sizeof(Analyse::keyWorkStr) / sizeof(keyWorkStr[0]));
const set<string>* Analyse::special = new set<string>(specialStr,specialStr + sizeof(specialStr) / sizeof(specialStr[0]));
const set<string>* Analyse::opera = new set<string>(operaStr,operaStr + sizeof(operaStr) / sizeof(operaStr[0]));
/**
* 讀取下一個字符,並且更新當前字符
* 讀取相應輸入文件,沒有取默認文件
* 如果到達文件尾則將isEnd標誌爲true
*/
char Analyse::nextChar(){
if(reader == NULL){
reader = new ifstream("Knight.cpp");
}
char c;
if((c = reader->get()) != EOF){
setNowChar(c);
return c;
}
isEnd = true;
return NULL;
}
/**
* 接收一個字符判斷:
* 如果該字符是換行,空字符,製表符則忽略取下一個字符
* 程序總體把分析分成6種種類去分析,並由初始模塊根據第一字符原則判斷進入哪個類別,.分別是
* 1. 字符串:雙引號關聯起來的字串
* 2. 單個字符:單引號關聯起來的字符
* 3. 註釋:行註釋以及塊註釋
* 4. 數字:自然數以及小數
* 5. 文字(letter):標識符以及關鍵字
* 6. 符號:操作符以及特殊符號或非法字符
*/
void Analyse::step0(char c){
if(isEnd){
cout<<"分析結束"<<endl;
reader->close();
return;
}
while(c == '\n' || c == ' ' || c == ' ' || c == '\r'){
c = nextChar();
}
if(c == '\"'){
stringStep();
}else if(c == '\''){
charStep();
}else if(c == '/'){
noteStep();
}else if(isdigit(c)){
digitStep();
}else if(isalpha(c) || c == '_'){
letterStep();
}else{
signStepOrIllegal();
}
};
/**
* 進入字符串步驟
* 過濾第一種缺陷情況就是"\"",但是不能過濾第二重轉義字符出現的缺陷.
* 因爲這種情況不多,所以忽略
*/
void Analyse::stringStep(){
token = new string("\"");
char c;
do{
c = nextChar();
token->append(1,c);
if(c == '\"' && !(token->compare("\"\\\"") == 0 )){
break;
}
}while(true);
cout<<token->c_str()<<" :字符串"<<endl;
step0(nextChar());
}
/**
* 進入單字符步驟
* 可能出現轉義字符打印錯誤,故提供一重修復.
* 鑑於深層情況少見,忽略
*/
void Analyse::charStep(){
token = new string("\'");
char c;
do{
c = nextChar();
token->append(1,c);
if(c == '\'' && !(token->compare("'\\'") == 0)){
break;
}
}while(true);
cout<<token->c_str()<<" :單字符"<<endl;
step0(nextChar());
}
/**
* 進入註釋步驟
* 行註釋以及塊註釋
*/
void Analyse::noteStep(){
token = new string("/");
char c = nextChar();
if(c == '/'){//行註釋
token->append(1,'/');
token->append(getRemainLine());
}else if(c == '*'){//塊註釋
token->append(1,'*');
token->append(getRemainBlock());
}else{
cout<<"註釋代碼未知情況"<<endl;
}
cout<<token->c_str()<<" :註釋"<<endl;
step0(nextChar());
}
/**
* 返回註釋塊字符串
* 策略是一直掃描直到掃描到*和/符號
*
* @return String
*/
string Analyse::getRemainBlock(){
string buffer("");
char c;
char c2;
while(true){
c = nextChar();
if(c == '\t')
continue;
buffer.append(1,c);
if(c == '*'){
c2 = nextChar();
if(c2 == '\t')
continue;
if(c2 == '/'){//如果繼*後的符號是斜槓,那麼就退出循環
buffer.append(1,c2);
break;
}
buffer.append(1,c2);
}
}
return buffer;
}
/**
* 返回行註釋的字符串
* 掃描策略是直接掃描直到換行符
* @return
*/
string Analyse::getRemainLine(){
string buffer("");
char c;
while(true){
c = nextChar();
if(c == '\n' || c == '\r'){
break;
}
buffer.append(1,c);
}
return buffer;
}
/**
* 進入數字過程
* 不包括正負號
* 最多隻能出現一個點.
*/
void Analyse::digitStep(){
bool dot = false;
token = new string(1,getNowChar());//把當前數字加入
char c = nextChar();
while(c == '.' || isdigit(c)){
if(c == '.'){
if(dot){//就是點已經出現過了
break;
}
dot = true;
}
token->append(1,c);//點或者數字都加入字串
c = nextChar();
}
cout<<token->c_str()<<" :數字"<<endl;
//跳回步驟0
step0(getNowChar());
}
/**
* 進入文字過程
*/
void Analyse::letterStep(){
bool dot = false;
token = new string(1,getNowChar());//吧當前的字符串加入
char c = nextChar();
while(c == '_' || isdigit(c) || isalpha(c) || c == '.'){
if(c == '.'){
if(dot){//就是點已經出現過了
break;
}
dot = true;
}
token->append(1,c);
c = nextChar();
}
if(keyWork->find(*token) != keyWork->end()){
cout<<token->c_str()<<" :關鍵字"<<endl;
}else{
cout<<token->c_str()<<" :標識符"<<endl;
}
step0(getNowChar());
}
/**
* 進入符號階段
* 在這裏分別區分非法字符
* 操作符,特殊字符
*/
void Analyse::signStepOrIllegal(){
token = new string();
char c = getNowChar();
string buffer(1,getNowChar());
while((opera->find(buffer) != opera->end()) || (special->find(buffer) != special->end())){
token->append(1,c);
c = nextChar();
buffer.append(1,c);
}
if(token->size() != 0){//非法字符
if(special->find(buffer) != special->end()){
cout<<token->c_str()<<" :特殊字符"<<endl;
}else{
cout<<token->c_str()<<" :操作符"<<endl;
}
}else{
cout<<token->c_str()<<" :非法字符"<<endl;
}
step0(getNowChar());
}
/**
* 啓動分析
*/
void Analyse::start(){
step0(nextChar());
}