編譯器-詞法分析

要求：單詞符號及種別表

單詞符號	種別編碼	單詞值
main	1
int	2
float	3
double	4
char	5
if	6
else	7
do	8
while	9
l(l\|d)*	10	內部字符串
( +\|-\|ε ) dd(.dd \| ε)( e ( +\|-\|ε ) dd*\|ε)	20	二進制數值表示
=	21
+	22
-	23
*	24
/	25
(	26
)	27
{	28
}	29
,	30
;	31
>	32
>=	33
<	34
<=	35
==	36
!=	37
#	0

1. 總體設計思想

首先將指定語言的所有出現的單詞（可以是一類也可以是特定的）構造其正規式，然後根據正規式構造NFA，最後將NFA確定化爲DFA，詞DFA即爲遇到此類單詞時的狀態轉換圖也就是程序的流程分支圖，每一種單詞的狀態轉換圖又是整個詞法分析程序的分支，組合到一塊幾可以畫出整個分析程序的狀態轉換圖。

2. 詳細算法設計

下面給出關鍵單詞的NFA：

科學技術法： ( +|-|ε )dd*(.dd* | ε)( e ( +|-|ε ) dd*|ε)

標示符：l(l|d)*

下面給出程序的僞代碼：

ch=getch();//從源碼緩衝區去一個字符

while(isBlank(ch))

{

         ch=getch();//從源碼緩衝區去一個字符

}

switch(ch)

{

         根據ch的類型按照狀態圖流程進行判斷；

         return type;

}

3. 流程框圖

4. 函數相關說明

scaner(constchar codeBuffer[],int&startPosition,char token[])：掃描指定緩衝區的字符串，識別出從startPosition開始的該語言的單詞，codeBuffer爲字符串緩衝區指針，startPosition爲識別的起始位置，token爲識別出的單詞存放數組指針。

isBlank(constchar ch)：判斷ch是否爲空白字符，包括製表符，空格，換行符。

isLetter(constchar ch)：判斷ch是否爲字母。

isDigi(constchar ch)：判斷ch是否爲數字

getTypeCode(constchar token[])：如果識別出的是字符串就查表給出字符串的類型碼

judgeEe(char ch,constcharcodeBuffer[],int&startPosition,char token[],int&m)：如果爲科學計數法的形式，找出E/e後面的部分

5. 輸入與輸出

輸入：以#號結束所給文法的源程序字符串

輸出：二元組（returnCode,token或sum）構成的序列

例如：輸入：abc+123#

輸出：(10,’abc’)

(13,’+’)

(11,123)

出錯處理：

如果出現不符合該語言的構造都要提示錯誤

例如：123e+就不符合該語言構造規則，爲錯誤語言

6. 程序運行結果

程序源碼：

/********************************************************
*文件名：Morphology.h詞法分析器相關函數的聲明
*功能:實現詞法分析功能
*時間：2013.9.28
*作者:KDF5000
*/
#include "common.h"
#pragma once
class Morphology
{
public:
	Morphology(void);
	~Morphology(void);

	//char *keyWordTable[KEYWORD_NUMBER];
	//掃描代碼緩衝區,函數返回字符串的代碼，字符串或者數字保存在token數組中
	int scaner(const char codeBuffer[],int &startPosition,char token[]); 
	bool isBlank(const char ch);//判斷是否爲空格等字符
	
	static bool isLetter(const char ch);  //判斷輸入字符是否爲字母
	static bool isDigi(const char ch);    //判斷輸入字符是否爲數字
	int getTypeCode(const char token[]);  //獲取字符串對應的code
	bool judgeEe(char ch,const char codeBuffer[],int &startPosition,char token[],int &m);//遇到E/e時進行後面的判斷
	void setPreIsOp(bool isPreOP);   //前一個字符串是否爲運算符
private:
	bool preIsOp;   //前一個字符串是否爲運算符
};

/*******************************************************
*Morphology.cpp文件，主要函數的實現
********************************************************/
#include "Morphology.h"
#include <string.h>
//關鍵字表
#ifndef KEYWORDTABLE
#define KEYWORDTABLE    
char *keyWordTable[KEYWORD_NUMBER] = {"begin","if","then","while","do","end"};
#endif

Morphology::Morphology(void)
{
	 preIsOp = true;
}


Morphology::~Morphology(void)
{
}

void Morphology::setPreIsOp(bool isPreOp)
{
	preIsOp = true;
}
int Morphology::scaner(const char codeBuffer[],int &startPosition,char token[])
{
	//將token數組清空
	memset(token,0,sizeof(token));
	int m = 0; //token的指針
	char preCh;
	char ch = codeBuffer[startPosition++];
	//確保第一個字符不是空格，製表符，換行等符號
	while(isBlank(ch))
	{
		ch = codeBuffer[startPosition++];
	}
	
	//判斷第一個字符的類型，根據狀態轉換圖確定字符串
	//若第一個字符是字母
	if(isLetter(ch))
	{
		//根據狀態圖確定字符串
		while(isLetter(ch) || isDigi(ch) )
		{
			token[m++] = ch;
			ch = codeBuffer[startPosition++];
		}
		token[m] = '\0';//在token字符串末尾添加結束符
		//將不是字母或者數字的字符放回緩衝區
		startPosition--;

		//判斷取得的字符串的類型碼
		int stringCode = getTypeCode(token);
		preIsOp = false;
		return stringCode;
	}
	//如果爲‘+’,'-'號或者是數字
	else if(ch=='+' || ch == '-'|| isDigi(ch))
	{
		
		if(isDigi(ch))
		{
			preIsOp=true;
		}
		else
		{
			preCh = ch;
			token[m++]= ch;
			ch = codeBuffer[startPosition++];
		}

		if(isDigi(ch)&&preIsOp==true)
		{
			while(isDigi(ch))
			{
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
			}
			//如果是E/e
			if(ch == 'E' || ch == 'e')
			{
				if(!judgeEe(ch,codeBuffer,startPosition,token,m))
				{
					preIsOp = false;
					startPosition--;
					return ERROR;
				}
				preIsOp = false;
				startPosition--;
				return DIGIT;
			}
			//如果是’.’
			if(ch == '.')
			{
				//如果後面不是數字,則將'.'放回緩衝區
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
				//如果E/e後面不是數字，則將E重新放回緩衝區
				if(!isDigi(ch))
				{
					startPosition--;
					token[m] = '\0';
					//返回數字類型碼
					preIsOp = false;
					return ERROR;
				}
				//如果是數字
				while(isDigi(ch))
				{
					token[m++] = ch;
					ch = codeBuffer[startPosition++];
				}
				//
				if(ch == 'E' || ch == 'e')
				{
					if(!judgeEe(ch,codeBuffer,startPosition,token,m))
					{
						preIsOp = false;
						startPosition--;
						return ERROR;
					}
				}
				token[m] = '\0';
				startPosition--;
				preIsOp = false;
				return DIGIT;
			}
			token[m] = '\0';
			startPosition--;
			preIsOp = false;
			return DIGIT;
		}
		else
		{
			if(preCh=='+')
			{
				token[m] = '\0';
				startPosition--;
				preIsOp=true;
				return PLUS;				
			}
			else
			{
				token[m] = '\0';
				startPosition--;
				preIsOp=true;
				return SUB;
			}

		}
	}
	//*
	else if(ch == '*')
	{
		token[m++] = '*';
		token[m] = '\0';
		preIsOp=true;
		return STAR;
	}
	//'/'
	else if(ch == '/')
	{
		bool endNode= false;
		char nextCh;
		token[m++] = '/';
		ch = codeBuffer[startPosition++];
		if(ch=='*')
		{
			token[m++]= ch;
			while(!endNode)
			{
				ch = codeBuffer[startPosition++];
				if(ch=='#')
				{
					startPosition--;
					return ERROR;
				}
				nextCh = codeBuffer[startPosition];
				if(ch=='*' && nextCh=='/')
				{
					token[m++]= ch;
					token[m++]= nextCh;
					startPosition++;
					endNode = true;
				}
				else
				{
					token[m++]= ch;
				}
			}
			//startPosition--;
			token[m] = '\0';
			return NOTE;
		}
		else
		{
			startPosition--;
			token[m] = '\0';
			preIsOp=true;
			return SLASH;
		}
		
	}
	else if(ch == '=')
	{
		token[m++] = '=';
		token[m] = '\0';
		preIsOp = true;
		return EQUAL;
	}
	//':' ,':='
	else if(ch == ':')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '=')
		{
			token[m++] = ch;
			token[m] = '\0';
			return MAOHAO_DENGHAO;
		}
		startPosition--;
		token[m] = '\0';
		return MAOHAO;
	}
	//'<', '<>,'<='
	else if(ch == '<')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '>')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return SMALL_BIGGER;
		}
		if(ch == '=')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return SMALLER_EQUAL;
		}
		startPosition--;
		token[m] = '\0';
		return SMALLER;
	}
	// '>'
	else if(ch == '>')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(ch == '=')
		{
			token[m++] = ch ;
			token[m] = '\0';
			return BIGGER_EQUAL;
		}
		startPosition--;
		token[m] = '\0';
		return BIGGER;
	}
	// ';'
	else if(ch == ';')
	{
		token[m++] = ch;
		token[m] = '\0';
		return FENHAO;
	}
	else if(ch == '(')
	{
		token[m++] = ch;
		token[m] = '\0';
		preIsOp=true;
		return KUOHAO_L;
	}
	else if(ch == ')')
	{
		token[m++] = ch;
		token[m] = '\0';
		preIsOp=false;
		return KUOHAO_R;
	}
	//如果爲結束字符#
	else if(ch =='#')
	{
		token[m++] = '#';
		token[m] = '\0';
		return END_JINGHAO;
	}
	token[m++] = ch;
	token[m] = '\0';
	return ERROR;
}
//判斷是否爲空格，製表符，換行等字符
bool Morphology::isBlank(const char ch)
{
	if(ch==' '|| ch=='\n'|| ch=='\t')
	{
		return true;
	}
	else
	{
		return false;
	}
}
//判斷字符是否爲字母
bool Morphology::isLetter(const char ch)
{
	if((ch >= 'A' && ch <= 'Z') || ((ch >= 'a' && ch <= 'z')) )
	{
		return true;
	}
	else
	{
		return false;
	}
}
//判斷字符是否爲數字
bool Morphology::isDigi(const char ch)
{
	if(ch>='0' && ch<='9')
	{
		return true;
	}
	else
	{
		return false;
	}
}
//獲取制定字符串的類型碼，病作爲含函數值返回
int Morphology::getTypeCode(const char token[])
{
	for(int n=0;n<KEYWORD_NUMBER;n++)
	{
		if(strcmp(token,keyWordTable[n])==0)
		{
			return (n+1);
		}
	}
	return IDENTIFIER;
}

//遇到E/e時進行後面的判斷
bool Morphology::judgeEe(char ch,const char codeBuffer[],int &startPosition,char token[],int &m)
{
	token[m++] = ch;
	ch = codeBuffer[startPosition++];
	//如果E/e後面不是數字，則將E重新放回緩衝區
	if(ch == '+' || ch == '-')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(!isDigi(ch))
		{
			token[m] = '\0';
			//返回數字類型碼
			return false;
		}
	}
	else
	{
		if(!isDigi(ch))
		{
			token[--m] = '\0';
			//返回數字類型碼
			return false;
		}
	}
	//進入科學計數法E後面的文法
	while(isDigi(ch))
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
	}
	if(ch=='.')
	{
		token[m++] = ch;
		ch = codeBuffer[startPosition++];
		if(!isDigi(ch))
		{
			token[m] = '\0';
			return false;
		}
		else
		{
			while(isDigi(ch))
			{
				token[m++] = ch;
				ch = codeBuffer[startPosition++];
			}
			token[m] = '\0';
			return true;
		}
	}
	else
	{
		return true;
	}
}

編譯器-詞法分析

1. 總體設計思想

2. 詳細算法設計

3. 流程框圖

4. 函數相關說明

5. 輸入與輸出

6. 程序運行結果

Python實現大麥網搶票的四大關鍵技術點解析

salesforce零基礎學習（一百三十八）零碎知識點小總結（十）

窮屌絲的樹莓派無顯示屏安裝，上網方法

上市之後Twitter的入職面試問題

《C++編程實戰》學習筆記

PHP五種常用的設計模式——工廠模式

QT安裝配置

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結