關鍵字詞法識別

本人想做一個自己的腳本語言,但是又不想使用lex之類的詞法分析器,又不想自己畫狀態轉換圖,所以,寫了下面的一段程序,它的功能是:

有如下若干個關鍵字:

char * KeyWords[1024] = {
	"var",
	"+",
	"-",
	"*",
	"/",
	"%",
	"if",
	"else",
	"switch",
	"case",
	"while",
	"do",
	"for",
	"continue",
	"break",
	"class",
	"struct",
	"union",
	"enum",
	"new"
};

通過循環調用

bool AddFSM(char * reg, int nStart, int nFinal);

會自動產生一個可以識別以上關鍵字的狀態轉換表。

具體代碼如下:

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <list>

struct node
{
	int m_nState;
	char m_cLetter;
	int m_nNextState;
	node * m_pNext;

};

class FSM
{
public:
	FSM();
	~FSM();
	bool AddFSM(char * reg, int nStart, int nFinal);
	bool Match(char * str);
	int m_nStartState;
	int m_nFinalState;
	int m_nNextState;
	std::list<node *> m_pTable;	
};

FSM::FSM()
{
	m_nStartState = 0;
	m_nFinalState = 1;
	m_nNextState = 2;
	node * pStart = new node;
	if (pStart)
	{
		pStart->m_cLetter = 0;
		pStart->m_nNextState = 0;
		pStart->m_nState = m_nStartState;
		pStart->m_pNext = NULL;
	}
	else
		return;

	node * pEnd = new node;
	if (pEnd)
	{
		pEnd->m_cLetter = 0;
		pEnd->m_nNextState = 0;
		pEnd->m_nState = m_nFinalState;
		pEnd->m_pNext = NULL;
	}
	else
	{
		delete pStart;
	}

	m_pTable.push_back(pStart);
	m_pTable.push_back(pEnd);
}

FSM::~FSM()
{
	std::list<node *>::iterator it;

	for (it = m_pTable.begin();it != m_pTable.end(); it++)
	{
		node * pCurNode = *it;
		while (pCurNode)
		{
			node * pTmp = pCurNode->m_pNext;
			delete pCurNode;
			pCurNode = pTmp;
		}
	}

	m_pTable.clear();
}

bool FSM::AddFSM(char * reg, int nStart, int nFinal)
{
	node * pTmp = NULL;

	if (!reg)
		return false;

	int nLen = strlen(reg);
	if (nLen == 0)
	{
		//不支持空
		return false;
	}

	std::list<node *>::iterator it;

	for (it = m_pTable.begin(); it != m_pTable.end(); it++)
	{
		if ((*it)->m_nState == nStart)
		{
			pTmp = *it;
			break;
		}
	}

	if (!pTmp)
		return false;

	if (nLen == 1)
	{
		if (pTmp)
		{
			//查詢是否已經存在
			node * pFirst = pTmp->m_pNext;
			while (pFirst)
			{
				if (pFirst->m_cLetter == *reg && pFirst->m_nNextState == nFinal)
				{
					return true;
				}

				pFirst = pFirst->m_pNext;
			}
			node * pNewNode = new node;
			if (pNewNode)
			{
				pNewNode->m_nState = nStart;
				pNewNode->m_cLetter = *reg;
				pNewNode->m_nNextState = nFinal;
				pNewNode->m_pNext = pTmp->m_pNext;
				pTmp->m_pNext = pNewNode;
				return true;
			}
			return false;
		}

		return false;
	}

	//查詢當前字符是否存在
	char c = *reg;

	node * pCurNode = NULL;

	if (pTmp)
	{
		pCurNode = pTmp->m_pNext;

		while (pCurNode)
		{
			if (pCurNode->m_cLetter == c)
			{
				break;
			}

			pCurNode = pCurNode->m_pNext;
		}
	}

	int nNewState = 0;

	char X[256] = { 0 };
	char Y[256] = { 0 };

	X[0] = *reg;

	strcpy(Y, reg + 1);

	if (pCurNode)
	{
		nNewState = pCurNode->m_nNextState;

		bool bRet3 = AddFSM(Y, nNewState, nFinal);

		return bRet3;
	}
	else
	{
		node * pNew = new node;
		if (!pNew)
			return false;
		nNewState = pNew->m_nState = m_pTable.size();
		pNew->m_cLetter = 0;
		pNew->m_nNextState = 0;
		pNew->m_pNext = NULL;
		m_pTable.push_back(pNew);
	}
	
	bool bRet = AddFSM(X, nStart, nNewState);
	
	bool bRet2 = AddFSM(Y, nNewState, nFinal);

	return bRet && bRet2;
}

bool FSM::Match(char * str)
{
	if (!str)
		return false;

	if (*str == 0)
		return false;

	char * pTmp = str;

	int nState = m_nStartState;

	node * pNode = NULL;

	std::list<node *>::iterator it = m_pTable.begin();

	node * pTmpNode = NULL;

	while (*pTmp)
	{
		pNode = NULL;

		for (; it != m_pTable.end(); it++)
		{
			if ((*it)->m_nState == nState)
			{
				pNode = *it;
				break;
			}
		}

		if (!pNode)
			return false;

		pTmpNode = pNode->m_pNext;

		bool bFind = false;

		while (pTmpNode)
		{
			if (pTmpNode->m_cLetter == *pTmp)
			{
				nState = pTmpNode->m_nNextState;
				bFind = true;
				break;
			}
			pTmpNode = pTmpNode->m_pNext;
		}

		if (!bFind)
		{
			return false;
		}

		pTmp++;
	}

	if (nState == m_nFinalState)
		return true;

	return false;
}


char * KeyWords[1024] = {
	"var",
	"+",
	"-",
	"*",
	"/",
	"%",
	"if",
	"else",
	"switch",
	"case",
	"while",
	"do",
	"for",
	"continue",
	"break",
	"class",
	"struct",
	"union",
	"enum",
	"new"
};


int main()
{
	FSM obj;

	char * pTmp = NULL;

	int i = 0;

	pTmp = *KeyWords;

	while (pTmp)
	{
		if (obj.AddFSM(pTmp, obj.m_nStartState, obj.m_nFinalState))
		{
			printf("Add %s suc!\n", pTmp);
		}
		else
		{
			printf("Add %s failed.\n", pTmp);
		}

		i++;

		pTmp = KeyWords[i];
	}

	FILE * fSrc = fopen("E:/a.cs", "rb+");

	char buf[1024] = { 0 };

	char str[1024] = { 0 };

	if (fSrc)
	{
		while (!feof(fSrc))
		{
			int nSize = fread(buf, 1, 1024, fSrc);

			int i = 0;

			int m = 0;

			while (i < nSize)
			{
				if (buf[i] != ' ' && buf[i] != '\t' && buf[i] != '\n' && buf[i] != '\r' && buf[i] != 0)
				{
					str[m++] = buf[i++];
				}
				else
				{
					if (strcmp(str, ""))
					{
						if (obj.Match(str))
						{
							printf("%s match suc\n", str);
						}
						else
						{
							printf("%s match failed.\n", str);
						}
					}

					i++;

					m = 0;

					memset(str, 0, 1024);
				}				
			}
		}
		
		fclose(fSrc);
	}

	system("pause");
	
	return 0;
}

掃描文件內容如下:

if var == else
	
do while for

+ - * /

class break abc

enum

struct

case

break continue def  new 

hello china for

 new

程序運行效果如下:

 

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章