pl/0詞法分析器
下面是這個分析器的功能:
1、 待分析的簡單語言的詞法
(1) 關鍵字:
begin if then while do end
所有關鍵字都是小寫。
(2) 運算符和界符:
:= + – * / < <= <> > >= = ; ( ) #
(3) 其他單詞是標識符(ID)和整型常數(NUM),通過以下正規式定義:
ID=letter(letter| digit)*
NUM=digit digit *
(4) 空格由空白、製表符和換行符組成。空格一般用來分隔ID、NUM,運算符、界符和關鍵字,詞法分析階段通常被忽略。
2、 各種單詞符號對應的種別碼
詞法分析程序的功能
輸入:所給文法的源程序字符串。
輸出:二元組(syn,token或sum)構成的序列。
其中:syn爲單詞種別碼;
token爲存放的單詞自身字符串;
sum爲整型常數。
#include <iostream>
#include <fstream>
#include <string>
#include <windows.h>
using namespace std;
#define CODE "E:\\code\\code.txt"
#define RESULT "E:\\code\\result.txt"
//token數組用來接收關鍵字,變量,運算符和界符
//這裏限制變量名的長度最多爲9
//prog數組存儲的是源代碼字符串長度
char *prog, token[10];
char ch;
//syn是各個單詞符號對應的數字
int syn, p, m = 0, n, line, sum = 0;
//rwtab數組存儲的是關鍵字
char *rwtab1[10] = { "begin","if","then","while","do","end" };
char *rwtab2[4] = { "const","var","procedure","call" };
void scaner()
{
//規定,標識符只能由字母或數字構成
/*
共分爲三大塊,分別是標示符、數字、符號,對應下面的 if else if 和 else
*/
//將全部置空
for (n = 0; n<10; n++)
//token爲已捕獲的字符數
token[n] = NULL;
ch = prog[p++];
//這樣處理,可以去除空格
while (ch == ' ')
{
ch = prog[p];
p++;
}
//在這個if判斷中,範圍是a-z或者A-Z,因爲規定變量只能以字母開頭
if ((ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z')) //可能是標示符或者變量名
{
m = 0;
//這裏,是變量的第一個字符以後,可以是字母,數字
while ((ch >= '0'&&ch <= '9') || (ch >= 'a'&&ch <= 'z') || (ch >= 'A'&&ch <= 'Z'))
{
token[m++] = ch;
ch = prog[p++];
}
//變量的字符串結束標誌
token[m++] = '\0';
p--;
syn = 10;
//將識別出來的字符和已定義的標示符作比較, 判斷是否是關鍵字,所有關鍵字都是小寫
for (n = 0; n<6; n++)
if (strcmp(token, rwtab1[n]) == 0)
{
syn = n + 1;
break;
}
for (n = 0; n < 4;n++)
{
if (strcmp(token, rwtab2[n]) == 0)
{
syn = n + 31;
break;
}
}
}
else if ((ch >= '0'&&ch <= '9')) //數字 ,如果是數字,就用sum來保存這個數字
{
sum = 0;
while ((ch >= '0'&&ch <= '9'))
{
//這裏*10是隻考慮十進制數
sum = sum * 10 + ch - '0';
ch = prog[p++];
}
p--;
syn = 11;
//可接收的數字的最大值爲32767,如果更大,會報錯
if (sum>32767)
syn = -1;
}
else switch (ch) //如果是其他的字符
{
case '<':
m = 0;
token[m++] = ch;
ch = prog[p++];
if (ch == '>')
{
//說明是不等號
syn = 21;
token[m++] = ch;
}
else if (ch == '=')
{
//說明是<=
syn = 22;
token[m++] = ch;
}
else
{
//否則,就只是一個<符號
syn = 23;
//此時p回退一個
p--;
}
break;
case '>':
m = 0;
token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
syn = 24;
token[m++] = ch;
}
else
{
syn = 20;
p--;
}
break;
case ':':
m = 0;
token[m++] = ch;
ch = prog[p++];
if (ch == '=')
{
//說明是賦值運算符
syn = 18;
token[m++] = ch;
}
else
{
//否則就只是個:
syn = 17;
p--;
}
break;
case '*':
syn = 13;
token[0] = ch;
break;
case '/':
syn = 14;
token[0] = ch;
break;
case '+':
syn = 15;
token[0] = ch;
break;
case '-':
syn = 16;
token[0] = ch;
break;
case '=':
syn = 25;
token[0] = ch;
break;
case ';':
syn = 26;
token[0] = ch;
break;
case '(':
syn = 27;
token[0] = ch;
break;
case ')':
syn = 28;
token[0] = ch;
break;
case ',':
syn = 29;
token[0] = ch;
break;
case '!':
syn = 30;
token[0] = ch;
break;
case '.':
//如果接收到的是.,說明到了源代碼的結尾,置syn=0,函數結束
syn = 0;
token[0] = ch;
break;
case '\n':
//如果接收到的是換行符,則syn=-2,行+1
syn = -2;
break;
default:
//如果接收到的是其他未定義的字符,置syn=-1,會報錯。
syn = -1;
break;
}
}
/*
讀取源代碼文件(.txt)
*/
void read()
{
FILE *fp;
fp = fopen(CODE, "r");
fseek(fp, 0, SEEK_END);
int file_size;
file_size = ftell(fp);
fseek(fp, 0, SEEK_SET);
prog = (char *)malloc(file_size * sizeof(char));
fread(prog, file_size, sizeof(char), fp);
//關閉文件流
fclose(fp);
}
int main()
{
int p = 0;
int line = 1;
ofstream outfile(RESULT);
cout<< "加載代碼文件中......" << endl;
Sleep(3000);
//讀取源代碼文件
read();
p = 0;
outfile << "詞法分析的結果爲:" << endl;
do
{
scaner();
switch (syn)
{
case 11:
//cout << "(" << syn << "," << sum << ")" << endl;
outfile << "(" << syn << "," << sum << ")" << endl;
break;
case -1:
//cout << "Error in line " << line << "!" << endl;
outfile << "Error in line" << line << "!" << endl;
break;
case -2:
line = line++;
break;
default:
//cout << "(" << syn << "," << token << ")" << endl;
outfile << "(" << syn << "," << token << ")" << endl;
break;
}
} while (syn != 0);
outfile.close();
cout << "詞法分析完畢,請在result.txt中查看" << endl;
system("pause");
return 0;
}
pl/0 程序
var m, n, r, q;
procedure gcd;
begin
while r#0 do
begin
q := m / n;
r := m - q * n;
m := n;
n := r;
end;
end;
begin
read(m);
read(n);
if m < n then
begin
r := m;
m := n;
n := r;
end;
begin
r:=1;
call gcd;
write(m);
end;
end.
輸出結果
#
#
using---->25
namespace---->25
std---->25
;------->41
int---->7
main---->1
(------->42
)------->43
const---->18
string---->25
str---->25
=------->38
hello---->25
;------->41
const---->18
string---->25
str2---->25
=------->38
world---->25
;------->41
string---->25
n_str---->25
;------->41
n_str---->25
=------->38
str---->25
;------->41
n_str---->25
+------->27
=------->38
str2---->25
;------->41
cout---->25
<------>33
<------>33
n_str---->25
<------>33
<------>33
endl---->25
;------->41
return---->17
0------>26
;------->41
————————————————
版權聲明:本文爲博主原創文章,遵循 CC 4.0 BY-SA 版權協議,轉載請附上原文出處鏈接和本聲明。
原文鏈接:https://blog.csdn.net/judyge/article/details/52274690