#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <vector>
#include <map>
#include <string.h>
using namespace std;
//約定:每一行程序以 \n結尾
//約定:標識符的最大長度爲64
//關於註釋 暫時僅支持"//"格式
//保留字,
const int KEYS_COUNT = 32;
static char static_key_words[32][20] = {
"auto", "break", "case", "char", "const", "continue",
"default", "do", "double", "else", "enum", "extern",
"float", "for", "goto", "if", "int", "long",
"register", "return", "short", "signed", "sizeof", "static",
"struct", "switch", "typedef", "union", "unsigned", "void",
"volatile", "while"
};
//常數規約,常數出現再賦值語句的右邊
//運算符最多由2個字符組成
//多字符運算符開頭
static char both_operator_com[10][2] = {
">", "<", "=", "-", "+",
"!", "&", "|", "/", "*"
};
//註釋判斷邏輯
static char annotation_char[2] = "\\";
//常量關於字符串和字符
const int CONST_CHARS_COUNT = 2;
static char const_chars[2][2] = {
"\"", "'"
};
//返回類型定義
const int SUCC = 0;
const int FAIL = 1;
const int ERROR = -1;
const int FILE_NOT_EXIT = 10;
//分割符, 不將'/'加入的原因是對於'/'和註釋部分分開處理
const int DIVISION_CHARS_COUNT = 21;
static char division_chars[21][2] = {
" ", ">", "<", "=", "-",
"+", "!", "&", "|", "%",
"*", ";", "(", ",", "/",
")", "{", "}", "[", "]",
"."
};
const int DIVISION_MULTI_CHARS_COUNT = 13;
static char division_multi_chars[13][3] = {
">>", "<<", "<=", ">=", "+=",
"-=", "*=", "/=", "!=", "&&",
"*=", "||", "=="
};
//雙字符校驗
const int DIVISION_MULTI_CHAR_COUNT = 5;
static char division_multi_char[5][2] = {
">", "<", "=", "&", "|"
};
//單個運算符
const int SINGLE_OP_COUNT = 14;
static char single_operator[14][2] = {
"+", "-", "*", "/", "<",
">", "=", "^", ",", "&",
"|", "%", "~", "!"
};
//雙字符運算符
const int MULTI_OP_COUNT = 13;
static char multi_operator[13][3] = {
">>", "<<", "<=", ">=", "+=",
"-=", "*=", "/=", "!=", "&&",
"==", "*=", "||"
};
//邊界字符
const int LIMIT_COUNT = 8;
static char limit_[8][2] = {
"(", ")", "{", "}", ".",
"[", "]", ";"
};
//當前掃描狀態,對於賦值語句而言有效,定義
int status = 0;
static char TAG[4] = "tag"; //標識符
static char KEY[4] = "key"; //保留字
static char CONST[6] = "const"; //常量
static char OP[3] = "op"; //運算符
static char LIMIT[6] = "limit"; //邊界
static char ERR[6] = "error"; //錯誤
vector< pair<char*, char*> > tokens; //tokens
vector<char*> anno; //註釋
//內存初始化
void memset_(char* chrs, char c, int length)
{
for(int i = 0; i < length; i++) {
chrs[i] = c;
}
}
//不是以單字符進行分隔符
int is_not_division_char(char c)
{
for(int k = 0; k < DIVISION_CHARS_COUNT; k++) {
if(c == division_chars[k][0]) {
return FAIL;
}
}
return SUCC;
}
//不是以可能的雙字符進行分割
int is_not_multi_division_char(char c)
{
for(int k = 0; k < DIVISION_MULTI_CHAR_COUNT; k++) {
if(c == division_multi_char[k][0]) {
return FAIL;
}
}
return SUCC;
}
//是否爲雙字符分割符
int is_multi_division_chars(char* chrs)
{
int len = strlen(chrs);
if(len != 2) {
return FAIL;
}
for(int i = 0; i < DIVISION_MULTI_CHARS_COUNT; i++) {
if(!strcmp(chrs, division_multi_chars[i])) {
return SUCC;
}
}
return FAIL;
}
//是否爲數字字符
int is_dig(char c)
{
if(c > '9' || c < '0') {
return FAIL;
}
return SUCC;
}
//是否爲常量字符(字符串/字符),對開頭字符進行檢查
int is_const_char(char c)
{
for(int k = 0; k < CONST_CHARS_COUNT; k++) {
if(c == const_chars[k][0]) {
return SUCC;
}
}
return FAIL;
}
/** 將句子分割成單詞
* @param line 帶分割字符串
* @param words 引用的結果
* @param line_ 行號
*/
void division_str(char* line, vector<char*> &words, int line_)
{
int len = strlen(line);
if(len < 1) {
return;
}
char word[1025];
for(int i = 0; i < len; i++)
{
memset_(word, '\0', 1025);
int j = 0;
int div_ = 0;
for(; j < 1025 && i < len; j++, i++) {
//printf("char:%c\n", line[i]);
if(is_not_division_char(line[i]) == SUCC) { //一般分割符
if(line[i] == '\t' || line[i] == '\r') {
//printf("blank:%c\n", line[i]);
j--;
continue;
}
word[j] = line[i];
} else {
if(line[i] != ' ' && line[i] != '/') {
//單字符分割符和雙字符分隔符
if(is_not_multi_division_char(line[i+1]) == FAIL) { //雙字符分割符
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
char* div_multi_ = new char[3];
div_multi_[0] = line[i];
div_multi_[1] = line[i+1];
if(is_multi_division_chars(div_multi_) == SUCC) {
i++;
words.push_back(div_multi_);
}
//printf("==multi %s\n", div_multi_);
div_ = 1;
} else {
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
//單子符分割符
char* div_single_ = new char[2];
div_single_[0] = line[i];
div_single_[1] = '\0';
words.push_back(div_single_);
div_ = 1;
//printf("==single %s\n", div_single_);
}
} else if(line[i] == '/') {
//爭對註釋"//"的處理
if(line[i+1] == '/') {
i++;
for(; i < len; i++, j++) {
word[j] = line[i];
}
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
//printf("(%s)\n", word_);
anno.push_back(word_);
div_ = 1;
} else {
if(j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
words.push_back(word_);
}
//對除號的處理
char* div_single_ = new char[2];
div_single_[0] = line[i];
div_single_[1] = '\0';
words.push_back(div_single_);
div_ = 1;
}
}
//printf("[%c]\n", line[i]);
break;
}
}
word[j] = '\0';
//printf("== %s\n", word);
//存在標識符長度超過64
if(j == 1025 && is_not_division_char(line[i+1])) {
i++;
//繼續遍歷字符串直到遇到分割符,如果沒有遇到就分析結束
for(; is_not_division_char(line[i]) && i < len; i++) {}
} else if(!div_ && j != 0) {
char * word_ = new char[j+1];
strncpy(word_, word, j);
word_[j] = '\0';
//printf("(%s)\n", word_);
words.push_back(word_);
}
}
}
//是否爲標識符
int is_tag(char* chrs)
{
int len = strlen(chrs);
//輸入字符串有效性校驗
if(len < 1) {
return FAIL;
}
//開頭字符
if(chrs[0] != '_' &&
(
chrs[0] < 'A'
|| (chrs[0] > 'Z' && chrs[0] <'a')
|| (chrs[0] > 'z')
)
) {
return FAIL;
}
//利用
for(int i = 1; i < strlen(chrs); i++)
{
//遍歷字符串中不符合規定的字符
if(
chrs[i] != '_'
&& (
(chrs[i] > '9' && chrs[i] < 'A')
|| (chrs[0] > 'Z' && chrs[0] <'a')
|| (chrs[0] > 'z')
)
) {
return FAIL;
}
}
return SUCC;
}
//是否爲常量
int _is_const(char* chrs)
{
//從開始字符進行分流
int len = strlen(chrs);
if(len < 1) {
//空字符情況排除
return FAIL;
}
if(is_const_char(chrs[0]) == SUCC && chrs[len-1] == chrs[0]) {
int i = 1;
for(; i < len; i++) {
if(chrs[i] == chrs[0] && chrs[i-1] != '\\') {
break;
}
}
if(
i == 2 //考慮到空字符串的可能
&& i != len //並且字符串終結符並不再結尾
) {
return ERROR;
}
return SUCC;
} else if(is_dig(chrs[0]) == SUCC) {
int i = 1;
for(; i < len; i++) {
if(is_dig(chrs[i]) != SUCC) {
break;
}
}
if(i != len) {
return ERROR;
}
return SUCC;
}
return FAIL;
}
//是否爲單字符運算符
int is_op(char c)
{
for(int k = 0; k < SINGLE_OP_COUNT; k++) {
if(c == single_operator[k][0]) {
return SUCC;
}
}
return FAIL;
}
//是否爲雙字符類型運算符
int is_multi_op(char* chrs)
{
int len = strlen(chrs);
if(len != 2) {
return FAIL;
}
for(int i = 0; i < MULTI_OP_COUNT; i++) {
//printf("op[%s, %s]\n", chrs, multi_operator[i]);
if(!strcmp(chrs, multi_operator[i])) {
return SUCC;
}
}
return FAIL;
}
//是否爲保留字[關鍵字]
int is_keys(char* chrs)
{
int len = strlen(chrs);
if(len < 1) {
return FAIL;
}
for(int i = 0; i < KEYS_COUNT; i++) {
int klen = strlen(static_key_words[i]);
if(klen != len) {
continue;
}
int j = 0;
for(; j < klen && j < len; j++) {
if(chrs[j] != static_key_words[i][j]) {
break;
}
}
if(j == klen) {
return SUCC;
}
}
}
//是否爲邊界字符
int is_limit(char c)
{
for(int i = 0; i < LIMIT_COUNT; i++) {
if(c == limit_[i][0]) {
return SUCC;
}
}
return FAIL;
}
//判斷是否爲註釋"//",註釋的特徵:
// 1、一個單詞的開頭或者一個句子的開頭
// 2、將其後續的字符串取完
int is_anno(char* chrs)
{
int len = strlen(chrs);
if(len < 1) {
return FAIL;
}
if(chrs[0] != '/') {
return FAIL;
}
return SUCC;
}
int read_sour(char* file_name, vector<char*> &output)
{
ifstream fin;
fin.open(file_name);
if(!fin) {
return FILE_NOT_EXIT;
}
string line;
while(!fin.eof()) {
line = "";
getline(fin, line, '\n');
int slen = line.length();
char * line_ = new char[slen+1];
memset_(line_, '\0', slen);
strncpy(line_, line.c_str(), slen);
line_[slen] = '\0';
output.push_back(line_);
}
fin.close();
return SUCC;
}
int collect_words(vector<char*> &input, vector<char*> &output)
{
int size_ = input.size();
if(size_ < 1) {
return FAIL;
}
for(int i = 0; i < size_; i++) {
//printf("-div:%s \n", input[i]);
division_str(input[i], output, i);
}
return SUCC;
}
//單詞分類--詞法分析
void word_analize(vector<char*> words, vector< pair<char*, char*> > &result)
{
//int op_type = 0; //0開始 1接收了一個字符 2接收了兩個字符
char multi_op[3];
int size_ = words.size();
//printf("size: %d\n", size_);
int ret;
for(int i = 0; i < size_; i++) {
//printf("-%d\n", i);
ret = is_limit(words[i][0]);
if(ret == SUCC) {
printf("<%s, %s>\n", LIMIT, words[i]);
result.push_back(pair<char*, char*>(LIMIT, words[i]));
continue;
}
ret = is_keys(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", KEY, words[i]);
result.push_back(pair<char*, char*>(KEY, words[i]));
continue;
}
ret = is_op(words[i][0]);
if(ret == SUCC) { //第一層,單運算符
printf("<%s, %s>\n", OP, words[i]);
result.push_back(pair<char*, char*>(OP, words[i]));
continue;
}
ret = is_multi_op(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", OP, words[i]);
result.push_back(pair<char*, char*>(OP, words[i]));
continue;
}
ret = _is_const(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", CONST, words[i]);
result.push_back(pair<char*, char*>(CONST, words[i]));
continue;
} else if(ret == ERROR) {
printf("<%s, %s>\n", ERR, words[i]);
result.push_back(pair<char*, char*>(ERR, words[i]));
continue;
}
ret = is_tag(words[i]);
if(ret == SUCC) {
printf("<%s, %s>\n", TAG, words[i]);
result.push_back(pair<char*, char*>(TAG, words[i]));
continue;
}
printf("<%s, %s>\n", ERR, words[i]);
result.push_back(pair<char*, char*>(ERR, words[i]));
}
}
//測試用
void scan_words(vector<char*> words)
{
int size_ = words.size();
for(int i = 0; i < size_; i++) {
printf("line: %d, content: %s\n", i, words[i]);
}
}
//主程序進程
int main()
{
static char FILE_NAME[32] = {"sour.code"};
vector<char*> lines;
vector<char*> words;
int ret = read_sour(FILE_NAME, lines);
if(ret == FILE_NOT_EXIT) {
printf("file[%s] not exits\n", FILE_NAME);
return -1;
}
collect_words(lines, words);
word_analize(words, tokens);
}
少量分析在代碼中~~ 寫c++太累了...