任務(wù)
你將使用圖轉(zhuǎn)移算法手工實(shí)現(xiàn)一個(gè)小型的詞法分析器。
- 分析器的輸入:存儲(chǔ)在文本文件中的字符序列山橄,字符取自ASCII字符集垮媒。文件中可能包括下面幾種記號(hào):關(guān)鍵字if、符合C語(yǔ)言標(biāo)準(zhǔn)的標(biāo)識(shí)符航棱、無(wú)符號(hào)整型數(shù)字睡雇、空格符、回車符\n饮醇。
- 分析器的輸出:打印出所識(shí)別的記號(hào)的種類它抱、及記號(hào)開始行號(hào)、開始列號(hào)信息朴艰。
注意:1. 忽略空格及回車符观蓄;2. 對(duì)于標(biāo)識(shí)符和數(shù)字,要輸出符號(hào)的具體詞法單元(見(jiàn)下面的示例)祠墅。
【示例】對(duì)于下面的文本文件:
ifx if iif if 234
iff if
你的輸出應(yīng)該是(注意侮穿,因?yàn)槲谋撅@示的原因,列號(hào)信息可能不一定準(zhǔn)確):
ID(ifx) (1, 1)
IF (1, 4)
ID(iif) (1, 8)
IF (1, 13)
NUM(234) (1, 16)
ID(iff) (2, 1)
IF (2, 8)
程序
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_LINE 1024 //每行做大的字節(jié)數(shù)
//詞法的類型
enum Kind {
IF,
ID,
NUM
};
//存儲(chǔ)每個(gè)詞的信息的結(jié)構(gòu)體
struct Token {
enum Kind kind;
char* value;
int row;
int col;
};
struct Token *Token_new(enum Kind kind, char *value, int row, int col) {
struct Token *token = (struct Token*)malloc(sizeof(*token));
token->value = (char*)malloc(sizeof(*value));
strncpy(token->value, value, strlen(value));
token->kind = kind;
token->row = row;
token->col = col;
return token;
}
//用于存放所有詞的鏈表結(jié)構(gòu)
struct List {
struct Token *token;
struct List *next;
};
struct List *all_word;
struct List* List_new(struct Token* t, struct List* list) {
struct List* p = (struct List*)malloc(sizeof(*p));
p->token = t;
p->next = list;
return p;
}
void List_print(struct List* list) {
if (!list) {
return;
}
List_print(list->next);
switch (list->token->kind) {
case IF: {
printf("IF (%d, %d)\n", list->token->row, list->token->col);
break;
}
case ID: {
printf("ID(%s) (%d, %d)\n", list->token->value, list->token->row, list->token->col);
break;
}
case NUM: {
printf("NUM(%s) (%d, %d)\n", list->token->value, list->token->row, list->token->col);
break;
}
default:
break;
}
}
//處理單詞
void Process_word(char *word, int len, int row, int col) {
if (strcmp("if", word) == 0) {
struct Token *token = Token_new(IF, word, row, col);
all_word = List_new(token, all_word);
}else if (Is_num(word, len) == 1) {
struct Token *token = Token_new(NUM, word, row, col);
all_word = List_new(token, all_word);
} else {
struct Token *token = Token_new(ID, word, row, col);
all_word = List_new(token, all_word);
}
}
//判斷是否為數(shù)字
int Is_num(char* word, int len) {
for (int i = 0; i < len; i++) {
if (word[i] < '0' || word[i] > '9'){
return 0;
}
}
return 1;
}
int main() {
FILE *file;
char strLine[MAX_LINE];
char str[MAX_LINE];
if ((file = fopen("/home/hz/a.txt", "r")) == NULL) {
printf("Open Failed!");
return -1;
}
int curr_row = 0; //當(dāng)前的行數(shù)
while (!feof(file)) {
curr_row++;
if(fgets(strLine, MAX_LINE, file)){
printf("%s", strLine);
int len = strlen(strLine);
int str_save_len = 0;
for (int i = 0; i < len; i++) {
char c = strLine[i];
switch (c) {
case ' ': {
if (str_save_len > 0) {
str[str_save_len] = '\0';
Process_word(str, str_save_len, curr_row, i + 1);
str_save_len = 0;
}
break;
}
case '\n': {
if (str_save_len > 0) {
str[str_save_len] = '\0';
Process_word(str, str_save_len, curr_row, i + 1);
str_save_len = 0;
}
break;
}
default: {
str[str_save_len++] = c;
}
}
}
}
}
List_print(all_word);
fclose(file);
return 0;
}