前言
這個是腳本語言的起點扮授,我用的工具是bison和flex,PHP使用的是bison和re2c明刷,沒有太多差異
詞法解析器規(guī)則
%{
#include "skl_core.h"
#include "skl_variable.h"
#include "skl_function.h"
#include "skl_compiler.h"
#include "skl_language_parser.h"
void yyerror(char *);
int yywrap(void)
{
return 1;
}
void yyerror(char *s) {
printf("[Error] %s:%s(%d)\n",compiler_info.scanner_filename, s, compiler_info.scanner_line);
}
%}
%start COMMENT
%%
<INITIAL>"function" return T_FUNCTION;
<INITIAL>"global" return T_GLOBAL;
<INITIAL>"for" return T_FOR;
<INITIAL>"if" return T_IF;
<INITIAL>"else" return T_ELSE;
<INITIAL>"+" return T_ADD;
<INITIAL>"-" return T_SUB;
<INITIAL>"*" return T_MUL;
<INITIAL>"/" return T_DIV;
<INITIAL>"=" return T_ASSIGN;
<INITIAL>"==" return T_EQ;
<INITIAL>"!=" return T_NE;
<INITIAL>">" return T_GT;
<INITIAL>">=" return T_GE;
<INITIAL>"<" return T_LT;
<INITIAL>"<=" return T_LE;
<INITIAL>"(" return T_LP;
<INITIAL>")" return T_RP;
<INITIAL>"{" return T_LC;
<INITIAL>"}" return T_RC;
<INITIAL>";" return T_SEMICOLON;
<INITIAL>"break" return T_BREAK;
<INITIAL>"continue" return T_CONTINUE;
<INITIAL>"return" return T_RETURN;
<INITIAL>"include" return T_INCLUDE;
<INITIAL>"," return T_COMMA;
<INITIAL>[A-Za-z_][A-Za-z_0-9]* {
yylval.identifier = malloc_string(yytext);
return T_IDENTIFIER;
}
<INITIAL>([1-9][0-9]*)|"0" {
int temp;
sscanf(yytext ,"%d" ,&temp);
yylval.integer = temp;
return T_INT_LITERAL;
}
<INITIAL>[0-9]+\.[0-9]+ {
double temp;
sscanf(yytext, "%lf", &temp);
yylval.db = temp;
return T_DOUBLE_LITERAL;
}
<INITIAL>[ \t\r] ;
<INITIAL>"\n" {
compiler_info.scanner_line++;
}
<INITIAL>("#"|"http://")[^\n]* {
BEGIN COMMENT;
}
<INITIAL>. {
compiler_info.scanner_line++;
yyerror(yytext);
}
<INITIAL>L?\"(\\.|[^\\"])*\" {
yylval.identifier = malloc_string_trim(yytext);
return T_STRING_LITERAL;
}
<COMMENT>\n {
compiler_info.scanner_line++;
BEGIN INITIAL;
}
<COMMENT>. ;
%%
標(biāo)識解釋
- INITAL:解析程序語句標(biāo)識
- COMMENT:程序注釋標(biāo)識
INITAL開頭就是對程序進行token解析,而COMMENT則是沒有卵用的魄藕,至于為什么這么寫就需要讀者自己去查看資料日麸。
要注意這些還只是詞法解析格式
執(zhí)行命令
flex --outfile=skl_language_scanner.c skl_language_scanner.l
生成的skl_language_scanner.c才是詞法解析器源碼。
語法解析器規(guī)則
%{
#include "skl_core.h"
#include "skl_variable.h"
#include "skl_function.h"
#include "skl_compiler.h"
#define YYDEBUG 1
%}
%union {
char *identifier;
int integer;
double db;
variable_t *variable;
expression_t *expression;
expression_list_t *expression_list;
statement_t *statement;
statement_list_t *statement_list;
function_t *function;
param_list_t *param_list;
}
%token <integer> T_INT_LITERAL
%token <db> T_DOUBLE_LITERAL
%token <identifier>T_FUNCTION T_GLOBAL T_FOR T_IF T_ELSE T_ADD T_SUB T_MUL T_DIV T_ASSIGN
T_EQ T_NE T_GT T_GE T_LT T_LE T_LP T_RP T_LC T_RC T_SEMICOLON T_IDENTIFIER
T_BREAK T_CONTINUE T_RETURN T_COMMA T_STRING_LITERAL
T_INCLUDE
%type <param_list> param_list
%type <expression_list> call_param_list
%type <function>function_definition
%type <expression> equality_expression relational_expression additive_expression multiplicative_expression primary_expression expression option_expression
%type <statement_list> statement_block statement_list
%type <statement> all_statement expression_statement return_statement continue_statement break_statement
for_statement if_statement global_variable_declaration_statement
cannot_top_statement can_top_statement include_statement
%%
// 運行單元
// 開始 函數(shù)定義 或 置頂?shù)恼Z句
translation_unit: function_definition
| can_top_statement {
set_global_statement_list($1);
}
| translation_unit can_top_statement
{
set_global_statement_list($2);
}
| translation_unit cannot_top_statement{
set_global_statement_list($2);
}
| translation_unit function_definition
;
// 函數(shù)定義
function_definition: T_FUNCTION T_IDENTIFIER T_LP param_list T_RP statement_block
{
create_function($2 ,$4 ,$6);
}
| T_FUNCTION T_IDENTIFIER T_LP T_RP statement_block
{
create_function($2 ,NULL ,$5);
}
;
// 能置頂語句
can_top_statement: expression_statement
| global_variable_declaration_statement
| for_statement
| if_statement
| return_statement
| include_statement
;
// 不能置頂語句
cannot_top_statement: break_statement
| continue_statement
;
// 所有的語句
all_statement: can_top_statement
| cannot_top_statement
;
// 包含表達(dá)式
include_statement: T_INCLUDE T_STRING_LITERAL T_SEMICOLON
{
$$ = create_include_statment($2);
}
;
// 表達(dá)式語句
// 例子: 1+2+3;(雖然無用)
expression_statement: expression T_SEMICOLON
{
$$ = create_expression_statement($1);
}
;
// for語句
for_statement: T_FOR T_LP option_expression T_SEMICOLON option_expression T_SEMICOLON option_expression T_RP statement_block
{
$$ = create_for_statement($3 ,$5 ,$7 ,$9);
}
;
// break語句
break_statement: T_BREAK T_SEMICOLON
{
$$ = create_break_statement();
}
;
// if語句
if_statement: T_IF T_LP expression T_RP statement_block
{
$$ = create_if_statement($3 ,$5 ,NULL);
}
| T_IF T_LP expression T_RP statement_block T_ELSE statement_block
{
$$ = create_if_statement($3 ,$5 ,$7);
}
;
// return語句
return_statement: T_RETURN option_expression T_SEMICOLON
{
$$ = create_return_statement($2);
}
;
// 全局變量定義語句
// 例子: gloabl var1 = 4; | global var1;
global_variable_declaration_statement: T_GLOBAL T_IDENTIFIER T_SEMICOLON
{
$$ = create_global_variable_statement($2, NULL);
}
|T_GLOBAL T_IDENTIFIER T_ASSIGN expression T_SEMICOLON
{
$$ = create_global_variable_statement($2, $4);
}
;
// 語句列表
statement_list: all_statement
{
$$ = create_statement_list($1);
}
| statement_list all_statement
{
$$ = insert_statement_list($1 ,$2);
}
;
// 語句塊
statement_block: T_LC T_RC
{
$$ = NULL;
}
| T_LC statement_list T_RC
{
$$ = $2;
}
;
// continue語句
continue_statement: T_CONTINUE T_SEMICOLON
{
$$ = create_continue_statement();
}
;
// 所有表達(dá)式
expression: equality_expression
| T_IDENTIFIER T_ASSIGN expression
{
$$ = create_assign_expression($1 ,$3);
}
;
// 等值表達(dá)式
equality_expression: relational_expression
| equality_expression T_EQ relational_expression
{
$$ = create_binary_expression(expression_action_eq ,$1,$3);
}
| equality_expression T_NE relational_expression
{
$$ = create_binary_expression(expression_action_ne ,$1,$3);
}
;
// 關(guān)聯(lián)表達(dá)式(加法)
relational_expression: additive_expression
| relational_expression T_LT additive_expression
{
$$ = create_binary_expression(expression_action_lt ,$1,$3);
}
| relational_expression T_GT additive_expression
{
$$ = create_binary_expression(expression_action_gt ,$1,$3);
}
| relational_expression T_LE additive_expression
{
$$ = create_binary_expression(expression_action_le ,$1,$3);
}
| relational_expression T_GE additive_expression
{
$$ = create_binary_expression(expression_action_ge ,$1,$3);
}
;
// 加法表達(dá)式
additive_expression: multiplicative_expression
| additive_expression T_ADD multiplicative_expression
{
$$ = create_binary_expression(expression_action_add ,$1,$3);
}
| additive_expression T_SUB multiplicative_expression
{
$$ = create_binary_expression(expression_action_sub ,$1,$3);
}
;
// 乘除表達(dá)式
multiplicative_expression: primary_expression
| multiplicative_expression T_DIV primary_expression
{
$$ = create_binary_expression(expression_action_div ,$1,$3);
}
| multiplicative_expression T_MUL primary_expression
{
$$ = create_binary_expression(expression_action_mul ,$1,$3);
}
;
// 基礎(chǔ)表達(dá)式
primary_expression:T_SUB primary_expression
{
$$ = $2;
}
| T_LP expression T_RP
{
$$ = $2;
}
| T_IDENTIFIER
{
$$ = create_identifier_expression($1);
}
| T_STRING_LITERAL
{
$$ = create_string_expression($1);
}
| T_INT_LITERAL
{
$$ = create_integer_expression($1);
}
| T_DOUBLE_LITERAL
{
$$ = create_double_expression($1);
}
| T_IDENTIFIER T_LP T_RP
{
$$ = create_call_function_expression($1 ,NULL);
}
| T_IDENTIFIER T_LP call_param_list T_RP
{
$$ = create_call_function_expression($1 ,$3);
}
;
// 可選表達(dá)式
option_expression: expression
| {$$=NULL;}
;
// 參數(shù)列表
param_list: T_IDENTIFIER
{
$$ = create_param_list($1);
}
| param_list T_COMMA T_IDENTIFIER
{
$$ = insert_param_list($1 ,$3);
}
;
// 調(diào)用參數(shù)列表
call_param_list: expression
{
$$ = create_call_param_list($1);
}
| call_param_list T_COMMA expression
{
$$ = insert_call_param_list($1 ,$3);
}
;
這個規(guī)則很長纽哥,之前我也看過PHP钠乏,更長,幾千行春塌,看著煩晓避,但是作為語法特性實現(xiàn),就是需要這么長只壳。
這個規(guī)則也是我歷經(jīng)一個月一邊學(xué)習(xí)一邊調(diào)整而來俏拱,著實不易,撒花吕世。
同樣彰触,這個只是一個規(guī)則
執(zhí)行命令
bison --yacc -dv --defines=skl_language_parser.h --output=skl_language_parser.c skl_language_parser.y
這樣生成的skl_language_parser.h和skl_language_parser.c才是詞法解析器源碼。
當(dāng)然要看懂上述的內(nèi)容和知識命辖,也要花一番功夫况毅。
結(jié)語
做成一件事,沒有隨隨便便尔艇,既然做了尔许,就要達(dá)成初心