【编译原理课程设计】词法分析程序设计
【实验目的】
(1)理解词法分析在编译程序中的作用
(2)加深对有穷自动机模型的理解
(3)掌握词法分析程序的实现方法和技术
【实验内容】
对一个简单语言的子集编制一个一遍扫描的词法分析程序。
【实验要求】
(1)待分析的简单语言的词法
1) 关键字
begin if then while do end
2) 运算符和界符
:= + - * / < <= > >= <> = ; ( ) #
3) 其他单词是标识符(ID)和整形常数(NUM),通过以下正规式定义:
ID=letter(letter|digit)*
NUM=digitdigit*
4) 空格由空白、制表符和换行符组成。空格一般用来分隔ID、NUM、运算符、界符和关键字,词法分析阶段通常被忽略。
(2)各种单词符号对应的种别编码
单词符号 | 种别码 | 单词符号 | 种别码 |
begin | 1 | : | 17 |
if | 2 | := | 18 |
then | 3 | < | 20 |
while | 4 | <> | 21 |
do | 5 | <= | 22 |
end | 6 | > | 23 |
letter(letter|digit)* | 10 | >= | 24 |
digitdigit* | 11 | = | 25 |
+ | 13 | ; | 26 |
- | 14 | ( | 27 |
* | 15 | ) | 28 |
/ | 16 | # |
(3)词法分析程序的功能
输入:所给文法的源程序字符串
输出:二元组(syn,token或sum)构成的序列。
syn为单词种别码;
token为存放的单词自身字符串;
Sum 为整形常数。
例如:对源程序begin x:=9;if x>0 then x:=2*x+1/3;end# 经词法分析后输出如下序列:(1,begin)(10,’x’) (18,:=) (11,9) (26,;) (2,if)……
直接上代码
#include<iostream> using namespace std; bool is_digit(char ch); bool is_letter(char ch); char example[10000]; //缓冲区 char token[10]; //标识符 int syn, sum; const char* keyword[10] = { "begin","if","then","while","do","end" }; //关键字 int example_p;//缓冲区指针 int token_p;//标识符指针 char ch; //扫描 void scan() { memset(token, 0, sizeof(token)); //数组清零 token_p = 0; while (ch ==‘ ‘) { example_p++; ch = example[example_p]; } //字符是数字 if (is_digit(ch)) { sum = 0; //检索数字 while (is_digit(ch)) { //得到结果 sum = sum * 10 + ch - ‘0‘; //将字符转换成数字 example_p++; ch = example[example_p]; syn = 11; } } //字符是字母 else if (is_letter(ch)) { //检索标识符、关键字 while (is_digit(ch) || is_letter(ch)) { token[token_p] = ch; token_p++; example_p++; ch = example[example_p]; } token[token_p] = ‘\0‘; token_p++; syn = 10; //比对标识符和关键字 for (int i = 0;i < 6;i++) { if (strcmp(token, keyword[i]) == 0) { syn = i + 1; break; } } } //其他情况 else { switch (ch){ case ‘#‘: syn = 0; token[0] = ch; break; case ‘+‘: syn = 13; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘-‘: syn = 14; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘*‘: syn = 15; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘/‘: syn = 16; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘:‘: syn = 17; token_p = 0; token[token_p] = ch; token_p++; example_p++; ch = example[example_p]; if (ch == ‘=‘) { token[token_p] = ch; token_p++; syn = 18; example_p++; ch = example[example_p]; } break; case ‘<‘: syn = 20; token_p = 0; token[token_p] = ch; token_p++; example_p++; ch = example[example_p]; if (ch == ‘>‘) { token[token_p] = ch; token_p++; syn = 21; example_p++; ch = example[example_p]; } if (ch == ‘=‘) { token[token_p] = ch; token_p++; syn = 22; example_p++; ch = example[example_p]; } break; case ‘>‘: syn = 23; token_p = 0; token[token_p] = ch; token_p++; example_p++; ch = example[example_p]; if (ch == ‘=‘) { token[token_p] = ch; token_p++; syn = 24; example_p++; ch = example[example_p]; } break; case ‘=‘: syn = 25; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘;‘: syn = 26; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘(‘: syn = 27; token[0] = ch; example_p++; ch = example[example_p]; break; case ‘)‘: syn = 28; token[0] = ch; example_p++; ch = example[example_p]; break; } } } //字符是数字 bool is_digit(char ch) { if (ch >= ‘0‘&&ch <= ‘9‘) return true; else return false; } //字符是字母 bool is_letter(char ch) { if((ch>=‘a‘&&ch<=‘z‘)||(ch>=‘A‘&&ch<=‘Z‘)) return true; else return false; } int main() { example_p = 0; char str; do { str = getchar(); example[example_p] = str; example_p++; } while (str != ‘#‘); example_p = 0; ch = example[example_p]; do { scan(); switch (syn) { case 11: cout <<"("<< syn << "," << sum <<")"<< endl; break; default: cout <<"("<< syn << "," << token<<")"<<endl; } } while (syn != 0); system("pause"); return 0; }