完整的代码 材料 见文章末尾 以下为核心内容和部分结果
项目介绍
function.cpp 实现了共有的函数
lexer.cpp 词法分析器
get_predict_table.cpp获取预测分析表
LR.cpp语法分析
generate.cpp 语义分析 中间代码生成
to_asm.cpp目标代码生成
部分核心代码
LR分析
#include"compile.h"
node::node(const string& type, const string& text) {
this->type = type;
this->text = text;
}
node::~node() {
/*
for (node* t : this->child) {
delete t;
}
*/
}
string node::str() {
string out= "<" + this->type + ", " + this->text + ">";
return out;
}
vector<string> stack_text(const vector<node*>& stack) {
vector<string> ss;
for (const auto* s : stack) {
ss.push_back(s->type);
}
return ss;
}
node* analysis(const vector<unordered_map<string, string>>& word_table,
bool show)
{
auto predict_table = create_predict_table();
vector<node*> stack;
node* root = new node("Program");
node* end = new node("#");
stack.push_back(end);
stack.push_back(root);
int index = 0;
bool success = false;
/*
分析预测表的三个状态
1. cur = # 解析完成
2. cur = w 输入的字符表与符号栈中节点匹配
3. cur 为非终结符,继续生成子节点
4. error
*/
while (!stack.empty()) {
node* cur = stack.back();
stack.pop_back();
// 状态 1
if (cur->type == "#" && stack.empty()) {
cout << "分析完成!" << endl;
success = true;
return root;
}// 状态 2
else if (cur->type == word_table[index].at("type")) {
if (show) {
cout << "符号栈:";
for (auto& s : stack_text(stack)) {
cout << s << " ";
}
cout << std::endl;
cout << "匹配字符: " << word_table[index].at("word") << std::endl;
}
cur->text = word_table[index].at("word");
index++;
}
// 状态 3
else {
string w = word_table[index].at("type");
if (predict_table[cur->type].count(w) > 0) {
if (predict_table[cur->type][w] == "null") {
continue;
}
auto next_pr = split(predict_table[cur->type][w], ' ');
if (show) {
cout << "\n符号栈:";
for (auto& s : stack_text(stack)) {
cout << s << " ";
}
cout << "\n";
cout << "产生式: " << cur->type << " -> " << predict_table[cur->type][w] << "\n";
}
vector<node*> node_list;
/*
产生式右部符号入栈
子节点入栈
注意:子节点入栈顺序应该与产生式符号相反
*/
for (auto np : next_pr) {
node_list.push_back( new node(np));
//cout << node_list.back() << endl;
}
for (auto nl : node_list) {
//cout << nl << endl;
cur->child.push_back(nl);
}
for (int i = node_list.size() - 1; i >= 0; i--) {
stack.push_back(node_list[i]);
}
}
// 状态 4 错误
else {
cout << "error" << std::endl;
success = false;
return nullptr;
}
}
}
}
转换GCC汇编
#include"compile.h"
string global_head = "This is a simple compiler for C ;";
string code_head =
"\t.cfi_startproc\n"
"\tpushq\t%rbp\n"
"\t.cfi_def_cfa_offset 16\n"
"\t.cfi_offset 6, -16\n"
"\tmovq\t%rsp, %rbp\n"
"\t.cfi_def_cfa_register 6\n";
string code_footer =
"\tmovl\t$0, %eax\n"
"\tleave\n"
"\t.cfi_def_cfa 7, 8\n"
"\tret\n"
"\t.cfi_endproc\n"
".LFE6:\n"
"\t.size\tmain, .-main\n"
"\t.ident\t\"SCC: 1.0.0\"\n";
int LC = 0;
string re = "";
/*
对于数组给予相应长度地址空间
返回值[re, len]
re为变量名地址对照表, len为需要数据栈的高度(这里规定为12的倍数)
*/
tuple<unordered_map<string, vector<string>>,int> init_data(
vector<unordered_map<string, string>> name_list,
unordered_map<string, vector<string>> arrs)
{
//cout << arrs["arr"][0] << "sss " << arrs["arr"][1] << endl;
unordered_map<string, vector<string>>re;
int i = 0;
for (auto n : name_list) {
if (n["name"] != "main") {
if (n["flag"] == "int") {
i += 4;
re[n["name"]] = { to_string(i), "int" };
}
else if (n["flag"] == "char") {
i += 1;
re[n["name"]] = { to_string(i), "char" };
}
}
}
for (auto a : arrs) {
if (a.second[1] == "int") {
i += stoi(a.second[0]) * 4;
re[a.first] = { to_string(i), "int" };
}
else if (a.second[1] == "char") {
i += stoi(a.second[0]);
re[a.first] = { to_string(i), "char" };
}
}
for (auto rt : re) {
cout << rt.first << " " << rt.second[0] << " " << rt.second[1] << endl;
}
return { re, (i / 12 + 1) * 12 };
}
string init_string(vector<string> strings) {
string re;
for (int i = 0; i < strings.size(); i++) {
re += ".LC" + to_string(i) + ":\n\t.string \"" + strings[i] + "\"\n";
}
return re;
}
string args(string n, unordered_map<string, vector<string>>& name) {
if (name.count(n)) {
return "-" + name[n][0] + "(%rbp)";
}
else if (n.find("[]") != string::npos) {
vector<string> ags;
ags.push_back(n.substr(0, n.find("[]")));
ags.push_back(n.substr(n.find("[]") + 2));
if (if_num(ags[1])) {
int index = stoi(ags[1]);
if (name[ags[0]][1] == "char") {
return "-" + to_string(stoi(name[ags[0]][0]) - index) + "(%rbp)";
}
else if (name[ags[0]][1] == "int") {
return "-" + to_string(stoi(name[ags[0]][0]) - index * 4) + "(%rbp)";
}
}
else {
re += "\tmovl\t" + args(ags[1], name) + ", %eax\n\tcltq\n";
if (name[ags[0]][1] == "char") {
return "-" + name[ags[0]][0] + "(%rbp, %rax, 1)";
}
else if (name[ags[0]][1] == "int") {
return "-" + name[ags[0]][0] + "(%rbp, %rax, 4)";
}
}
}
else if (n.find("T") != string::npos) {
return n + "(%rip)";
}
else if (if_num(n)) {
return "$" + n;
}
else {
return n;
}
}
/*
传入参数:
1. midcode中间代码(四元式)
2. name变量地址参照表
可解析的汇编语句有
1. 赋值语句(op,=)
2. 四则运算(op,+-* /)
3. 跳转语句(op, j)
4. 输出语句(op, print)
*/
string generate_code(vector<m_node>& mid_code,
unordered_map<string, vector<string>>& name) {
re = "";
for (auto& m : mid_code) {
string a1 = args(m.arg1, name);
string a2 = args(m.arg2, name);
string r = args(m.result, name);
if (m.op == "=") {
if (name.count(m.result) && name[m.result][1] == "char") {
re += "\tmovb\t$" + to_string(int(m.arg1[0])) + ", " + r + "\n";
}
else if ((name.find(m.arg1)!= name.end()) ||
m.arg1.find("T")!=string::npos ||
m.arg1.find("[]") != string::npos) {
re += "\tmovl\t" + a1 + ", %ecx\n";
re += "\tmovl\t%ecx, " + r + "\n";
}
else {
re += "\tmovl\t" + a1 + ", " + r + "\n";
}
}
else if (m.op == "code_block") {
re += "." + m.result + ":\n";
continue;
}
else if (m.op.find("j") != string::npos) {
if (m.op == "j") {
re += "\tjmp\t." + m.result + "\n";
}
else {
re += "\tmovl\t" + a1 + ", %eax\n";
re += "\tcmpl\t" + a2 + ", %eax\n";
if (m.op.find(">") != string::npos) {
re += "\tjg\t." + m.result + "\n";
}
else if (m.op.find("<") != string::npos) {
re += "\tjle\t." + m.result + "\n";
}
else if (m.op.find("=") != string::npos) {
re += "\tje\t." + m.result + "\n";
}
}
}
else if (m.op=="+"||m.op=="-") {
re += "\tmovl\t" + a1 + ", %edx\n";
re += "\tmovl\t" + a2 + ", %eax\n";
if (m.op == "+") {
re += "\taddl\t%edx, %eax\n";
}
else {
re += "\tsubl\t%edx, %eax\n";
}
re += "\tmovl\t%eax, " + r + "\n";
}
else if (m.op == "*" || m.op == "/") {
if (name.count(m.arg1)) {
re += "\tmovl\t" + a2 + ", %eax\n";
re += "\timull\t" + a1 + ", %eax\n";
re += "\tmovl\t%eax, " + r + "\n";
}
else if (name.count(m.arg2) && name.count(m.arg1) == 0) {
re += "\tmovl\t" + a2 + ", %eax\n";
re += "\timull\t" + a1 + ", %eax, %eax\n";
re += "\tmovl\t%eax, " + r + "\n";
}
else if (name.count(m.arg2) == 0 && name.count(m.arg1) == 0) {
int num = stoi(m.arg2) * stoi(m.arg1);
re += "\tmovl\t$" + to_string(num) + ", " + r + "\n";
}
}
else if (m.op == "print") {
if (m.arg1 != "-1") {
if (name.count(m.arg1) && name[m.arg1][1] == "char") {
re += "\tmovsbl\t" + a1 + ", %eax\n";
}
else {
re += "\tmovl\t" + a1 + ", %eax\n";
}
}
if (m.arg2 != "-1") {
if ( name.count(m.arg2) && name[m.arg2][1] == "char") {
re += "\tmovsbl\t" + a2 + ", %edx\n";
}
else {
re += "\tmovl\t" + a2 + ", %edx\n";
}
}
if (m.result != "-1") {
if (name.count(m.result) && name[m.result][1] == "char") {
re += "\tmovsbl\t" + r + ", %ecx\n";
}
else {
re += "\tmovl\t" + r + ", %ecx\n";
}
}
re += "\tmovl\t%eax, %esi\n";
re+="\tleaq\t.LC" + to_string(LC) + "(%rip), %rdi\n";
LC += 1;
re += "\tmovl\t$0, %eax\n\tcall\tprintf@PLT\n";
}
}
return re;
}
/*
将生成的临时变量,汇编代码,头部,结束部分等一些内容拼接在一起
传入参数:
1. tmp 临时变量(其实在代码里作为全局变量)
2. strs 字符串变量
3. code 主函数汇编代码
4. subq 数据栈高度
*/
string connect(int tmp,string strs,string code,int subq) {
string data = "";
for (int i = 0; i < tmp; i++) {
data += "\t.comm\tT" + to_string(i) + ",4,4\n";
}
string re = "\t.text\n\t.section\t.rodata\n" + data + strs + \
"\t.text\n\t.globl\tmain\n\t.type\tmain, @function\nmain:\n" + code_head + \
"\tsubq\t$" + to_string(subq) + ", %rsp\n" + code + code_footer;
return re;
}
部分输出
LR分析输出语法树
输出语法分析树
第1层
<Program, > 第2层
<type, int> <M, > <C, > <Pro, > 第3层
<name, main> <(, (> <cc, > <), )> <{, {> <Pr, > <}, }> 第4层
<P, > <Pr, > 第5层
<type, int> <L, > <;, ;> <P, > <Pr, > 第6层
<M, > <LM, > <type, int> <L, > <;, ;> <P, > <Pr, > 第7层
<name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第8层
<[, [> <E, > <], ]> <name, index> <=, => <FE, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第9层
<T, > <ET, > <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第10层
<F, > <TT, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <Pan, > <P, > <Pr, > 第11层
<number, 25> <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <Ptype, > <P_block, > <Pro, > <printf, printf> <OUT, > <;, ;> 第12层
<number, 0> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <while, while> <(, (> <Pbc, > <), )> <{, {> <Pr, > <}, }> <(, (> <TXT, > <V, > <), )> 第13层
<number, 0> <number, 1> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <E, > <PM, > <P, > <Pr, > <TEXT, finish\n> 第14层
<number, 1> <number, 2> <F, > <TT, > <F, > <TT, > <T, > <ET, > <Cmp, <> <E, > <type, int> <L, > <;, ;> <P, > <Pr, > 第15层
<number, 2> <number, 3> <F, > <TT, > <T, > <ET, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第16层
<M, > <MS, > <F, > <TT, > <name, b> <=, => <FE, > <M, > <LM, > <printf, printf> <OUT, > <;, ;> <P, > <Pr, > 第17层
<name, index> <number, 10> <*, *> <F, > <TT, > <E, > <name, arr> <Size, > <AM, > <(, (> <TXT, > <V, > <), )> <L, > <;, ;> 第18层
<number, 2> <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <TEXT, f(%d)=%d\n> <,, ,> <E, > <VV, > <M, > <LM, > 第19层
<F, > <TT, > <T, > <ET, > <T, > <ET, > <T, > <ET, > <,, ,> <E, > <VV, > <name, index> <=, => <FE, > 第20层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <T, > <ET, > <E, > 第21层
<name, arr> <Size, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <T, > <ET, > 第22层
<[, [> <E, > <], ]> <name, index> <number, 2> <name, arr> <Size, > <M, > <MS, > <name, index> <M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第23层
<T, > <ET, > <[, [> <E, > <], ]> <name, b> <name, b> <M, > <MS, > <F, > <TT, > 第24层
<F, > <TT, > <T, > <ET, > <name, index> <number, 1> 第25层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第26层
<name, index> <M, > <MS, > <F, > <TT, > 第27层
<name, index> <number, 1>
输出预测表
输出预测分析表
L
name: M LM
Program
type: type M C Pro
LM
=: = FE
;: null
[: Size AM
C
(: ( cc )
Pro
{: { Pr }
Pr
type: P Pr
while: P Pr
if: P Pr
name: P Pr
printf: P Pr
}: null
P
type: type L ;
while: Pan
if: Pan
name: L ;
printf: printf OUT ;
TXT
TEXT: TEXT
FE
number: E
(: E
TEXT: TEXT
name: E
CHAR: CHAR
M
name: name
E
number: T ET
(: T ET
name: T ET
Size
[: [ E ]
ET
;: null
+: + T ET
]: null
-: - T ET
): null
,: null
Cmp: null
T
number: F TT
(: F TT
name: F TT
TT
*: * F TT
/: / F TT
): null
,: null
Cmp: null
]: null
+: null
;: null
-: null
F
number: number
(: BRA
name: M MS
MS
+: null
;: null
[: Size
): null
,: null
Cmp: null
-: null
]: null
*: null
/: null
Pbc
number: E PM
(: E PM
name: E PM
BRA
(: ( E )
OUT
(: ( TXT V )
V
,: , E VV
): null
VV
,: , E VV
): null
Pan
while: Ptype P_block Pro
if: Ptype P_block Pro
Ptype
while: while
if: if
P_block
(: ( Pbc )
PM
Cmp: Cmp E
): null
AM
=: = E
;: null
cc
): null
结果展示
输入文件为:
int main(){
int arr[25];
int index = 0;
// 求0~20的斐波那契数列
arr[0] = 1;
arr[1] = 2;
arr[2] = 3;
while(index < 10*2 ){
int b = arr[index];
arr[index+2]=arr[index+1] + b;
printf("f(%d)=%d\n",index,b);
index = index +1;
}
printf("finish\n");
}
输出的gcc汇编为
.text
.section .rodata
.comm T0,4,4
.comm T1,4,4
.comm T2,4,4
.comm T3,4,4
.LC0:
.string "f(%d)=%d\n"
.LC1:
.string "fnish\n"
.text
.globl main
.type main, @function
main:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
subq $120, %rsp
movl $0, -8(%rbp)
movl $1, -112(%rbp)
movl $2, -108(%rbp)
movl $3, -104(%rbp)
.W4:
movl -8(%rbp), %eax
cmpl $20, %eax
jle .code6
jmp .block6
.code6:
movl -8(%rbp), %eax
cltq
movl -112(%rbp, %rax, 4), %ecx
movl %ecx, -12(%rbp)
movl -8(%rbp), %edx
movl $1, %eax
addl %edx, %eax
movl %eax, T0(%rip)
movl T0(%rip), %eax
cltq
movl -112(%rbp, %rax, 4), %edx
movl -12(%rbp), %eax
addl %edx, %eax
movl %eax, T1(%rip)
movl -8(%rbp), %edx
movl $2, %eax
addl %edx, %eax
movl %eax, T2(%rip)
movl T2(%rip), %eax
cltq
movl T1(%rip), %ecx
movl %ecx, -112(%rbp, %rax, 4)
movl -8(%rbp), %eax
movl -12(%rbp), %edx
movl %eax, %esi
leaq .LC0(%rip), %rdi
movl $0, %eax
call printf@PLT
movl -8(%rbp), %edx
movl $1, %eax
addl %edx, %eax
movl %eax, T3(%rip)
movl T3(%rip), %ecx
movl %ecx, -8(%rbp)
jmp .W4
.block6:
movl %eax, %esi
leaq .LC1(%rip), %rdi
movl $0, %eax
call printf@PLT
movl $0, %eax
leave
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE6:
.size main, .-main
.ident "PCC: 1.0.0"
生成汇编代码.s文件,后续的链接等工作将交给gcc,可以调用gcc将汇编编译为可执行文件
数组下标可以用变量表示,并且print语句中可以有表达式存在
While 语句也可以循环嵌套
获取方式
点这里 只需要一点点辛苦费,不需要你写材料 报告 ppt
标签:gcc,string,eax,movl,C++,re,编译器,null,name From: https://blog.csdn.net/weixin_45605341/article/details/140086276