编译原理项目——C++实现C语言编译器输出为gcc级汇编（代码/报告材料）

标签：gcc string eax movl C++ re 编译器 null name

完整的代码材料见文章末尾以下为核心内容和部分结果

项目介绍

在这里插入图片描述

function.cpp 实现了共有的函数
lexer.cpp 词法分析器
get_predict_table.cpp获取预测分析表
LR.cpp语法分析
generate.cpp 语义分析中间代码生成
to_asm.cpp目标代码生成

部分核心代码

LR分析

#include"compile.h"
node::node(const string& type, const string& text) {
	this->type = type;
	this->text = text;
}
node::~node() {
	/*
	for (node* t : this->child) {
		delete t;
	}
	*/
}
string node::str() {
	string out= "<" + this->type + ", " + this->text + ">";
	return out;
}
vector<string> stack_text(const vector<node*>& stack) {
	vector<string> ss;
	for (const auto* s : stack) {
		ss.push_back(s->type);
	}
	return ss;
}

node* analysis(const vector<unordered_map<string, string>>& word_table,
	bool show)
{
	auto predict_table = create_predict_table();
	vector<node*> stack;
	node* root = new node("Program");
	node* end = new node("#");
	stack.push_back(end);
	stack.push_back(root);
	int index = 0;
	bool success = false;
	/*
	分析预测表的三个状态
	1. cur = #  解析完成
	2. cur = w  输入的字符表与符号栈中节点匹配
	3. cur 为非终结符，继续生成子节点
	4. error
	*/
	while (!stack.empty()) {
		node* cur = stack.back();
		stack.pop_back();
		// 状态 1
		if (cur->type == "#" && stack.empty()) {
			cout << "分析完成!" << endl;
			success = true;
			return root;
		}// 状态 2
		else if (cur->type == word_table[index].at("type")) {
			if (show) {
				cout << "符号栈：";
				for (auto& s : stack_text(stack)) {
					cout << s << " ";
				}
				cout << std::endl;
				cout << "匹配字符: " << word_table[index].at("word") << std::endl;
			}
			cur->text = word_table[index].at("word");
			index++;
		}
		// 状态 3
		else {
			string w = word_table[index].at("type");
			if (predict_table[cur->type].count(w) > 0) {
				if (predict_table[cur->type][w] == "null") {
					continue;
				}
				auto next_pr = split(predict_table[cur->type][w], ' ');
				if (show) {
					cout << "\n符号栈：";
					for (auto& s : stack_text(stack)) {
						cout << s << " ";
					}
					cout << "\n";
					cout << "产生式: " << cur->type << " -> " << predict_table[cur->type][w] << "\n";
				}
				vector<node*> node_list;
				/*
				产生式右部符号入栈
				子节点入栈
				注意：子节点入栈顺序应该与产生式符号相反
				*/
				for (auto np : next_pr) {
					node_list.push_back( new node(np));
					//cout << node_list.back() << endl;
				}
				for (auto nl : node_list) {
					//cout << nl << endl;
					cur->child.push_back(nl);
				}
				for (int i = node_list.size() - 1; i >= 0; i--) {
					stack.push_back(node_list[i]);
				}
			}
			// 状态 4 错误
			else {
				cout << "error" << std::endl;
				success = false;
				return nullptr;
			}
		}
	}
}

转换GCC汇编

#include"compile.h"
string global_head = "This is a simple compiler for C ;";

string code_head =
"\t.cfi_startproc\n"
"\tpushq\t%rbp\n"
"\t.cfi_def_cfa_offset 16\n"
"\t.cfi_offset 6, -16\n"
"\tmovq\t%rsp, %rbp\n"
"\t.cfi_def_cfa_register 6\n";

string code_footer =
"\tmovl\t$0, %eax\n"
"\tleave\n"
"\t.cfi_def_cfa 7, 8\n"
"\tret\n"
"\t.cfi_endproc\n"
".LFE6:\n"
"\t.size\tmain, .-main\n"
"\t.ident\t\"SCC: 1.0.0\"\n";

int LC = 0;
string re = "";

/*
对于数组给予相应长度地址空间
返回值[re, len]
re为变量名地址对照表, len为需要数据栈的高度（这里规定为12的倍数）
*/

tuple<unordered_map<string, vector<string>>,int> init_data(
	vector<unordered_map<string, string>> name_list,
	 unordered_map<string, vector<string>> arrs) 
{
	//cout << arrs["arr"][0] << "sss " << arrs["arr"][1] << endl;
	 unordered_map<string, vector<string>>re;
	int i = 0;
	for (auto n : name_list) {
		if (n["name"] != "main") {
			if (n["flag"] == "int") {
				i += 4;
				re[n["name"]] = { to_string(i), "int" };
			}
			else if (n["flag"] == "char") {
				i += 1;
				re[n["name"]] = { to_string(i), "char" };
			}
		}
	}
	for (auto a : arrs) {
		if (a.second[1] == "int") {
			i += stoi(a.second[0]) * 4;
			re[a.first] = { to_string(i), "int" };
		}
		else if (a.second[1] == "char") {
			i += stoi(a.second[0]);
			re[a.first] = { to_string(i), "char" };
		}
	}
	for (auto rt : re) {
		cout << rt.first << " " << rt.second[0] << " " << rt.second[1] << endl;
	}
	return { re, (i / 12 + 1) * 12 };
}

string init_string(vector<string> strings) {
	string re;
	for (int i = 0; i < strings.size(); i++) {
		re += ".LC" + to_string(i) + ":\n\t.string \"" + strings[i] + "\"\n";
	}
	return re;
}
string args(string n, unordered_map<string, vector<string>>& name) {
	if (name.count(n)) {
		return "-" + name[n][0] + "(%rbp)";
	}
	else if (n.find("[]") != string::npos) {
		vector<string> ags;
		ags.push_back(n.substr(0, n.find("[]")));
		ags.push_back(n.substr(n.find("[]") + 2));

		if (if_num(ags[1])) {
			int index = stoi(ags[1]);
			if (name[ags[0]][1] == "char") {
				return "-" + to_string(stoi(name[ags[0]][0]) - index) + "(%rbp)";
			}
			else if (name[ags[0]][1] == "int") {
				return "-" + to_string(stoi(name[ags[0]][0]) - index * 4) + "(%rbp)";
			}
		}
		else {
			re += "\tmovl\t" + args(ags[1], name) + ", %eax\n\tcltq\n";
			if (name[ags[0]][1] == "char") {
				return "-" + name[ags[0]][0] + "(%rbp, %rax, 1)";
			}
			else if (name[ags[0]][1] == "int") {
				return "-" + name[ags[0]][0] + "(%rbp, %rax, 4)";
			}
		}
	}
	else if (n.find("T") != string::npos) {
		return n + "(%rip)";
	}
	else if (if_num(n)) {
		return "$" + n;
	}
	else {
		return n;
	}
}
/*
传入参数：
	1. midcode中间代码（四元式）
	2. name变量地址参照表
可解析的汇编语句有
	1. 赋值语句（op,=）
	2. 四则运算（op,+-*  /)
	3. 跳转语句（op, j）
	4. 输出语句（op, print）
*/
string generate_code(vector<m_node>& mid_code,
	unordered_map<string, vector<string>>& name) {
	re = "";
	for (auto& m : mid_code) {
		string a1 = args(m.arg1, name);
		string a2 = args(m.arg2, name);
		string r = args(m.result, name);
		if (m.op == "=") {
			if (name.count(m.result) && name[m.result][1] == "char") {
				re += "\tmovb\t$" + to_string(int(m.arg1[0])) + ", " + r + "\n";
			}
			else if ((name.find(m.arg1)!= name.end()) || 
				m.arg1.find("T")!=string::npos || 
				m.arg1.find("[]") != string::npos) {
				re += "\tmovl\t" + a1 + ", %ecx\n";
				re += "\tmovl\t%ecx, " + r + "\n";
			}
			else {
				re += "\tmovl\t" + a1 + ", " + r + "\n";
			}
		}
		else if (m.op == "code_block") {
			re += "." + m.result + ":\n";
			continue;
		}
		else if (m.op.find("j") != string::npos) {
			if (m.op == "j") {
				re += "\tjmp\t." + m.result + "\n";
			}
			else {
				re += "\tmovl\t" + a1 + ", %eax\n";
				re += "\tcmpl\t" + a2 + ", %eax\n";
				if (m.op.find(">") != string::npos) {
					re += "\tjg\t." + m.result + "\n";
				}
				else if (m.op.find("<") != string::npos) {
					re += "\tjle\t." + m.result + "\n";
				}
				else if (m.op.find("=") != string::npos) {
					re += "\tje\t." + m.result + "\n";
				}
			}
		}
		else if (m.op=="+"||m.op=="-") {
			re += "\tmovl\t" + a1 + ", %edx\n";
			re += "\tmovl\t" + a2 + ", %eax\n";
			if (m.op == "+") {
				re += "\taddl\t%edx, %eax\n";
			}
			else {
				re += "\tsubl\t%edx, %eax\n";
			}
			re += "\tmovl\t%eax, " + r + "\n";
		}
		else if (m.op == "*" || m.op == "/") {
			if (name.count(m.arg1)) {
				re += "\tmovl\t" + a2 + ", %eax\n";
				re += "\timull\t" + a1 + ", %eax\n";
				re += "\tmovl\t%eax, " + r + "\n";
			}
			else if (name.count(m.arg2) && name.count(m.arg1) == 0) {
				re += "\tmovl\t" + a2 + ", %eax\n";
				re += "\timull\t" + a1 + ", %eax, %eax\n";
				re += "\tmovl\t%eax, " + r + "\n";
			}
			else if (name.count(m.arg2) == 0 && name.count(m.arg1) == 0) {
				int num = stoi(m.arg2) * stoi(m.arg1);
				re += "\tmovl\t$" + to_string(num) + ", " + r + "\n";
			}
		}
		else if (m.op == "print") {
			if (m.arg1 != "-1") {
				if (name.count(m.arg1) && name[m.arg1][1] == "char") {
					re += "\tmovsbl\t" + a1 + ", %eax\n";
				}
				else {
					re += "\tmovl\t" + a1 + ", %eax\n";
				}
			}
			if (m.arg2 != "-1") {
				if ( name.count(m.arg2) && name[m.arg2][1] == "char") {
					re += "\tmovsbl\t" + a2 + ", %edx\n";
				}
				else {
					re += "\tmovl\t" + a2 + ", %edx\n";
				}
			}
			if (m.result != "-1") {
				if (name.count(m.result) && name[m.result][1] == "char") {
					re += "\tmovsbl\t" + r + ", %ecx\n";
				}
				else {
					re += "\tmovl\t" + r + ", %ecx\n";
				}
			}
			re += "\tmovl\t%eax, %esi\n";
			re+="\tleaq\t.LC" + to_string(LC) + "(%rip), %rdi\n";
			LC += 1;
			re += "\tmovl\t$0, %eax\n\tcall\tprintf@PLT\n";
		}
	}
	return re;
}
/*
将生成的临时变量,汇编代码,头部,结束部分等一些内容拼接在一起
传入参数：
	1. tmp 临时变量（其实在代码里作为全局变量）
	2. strs 字符串变量
	3. code 主函数汇编代码
	4. subq 数据栈高度
*/
string connect(int tmp,string strs,string code,int subq) {
	string data = "";
	for (int i = 0; i < tmp; i++) {
		data += "\t.comm\tT" + to_string(i) + ",4,4\n";
	}
	string re = "\t.text\n\t.section\t.rodata\n" + data + strs + \
		"\t.text\n\t.globl\tmain\n\t.type\tmain, @function\nmain:\n" + code_head + \
		"\tsubq\t$" + to_string(subq) + ", %rsp\n" + code + code_footer;
	return re;
}

部分输出

LR分析输出语法树

输出语法分析树
第1层
<Program, > 第2层
<type, int> <M, > <C, > <Pro, > 第3层
<name, main> <(, (> <cc, > <), )> <{, {> <Pr, > <}, }> 第4层
<P, > <Pr, > 第5层
<type, int> <L, > <;, ;> <P, > <Pr, > 第6层
<M, > <LM, > <type, int> <L, > <;, ;> <P, > <Pr, > 第7层
<name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第8层
<[, [> <E, > <], ]> <name, index> <=, => <FE, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第9层
<T, > <ET, > <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第10层
<F, > <TT, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <M, > <LM, > <Pan, > <P, > <Pr, > 第11层
<number, 25> <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <name, arr> <Size, > <AM, > <Ptype, > <P_block, > <Pro, > <printf, printf> <OUT, > <;, ;> 第12层
<number, 0> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <while, while> <(, (> <Pbc, > <), )> <{, {> <Pr, > <}, }> <(, (> <TXT, > <V, > <), )> 第13层
<number, 0> <number, 1> <F, > <TT, > <F, > <TT, > <T, > <ET, > <T, > <ET, > <E, > <PM, > <P, > <Pr, > <TEXT, finish\n> 第14层
<number, 1> <number, 2> <F, > <TT, > <F, > <TT, > <T, > <ET, > <Cmp, <> <E, > <type, int> <L, > <;, ;> <P, > <Pr, > 第15层
<number, 2> <number, 3> <F, > <TT, > <T, > <ET, > <M, > <LM, > <L, > <;, ;> <P, > <Pr, > 第16层
<M, > <MS, > <F, > <TT, > <name, b> <=, => <FE, > <M, > <LM, > <printf, printf> <OUT, > <;, ;> <P, > <Pr, > 第17层
<name, index> <number, 10> <*, *> <F, > <TT, > <E, > <name, arr> <Size, > <AM, > <(, (> <TXT, > <V, > <), )> <L, > <;, ;> 第18层
<number, 2> <T, > <ET, > <[, [> <E, > <], ]> <=, => <E, > <TEXT, f(%d)=%d\n> <,, ,> <E, > <VV, > <M, > <LM, > 第19层
<F, > <TT, > <T, > <ET, > <T, > <ET, > <T, > <ET, > <,, ,> <E, > <VV, > <name, index> <=, => <FE, > 第20层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <+, +> <T, > <ET, > <F, > <TT, > <T, > <ET, > <E, > 第21层
<name, arr> <Size, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <M, > <MS, > <F, > <TT, > <T, > <ET, > 第22层
<[, [> <E, > <], ]> <name, index> <number, 2> <name, arr> <Size, > <M, > <MS, > <name, index> <M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第23层
<T, > <ET, > <[, [> <E, > <], ]> <name, b> <name, b> <M, > <MS, > <F, > <TT, > 第24层
<F, > <TT, > <T, > <ET, > <name, index> <number, 1> 第25层
<M, > <MS, > <F, > <TT, > <+, +> <T, > <ET, > 第26层
<name, index> <M, > <MS, > <F, > <TT, > 第27层
<name, index> <number, 1>

输出预测表

输出预测分析表
L
 name: M LM
Program
 type: type M C Pro
LM
 =: = FE
 ;: null
 [: Size AM
C
 (: ( cc )
Pro
 {: { Pr }
Pr
 type: P Pr
 while: P Pr
 if: P Pr
 name: P Pr
 printf: P Pr
 }: null
P
 type: type L ;
 while: Pan
 if: Pan
 name: L ;
 printf: printf OUT ;
TXT
 TEXT: TEXT
FE
 number: E
 (: E
 TEXT: TEXT
 name: E
 CHAR: CHAR
M
 name: name
E
 number: T ET
 (: T ET
 name: T ET
Size
 [: [ E ]
ET
 ;: null
 +: + T ET
 ]: null
 -: - T ET
 ): null
 ,: null
 Cmp: null
T
 number: F TT
 (: F TT
 name: F TT
TT
 *: * F TT
 /: / F TT
 ): null
 ,: null
 Cmp: null
 ]: null
 +: null
 ;: null
 -: null
F
 number: number
 (: BRA
 name: M MS
MS
 +: null
 ;: null
 [: Size
 ): null
 ,: null
 Cmp: null
 -: null
 ]: null
 *: null
 /: null
Pbc
 number: E PM
 (: E PM
 name: E PM
BRA
 (: ( E )
OUT
 (: ( TXT V )
V
 ,: , E VV
 ): null
VV
 ,: , E VV
 ): null
Pan
 while: Ptype P_block Pro
 if: Ptype P_block Pro
Ptype
 while: while
 if: if
P_block
 (: ( Pbc )
PM
 Cmp: Cmp E
 ): null
AM
 =: = E
 ;: null
cc
 ): null

结果展示

输入文件为：

int main(){
    int arr[25];
    int index = 0;
    // 求０～２０的斐波那契数列
    arr[0] = 1;
    arr[1] = 2;
    arr[2] = 3;
    while(index < 10*2 ){
        int b = arr[index];
        arr[index+2]=arr[index+1] + b;
        printf("f(%d)=%d\n",index,b);
        index = index +1;
    }
    printf("finish\n");
}

输出的gcc汇编为

	.text
	.section	.rodata
	.comm	T0,4,4
	.comm	T1,4,4
	.comm	T2,4,4
	.comm	T3,4,4
.LC0:
	.string "f(%d)=%d\n"
.LC1:
	.string "fnish\n"
	.text
	.globl	main
	.type	main, @function
main:

	.cfi_startproc
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	subq	$120, %rsp
	movl	$0, -8(%rbp)
	movl	$1, -112(%rbp)
	movl	$2, -108(%rbp)
	movl	$3, -104(%rbp)
.W4:
	movl	-8(%rbp), %eax
	cmpl	$20, %eax
	jle	.code6
	jmp	.block6
.code6:
	movl	-8(%rbp), %eax
	cltq
	movl	-112(%rbp, %rax, 4), %ecx
	movl	%ecx, -12(%rbp)
	movl	-8(%rbp), %edx
	movl	$1, %eax
	addl	%edx, %eax
	movl	%eax, T0(%rip)
	movl	T0(%rip), %eax
	cltq
	movl	-112(%rbp, %rax, 4), %edx
	movl	-12(%rbp), %eax
	addl	%edx, %eax
	movl	%eax, T1(%rip)
	movl	-8(%rbp), %edx
	movl	$2, %eax
	addl	%edx, %eax
	movl	%eax, T2(%rip)
	movl	T2(%rip), %eax
	cltq
	movl	T1(%rip), %ecx
	movl	%ecx, -112(%rbp, %rax, 4)
	movl	-8(%rbp), %eax
	movl	-12(%rbp), %edx
	movl	%eax, %esi
	leaq	.LC0(%rip), %rdi
	movl	$0, %eax
	call	printf@PLT
	movl	-8(%rbp), %edx
	movl	$1, %eax
	addl	%edx, %eax
	movl	%eax, T3(%rip)
	movl	T3(%rip), %ecx
	movl	%ecx, -8(%rbp)
	jmp	.W4
.block6:
	movl	%eax, %esi
	leaq	.LC1(%rip), %rdi
	movl	$0, %eax
	call	printf@PLT

	movl	$0, %eax
	leave
	.cfi_def_cfa 7, 8
	ret
	.cfi_endproc
.LFE6:
	.size	main, .-main
	.ident	"PCC: 1.0.0"

生成汇编代码.s文件，后续的链接等工作将交给gcc，可以调用gcc将汇编编译为可执行文件

数组下标可以用变量表示，并且print语句中可以有表达式存在

在这里插入图片描述