首页 > 编程语言 >【编译原理】正则式转NFA转DFA 代码实现(C/C++)

【编译原理】正则式转NFA转DFA 代码实现(C/C++)

时间:2024-04-16 20:11:22浏览次数:35  
标签:ch NFA nfa int C++ state op DFA size

直接上代码:

#include<bits/stdc++.h>

using namespace std;

//nfa结构定义
struct nst {
    vector<int> a[26], e; //接收a-z会到达的状态,接收eps会到达的状态
    bool f = 0; //=0为可接受态
};
vector<nst> nfa;
set<char>alp;
string str;
set<int>accepted;

struct dst {
    vector<int> a[26]; //int a[26] a[0] = 5
    bool f = 0; //1为可接受态
};
vector<dst> dfa;

stack<int> st;
int nfa_size = 0, dfa_size;
string dispregex;
struct nst init_nfa_state;
struct dst init_dfa_state;

//产生式定义
struct production{
    int a, b, c;
};

/***************************** regex to nfa ****************************/

//把单词隔开
string insert_concat(string regexp) {
    string ret = "";
    char c, c2;
    for (unsigned int i = 0; i < regexp.size(); i++) {
        c = regexp[i];
        if (i + 1 < regexp.size()) {
            c2 = regexp[i + 1];
            ret += c;
            if (c != '(' && c2 != ')' && c != '|' && c2 != '|' && c2 != '*') {
                ret += '.';
            }
        }
    }
    ret += regexp[regexp.size() - 1];
    return ret;
}

//输出nfa
void print_nfa() {
//    cout << "------------------------------------------------------------------------\n";
    cout << str << ":\n";
    cout << "正则式 转 NFA: \n";
    cout << "K = {";
    for (int i = 0; i < nfa.size(); i++) {
        if (i)  cout << ", ";
        cout << char(i + 'A');
    }
    cout << "};\n";
    cout << "Σ = {";
    auto it = alp.begin();
    cout << *it, it++;
    for (; it != alp.end(); it++) {
        cout << ", " << *it;
    }
    cout << "};\n";

    vector<production> ans;
    set<int> in; //记录有入度的点
    for (int i = 0; i < nfa.size(); i++) {
        bool &f = nfa[i].f;
        for (int j = 0; j < 26; j++) {
            if (nfa[i].a[j].size() == 0)    continue;
            f |= 1;
            for (auto k : nfa[i].a[j])  ans.push_back({i, j, k}), in.insert(k);
        }
        if (nfa[i].e.size() == 0)   continue;
        for (auto j : nfa[i].e)     ans.push_back({i, -1, j}), in.insert(j);
        f |= 1;
    }

    for (int i = 0; i < ans.size(); i++) {
        if (i)  cout << ", ";
        cout << "f(";
        cout << char(ans[i].a + 'A');
        cout << ", ";
        if (ans[i].b == -1) cout << "ε";
        else    cout << char(ans[i].b + 'a');
        cout << ") = ";
        cout << char(ans[i].c + 'A');
    }
    cout << ";\n";

    //没有入度就是起点
    for (int i = 0; i < nfa.size(); i++) {
        if (!in.count(i)) {
            cout << char(i + 'A') << ";\n";
            break;
        }
    }

    cout << "Z = {";
    vector<int> final;
    for (int i = 0; i < nfa.size(); i++) {
        if (!nfa[i].f)    final.push_back(i), accepted.insert(i);
    }
    for (int i = 0; i < final.size(); i++) {
        if (i)  cout << ", ";
        cout << char(final[i] + 'A');
    }
    cout << "};\n";
    cout << "..........................................\n";
}

//处理字母
void character(int i) {
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);
    nfa[nfa_size].a[i].push_back(nfa_size + 1);
    st.push(nfa_size);
    //cout << char(i + 'a') << ' ' << nfa_size << ' ';
    nfa_size++;
    st.push(nfa_size);
    //cout << nfa_size << endl;
    nfa_size++;
    //print_nfa();
}

//处理'|'
void union_() {
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);

    int d = st.top();
    st.pop();
    int c = st.top();
    st.pop();
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << "| " << a << ' ' << b << ' ' << c << ' ' << d << endl;

    nfa[nfa_size].e.push_back(a);
    nfa[nfa_size].e.push_back(c);
    st.push(nfa_size);
    nfa_size++;
    nfa[b].e.push_back(nfa_size);
    nfa[d].e.push_back(nfa_size);
    st.push(nfa_size);
    nfa_size++;

    //print_nfa();
}

//处理'.'
void concatenation() {
    int d = st.top();
    st.pop();
    int c = st.top();
    st.pop();
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << ". " << a << ' ' << b << ' ' << c << ' ' << d << endl;
    nfa[b].e.push_back(c);
    //nfa_size++;
    st.push(a);
    st.push(d);

    //print_nfa();
}

//处理'*'
void kleene_star() {
    //取出前两个
    //cout << st.size() << endl;
    int b = st.top();
    st.pop();
    int a = st.top();
    st.pop();
    //cout << "* " << a << ' ' << b << endl;
    //cout << nfa_size << endl;
    //再加三条边
    nfa.push_back(init_nfa_state);
    nfa.push_back(init_nfa_state);
    nfa[b].e.push_back(a);
    nfa[nfa_size].e.push_back(a);
    nfa[nfa_size].e.push_back(nfa_size + 1);
    nfa[b].e.push_back(nfa_size + 1);
    st.push(nfa_size);
    //cout << "** " << nfa_size << ' ';
    nfa_size++;
    st.push(nfa_size);
    //cout << nfa_size << endl;
    nfa_size++;
    //print_nfa();
    //cout << "------------------------------------\n";
}

//后缀转nfa
void postfix_to_nfa(string postfix) {
    for (unsigned int i = 0; i < postfix.size(); i++) {
        char ch = postfix[i];
        if (ch <= 'z' && ch >= 'a')     character(ch - 'a');
        else if (ch == '*')     kleene_star();
        else if (ch == '.')     concatenation();
        else if (ch == '|')     union_();
        else {
            cout << "输入为非法字符!读入只能是 a-z, |, *, ()" << endl;
        }
    }
}

//出入栈优先级
int priority(char c) {
    switch (c) {
        case '*':
            return 3;
        case '.':
            return 2;
        case '|':
            return 1;
        default:
            return 0;
    }
}

//正则式转后缀表达式
string regexp_to_postfix(string regexp) {
    string postfix = "";
    stack<char> op;
    char c;
    for (unsigned int i = 0; i < regexp.size(); i++) {
        char ch = regexp[i];
        if (ch <= 'z' && ch >= 'a')     postfix += ch;
        else if (ch == '(')     op.push(ch);
        else if (ch == ')') {
            while (op.top() != '(') {
                postfix += op.top();
                op.pop();
            }
            op.pop();
        }
        else {
            while (!op.empty()) {
                c = op.top();
                if (priority(c) >= priority(ch)) {
                    postfix += op.top();
                    op.pop();
                } else break;
            }
            op.push(regexp[i]);
        }
    }
    while (!op.empty()) {
        postfix += op.top();
        op.pop();
    }
    return postfix;
}

/***************************** nfa to dfa ****************************/

void print_dfa() {
    //cout << dfa_size << endl;
    dfa_size++;
    //cout << dfa_size << endl;
    cout << "NFA 转 DFA: " << endl;
    cout << "K = {";
    for (int i = 0; i < dfa_size; i++) {
        if (i)  cout << ", ";
        cout << i;
    }
    cout << "};\n";

    int fst = 0; //是否为第一次输出
    for (int i = 0; i < dfa_size; i++) {
        for (int j = 0; j < 26; j++) {
            if (dfa[i].a[j].size() == 0)    continue;
            for (auto k : dfa[i].a[j]) {
                if (fst)    cout << ", ";
                else    fst = 1;
                cout << "f(" << i << ", " << char(j + 'a') << ") = " << k;
            }
        }
    }
    cout << ";\n";

    cout << "Z = {";
    vector<int> final;
    for (int i = 0; i < dfa_size; i++) {
        if (dfa[i].f)   final.push_back(i);
    }
    for (int i = 0; i < final.size(); i++) {
        if (i)  cout << ", ";
        cout << final[i];
    }
    cout << "};\n";
    cout << "------------------------------------------------------------------------\n";
}

void epsilon_closure(int state, set<int> &si) {
    for (unsigned int i = 0; i < nfa[state].e.size(); i++) {
        if (si.count(nfa[state].e[i]) == 0) {
            si.insert(nfa[state].e[i]);
            epsilon_closure(nfa[state].e[i], si);
        }
    }
}

//打印闭包(调试)
void print_epsilon(int n, set<int>eps[]) {
    cout << "===========================\n";
    for (int i = 0; i < n; i++) {
        cout << i << ": ";
        for (auto j: eps[i]) cout << j << ' ';
        cout << endl;
    }
    cout << "===========================\n";
}

//nfa转dfa
void nfa_to_dfa(int start_state, int n) {
    dfa.resize(n);
    map<set<int>, int> mp, idx; //记录所有出现过的状态, state对应的状态数字
    set<int>si; //起始状态集

    //求所有状态的空闭包
    set<int> eps[n];
    for (int i = 0; i < n; i++)     epsilon_closure(i, eps[i]);
    si = eps[start_state];
    si.insert(start_state); //至少得有一个起始状态哦
    idx[si] = 0;
    //print_epsilon(n, eps);

    queue<set<int>> q;
    q.push(si);
    while (!q.empty()) {
        auto ss = q.front();
        q.pop();
        mp[ss]++;
        if (mp[ss] > 1) continue;

//        cout << "ss: ";
//        for (auto j : ss)   cout << j << ' ';
//        cout << endl;

        //接下来算第一行:起始状态先读入一个对应字符再做eps闭包
        for (auto ch: alp) { //第几个字符
            set<int> state; //记录状态
            int i = ch - 'a';
            for (auto st: ss) { //对应起点
                //st通过一个i边能到达的集合
                for (auto j: nfa[st].a[i]) {
                    state.insert(j); //首先是自己
                    //加上j的空闭包
                    for (auto k: eps[j]) state.insert(k);
                }
            }
            if (state.size() && mp[state] == 0) q.push(state);
            if (state.size()) {
                //cout << i << ": ";
                if (!idx.count(state))  dfa_size++, idx[state] = dfa_size;
                dfa[idx[ss]].a[i].push_back(idx[state]);
                //cout << "ATTENTION!!! " << idx[ss] << ' ' << idx[state] << endl;
                for (auto j : state) {
                    //cout << j << ' ';
                    if (accepted.count(j)) {
                        dfa[idx[state]].f = 1;
                        //cout << "& " << idx[state] << "\n";
                        break;
                    }
                }
                //cout << endl;
            }
        }
//        cout << "+++++++++++++++++++++++\n";
    }
}

/***************************** solve ****************************/

//每组处理前的清空
void clear() {
    nfa_size = dfa_size = 0;
    while(!st.empty())  st.pop();
    alp.clear();
    nfa.clear();
    dfa.clear();
}

//判断是否有非法输入
bool check (string postfix) {
    for (unsigned int i = 0; i < postfix.size(); i++) {
        char ch = postfix[i];
        if (ch <= 'z' && ch >= 'a') continue;
        else if (ch == '*')    continue;
        else if (ch == '.')    continue;
        else if (ch == '|')    continue;
        cout << "输入为非法字符!读入只能是: 小写英文字母, |, *, ()" << endl;
        return false; //检测到非法字符
    }
    return true;
}

void solve() {
    clear();
    string regexp, postfix;
    regexp = str;
    for (auto i : regexp) {
        if (i >= 'a' && i <= 'z')   alp.insert(i);
    }

    dispregex = regexp;
    regexp = insert_concat(regexp);
    cout << regexp << endl;
    postfix = regexp_to_postfix(regexp);
    cout << "Postfix Expression: " << postfix << endl;
    bool suc = check(postfix);
    if (suc) {
        postfix_to_nfa(postfix);
        print_nfa();
    }
    //cout << nfa_size << endl;
    //nfa_to_dfa(nfa_size);

    //开始转dfa
    int final_state = st.top();
    st.pop();
    int start_state = st.top();
    st.pop();
    //cout << start_state << ' ' << final_state << endl;
    nfa[final_state].f = 1;
    nfa_to_dfa(start_state, nfa.size());
    print_dfa();

    cout << endl << endl;

}

int main() {
    freopen("in3.txt", "r", stdin);
    freopen("out3.txt", "w", stdout);
    while (cin >> str) {
        solve();
    }
}

//正则式 -> 有空nfa -> 去空nfa -> dfa -> 极小化dfa

输入文件in3.txt

a*b
a(ab)*
a|b*
ab*
a*

输出文件out3.txt

a*.b
Postfix Expression: a*b.
a*b:
正则式 转 NFA: 
K = {A, B, C, D, E, F};
Σ = {a, b};
f(A, a) = B, f(B, ε) = A, f(B, ε) = D, f(C, ε) = A, f(C, ε) = D, f(D, ε) = E, f(E, b) = F;
C;
Z = {F};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(0, b) = 2, f(1, a) = 1, f(1, b) = 2;
Z = {2};
------------------------------------------------------------------------


a.(a.b)*
Postfix Expression: aab.*.
a(ab)*:
正则式 转 NFA: 
K = {A, B, C, D, E, F, G, H};
Σ = {a, b};
f(A, a) = B, f(B, ε) = G, f(C, a) = D, f(D, ε) = E, f(E, b) = F, f(F, ε) = C, f(F, ε) = H, f(G, ε) = C, f(G, ε) = H;
A;
Z = {H};
..........................................
NFA 转 DFA: 
K = {0, 1, 2, 3};
f(0, a) = 1, f(1, a) = 2, f(2, b) = 3, f(3, a) = 2;
Z = {1, 3};
------------------------------------------------------------------------


a|b*
Postfix Expression: ab*|
a|b*:
正则式 转 NFA: 
K = {A, B, C, D, E, F, G, H};
Σ = {a, b};
f(A, a) = B, f(B, ε) = H, f(C, b) = D, f(D, ε) = C, f(D, ε) = F, f(E, ε) = C, f(E, ε) = F, f(F, ε) = H, f(G, ε) = A, f(G, ε) = E;
G;
Z = {H};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(0, b) = 2, f(2, b) = 2;
Z = {1, 2};
------------------------------------------------------------------------


a.b*
Postfix Expression: ab*.
ab*:
正则式 转 NFA: 
K = {A, B, C, D, E, F};
Σ = {a, b};
f(A, a) = B, f(B, ε) = E, f(C, b) = D, f(D, ε) = C, f(D, ε) = F, f(E, ε) = C, f(E, ε) = F;
A;
Z = {F};
..........................................
NFA 转 DFA: 
K = {0, 1, 2};
f(0, a) = 1, f(1, b) = 2, f(2, b) = 2;
Z = {1, 2};
------------------------------------------------------------------------


a*
Postfix Expression: a*
a*:
正则式 转 NFA: 
K = {A, B, C, D};
Σ = {a};
f(A, a) = B, f(B, ε) = A, f(B, ε) = D, f(C, ε) = A, f(C, ε) = D;
C;
Z = {D};
..........................................
NFA 转 DFA: 
K = {0, 1};
f(0, a) = 1, f(1, a) = 1;
Z = {1};
------------------------------------------------------------------------



标签:ch,NFA,nfa,int,C++,state,op,DFA,size
From: https://www.cnblogs.com/CTing/p/18139091

相关文章

  • C++定义,继承和虚函数
    类定义方式一般有两种Baseb和Baseb(3);一种不带参数,一种带参数,这两种实例定义会在范围结束自动释放。Base*c=newBase;和Base*c=newBase(5);没有参数可不加括号。通过new申请的类,需要手动delete释放,否则需要关闭程序才会释放(说的内存泄漏是指程序一直运行期间不断产生......
  • C++ list erase
    原文:https://www.cnblogs.com/yelongsan/p/4050404.htmlSTL中的容器按存储方式分为两类,一类是按以数组形式存储的容器(如:vector、deque);另一类是以不连续的节点形式存储的容器(如:list、set、map)。在使用erase方法来删除元素时,需要注意一些问题。      在使用list、set或m......
  • 深度解读《深度探索C++对象模型》之默认构造函数
    接下来我将持续更新“深度解读《深度探索C++对象模型》”系列,敬请期待,欢迎关注!也可以关注公众号:iShare爱分享,主动获得推文。提到默认构造函数,很多文章和书籍里提到:“在需要的时候编译器会自动生成一个默认构造函数”。那么关键的问题来了,到底是什么时候需要?是谁需要?比如下面的......
  • 第十五届蓝桥杯大赛软件赛省赛 C/C++ 大学 A 组
    Preface答辩圈钱杯,去年省赛的题好歹还有些有意思的题(指杜教筛),今年变成煞笔题开会了是吧两个小时多一点就全写完了,然后开始给后面三题写对拍(结果发现其实都没写挂纯在浪费时间)考完感觉AK有望,结果后面发现最后一题漏看条件了,苦露西而且中间EF两题全偷懒开__int128了,反正用下发的......
  • 华为实习4.10机考第二题C++代码
    考的是简单的并查集这道题考法就是并查集,若两个图片相似度大于0,则将他们放到一个家族中,同时维护家族的相似度总和。注意M矩阵是对称矩阵,所以需要避免重复维护相似度,因此可以只针对M矩阵的下三角矩阵或上三角矩阵中的连接块,计算相似度总和;或考虑整个M矩阵,然后相似度总和除......
  • C++发票识别、发票查验接口示例,您的“发票管理专家”
    发票识别+发票查验接口。当财务人员在进行发票的数字化管理时,仅需一键上传发票图片,翔云发票识别接口即可快速、精准对发票的全票面信息进行提取,翔云发票查验接口可根据识别接口提取的发票信息实时联网进行真伪查验。助财务工作者从发票海洋中解脱出来,提升发票管理效率与准确率......
  • C++身份核验接口代码、身份证OCR、身份证实名认证API
    实名认证是什么意思呢?一般指的是对用户资料真实性进行的验证审核,这样有利于建立完善且可靠的互联网环境。如果交易双方使用的都是虚假信息,那么在诸多环节会存在很大的风险。另外,还有游戏平台对玩家进行实名认证,防止未成年人注册。实名认证有利于网络绿化,所以在互联网发展......
  • 结对编程 c++语言实现四则运算练习题
    结对同学:2252813程序要求:两个运算符,100以内的数字,不需要写答案。需要检查答案是否正确,并且保证答案在0-100之间通过阅读题目要求,我们决定使用c++语言完成编程,需要满足两个功能,首先生成一个包含两个运算符的算式,参与运算的数字在100之内。下一步检查答案是否正确,并且保证答......
  • C/C++项目中.h和.inc文件区别
    原问题:Differencebetween.hfilesand.incfilesincC/C++的标准惯例是将class、function的声明信息写在.h文件中。.c文件写class实现、function实现、变量定义等等。然而对于template来说,它既不是class也不是function,而是可以生成一组class或function的东西。编译器(compiler......
  • C++动态内存分配/malloc/new
    0前言这部分确实是面试老八股了,不过我还是记录一下1内存分区在C语言中,将内存分为程序代码区+数据区,其中数据区又分为静态存储区和动态存储区在C++中,分为五种:动态存储区:栈区:存放局部变量,由编译器自动分配释放,程序员不能操作堆:由程序员使用malloc/new申请,用free/delete......