正则表达式转DFA&NFA
显然,正则表达式、NFA、DFA的概念都很简单,所以直接上代码,注释应该解释地比较清楚,没有万能头文件的自行替换需求库,如果有疑问的可以留言。
网盘链接
[自行补全]/s/1pbGT_wpB662TwFrnukXgGQ?pwd=TSIT
提取码:TSIT
运行截图
原理可以参考这篇博客传送门文章来源:https://www.toymoban.com/news/detail-506258.html
本次程序由四个文件组成文章来源地址https://www.toymoban.com/news/detail-506258.html
文件名 | 描述 |
---|---|
Regular_Expression.cpp | 处理正则表达式 |
NFA.cpp | 正则表达式转NFA |
DFA.cpp | NFA转DFA |
MainProcess.cpp | 主要处理程序 |
//Regular_Expression.cpp
#ifndef __REGULAR_EXPRESSION__
#define __REGULAR_EXPRESSION__
#include<bits/stdc++.h>
using namespace std;
const int OPERANDS = 1; // 操作符
const int OPERATOR = 2; // 运算符
const int ILLEGAL_CHAR = -1; // 非法字符
// 正则表达式包含非法字符
class Regular_Expression_Input_Exception
{
public:
char c;
Regular_Expression_Input_Exception(char c)
{
this->c = c;
}
string what()const
{
string ret;
ret = c + " is not in the charset" ;
return ret;
}
};
// 正则表达式格式错误
class Regular_Expression_Format_Exceprion
{
public:
Regular_Expression_Format_Exceprion(){}
string what()const
{
string ret;
ret = "Format Error ";
return ret;
}
};
class Regular_Expression
{
public:
Regular_Expression(string & s)
{
expression = s;
expression = Regular_Expression_Pretreatment(expression);
expression = Regular_Expression_Infix2PostFix(expression);
}
string get_postfix()
{
return expression;
}
private:
string expression;
inline string Regular_Expression_Pretreatment(string&);
inline string Regular_Expression_Infix2PostFix(string&);
};
// 返回字符类型
int get_chartype(char c)
{
if ( ('a' <= c && c <= 'z')||
('A' <= c && c <= 'Z')||
('0' <= c && c <= '9')||
(c == '.')
) return OPERANDS;
if ( (c == '|') ||
(c == '*') ||
(c == '(') ||
(c == ')') ||
(c == '&')
) return OPERATOR;
return ILLEGAL_CHAR;
}
// 设置操作符优先级
int get_priorioty(char c)
{
switch (c)
{
case '*': return 3; break;
case '&': return 2; break;
case '|': return 1; break;
case '(': return 0; break;
default: return ILLEGAL_CHAR; break;
}
}
//检查并预处理正则表达式 添加&作为连接符
string Regular_Expression::Regular_Expression_Pretreatment(string & pre_expression)
{
assert(pre_expression.size() > 0);
string treated_expression;
try
{
char prechar ;
int pretype ;
char nchar = pre_expression[0];
int ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
treated_expression.push_back(nchar);
prechar = nchar;
pretype = ntype;
int len = pre_expression.length();
for(int i = 1 ; i<len ; ++i)
{
nchar = pre_expression[i];
ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
// 当第一位是操作数 , * , )且第二位为操作数或(
if( (pretype == OPERANDS || prechar == '*' || prechar == ')' ) &&
(ntype == OPERANDS || nchar == '(')
)
// 使用 & 作为连接符
treated_expression.push_back('&');
treated_expression.push_back(nchar);
pretype = ntype;
prechar = nchar;
}
return treated_expression;
}
catch(const Regular_Expression_Input_Exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
return treated_expression;
}
// 正则表达式的中缀表达式转成后缀表达式
string Regular_Expression::Regular_Expression_Infix2PostFix(string & pre_expression)
{
assert(pre_expression.size() > 0);
string treated_expression;
try
{
vector<char> op;
int len = pre_expression.length();
int ntype ;
char nchar;
for(int i=0 ; i<len ; ++i)
{
nchar = pre_expression[i];
ntype = get_chartype(nchar);
if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);
// 遇到 ( 直接压入栈中;
if(nchar == '(')
{
op.push_back(nchar);
}
// 遇到 ) 将运算符出栈并输出,直到遇到 ( ,将 ( 出栈但不输出
else if(nchar == ')')
{
while(!op.empty() && op.back() != '(')
{
treated_expression.push_back(op.back());
op.pop_back();
}
// 没有左括号匹配
if(op.empty()) throw Regular_Expression_Format_Exceprion();
// 弹出左括号
op.pop_back();
}
/* 遇到 * 、 & 、 | 运算符:
如果栈为空,直接将运算符压入栈中;
如果栈不为空,弹出栈中优先级大于等于当前运算符的运算符并输出,再将当前运算符入栈。
*/
else if(ntype == OPERATOR)
{
int npriority = get_priorioty(nchar);
while(!op.empty() && get_priorioty(op.back()) >= npriority)
{
treated_expression.push_back(op.back());
op.pop_back();
}
op.push_back(nchar);
}
else if(ntype == OPERANDS)
{
treated_expression.push_back(nchar);
}
}
while(!op.empty())
{
nchar = op.back();
// 缺少右括号匹配
if(nchar == '(') throw Regular_Expression_Format_Exceprion();
treated_expression.push_back(nchar);
op.pop_back();
}
return treated_expression;
}
catch(const Regular_Expression_Input_Exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const Regular_Expression_Format_Exceprion& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const std::exception& e)
{
std::cerr << e.what() << '\n';
exit(0);
}
return treated_expression;
}
// #ifndef __MAINPROCESS__
// int main()
// {
// #ifdef __LOCAL
// freopen("usrin.txt", "r", stdin);
// freopen("usrout.txt", "w", stdout);
// #endif
// string str;
// cin >> str;
// Regular_Expression reg(str);
// cout << reg.get_postfix() << "\n";
// }
// #endif
#endif
//NFA.cpp
#ifndef __NFA__
#define __NFA__
#include "Regular_Expression.cpp"
#include<bits/stdc++.h>
using namespace std;
struct Nfastate
{
// '#' 视为空转移
Nfastate(char c = '#' , Nfastate* trans = NULL , vector<Nfastate*> vec = vector<Nfastate*>())
{
input = c;
chTrans = trans;
epTrans.clear();
for(auto x:vec) epTrans.insert(x);
}
void add_New_epTrans(Nfastate& n)
{
epTrans.insert(&n);
}
void add_New_epTrans(Nfastate* n)
{
epTrans.insert(n);
}
void add_new_epTrans(vector<Nfastate*> &vec)
{
for(auto x:vec) epTrans.insert(x);
}
void print();
set<Nfastate*> closure();
char input; // 转移字符
Nfastate* chTrans; // 转移到的状态
set<Nfastate*> epTrans; // 空转移状态集合
};
set<Nfastate*> Nfastate::closure()
{
set<Nfastate*> s;
s.insert(this);
bool hasnewnode = 1;
while(hasnewnode)
{
hasnewnode = 0;
for(auto x:epTrans)
{
if(s.find(x) == s.end())
{
hasnewnode = 1;
s.insert(x);
}
}
}
return s;
}
void Nfastate::print()
{
cerr << "------Nfastate---------\n";
cerr << "input = " << input << "\n";
cerr << "chTrans = " << chTrans << "\n";
cerr << "epTrans = {" ;
for(auto x:epTrans) cerr << x << " ";
cerr << "}\n---------------------------------\n";
}
struct subNFA
{
subNFA(Nfastate* phead = NULL , Nfastate* ptail = NULL)
{
head = phead;
tail = ptail;
}
subNFA(Nfastate& ohead , Nfastate & otail)
{
head = &ohead;
tail = &otail;
}
subNFA(char c)
{
Nfastate *ntail = new Nfastate;
Nfastate *nhead = new Nfastate(c , ntail);
// fix1
if(c == '#') nhead->chTrans = NULL;
head = nhead;
tail = ntail;
}
void operator=(const subNFA& ele)
{
head = ele.head;
tail = ele.tail;
}
void print();
Nfastate *head; // 头部指针
Nfastate *tail; // 尾部指针
};
void subNFA::print()
{
int nodecnt = 0;
map<Nfastate* , int> mp;
queue<Nfastate*> q;
q.push(head);
mp[head] = ++nodecnt;
auto addnode = [&](Nfastate* e)
{
if(mp.count(e)) return mp[e];
mp[e] = ++nodecnt;
q.push(e);
return mp[e];
};
while(!q.empty())
{
Nfastate* now = q.front();
q.pop();
std::cout << "node : " << mp[now] << "\n";
if(now->chTrans != NULL)
{
addnode(now->chTrans);
std::cout << " chTrans : " << now->input << "\n";
std::cout << " Transnode : " << mp[now->chTrans] << "\n";
}
if(!now->epTrans.empty())
std:: cout <<" epTrans : {";
bool first = 1;
for(auto x:now->epTrans)
{
addnode(x);
std::cout << (first?"":" , ") << mp[x] ;
first = 0;
}
if(!now->epTrans.empty())
std::cout << "}\n";
cout << "\n";
}
std::cout << "Start node: "<< mp[head] << "\n";
std::cout << "End node : " << mp[tail] << "\n";
}
class NFA
{
public:
NFA(string &expression):reg(expression),subnfa()
{
// cerr << reg.get_postfix() << "\n";
Build_NFA();
}
void print();
void Build_NFA();
Nfastate* gethead();
Nfastate* gettail();
private:
subNFA subnfa; // 最终的NFA
Regular_Expression reg; // 正则表达式转换
};
Nfastate* NFA::gethead(){return subnfa.head;}
Nfastate* NFA::gettail(){return subnfa.tail;}
void NFA::print()
{
//debug
// std::cout << nfa.head << " " << nfa.tail << "\n";
std::cout << reg.get_postfix() << "\n";
subnfa.print();
}
void NFA::Build_NFA()
{
string expression = reg.get_postfix();
vector<subNFA> sta_nfa; // nfa栈
try
{
int len = expression.length();
for(int i=0 ; i<len ; ++i)
{
char nchar = expression[i];
int ntype = get_chartype(nchar);
//如果遇到操作数a,则新建一个NFA,转换弧上的值为a,将这个NFA压入栈中
if(ntype == OPERANDS)
{
subNFA n(nchar);
sta_nfa.push_back(n);
//debug
// cerr << nchar << ' ' << n.head->input << "\n";
// cerr << n.head << " " << n.tail << "\n";
// n.head->print();
// n.tail->print();
}
else if(nchar == '*')
{
// 取不出一个元素
if(sta_nfa.size() < 1) throw Regular_Expression_Format_Exceprion();
subNFA n1 = sta_nfa.back();
sta_nfa.pop_back();
subNFA n('#');
(*n.head).add_New_epTrans(n1.head);
(*n1.tail).add_New_epTrans(n.tail);
(*n1.tail).add_New_epTrans(n.head);
sta_nfa.push_back(n);
}
else if(nchar == '|')
{
// 取不出两个元素
if(sta_nfa.size() < 2) throw Regular_Expression_Format_Exceprion();
subNFA n1 = sta_nfa.back();
sta_nfa.pop_back();
subNFA n2 = sta_nfa.back();
sta_nfa.pop_back();
subNFA n('#');
//debug
// n.head->print();
// n.tail->print();
(*n.head).add_New_epTrans(n1.head);
(*n.head).add_New_epTrans(n2.head);
(*n1.tail).add_New_epTrans(n.tail);
(*n2.tail).add_New_epTrans(n.tail);
//debug
// n.head->print();
// n.tail->print();
sta_nfa.push_back(n);
}
else if(nchar == '&')
{
// 取不出两个元素
if(sta_nfa.size() < 2) throw Regular_Expression_Format_Exceprion();
subNFA n2 = sta_nfa.back();
sta_nfa.pop_back();
subNFA n1 = sta_nfa.back();
sta_nfa.pop_back();
(*n1.tail).add_New_epTrans(n2.head);
n1.tail = n2.tail;
sta_nfa.push_back(n1);
}
else throw Regular_Expression_Format_Exceprion();
}
if(sta_nfa.size() != 1) throw Regular_Expression_Format_Exceprion();
subnfa = sta_nfa[0];
}
catch(const Regular_Expression_Input_Exception& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const Regular_Expression_Format_Exceprion& e)
{
std::cerr << e.what() << "\n";
exit(0);
}
catch(const std::exception& e)
{
std::cerr << e.what() << '\n';
exit(0);
}
}
// #ifndef __MAINPROCESS__
// int main()
// {
// #ifdef __LOCAL
// freopen("usrin.txt", "r", stdin);
// freopen("usrout.txt", "w", stdout);
// #endif
// string str;
// cin >> str;
// NFA nfa(str);
// nfa.print();
// }
// #endif
#endif
//DFA.cpp
#ifndef __DFA__
#define __DFA__
#include <bits/stdc++.h>
#include "NFA.cpp"
using namespace std;
struct Dfastate
{
// 获取对于字符c的闭包
Dfastate move(char c , Nfastate* tail)
{
Dfastate ret;
for(auto x:closure)
{
if(x->input == c) ret.closure.insert(x->chTrans);
}
ret.n_closure(tail);
return ret;
}
void n_closure(Nfastate*);
void upGrade_End_info(Nfastate*);
bool isEnd; //判断是否是结束状态
set<Nfastate*> closure; //闭包
void print();
bool operator<(const Dfastate& b) const;
bool operator>(const Dfastate& b) const;
bool operator==(const Dfastate& b) const;
};
void Dfastate::print()
{
cerr << "----------Dfastate-------------\n";
for(auto x:closure) cerr << x << " "; cerr << "\n";
cerr << (isEnd ? "YES" : "NO") << "\n";
cerr << "-------------------------------\n\n";
}
bool Dfastate::operator<(const Dfastate& b)const
{
//debug
// cerr << " < \n";
return closure < b.closure;
}
bool Dfastate::operator>(const Dfastate& b)const
{
//debug
// cerr << " > \n";
return closure > b.closure;
}
bool Dfastate::operator==(const Dfastate& b)const
{
//debug
// cerr << " == \n";
return closure == b.closure;
}
void Dfastate::upGrade_End_info(Nfastate* tail)
{
if(closure.find(tail) != closure.end()) isEnd = true;
else isEnd = false;
}
void Dfastate::n_closure(Nfastate * tail)
{
bool hasnewnode = 1;
while(hasnewnode)
{
hasnewnode = 0;
for(auto x:closure)
{
for(auto y:x->epTrans)
{
if(closure.find(y) == closure.end())
{
hasnewnode = 1;
closure.insert(y);
}
}
}
}
upGrade_End_info(tail);
}
struct subDFA
{
bool isEnd; //是否是结束状态
map<int , subDFA*> to; //转移图
};
class DFA
{
public:
DFA(string &expression)
{
// expression = str;
for(auto x:expression)
{
int ntype = get_chartype(x);
if(ntype == OPERANDS) charset.insert(x);
if(ntype == ILLEGAL_CHAR)
{
cerr << "ILLEGAL CHARACHTER\n";
exit(0);
}
}
Build_DFA(expression);
//debug
// for(auto x:charset)
// {
// cerr << x << " ";
// }
}
void print();
bool match(string&);
private:
void Build_DFA(string&);
set<char> charset; //用到的字符集
subDFA* dfahead; //DFA的初始状态
// string expression;
};
bool DFA::match(string& matchstr)
{
subDFA* nowp = dfahead;
if(nowp == NULL) return false;
for(auto ch:matchstr)
{
if(!nowp->to.count(ch)) return false;
nowp = nowp->to[ch];
}
return nowp->isEnd;
}
void DFA::print()
{
int nodecnt = 0;
map<subDFA* , int> mp;
queue<subDFA*> q;
q.push(dfahead);
mp[dfahead] = ++nodecnt;
while(!q.empty())
{
subDFA * now = q.front();
q.pop();
for(auto next:now->to)
{
if(mp.count(next.second)) continue;
else
{
mp[next.second] = ++nodecnt;
q.push(next.second);
}
}
}
for(auto x:mp)
{
cout << "node[" << x.second <<"] : ";
for(auto y:x.first->to)
{
cout << "(" << char(y.first) << " , " << mp[y.second] << ") ";
}
cout << "isEnd : " << ( (x.first->isEnd) ? "YES" : "NO" );
cout << "\n";
}
}
void DFA::Build_DFA(string & expression)
{
NFA nfa(expression);
Nfastate* head = nfa.gethead();
Nfastate* tail = nfa.gettail();
map<Dfastate , subDFA*> mp_Dfastate_PsubDFA;
queue<Dfastate> q_Dfastate;
// 获取初始状态的闭包
Dfastate T;
T.closure.insert(head);
//debug
// T.print();
T.n_closure(tail);
// T.print();
//初始化map queue 以及转移的初始状态
mp_Dfastate_PsubDFA[T] = new subDFA;
mp_Dfastate_PsubDFA[T]->isEnd = T.isEnd;
q_Dfastate.push(T);
dfahead = mp_Dfastate_PsubDFA[T];
while(!q_Dfastate.empty() )
{
T = q_Dfastate.front();
q_Dfastate.pop();
//debug
// T.print();
subDFA* now = mp_Dfastate_PsubDFA[T];
for(auto ch:charset)
{
Dfastate next = T.move(ch , tail);
//debug
// cerr << "char = " << ch << "\n";
// next.print();
if(next.closure.empty()) continue;
//状态没出现过就新建节点并加入队列
if(!mp_Dfastate_PsubDFA.count(next))
{
mp_Dfastate_PsubDFA[next] = new subDFA;
mp_Dfastate_PsubDFA[next]->isEnd = next.isEnd;
q_Dfastate.push(next);
}
now->to[ch] = mp_Dfastate_PsubDFA[next];
}
}
//debug
// cerr << mp_Dfastate_PsubDFA.size() << "\n";
}
const string Digit = "(0|1|2|3|4|5|6|7|8|9)" ;
const string Pos_Digit = "(1|2|3|4|5|6|7|8|9)";
// #ifndef __MAINPROCESS__
// int main()
// {
// #ifdef __LOCAL
// freopen("usrin.txt", "r", stdin);
// freopen("usrout.txt", "w", stdout);
// #endif
// string str;
// string interger = "(0|" + Pos_Digit + Digit + "*)";
// string decimal = "(0|" + Digit + "*" + Pos_Digit + ")";
// string pat = "(" + interger + ")|(" + interger + "." + decimal + ")";
// cout << pat << "\n";
// DFA dfa(pat);
// dfa.print();
// cin >> str;
// cout << dfa.match(str);
// }
// #endif
#endif
//Main_Process.cpp
#ifndef __MAINPRECESS__
#define __MAINPROCESS__
#include<bits/stdc++.h>
#include "DFA.cpp"
using namespace std;
int main()
{
#ifdef __LOCAL
freopen("usrin.txt", "r", stdin);
freopen("usrout.txt", "w", stdout);
#endif
string str;
string interger = "(0|" + Pos_Digit + Digit + "*)";
string decimal = "(0|" + Digit + "*" + Pos_Digit + ")";
string pat = "(" + interger + ")|(" + interger + "." + decimal + ")";
// pat = "(eedd";
// pat 为正规式
// str 为待检测的字符串
pat = "(a|b)*acd*(c|d)";
str = "aacdc";
cout <<"Regular_Expression : " << pat << "\n";
// cin >> str;
cout << "MatchString : " << str << "\n\n";
// 构建DFA
DFA dfa(pat);
cout << "DFA GRAPH EXPRESSION:\n";
// 输出DFA
dfa.print();
cout << "\n";
//判断是否符合
cout << (dfa.match(str) ? "Legal\n" : "Illegal\n");
}
#endif
到了这里,关于【编译原理】【词法分析】【正则表达式】【NFA】【DFA】【C++】正则表达式转DFA&NFA,判断字符串是否符合正则表达式的匹配算法的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!