今天来为大家分享一个编译原理中用正规表达式转NFA的小程序

正规表达式就是类似正则一样的式子,例如:(a|b)*abb,最后应该转化为:

正规表达式 转 NFA C++-LMLPHP

大致的处理流程为:

正规表达式 转 NFA C++-LMLPHP

例子中的表达式:(a|b)*abb,|和*都是运算法则,而且容易识别,但是处理abb就不是你那么方便了,所以我们在abb中间加上+号,就可以像|*那样识别了,所以处理后为(a|b)*a+b+b

我们识别出来之后,首先根据书中提供的运算符->NFA部件的图转化为NFA部件,之后再根据优先级和各个部件组建NFA

运算符对应NFA中的各个部件图为:

ε符:

        正规表达式 转 NFA C++-LMLPHP

φ符:

        正规表达式 转 NFA C++-LMLPHP

输入符号:

        正规表达式 转 NFA C++-LMLPHP

| 符:      

  正规表达式 转 NFA C++-LMLPHP

+符:

    正规表达式 转 NFA C++-LMLPHP

*符:

    正规表达式 转 NFA C++-LMLPHP

有一个问题,NFA的开始和终止都是状态集合,但是整改了好多次,没能设计出来,所以该程序产生的NFA均只有一个开始状态和一个终止状态

代码注释挺清楚的,我就不过多描述了

G[S]->NFA代码如下:

#ifndef _GNFA_
#define _GNFA_ /*
@author:Lv
@time:2018-11
@title:正规式转NFA
*/ #include <iostream>
#include <string>
#include <vector>
#include <map>
#include <set> namespace GNFAs
{
#define stds std:: /*
@brief 记录NFA每一个状态
@member 记录名称
*/
struct state
{
stds string _staName;
//state(const stds string& name = "#") :_staName(name) { }
bool operator<(const state& b)const { return _staName < b._staName; }
}; /*
@brief 记录状态之间转换的边信息
@member 起始状态、终止状态、状态转换输入符号
*/
struct edge
{
state _edgStart;
state _edgEnd;
char _edgSymbol;
}; /*
NFA-class
*/
class NFA
{
public:
using ostream = stds ostream;
using istream = stds istream;
using exptype = stds string;
using map_sta = stds vector<edge>;
using container_sta = stds set<state>;
using container_sym = stds set<char>;
public:
/*
@brief nfaunit 用于记录NFA数据集合--NFA基本数据类型--NFA单元
@member K 状态集合
Σ 字母表
f 状态映射
S 开始状态
Z 终止状态集合
*/
typedef struct NFAunit
{
container_sta K;
container_sym Σ;
map_sta f;
state S;
state Z; NFAunit()
{
f.clear();
K.clear();
Σ.clear();
Z = state();
S = state();
} NFAunit(const NFAunit& other)
:K(other.K)
,Σ(other.Σ)
,f(other.f)
,S(other.S)
,Z(other.Z)
{ } NFAunit& operator=(const NFAunit& other)
{
if (this != &other)
{
K = other.K;
Σ = other.Σ;
f = other.f;
S = other.S;
Z = other.Z;
}
return *this;
} NFAunit& operator+=(const NFAunit& other)
{
K.insert(other.K.begin(), other.K.end());
Σ.insert(other.Σ.begin(), other.Σ.end());
f.insert(f.end(), other.f.begin(), other.f.end());
return *this;
}
} _type_; public:
NFA(); NFA(const NFA&); NFA& operator=(const NFA&); _type_ getNFA()const { return _data; } exptype getExpression()const { return _expression; } public:
/*
@brief 输入正规式
*/
void input(); /*
@brief 转成NFA
*/
void toNFA(); /*
@brief 展示NFA
*/
void show()const; /*
@brief 刷新数据
*/
void update(); /*
@brief 运行
*/
void run(); private:
/*
@brief 检查正规式是否合法
@retur 是否合法
*/
bool _checkExp()const; /*
@brief 检查正规式字符是否合法
@retur 是否合法
*/
bool _checkSym()const; /*
@brief 检查正规式语法,如:括号匹配
@retur 是否合法
*/
bool _checkSync()const; /*
@brief 做一些处理便于表达式转换
*/
void change(); /*
@brief 中缀转后缀
*/
void postexp(); /*
@brief 栈内优先级
*/
int inpriority(const char)const; /*
@brief 栈外优先级
*/
int outpriority(const char)const; /*
@brief 整合a|b
*/
_type_ _or(_type_, _type_); /*
@brief 整合ab
*/
_type_ _mul(_type_, _type_); /*
@brief 整合a*
*/
_type_ _star(_type_); /*
@brief 整合单元
*/
_type_ _unit(const char); private:
int _staNum;
ostream& _out;
istream& _in;
NFAunit _data;
exptype _expression;
};
} #endif //_GNFA_
#include "GNFA.h"
using namespace GNFAs; #include <stack> using stack_unit = stds stack<NFA::NFAunit>;
using stack_char = stds stack<char>; #define enter stds endl NFA::NFA()
:_staNum()
, _out(std::cout)
, _in(std::cin)
{ } void NFA::input()
{
_out << "请输入正规式:" << enter; while (!_checkExp()) _in >> _expression;
} bool NFA::_checkExp()const
{
if (!_checkSym())
{
_out << "含有非法字符!" << enter;
return false;
}
if (!_checkSync())
{
_out << "含有语法错误!" << enter;
return false;
}
return _expression != exptype();
} bool NFA::_checkSym()const
{
for (int i = ; i < _expression.size(); ++i)
{
if (islower(_expression[i])) continue;
else if (_expression[i] == '(' || _expression[i] == ')' || _expression[i] == '*' || _expression[i] == '|')
continue;
else
return false;
}
return true;
} bool NFA::_checkSync()const
{
stack_char stack;
for (int i = ; i < _expression.size(); ++i)
{
if (_expression[i] == '(')
stack.push('(');
else if (_expression[i] == ')')
{
if (stack.size() && stack.top() == '(')
stack.pop();
else return false;
}
if (_expression[i] == '*')
{
if (i &&_expression[i - ] != '|')
continue;
else return false;
}
}
if (stack.size())return false;
return true;
} void NFA::change()
{
exptype t; char s, e; for (int i = ; i < _expression.size(); ++i)
{
s = _expression[i];
e = _expression[i + ];
t += s; if (s != '(' && s != '|' && islower(e)) t += '+';
else if (e == '(' && s != '|' && s != '(') t += '+';
}
t += e;
_expression = t;
} int NFA::inpriority(const char c)const
{
switch (c)
{
case '#': return ;
case '(': return ;
case '*': return ;
case '|': return ;
case '+': return ;
case ')': return ;
}
return -;
} int NFA::outpriority(const char c)const
{
switch (c)
{
case '#': return ;
case '(': return ;
case '*': return ;
case '|': return ;
case '+': return ;
case ')': return ;
}
return -;
} void NFA::postexp()
{
_expression += '#';
exptype t = "";
stack_char s;
char ch = '#', ch1, op;
s.push(ch); //读一个字符
int read_location = ;
ch = _expression.at(read_location++);
while (!s.empty())
{
if (islower(ch))
{
t += ch;
ch = _expression.at(read_location++);
}
else
{
ch1 = s.top();
if (inpriority(ch1)<outpriority(ch))
{
s.push(ch);
ch = _expression.at(read_location++);
}
else if (inpriority(ch1)>outpriority(ch))
{
op = s.top();
s.pop();
t += op;
}
else
{
op = s.top();
s.pop(); if (op == '(')
ch = _expression.at(read_location++);
}
}
}
t.erase(t.end() - );
_expression = t;
} void NFA::toNFA()
{
char item;
_type_ left, right;
stack_unit stack; for (int i = ; i < _expression.size(); ++i)
{
item = _expression[i];
switch (item)
{
case '|':
right = stack.top();
stack.pop();
left = stack.top();
stack.pop();
_data = _or(left, right);
stack.push(_data);
break;
case '*':
left = stack.top();
stack.pop();
_data = _star(left);
stack.push(_data);
break;
case '+':
right = stack.top();
stack.pop();
left = stack.top();
stack.pop();
_data = _mul(left, right);
stack.push(_data);
break;
default:
_data = _unit(item);
stack.push(_data);
}
} _data = stack.top();
stack.pop();
} NFA::_type_ NFA::_or(_type_ unitl, _type_ unitr)
{
_type_ unit;
exptype name;
edge e1, e2, e3, e4; state start { name += _staNum++ + 'A' };
name = "";
state end{ name += _staNum++ + 'A' }; e1._edgStart = start;
e1._edgEnd = unitl.f[]._edgStart;
e1._edgSymbol = '#'; e2._edgStart = start;
e2._edgEnd = unitr.f[]._edgStart;
e2._edgSymbol = '#'; e3._edgStart = unitl.f[unitl.f.size() - ]._edgEnd;
e3._edgEnd = end;
e3._edgSymbol = '#'; e4._edgStart = unitr.f[unitr.f.size() - ]._edgEnd;
e4._edgEnd = end;
e4._edgSymbol = '#'; unit = unitl;
unit += unitr;
unit.f.push_back(e1);
unit.f.push_back(e2);
unit.f.push_back(e3);
unit.f.push_back(e4); unit.S = start;
unit.Z = end; return unit;
} NFA::_type_ NFA::_mul(_type_ unitl, _type_ unitr)
{
for (auto &it : unitr.f)
{
if (it._edgStart._staName == unitr.S._staName)
{
it._edgStart = unitl.Z;
_staNum--;
}
else if (it._edgEnd._staName == unitr.S._staName)
{
it._edgEnd = unitl.Z;
_staNum--;
}
}
unitr.S = unitl.Z;
unitl += unitr;
unitl.Z = unitr.Z; return unitl;
} NFA::_type_ NFA::_star(_type_ u)
{
_type_ unit;
exptype name;
edge e1, e2, e3, e4; state start{ name += _staNum++ + 'A' };
name = "";
state end{ name += _staNum++ + 'A' }; e1._edgStart = start;
e1._edgEnd = end;
e1._edgSymbol = '#'; e2._edgStart = u.Z;
e2._edgEnd = u.S;
e2._edgSymbol = '#'; e3._edgStart = start;
e3._edgEnd = u.Z;
e3._edgSymbol = '#'; e4._edgStart = u.Z;
e4._edgEnd = start;
e4._edgSymbol = '#'; unit = u; unit.f.push_back(e1);
unit.f.push_back(e2);
unit.f.push_back(e3);
unit.f.push_back(e4); unit.S = start;
unit.Z = end; return unit;
} NFA::_type_ NFA::_unit(const char ch)
{
_type_ unit;
exptype name;
edge e; state start{ name += _staNum++ + 'A' };
name = "";
state end{ name += _staNum++ + 'A' }; e._edgStart = start;
e._edgEnd = end;
e._edgSymbol = ch; unit.f.push_back(e);
unit.S = start;
unit.Z = end; return unit;
} void NFA::show()const
{
_out << "NFA 的起始状态:" << _data.S._staName << enter;
_out << "NFA 的结束状态:" << _data.Z._staName << enter << enter; for (auto it : _data.f)
{
_out << "from state: " << it._edgStart._staName
<< "\t to state: " << it._edgEnd._staName
<< "\t\tby ";
if (it._edgSymbol == '#') _out << R"+(ε)+" << enter;
else _out << it._edgSymbol << enter;
}
_out << enter;
} void NFA::update()
{
for (auto it : _data.f)
{
_data.K.insert(it._edgStart);
_data.K.insert(it._edgEnd);
_data.Σ.insert(it._edgSymbol);
}
} void NFA::run()
{
input(); change(); postexp(); toNFA(); show(); update();
}
#include "GNFA.h"
using namespace GNFAs; int main()
{
NFA nfa;
nfa.run();
}

测试结果:

正规表达式 转 NFA C++-LMLPHP

感谢您的阅读,生活愉快~

05-11 19:21