正則表達式的基本運算
正則表達式有三種基本的運算:
- 連接(Concatenation), 例如 abc, 由a, b, c組成
- 聯合(Union), 例如 a|b|c, 表示a或者b或者c
- Kleene閉包(Kleene *), 例如 (ab)*, 表示ab串不出現,或者出現1次或一次以上
其它的運算如+, {}等都可以用以上三種基本運算或者運算的組合來表示。
正則表達式轉換成NFA
1)(a)R=AB (b)R=A|B (c)R=A*
r=s|t
r=st
r=s*
2)例題3.26:爲正則表達式r=(a|b)*abb構造一個NFA
①對於(a|b)中的a構造NFA
②對於(a|b)中的b構造NFA
③而(a|b)構造NFA,將上面兩個圖合併
④(a|b)*構造NFA
⑤對於abb中的a構造NFA,對於abb中的兩個b構造NFA同④類似
⑥將④⑤合併之後
⑦最終的NFA是
正則表達式轉換成NFA代碼
1.正則表達式後綴化《逆波蘭表達式&後綴表達式》,並將其中操作符進行修改
public static enum Operator {
DIVIDE('|', 1),
ADD('&', 2),MULTIPLY('*', 2),
LEFT_BRACKET('(', 3), RIGHT_BRACKET(')', 3);
char value;
int priority;
//...
}
2.構造NFA源碼
public class NfaAlgorithm {
private static String prepareString(String _regex) {
String regex = "";
char[] regexs = _regex.replaceAll(" ", "").toCharArray();
for (int i = 0; i < regexs.length; i++) {
if (i == 0)
regex += regexs[i];
else {
if (regexs[i] == '|' || regexs[i] == '*' || regexs[i] == ')') {
regex += regexs[i];
} else {
if (regexs[i - 1] == '(' || regexs[i - 1] == '|')
regex += regexs[i];
else
regex += ("&" + regexs[i]);
}
}
}
return regex;
}
private static String E = "epsllon";
public static Graph transformNFA(String suffixRegex){
char[] rs = suffixRegex.toCharArray();
Stack<Object> operandStack = new Stack();
for (int i =0; i<rs.length; i++){
char o = rs[i];
if(!Operator.isOperator(o)){
operandStack.push(String.valueOf(o));
}else{
Operator operator = Operator.findOperator(o);
Graph graph = null;
switch (operator){
case ADD://'&'
Object av1 = operandStack.pop();
Object av2 = operandStack.pop();
graph = Graph.parse(av2);
graph.add(av1);
break;
case MULTIPLY://'*'
Object mv = operandStack.pop();
graph = Graph.parse(mv);
graph.multiply();
break;
case DIVIDE://'|'
Object dv1 = operandStack.pop();
Object dv2 = operandStack.pop();
graph = Graph.parse(dv2);
graph.divide(dv1);
break;
}
operandStack.push(graph);
}
}
return Graph.parse(operandStack.pop());
}
public static void main(String[] args) {
String exp = prepareString("(A*B|AC)D");
//String exp = prepareString("AB");
System.out.println(exp);
String regex = SuffixAlgorithm.rpn(exp);
System.out.println(regex);
Graph graph = transformNFA(regex);
System.out.println(graph);
}
static class Graph {
private List<Edge> edges;
private Node start;
private Node end;
private Graph(String label){
this.start = new Node();
this.end = new Node();
this.edges = new ArrayList<>();
Edge edge = new Edge(start,end,label);
this.edges.add(edge);
}
public static Graph parse(Object obj){
if(obj instanceof Graph){
return (Graph) obj;
}
if(obj instanceof String)
return new Graph((String)obj);
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void add(Object obj){
if(obj instanceof String){
add((String)obj);
return;
}
if(obj instanceof Graph) {
add((Graph) obj);
return;
}
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void divide(Object obj){
if(obj instanceof String){
divide(parse(obj));
return;
}
if(obj instanceof Graph) {
divide((Graph) obj);
return;
}
throw new IllegalArgumentException("not support class["+obj.getClass()+"]");
}
public void multiply(){
Node nodeStart = this.start;
Node nodeEnd = this.end;
this.start = new Node();
this.end = new Node();
Edge edge1 = new Edge(nodeEnd,nodeStart,E);
Edge edge2 = new Edge(this.start,nodeStart,E);
Edge edge3 = new Edge(nodeEnd,this.end,E);
Edge edge4 = new Edge(this.start,this.end,E);
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.edges.add(edge4);
}
private void divide(Graph graph){
Node nodeStart = this.start;
Node nodeEnd = this.end;
this.start = new Node();
this.end = new Node();
Edge edge1 = new Edge(this.start,graph.start,E);
Edge edge2 = new Edge(this.start,nodeStart,E);
Edge edge3 = new Edge(nodeEnd,this.end,E);
Edge edge4 = new Edge(graph.end,this.end,E);
this.edges.add(edge1);
this.edges.add(edge2);
this.edges.add(edge3);
this.edges.add(edge4);
this.edges.addAll(graph.edges);
}
private void add(String label){
Node mid = end;
this.end = new Node();
Edge edge = new Edge(mid,end,label);
this.edges.add(edge);
}
private void add(Graph graph){
Node node = this.end;
this.end = graph.end;
edges.stream().filter(edge -> edge.end == node)
.forEach(edge -> {
edge.end = graph.start;
});
this.edges.addAll(graph.edges);
}
@Override
public String toString() {
String printString = "Start=" + this.start + " End=" + this.end + "\n";
for (int i = 0; i < edges.size(); i++) {
printString += edges.get(i) + "\n";
}
return printString;
}
}
//邊
static class Edge {
private Node begin;
private Node end;
private String label;
public Edge(Node begin, Node end, String label) {
this.begin = begin;
this.end = end;
this.label = label;
}
@Override
public String toString() {
return "Edge [begin=" + begin + ", end=" + end + ", label=" + label+ "]";
}
}
//節點
static class Node {
private int id;
private static int ID=0;
public Node(){
this.id=ID++;
}
public int getId() {
return id;
}
public static void reset(){
ID=0;
}
@Override
public String toString() {
return id+"";
}
}
}
參考文章: