forked from jasder/antlr
added context and started using NFAConfig for lexer DFA
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6775]
This commit is contained in:
parent
cb7445cf36
commit
5d57017098
|
@ -5,9 +5,7 @@ import org.antlr.v4.misc.IntervalSet;
|
|||
import org.antlr.v4.misc.OrderedHashSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
public class LexerNFAToDFAConverter {
|
||||
Grammar g;
|
||||
|
@ -19,6 +17,12 @@ public class LexerNFAToDFAConverter {
|
|||
List<LexerState> work = new LinkedList<LexerState>();
|
||||
List<LexerState> accepts = new LinkedList<LexerState>();
|
||||
|
||||
/** Used to prevent the closure operation from looping to itself and
|
||||
* hence looping forever. Sensitive to the NFA state, the alt, and
|
||||
* the stack context.
|
||||
*/
|
||||
Set<NFAConfig> closureBusy;
|
||||
|
||||
public static boolean debug = false;
|
||||
|
||||
public LexerNFAToDFAConverter(Grammar g) {
|
||||
|
@ -28,6 +32,7 @@ public class LexerNFAToDFAConverter {
|
|||
}
|
||||
|
||||
public DFA createDFA() {
|
||||
closureBusy = new HashSet<NFAConfig>();
|
||||
LexerState start = computeStartState();
|
||||
dfa.startState = start;
|
||||
dfa.addState(start); // make sure dfa knows about this state
|
||||
|
@ -42,7 +47,8 @@ public class LexerNFAToDFAConverter {
|
|||
|
||||
// walk accept states, informing DFA
|
||||
for (LexerState d : accepts) {
|
||||
for (NFAState s : d.nfaStates) {
|
||||
for (NFAConfig c : d.nfaConfigs) {
|
||||
NFAState s = c.state;
|
||||
if ( s instanceof RuleStopState && !s.rule.isFragment() ) {
|
||||
dfa.defineAcceptState(s.rule.index, d);
|
||||
d.matchesRules.add(s.rule);
|
||||
|
@ -50,13 +56,23 @@ public class LexerNFAToDFAConverter {
|
|||
}
|
||||
}
|
||||
|
||||
closureBusy = null; // wack all that memory used during closure
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
/** */
|
||||
public LexerState computeStartState() {
|
||||
LexerState d = dfa.newLexerState();
|
||||
d.nfaStates.add(dfa.decisionNFAStartState);
|
||||
// add config for each alt start, then add closure for those states
|
||||
for (int ruleIndex=1; ruleIndex<=dfa.nAlts; ruleIndex++) {
|
||||
Transition t = dfa.decisionNFAStartState.transition(ruleIndex-1);
|
||||
NFAState altStart = t.target;
|
||||
d.addNFAConfig(altStart, ruleIndex,
|
||||
NFAContext.EMPTY,
|
||||
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
|
||||
}
|
||||
|
||||
closure(d);
|
||||
return d;
|
||||
}
|
||||
|
@ -106,14 +122,16 @@ public class LexerNFAToDFAConverter {
|
|||
//System.out.println("reach "+label.toString(g)+" from "+d.stateNumber);
|
||||
LexerState labelTarget = dfa.newLexerState();
|
||||
|
||||
for (NFAState s : d.nfaStates) {
|
||||
for (NFAConfig c : d.nfaConfigs) {
|
||||
NFAState s = c.state;
|
||||
int n = s.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) { // for each transition
|
||||
Transition t = s.transition(i);
|
||||
// found a transition with label; does it collide with label?
|
||||
if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
|
||||
// add NFA target to (potentially) new DFA state
|
||||
labelTarget.nfaStates.add(t.target);
|
||||
labelTarget.addNFAConfig(t.target, c.alt, c.context,
|
||||
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -130,9 +148,13 @@ public class LexerNFAToDFAConverter {
|
|||
System.out.println("closure("+d+")");
|
||||
}
|
||||
|
||||
List<NFAState> states = new ArrayList<NFAState>();
|
||||
states.addAll(d.nfaStates.elements()); // dup initial list; avoid walk/update issue
|
||||
for (NFAState s : states) closure(d, s, NFAContext.EMPTY); // update d.nfaStates
|
||||
List<NFAConfig> configs = new ArrayList<NFAConfig>();
|
||||
configs.addAll(d.nfaConfigs.elements()); // dup initial list; avoid walk/update issue
|
||||
for (NFAConfig c : configs) {
|
||||
closure(d, c.state, c.alt, c.context); // update d.nfaStates
|
||||
}
|
||||
|
||||
closureBusy.clear();
|
||||
|
||||
if ( debug ) {
|
||||
System.out.println("after closure("+d+")");
|
||||
|
@ -140,16 +162,20 @@ public class LexerNFAToDFAConverter {
|
|||
//System.out.println("after closure d="+d);
|
||||
}
|
||||
|
||||
public void closure(LexerState d, NFAState s, NFAContext context) {
|
||||
public void closure(LexerState d, NFAState s, int ruleIndex, NFAContext context) {
|
||||
NFAConfig proposedNFAConfig =
|
||||
new NFAConfig(s, ruleIndex, context, SemanticContext.EMPTY_SEMANTIC_CONTEXT);
|
||||
|
||||
if ( closureBusy.contains(proposedNFAConfig) ) return;
|
||||
closureBusy.add(proposedNFAConfig);
|
||||
|
||||
// s itself is always in closure
|
||||
d.nfaStates.add(s);
|
||||
d.nfaConfigs.add(proposedNFAConfig);
|
||||
|
||||
if ( s instanceof RuleStopState ) {
|
||||
// TODO: chase FOLLOW links if recursive
|
||||
if ( context!=NFAContext.EMPTY ) {
|
||||
if ( !d.nfaStates.contains(context.returnState) ) {
|
||||
closure(d, context.returnState, context.parent);
|
||||
}
|
||||
closure(d, context.returnState, ruleIndex, context.parent);
|
||||
// do nothing if context not empty and already added to nfaStates
|
||||
}
|
||||
else {
|
||||
|
@ -163,10 +189,10 @@ public class LexerNFAToDFAConverter {
|
|||
if ( t instanceof RuleTransition ) {
|
||||
NFAContext newContext =
|
||||
new NFAContext(context, ((RuleTransition)t).followState);
|
||||
if ( !d.nfaStates.contains(t.target) ) closure(d, t.target, newContext);
|
||||
closure(d, t.target, ruleIndex, newContext);
|
||||
}
|
||||
else if ( t.isEpsilon() && !d.nfaStates.contains(t.target) ) {
|
||||
closure(d, t.target, context);
|
||||
else if ( t.isEpsilon() ) {
|
||||
closure(d, t.target, ruleIndex, context);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -94,6 +94,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
}
|
||||
|
||||
public DFA createDFA() {
|
||||
closureBusy = new HashSet<NFAConfig>();
|
||||
computeStartState();
|
||||
dfa.addState(dfa.startState); // make sure dfa knows about this state
|
||||
work.add(dfa.startState);
|
||||
|
@ -108,6 +109,8 @@ public class StackLimitedNFAToDFAConverter {
|
|||
|
||||
unreachableAlts = getUnreachableAlts();
|
||||
|
||||
closureBusy = null; // wack all that memory used during closure
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
|
@ -266,17 +269,15 @@ public class StackLimitedNFAToDFAConverter {
|
|||
// it forward
|
||||
boolean collectPredicates = (d == dfa.startState);
|
||||
|
||||
closureBusy = new HashSet<NFAConfig>();
|
||||
|
||||
// TODO: can we avoid this separate list by directly filling d.nfaConfigs?
|
||||
// OH: concurrent modification. dup initialconfigs?
|
||||
// OH: concurrent modification. dup initialconfigs? works for lexers, try here to save configs param
|
||||
List<NFAConfig> configs = new ArrayList<NFAConfig>();
|
||||
for (NFAConfig c : d.nfaConfigs) {
|
||||
closure(c.state, c.alt, c.context, c.semanticContext, collectPredicates, configs);
|
||||
}
|
||||
d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d
|
||||
|
||||
closureBusy = null; // wack all that memory used during closure
|
||||
closureBusy.clear();
|
||||
|
||||
if ( debug ) {
|
||||
System.out.println("after closure("+d+")");
|
||||
|
|
|
@ -58,7 +58,7 @@ public class DFAState {
|
|||
new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS);
|
||||
|
||||
/** The set of NFA configurations (state,alt,context) for this DFA state */
|
||||
public OrderedHashSet<NFAConfig> nfaConfigs;
|
||||
public OrderedHashSet<NFAConfig> nfaConfigs = new OrderedHashSet<NFAConfig>();
|
||||
|
||||
/** Rather than recheck every NFA configuration in a DFA state (after
|
||||
* resolving) in reach just check this boolean. Saves a linear walk
|
||||
|
@ -75,7 +75,6 @@ public class DFAState {
|
|||
|
||||
public DFAState(DFA dfa) {
|
||||
this.dfa = dfa;
|
||||
nfaConfigs = new OrderedHashSet<NFAConfig>();
|
||||
}
|
||||
|
||||
public void addNFAConfig(NFAConfig c) {
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.antlr.v4.automata;
|
||||
|
||||
import org.antlr.v4.misc.OrderedHashSet;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
|
||||
import java.util.HashSet;
|
||||
|
@ -10,7 +9,7 @@ import java.util.Set;
|
|||
* DFA used for prediction.
|
||||
*/
|
||||
public class LexerState extends DFAState {
|
||||
public OrderedHashSet<NFAState> nfaStates;
|
||||
//public OrderedHashSet<NFAState> nfaStates;
|
||||
|
||||
/** For ambiguous lexer rules, the accept state matches a set of rules,
|
||||
* not just one. Means we can't use predictsAlt (an int).
|
||||
|
@ -18,44 +17,44 @@ public class LexerState extends DFAState {
|
|||
public Set<Rule> matchesRules = new HashSet<Rule>();
|
||||
|
||||
public LexerState(DFA dfa) {
|
||||
this.dfa = dfa;
|
||||
nfaStates = new OrderedHashSet<NFAState>();
|
||||
super(dfa);
|
||||
//nfaStates = new OrderedHashSet<NFAState>();
|
||||
}
|
||||
|
||||
public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
|
||||
|
||||
public Set<Integer> getAltSet() { return null; }
|
||||
|
||||
/** Two LexerStates are equal if their NFA state lists are the
|
||||
* same. Don't test the DFA state numbers here because
|
||||
* we use to know if any other state exists that has this exact set
|
||||
* of states. The DFAState state number is irrelevant.
|
||||
*/
|
||||
public boolean equals(Object o) {
|
||||
// compare set of NFA configurations in this set with other
|
||||
if ( this==o ) return true;
|
||||
LexerState other = (LexerState)o;
|
||||
return this.nfaStates.equals(other.nfaStates);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
int h = 0;
|
||||
for (NFAState s : nfaStates) h += s.stateNumber;
|
||||
return h;
|
||||
}
|
||||
|
||||
/** Print all NFA states plus what alts they predict */
|
||||
public String toString() {
|
||||
StringBuffer buf = new StringBuffer();
|
||||
buf.append(stateNumber+":{");
|
||||
for (int i = 0; i < nfaStates.size(); i++) {
|
||||
NFAState s = nfaStates.get(i);
|
||||
if ( i>0 ) {
|
||||
buf.append(", ");
|
||||
}
|
||||
buf.append(s);
|
||||
}
|
||||
buf.append("}");
|
||||
return buf.toString();
|
||||
}
|
||||
// public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
|
||||
//
|
||||
// public Set<Integer> getAltSet() { return null; }
|
||||
//
|
||||
// /** Two LexerStates are equal if their NFA state lists are the
|
||||
// * same. Don't test the DFA state numbers here because
|
||||
// * we use to know if any other state exists that has this exact set
|
||||
// * of states. The DFAState state number is irrelevant.
|
||||
// */
|
||||
// public boolean equals(Object o) {
|
||||
// // compare set of NFA configurations in this set with other
|
||||
// if ( this==o ) return true;
|
||||
// LexerState other = (LexerState)o;
|
||||
// return this.nfaStates.equals(other.nfaStates);
|
||||
// }
|
||||
//
|
||||
// public int hashCode() {
|
||||
// int h = 0;
|
||||
// for (NFAState s : nfaStates) h += s.stateNumber;
|
||||
// return h;
|
||||
// }
|
||||
//
|
||||
// /** Print all NFA states plus what alts they predict */
|
||||
// public String toString() {
|
||||
// StringBuffer buf = new StringBuffer();
|
||||
// buf.append(stateNumber+":{");
|
||||
// for (int i = 0; i < nfaStates.size(); i++) {
|
||||
// NFAState s = nfaStates.get(i);
|
||||
// if ( i>0 ) {
|
||||
// buf.append(", ");
|
||||
// }
|
||||
// buf.append(s);
|
||||
// }
|
||||
// buf.append("}");
|
||||
// return buf.toString();
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -330,11 +330,7 @@ public class DOTGenerator {
|
|||
}
|
||||
if ( Tool.internalOption_ShowNFAConfigsInDFA ) {
|
||||
Set<Integer> alts = ((DFAState)s).getAltSet();
|
||||
if ( s instanceof LexerState ) {
|
||||
buf.append("\\n");
|
||||
buf.append( ((LexerState)s).nfaStates.toString() );
|
||||
}
|
||||
else if ( alts!=null ) {
|
||||
if ( alts!=null ) {
|
||||
buf.append("\\n");
|
||||
// separate alts
|
||||
List<Integer> altList = new ArrayList<Integer>();
|
||||
|
|
Loading…
Reference in New Issue