added context and started using NFAConfig for lexer DFA

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6775]
This commit is contained in:
parrt 2010-03-26 15:48:04 -08:00
parent cb7445cf36
commit 5d57017098
5 changed files with 90 additions and 69 deletions

View File

@ -5,9 +5,7 @@ import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.OrderedHashSet; import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.tool.Grammar; import org.antlr.v4.tool.Grammar;
import java.util.ArrayList; import java.util.*;
import java.util.LinkedList;
import java.util.List;
public class LexerNFAToDFAConverter { public class LexerNFAToDFAConverter {
Grammar g; Grammar g;
@ -19,6 +17,12 @@ public class LexerNFAToDFAConverter {
List<LexerState> work = new LinkedList<LexerState>(); List<LexerState> work = new LinkedList<LexerState>();
List<LexerState> accepts = new LinkedList<LexerState>(); List<LexerState> accepts = new LinkedList<LexerState>();
/** Used to prevent the closure operation from looping to itself and
* hence looping forever. Sensitive to the NFA state, the alt, and
* the stack context.
*/
Set<NFAConfig> closureBusy;
public static boolean debug = false; public static boolean debug = false;
public LexerNFAToDFAConverter(Grammar g) { public LexerNFAToDFAConverter(Grammar g) {
@ -28,6 +32,7 @@ public class LexerNFAToDFAConverter {
} }
public DFA createDFA() { public DFA createDFA() {
closureBusy = new HashSet<NFAConfig>();
LexerState start = computeStartState(); LexerState start = computeStartState();
dfa.startState = start; dfa.startState = start;
dfa.addState(start); // make sure dfa knows about this state dfa.addState(start); // make sure dfa knows about this state
@ -42,7 +47,8 @@ public class LexerNFAToDFAConverter {
// walk accept states, informing DFA // walk accept states, informing DFA
for (LexerState d : accepts) { for (LexerState d : accepts) {
for (NFAState s : d.nfaStates) { for (NFAConfig c : d.nfaConfigs) {
NFAState s = c.state;
if ( s instanceof RuleStopState && !s.rule.isFragment() ) { if ( s instanceof RuleStopState && !s.rule.isFragment() ) {
dfa.defineAcceptState(s.rule.index, d); dfa.defineAcceptState(s.rule.index, d);
d.matchesRules.add(s.rule); d.matchesRules.add(s.rule);
@ -50,13 +56,23 @@ public class LexerNFAToDFAConverter {
} }
} }
closureBusy = null; // wack all that memory used during closure
return dfa; return dfa;
} }
/** */ /** */
public LexerState computeStartState() { public LexerState computeStartState() {
LexerState d = dfa.newLexerState(); LexerState d = dfa.newLexerState();
d.nfaStates.add(dfa.decisionNFAStartState); // add config for each alt start, then add closure for those states
for (int ruleIndex=1; ruleIndex<=dfa.nAlts; ruleIndex++) {
Transition t = dfa.decisionNFAStartState.transition(ruleIndex-1);
NFAState altStart = t.target;
d.addNFAConfig(altStart, ruleIndex,
NFAContext.EMPTY,
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
}
closure(d); closure(d);
return d; return d;
} }
@ -106,14 +122,16 @@ public class LexerNFAToDFAConverter {
//System.out.println("reach "+label.toString(g)+" from "+d.stateNumber); //System.out.println("reach "+label.toString(g)+" from "+d.stateNumber);
LexerState labelTarget = dfa.newLexerState(); LexerState labelTarget = dfa.newLexerState();
for (NFAState s : d.nfaStates) { for (NFAConfig c : d.nfaConfigs) {
NFAState s = c.state;
int n = s.getNumberOfTransitions(); int n = s.getNumberOfTransitions();
for (int i=0; i<n; i++) { // for each transition for (int i=0; i<n; i++) { // for each transition
Transition t = s.transition(i); Transition t = s.transition(i);
// found a transition with label; does it collide with label? // found a transition with label; does it collide with label?
if ( !t.isEpsilon() && !t.label().and(label).isNil() ) { if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
// add NFA target to (potentially) new DFA state // add NFA target to (potentially) new DFA state
labelTarget.nfaStates.add(t.target); labelTarget.addNFAConfig(t.target, c.alt, c.context,
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
} }
} }
} }
@ -130,9 +148,13 @@ public class LexerNFAToDFAConverter {
System.out.println("closure("+d+")"); System.out.println("closure("+d+")");
} }
List<NFAState> states = new ArrayList<NFAState>(); List<NFAConfig> configs = new ArrayList<NFAConfig>();
states.addAll(d.nfaStates.elements()); // dup initial list; avoid walk/update issue configs.addAll(d.nfaConfigs.elements()); // dup initial list; avoid walk/update issue
for (NFAState s : states) closure(d, s, NFAContext.EMPTY); // update d.nfaStates for (NFAConfig c : configs) {
closure(d, c.state, c.alt, c.context); // update d.nfaStates
}
closureBusy.clear();
if ( debug ) { if ( debug ) {
System.out.println("after closure("+d+")"); System.out.println("after closure("+d+")");
@ -140,16 +162,20 @@ public class LexerNFAToDFAConverter {
//System.out.println("after closure d="+d); //System.out.println("after closure d="+d);
} }
public void closure(LexerState d, NFAState s, NFAContext context) { public void closure(LexerState d, NFAState s, int ruleIndex, NFAContext context) {
NFAConfig proposedNFAConfig =
new NFAConfig(s, ruleIndex, context, SemanticContext.EMPTY_SEMANTIC_CONTEXT);
if ( closureBusy.contains(proposedNFAConfig) ) return;
closureBusy.add(proposedNFAConfig);
// s itself is always in closure // s itself is always in closure
d.nfaStates.add(s); d.nfaConfigs.add(proposedNFAConfig);
if ( s instanceof RuleStopState ) { if ( s instanceof RuleStopState ) {
// TODO: chase FOLLOW links if recursive // TODO: chase FOLLOW links if recursive
if ( context!=NFAContext.EMPTY ) { if ( context!=NFAContext.EMPTY ) {
if ( !d.nfaStates.contains(context.returnState) ) { closure(d, context.returnState, ruleIndex, context.parent);
closure(d, context.returnState, context.parent);
}
// do nothing if context not empty and already added to nfaStates // do nothing if context not empty and already added to nfaStates
} }
else { else {
@ -163,10 +189,10 @@ public class LexerNFAToDFAConverter {
if ( t instanceof RuleTransition ) { if ( t instanceof RuleTransition ) {
NFAContext newContext = NFAContext newContext =
new NFAContext(context, ((RuleTransition)t).followState); new NFAContext(context, ((RuleTransition)t).followState);
if ( !d.nfaStates.contains(t.target) ) closure(d, t.target, newContext); closure(d, t.target, ruleIndex, newContext);
} }
else if ( t.isEpsilon() && !d.nfaStates.contains(t.target) ) { else if ( t.isEpsilon() ) {
closure(d, t.target, context); closure(d, t.target, ruleIndex, context);
} }
} }
} }

View File

@ -94,6 +94,7 @@ public class StackLimitedNFAToDFAConverter {
} }
public DFA createDFA() { public DFA createDFA() {
closureBusy = new HashSet<NFAConfig>();
computeStartState(); computeStartState();
dfa.addState(dfa.startState); // make sure dfa knows about this state dfa.addState(dfa.startState); // make sure dfa knows about this state
work.add(dfa.startState); work.add(dfa.startState);
@ -108,6 +109,8 @@ public class StackLimitedNFAToDFAConverter {
unreachableAlts = getUnreachableAlts(); unreachableAlts = getUnreachableAlts();
closureBusy = null; // wack all that memory used during closure
return dfa; return dfa;
} }
@ -266,17 +269,15 @@ public class StackLimitedNFAToDFAConverter {
// it forward // it forward
boolean collectPredicates = (d == dfa.startState); boolean collectPredicates = (d == dfa.startState);
closureBusy = new HashSet<NFAConfig>();
// TODO: can we avoid this separate list by directly filling d.nfaConfigs? // TODO: can we avoid this separate list by directly filling d.nfaConfigs?
// OH: concurrent modification. dup initialconfigs? // OH: concurrent modification. dup initialconfigs? works for lexers, try here to save configs param
List<NFAConfig> configs = new ArrayList<NFAConfig>(); List<NFAConfig> configs = new ArrayList<NFAConfig>();
for (NFAConfig c : d.nfaConfigs) { for (NFAConfig c : d.nfaConfigs) {
closure(c.state, c.alt, c.context, c.semanticContext, collectPredicates, configs); closure(c.state, c.alt, c.context, c.semanticContext, collectPredicates, configs);
} }
d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d
closureBusy = null; // wack all that memory used during closure closureBusy.clear();
if ( debug ) { if ( debug ) {
System.out.println("after closure("+d+")"); System.out.println("after closure("+d+")");

View File

@ -58,7 +58,7 @@ public class DFAState {
new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS); new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS);
/** The set of NFA configurations (state,alt,context) for this DFA state */ /** The set of NFA configurations (state,alt,context) for this DFA state */
public OrderedHashSet<NFAConfig> nfaConfigs; public OrderedHashSet<NFAConfig> nfaConfigs = new OrderedHashSet<NFAConfig>();
/** Rather than recheck every NFA configuration in a DFA state (after /** Rather than recheck every NFA configuration in a DFA state (after
* resolving) in reach just check this boolean. Saves a linear walk * resolving) in reach just check this boolean. Saves a linear walk
@ -75,7 +75,6 @@ public class DFAState {
public DFAState(DFA dfa) { public DFAState(DFA dfa) {
this.dfa = dfa; this.dfa = dfa;
nfaConfigs = new OrderedHashSet<NFAConfig>();
} }
public void addNFAConfig(NFAConfig c) { public void addNFAConfig(NFAConfig c) {

View File

@ -1,6 +1,5 @@
package org.antlr.v4.automata; package org.antlr.v4.automata;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.tool.Rule; import org.antlr.v4.tool.Rule;
import java.util.HashSet; import java.util.HashSet;
@ -10,7 +9,7 @@ import java.util.Set;
* DFA used for prediction. * DFA used for prediction.
*/ */
public class LexerState extends DFAState { public class LexerState extends DFAState {
public OrderedHashSet<NFAState> nfaStates; //public OrderedHashSet<NFAState> nfaStates;
/** For ambiguous lexer rules, the accept state matches a set of rules, /** For ambiguous lexer rules, the accept state matches a set of rules,
* not just one. Means we can't use predictsAlt (an int). * not just one. Means we can't use predictsAlt (an int).
@ -18,44 +17,44 @@ public class LexerState extends DFAState {
public Set<Rule> matchesRules = new HashSet<Rule>(); public Set<Rule> matchesRules = new HashSet<Rule>();
public LexerState(DFA dfa) { public LexerState(DFA dfa) {
this.dfa = dfa; super(dfa);
nfaStates = new OrderedHashSet<NFAState>(); //nfaStates = new OrderedHashSet<NFAState>();
} }
public Set<NFAState> getUniqueNFAStates() { return nfaStates; } // public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
//
public Set<Integer> getAltSet() { return null; } // public Set<Integer> getAltSet() { return null; }
//
/** Two LexerStates are equal if their NFA state lists are the // /** Two LexerStates are equal if their NFA state lists are the
* same. Don't test the DFA state numbers here because // * same. Don't test the DFA state numbers here because
* we use to know if any other state exists that has this exact set // * we use to know if any other state exists that has this exact set
* of states. The DFAState state number is irrelevant. // * of states. The DFAState state number is irrelevant.
*/ // */
public boolean equals(Object o) { // public boolean equals(Object o) {
// compare set of NFA configurations in this set with other // // compare set of NFA configurations in this set with other
if ( this==o ) return true; // if ( this==o ) return true;
LexerState other = (LexerState)o; // LexerState other = (LexerState)o;
return this.nfaStates.equals(other.nfaStates); // return this.nfaStates.equals(other.nfaStates);
} // }
//
public int hashCode() { // public int hashCode() {
int h = 0; // int h = 0;
for (NFAState s : nfaStates) h += s.stateNumber; // for (NFAState s : nfaStates) h += s.stateNumber;
return h; // return h;
} // }
//
/** Print all NFA states plus what alts they predict */ // /** Print all NFA states plus what alts they predict */
public String toString() { // public String toString() {
StringBuffer buf = new StringBuffer(); // StringBuffer buf = new StringBuffer();
buf.append(stateNumber+":{"); // buf.append(stateNumber+":{");
for (int i = 0; i < nfaStates.size(); i++) { // for (int i = 0; i < nfaStates.size(); i++) {
NFAState s = nfaStates.get(i); // NFAState s = nfaStates.get(i);
if ( i>0 ) { // if ( i>0 ) {
buf.append(", "); // buf.append(", ");
} // }
buf.append(s); // buf.append(s);
} // }
buf.append("}"); // buf.append("}");
return buf.toString(); // return buf.toString();
} // }
} }

View File

@ -330,11 +330,7 @@ public class DOTGenerator {
} }
if ( Tool.internalOption_ShowNFAConfigsInDFA ) { if ( Tool.internalOption_ShowNFAConfigsInDFA ) {
Set<Integer> alts = ((DFAState)s).getAltSet(); Set<Integer> alts = ((DFAState)s).getAltSet();
if ( s instanceof LexerState ) { if ( alts!=null ) {
buf.append("\\n");
buf.append( ((LexerState)s).nfaStates.toString() );
}
else if ( alts!=null ) {
buf.append("\\n"); buf.append("\\n");
// separate alts // separate alts
List<Integer> altList = new ArrayList<Integer>(); List<Integer> altList = new ArrayList<Integer>();