added context and started using NFAConfig for lexer DFA

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6775]
This commit is contained in:
parrt 2010-03-26 15:48:04 -08:00
parent cb7445cf36
commit 5d57017098
5 changed files with 90 additions and 69 deletions

View File

@ -5,9 +5,7 @@ import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.tool.Grammar;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.*;
public class LexerNFAToDFAConverter {
Grammar g;
@ -19,6 +17,12 @@ public class LexerNFAToDFAConverter {
List<LexerState> work = new LinkedList<LexerState>();
List<LexerState> accepts = new LinkedList<LexerState>();
/** Used to prevent the closure operation from looping to itself and
* hence looping forever. Sensitive to the NFA state, the alt, and
* the stack context.
*/
Set<NFAConfig> closureBusy;
public static boolean debug = false;
public LexerNFAToDFAConverter(Grammar g) {
@ -28,6 +32,7 @@ public class LexerNFAToDFAConverter {
}
public DFA createDFA() {
closureBusy = new HashSet<NFAConfig>();
LexerState start = computeStartState();
dfa.startState = start;
dfa.addState(start); // make sure dfa knows about this state
@ -42,7 +47,8 @@ public class LexerNFAToDFAConverter {
// walk accept states, informing DFA
for (LexerState d : accepts) {
for (NFAState s : d.nfaStates) {
for (NFAConfig c : d.nfaConfigs) {
NFAState s = c.state;
if ( s instanceof RuleStopState && !s.rule.isFragment() ) {
dfa.defineAcceptState(s.rule.index, d);
d.matchesRules.add(s.rule);
@ -50,13 +56,23 @@ public class LexerNFAToDFAConverter {
}
}
closureBusy = null; // wack all that memory used during closure
return dfa;
}
/** */
public LexerState computeStartState() {
LexerState d = dfa.newLexerState();
d.nfaStates.add(dfa.decisionNFAStartState);
// add config for each alt start, then add closure for those states
for (int ruleIndex=1; ruleIndex<=dfa.nAlts; ruleIndex++) {
Transition t = dfa.decisionNFAStartState.transition(ruleIndex-1);
NFAState altStart = t.target;
d.addNFAConfig(altStart, ruleIndex,
NFAContext.EMPTY,
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
}
closure(d);
return d;
}
@ -106,14 +122,16 @@ public class LexerNFAToDFAConverter {
//System.out.println("reach "+label.toString(g)+" from "+d.stateNumber);
LexerState labelTarget = dfa.newLexerState();
for (NFAState s : d.nfaStates) {
for (NFAConfig c : d.nfaConfigs) {
NFAState s = c.state;
int n = s.getNumberOfTransitions();
for (int i=0; i<n; i++) { // for each transition
Transition t = s.transition(i);
// found a transition with label; does it collide with label?
if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
// add NFA target to (potentially) new DFA state
labelTarget.nfaStates.add(t.target);
labelTarget.addNFAConfig(t.target, c.alt, c.context,
SemanticContext.EMPTY_SEMANTIC_CONTEXT);
}
}
}
@ -130,9 +148,13 @@ public class LexerNFAToDFAConverter {
System.out.println("closure("+d+")");
}
List<NFAState> states = new ArrayList<NFAState>();
states.addAll(d.nfaStates.elements()); // dup initial list; avoid walk/update issue
for (NFAState s : states) closure(d, s, NFAContext.EMPTY); // update d.nfaStates
List<NFAConfig> configs = new ArrayList<NFAConfig>();
configs.addAll(d.nfaConfigs.elements()); // dup initial list; avoid walk/update issue
for (NFAConfig c : configs) {
closure(d, c.state, c.alt, c.context); // update d.nfaStates
}
closureBusy.clear();
if ( debug ) {
System.out.println("after closure("+d+")");
@ -140,16 +162,20 @@ public class LexerNFAToDFAConverter {
//System.out.println("after closure d="+d);
}
public void closure(LexerState d, NFAState s, NFAContext context) {
public void closure(LexerState d, NFAState s, int ruleIndex, NFAContext context) {
NFAConfig proposedNFAConfig =
new NFAConfig(s, ruleIndex, context, SemanticContext.EMPTY_SEMANTIC_CONTEXT);
if ( closureBusy.contains(proposedNFAConfig) ) return;
closureBusy.add(proposedNFAConfig);
// s itself is always in closure
d.nfaStates.add(s);
d.nfaConfigs.add(proposedNFAConfig);
if ( s instanceof RuleStopState ) {
// TODO: chase FOLLOW links if recursive
if ( context!=NFAContext.EMPTY ) {
if ( !d.nfaStates.contains(context.returnState) ) {
closure(d, context.returnState, context.parent);
}
closure(d, context.returnState, ruleIndex, context.parent);
// do nothing if context not empty and already added to nfaStates
}
else {
@ -163,10 +189,10 @@ public class LexerNFAToDFAConverter {
if ( t instanceof RuleTransition ) {
NFAContext newContext =
new NFAContext(context, ((RuleTransition)t).followState);
if ( !d.nfaStates.contains(t.target) ) closure(d, t.target, newContext);
closure(d, t.target, ruleIndex, newContext);
}
else if ( t.isEpsilon() && !d.nfaStates.contains(t.target) ) {
closure(d, t.target, context);
else if ( t.isEpsilon() ) {
closure(d, t.target, ruleIndex, context);
}
}
}

View File

@ -94,6 +94,7 @@ public class StackLimitedNFAToDFAConverter {
}
public DFA createDFA() {
closureBusy = new HashSet<NFAConfig>();
computeStartState();
dfa.addState(dfa.startState); // make sure dfa knows about this state
work.add(dfa.startState);
@ -108,6 +109,8 @@ public class StackLimitedNFAToDFAConverter {
unreachableAlts = getUnreachableAlts();
closureBusy = null; // wack all that memory used during closure
return dfa;
}
@ -266,17 +269,15 @@ public class StackLimitedNFAToDFAConverter {
// it forward
boolean collectPredicates = (d == dfa.startState);
closureBusy = new HashSet<NFAConfig>();
// TODO: can we avoid this separate list by directly filling d.nfaConfigs?
// OH: concurrent modification. dup initialconfigs?
// OH: concurrent modification. dup initialconfigs? works for lexers, try here to save configs param
List<NFAConfig> configs = new ArrayList<NFAConfig>();
for (NFAConfig c : d.nfaConfigs) {
closure(c.state, c.alt, c.context, c.semanticContext, collectPredicates, configs);
}
d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d
closureBusy = null; // wack all that memory used during closure
closureBusy.clear();
if ( debug ) {
System.out.println("after closure("+d+")");

View File

@ -58,7 +58,7 @@ public class DFAState {
new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS);
/** The set of NFA configurations (state,alt,context) for this DFA state */
public OrderedHashSet<NFAConfig> nfaConfigs;
public OrderedHashSet<NFAConfig> nfaConfigs = new OrderedHashSet<NFAConfig>();
/** Rather than recheck every NFA configuration in a DFA state (after
* resolving) in reach just check this boolean. Saves a linear walk
@ -75,7 +75,6 @@ public class DFAState {
public DFAState(DFA dfa) {
this.dfa = dfa;
nfaConfigs = new OrderedHashSet<NFAConfig>();
}
public void addNFAConfig(NFAConfig c) {

View File

@ -1,6 +1,5 @@
package org.antlr.v4.automata;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.tool.Rule;
import java.util.HashSet;
@ -10,7 +9,7 @@ import java.util.Set;
* DFA used for prediction.
*/
public class LexerState extends DFAState {
public OrderedHashSet<NFAState> nfaStates;
//public OrderedHashSet<NFAState> nfaStates;
/** For ambiguous lexer rules, the accept state matches a set of rules,
* not just one. Means we can't use predictsAlt (an int).
@ -18,44 +17,44 @@ public class LexerState extends DFAState {
public Set<Rule> matchesRules = new HashSet<Rule>();
public LexerState(DFA dfa) {
this.dfa = dfa;
nfaStates = new OrderedHashSet<NFAState>();
super(dfa);
//nfaStates = new OrderedHashSet<NFAState>();
}
public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
public Set<Integer> getAltSet() { return null; }
/** Two LexerStates are equal if their NFA state lists are the
* same. Don't test the DFA state numbers here because
* we use to know if any other state exists that has this exact set
* of states. The DFAState state number is irrelevant.
*/
public boolean equals(Object o) {
// compare set of NFA configurations in this set with other
if ( this==o ) return true;
LexerState other = (LexerState)o;
return this.nfaStates.equals(other.nfaStates);
}
public int hashCode() {
int h = 0;
for (NFAState s : nfaStates) h += s.stateNumber;
return h;
}
/** Print all NFA states plus what alts they predict */
public String toString() {
StringBuffer buf = new StringBuffer();
buf.append(stateNumber+":{");
for (int i = 0; i < nfaStates.size(); i++) {
NFAState s = nfaStates.get(i);
if ( i>0 ) {
buf.append(", ");
}
buf.append(s);
}
buf.append("}");
return buf.toString();
}
// public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
//
// public Set<Integer> getAltSet() { return null; }
//
// /** Two LexerStates are equal if their NFA state lists are the
// * same. Don't test the DFA state numbers here because
// * we use to know if any other state exists that has this exact set
// * of states. The DFAState state number is irrelevant.
// */
// public boolean equals(Object o) {
// // compare set of NFA configurations in this set with other
// if ( this==o ) return true;
// LexerState other = (LexerState)o;
// return this.nfaStates.equals(other.nfaStates);
// }
//
// public int hashCode() {
// int h = 0;
// for (NFAState s : nfaStates) h += s.stateNumber;
// return h;
// }
//
// /** Print all NFA states plus what alts they predict */
// public String toString() {
// StringBuffer buf = new StringBuffer();
// buf.append(stateNumber+":{");
// for (int i = 0; i < nfaStates.size(); i++) {
// NFAState s = nfaStates.get(i);
// if ( i>0 ) {
// buf.append(", ");
// }
// buf.append(s);
// }
// buf.append("}");
// return buf.toString();
// }
}

View File

@ -330,11 +330,7 @@ public class DOTGenerator {
}
if ( Tool.internalOption_ShowNFAConfigsInDFA ) {
Set<Integer> alts = ((DFAState)s).getAltSet();
if ( s instanceof LexerState ) {
buf.append("\\n");
buf.append( ((LexerState)s).nfaStates.toString() );
}
else if ( alts!=null ) {
if ( alts!=null ) {
buf.append("\\n");
// separate alts
List<Integer> altList = new ArrayList<Integer>();