added context and started using NFAConfig for lexer DFA

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6775]
2010-03-26 15:48:04 -08:00 · 2010-03-26 15:48:04 -08:00 · 5d57017098
parent cb7445cf36
commit 5d57017098
5 changed files with 90 additions and 69 deletions
--- a/tool/src/org/antlr/v4/analysis/LexerNFAToDFAConverter.java
+++ b/tool/src/org/antlr/v4/analysis/LexerNFAToDFAConverter.java
@ -5,9 +5,7 @@ import org.antlr.v4.misc.IntervalSet;
 import org.antlr.v4.misc.OrderedHashSet;
 import org.antlr.v4.tool.Grammar;

-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;

 public class LexerNFAToDFAConverter {
 	Grammar g;
@ -19,6 +17,12 @@ public class LexerNFAToDFAConverter {
 	List<LexerState> work = new LinkedList<LexerState>();
 	List<LexerState> accepts = new LinkedList<LexerState>();

+	/** Used to prevent the closure operation from looping to itself and
+     *  hence looping forever.  Sensitive to the NFA state, the alt, and
+     *  the stack context.
+     */
+	Set<NFAConfig> closureBusy;	
+
 	public static boolean debug = false;	

 	public LexerNFAToDFAConverter(Grammar g) {
@ -28,6 +32,7 @@ public class LexerNFAToDFAConverter {
 	}

 	public DFA createDFA() {
+		closureBusy = new HashSet<NFAConfig>();
 		LexerState start = computeStartState();
 		dfa.startState = start;
 		dfa.addState(start); // make sure dfa knows about this state
@ -42,7 +47,8 @@ public class LexerNFAToDFAConverter {

 		// walk accept states, informing DFA
 		for (LexerState d : accepts) {
-			for (NFAState s : d.nfaStates) {
+			for (NFAConfig c : d.nfaConfigs) {
+				NFAState s = c.state;
 				if ( s instanceof RuleStopState && !s.rule.isFragment() ) {
 					dfa.defineAcceptState(s.rule.index, d);
 					d.matchesRules.add(s.rule);
@ -50,13 +56,23 @@ public class LexerNFAToDFAConverter {
 			}
 		}

+		closureBusy = null; // wack all that memory used during closure			
+
 		return dfa;
 	}

 	/** */
 	public LexerState computeStartState() {
 		LexerState d = dfa.newLexerState();
-		d.nfaStates.add(dfa.decisionNFAStartState);		
+		// add config for each alt start, then add closure for those states
+		for (int ruleIndex=1; ruleIndex<=dfa.nAlts; ruleIndex++) {
+			Transition t = dfa.decisionNFAStartState.transition(ruleIndex-1);
+			NFAState altStart = t.target;
+			d.addNFAConfig(altStart, ruleIndex,
+						   NFAContext.EMPTY,
+						   SemanticContext.EMPTY_SEMANTIC_CONTEXT);
+		}
+
 		closure(d);
 		return d;
 	}
@ -106,14 +122,16 @@ public class LexerNFAToDFAConverter {
 		//System.out.println("reach "+label.toString(g)+" from "+d.stateNumber);
 		LexerState labelTarget = dfa.newLexerState();

-		for (NFAState s : d.nfaStates) {
+		for (NFAConfig c : d.nfaConfigs) {
+			NFAState s = c.state;
 			int n = s.getNumberOfTransitions();
 			for (int i=0; i<n; i++) {               // for each transition
 				Transition t = s.transition(i);
 				// found a transition with label; does it collide with label?
 				if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
 					// add NFA target to (potentially) new DFA state
-					labelTarget.nfaStates.add(t.target);
+					labelTarget.addNFAConfig(t.target, c.alt, c.context,
+											 SemanticContext.EMPTY_SEMANTIC_CONTEXT);
 				}
 			}
 		}
@ -130,9 +148,13 @@ public class LexerNFAToDFAConverter {
 			System.out.println("closure("+d+")");
 		}

-		List<NFAState> states = new ArrayList<NFAState>();
-		states.addAll(d.nfaStates.elements()); // dup initial list; avoid walk/update issue
-		for (NFAState s : states) closure(d, s, NFAContext.EMPTY); // update d.nfaStates
+		List<NFAConfig> configs = new ArrayList<NFAConfig>();
+		configs.addAll(d.nfaConfigs.elements()); // dup initial list; avoid walk/update issue
+		for (NFAConfig c : configs) {
+			closure(d, c.state, c.alt, c.context); // update d.nfaStates
+		}
+
+		closureBusy.clear();

 		if ( debug ) {
 			System.out.println("after closure("+d+")");
@ -140,16 +162,20 @@ public class LexerNFAToDFAConverter {
 		//System.out.println("after closure d="+d);
 	}

-	public void closure(LexerState d, NFAState s, NFAContext context) {
+	public void closure(LexerState d, NFAState s, int ruleIndex, NFAContext context) {
+		NFAConfig proposedNFAConfig =
+			new NFAConfig(s, ruleIndex, context, SemanticContext.EMPTY_SEMANTIC_CONTEXT);
+
+		if ( closureBusy.contains(proposedNFAConfig) ) return;
+		closureBusy.add(proposedNFAConfig);
+
 		// s itself is always in closure
-		d.nfaStates.add(s);
+		d.nfaConfigs.add(proposedNFAConfig);

 		if ( s instanceof RuleStopState ) {
 			// TODO: chase FOLLOW links if recursive
 			if ( context!=NFAContext.EMPTY ) {
-				if ( !d.nfaStates.contains(context.returnState) ) {
-					closure(d, context.returnState, context.parent);
-				}
+				closure(d, context.returnState, ruleIndex, context.parent);
 				// do nothing if context not empty and already added to nfaStates
 			}
 			else {
@ -163,10 +189,10 @@ public class LexerNFAToDFAConverter {
 				if ( t instanceof RuleTransition ) {
 					NFAContext newContext =
 						new NFAContext(context, ((RuleTransition)t).followState);
-					if ( !d.nfaStates.contains(t.target) ) closure(d, t.target, newContext);
+					closure(d, t.target, ruleIndex, newContext);
 				}
-				else if ( t.isEpsilon() && !d.nfaStates.contains(t.target) ) {
-					closure(d, t.target, context);
+				else if ( t.isEpsilon() ) {
+					closure(d, t.target, ruleIndex, context);
 				}
 			}
 		}
--- a/tool/src/org/antlr/v4/analysis/StackLimitedNFAToDFAConverter.java
+++ b/tool/src/org/antlr/v4/analysis/StackLimitedNFAToDFAConverter.java
@ -94,6 +94,7 @@ public class StackLimitedNFAToDFAConverter {
 	}

 	public DFA createDFA() {
+		closureBusy = new HashSet<NFAConfig>();
 		computeStartState();
 		dfa.addState(dfa.startState); // make sure dfa knows about this state
 		work.add(dfa.startState);
@ -108,6 +109,8 @@ public class StackLimitedNFAToDFAConverter {

 		unreachableAlts = getUnreachableAlts();

+		closureBusy = null; // wack all that memory used during closure		
+
 		return dfa;
 	}

@ -266,17 +269,15 @@ public class StackLimitedNFAToDFAConverter {
 		// it forward
 		boolean collectPredicates = (d == dfa.startState);

-		closureBusy = new HashSet<NFAConfig>();
-
 		// TODO: can we avoid this separate list by directly filling d.nfaConfigs?
-		// OH: concurrent modification. dup initialconfigs?
+		// OH: concurrent modification. dup initialconfigs? works for lexers, try here to save configs param
 		List<NFAConfig> configs = new ArrayList<NFAConfig>();
 		for (NFAConfig c : d.nfaConfigs) {
 			closure(c.state, c.alt, c.context, c.semanticContext, collectPredicates, configs);
 		}
 		d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d

-		closureBusy = null; // wack all that memory used during closure
+		closureBusy.clear();

 		if ( debug ) {
 			System.out.println("after closure("+d+")");
--- a/tool/src/org/antlr/v4/automata/DFAState.java
+++ b/tool/src/org/antlr/v4/automata/DFAState.java
@ -58,7 +58,7 @@ public class DFAState {
 		new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS);

 	/** The set of NFA configurations (state,alt,context) for this DFA state */
-	public OrderedHashSet<NFAConfig> nfaConfigs;
+	public OrderedHashSet<NFAConfig> nfaConfigs = new OrderedHashSet<NFAConfig>();

 	/** Rather than recheck every NFA configuration in a DFA state (after
 	 *  resolving) in reach just check this boolean.  Saves a linear walk
@ -75,7 +75,6 @@ public class DFAState {

 	public DFAState(DFA dfa) {
 		this.dfa = dfa;
-		nfaConfigs = new OrderedHashSet<NFAConfig>();
 	}

 	public void addNFAConfig(NFAConfig c) {
--- a/tool/src/org/antlr/v4/automata/LexerState.java
+++ b/tool/src/org/antlr/v4/automata/LexerState.java
@ -1,6 +1,5 @@
 package org.antlr.v4.automata;

-import org.antlr.v4.misc.OrderedHashSet;
 import org.antlr.v4.tool.Rule;

 import java.util.HashSet;
@ -10,7 +9,7 @@ import java.util.Set;
 *  DFA used for prediction.
 */
 public class LexerState extends DFAState {
-	public OrderedHashSet<NFAState> nfaStates;
+	//public OrderedHashSet<NFAState> nfaStates;

 	/** For ambiguous lexer rules, the accept state matches a set of rules,
 	 *  not just one. Means we can't use predictsAlt (an int).
@ -18,44 +17,44 @@ public class LexerState extends DFAState {
 	public Set<Rule> matchesRules = new HashSet<Rule>();

 	public LexerState(DFA dfa) {
-		this.dfa = dfa;
-		nfaStates = new OrderedHashSet<NFAState>();
+		super(dfa);
+		//nfaStates = new OrderedHashSet<NFAState>();
 	}

-	public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
-
-	public Set<Integer> getAltSet() { return null; }
-	
-	/** Two LexerStates are equal if their NFA state lists are the
-	 *  same. Don't test the DFA state numbers here because
-	 *  we use to know if any other state exists that has this exact set
-	 *  of states. The DFAState state number is irrelevant.
-	 */
-	public boolean equals(Object o) {
-		// compare set of NFA configurations in this set with other
-		if ( this==o ) return true;
-		LexerState other = (LexerState)o;
-		return this.nfaStates.equals(other.nfaStates);
-	}
-
-	public int hashCode() {
-		int h = 0;
-		for (NFAState s : nfaStates) h += s.stateNumber;
-		return h;
-	}
-
-	/** Print all NFA states plus what alts they predict */
-	public String toString() {
-		StringBuffer buf = new StringBuffer();
-		buf.append(stateNumber+":{");
-		for (int i = 0; i < nfaStates.size(); i++) {
-			NFAState s = nfaStates.get(i);
-			if ( i>0 ) {
-				buf.append(", ");
-			}
-			buf.append(s);
-		}
-		buf.append("}");
-		return buf.toString();
-	}
+//	public Set<NFAState> getUniqueNFAStates() { return nfaStates; }
+//
+//	public Set<Integer> getAltSet() { return null; }
+//
+//	/** Two LexerStates are equal if their NFA state lists are the
+//	 *  same. Don't test the DFA state numbers here because
+//	 *  we use to know if any other state exists that has this exact set
+//	 *  of states. The DFAState state number is irrelevant.
+//	 */
+//	public boolean equals(Object o) {
+//		// compare set of NFA configurations in this set with other
+//		if ( this==o ) return true;
+//		LexerState other = (LexerState)o;
+//		return this.nfaStates.equals(other.nfaStates);
+//	}
+//
+//	public int hashCode() {
+//		int h = 0;
+//		for (NFAState s : nfaStates) h += s.stateNumber;
+//		return h;
+//	}
+//
+//	/** Print all NFA states plus what alts they predict */
+//	public String toString() {
+//		StringBuffer buf = new StringBuffer();
+//		buf.append(stateNumber+":{");
+//		for (int i = 0; i < nfaStates.size(); i++) {
+//			NFAState s = nfaStates.get(i);
+//			if ( i>0 ) {
+//				buf.append(", ");
+//			}
+//			buf.append(s);
+//		}
+//		buf.append("}");
+//		return buf.toString();
+//	}
 }
--- a/tool/src/org/antlr/v4/tool/DOTGenerator.java
+++ b/tool/src/org/antlr/v4/tool/DOTGenerator.java
@ -330,11 +330,7 @@ public class DOTGenerator {
        }
 		if ( Tool.internalOption_ShowNFAConfigsInDFA ) {
 			Set<Integer> alts = ((DFAState)s).getAltSet();
-			if ( s instanceof LexerState ) {
-				buf.append("\\n");
-				buf.append( ((LexerState)s).nfaStates.toString() );
-			}
-			else if ( alts!=null ) {
+			if ( alts!=null ) {
 				buf.append("\\n");
 				// separate alts
 				List<Integer> altList = new ArrayList<Integer>();