moved lots of crap from v3 for DFA display

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6741]
2010-03-07 14:18:03 -08:00 · 2010-03-07 14:18:03 -08:00 · 6ad8ff2a46
parent 61fbb6571d
commit 6ad8ff2a46
22 changed files with 838 additions and 106 deletions
--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@ -359,8 +359,8 @@ public class Tool {
        //g.ast.inspect();

 		// MAKE SURE GRAMMAR IS SEMANTICALLY CORRECT (FILL IN GRAMMAR OBJECT)
-        SemanticPipeline sem = new SemanticPipeline();
-        sem.process(g);
+        SemanticPipeline sem = new SemanticPipeline(g);
+        sem.process();
 		if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any)
 			for (Grammar imp : g.getImportedGrammars()) {
 				process(imp);
@ -373,8 +373,8 @@ public class Tool {
 		g.nfa = factory.createNFA();

 		// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
-		AnalysisPipeline anal = new AnalysisPipeline();
-		anal.process(g);
+		AnalysisPipeline anal = new AnalysisPipeline(g);
+		anal.process();
 		
 		// GENERATE CODE
    }
--- a/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java
+++ b/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java
@ -1,10 +1,18 @@
 package org.antlr.v4.analysis;

+import org.antlr.v4.automata.DFA;
 import org.antlr.v4.automata.DecisionState;
+import org.antlr.v4.automata.NFAToDFAConverter;
 import org.antlr.v4.tool.Grammar;

 public class AnalysisPipeline {
-	public void process(Grammar g) {
+	public Grammar g;
+
+	public AnalysisPipeline(Grammar g) {
+		this.g = g;
+	}
+
+	public void process() {
 		// LEFT-RECURSION CHECK
 		LeftRecursionDetector lr = new LeftRecursionDetector(g.nfa);
 		lr.check();
@ -18,7 +26,10 @@ public class AnalysisPipeline {

 	public void createDFA(DecisionState s) {
 		// TRY APPROXIMATE LL(*) ANALYSIS
-
+		NFAToDFAConverter conv = new NFAToDFAConverter(g, s);
+		DFA dfa = conv.createDFA();
+		System.out.println("DFA="+dfa);
+		
 		// REAL LL(*) ANALYSIS IF THAT FAILS
 	}
 }
--- a/tool/src/org/antlr/v4/automata/DFA.java
+++ b/tool/src/org/antlr/v4/automata/DFA.java
@ -1,6 +1,11 @@
 package org.antlr.v4.automata;

+import org.antlr.v4.misc.Utils;
+import org.antlr.v4.tool.Grammar;
+
 import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Map;

 /** A DFA (converted from a grammar's NFA).
@ -8,6 +13,8 @@ import java.util.Map;
 *  of recognizers (lexers, parsers, tree walkers).
 */
 public class DFA {
+	Grammar g;
+
 	/** What's the start state for this DFA? */
    public DFAState startState;

@ -15,7 +22,7 @@ public class DFA {
 //    public NFA nfa;

 	/** From what NFAState did we create the DFA? */
-	public NFAState decisionNFAStartState;
+	public DecisionState decisionNFAStartState;

 	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
     *  to the actual DFAState object.  We use this to detect
@ -38,6 +45,73 @@ public class DFA {
 	 */
 	//protected List<DFAState> states = new ArrayList<DFAState>();

+	/** Each alt in an NFA derived from a grammar must have a DFA state that
+     *  predicts it lest the parser not know what to do.  Nondeterminisms can
+     *  lead to this situation (assuming no semantic predicates can resolve
+     *  the problem) and when for some reason, I cannot compute the lookahead
+     *  (which might arise from an error in the algorithm or from
+     *  left-recursion etc...).  This list starts out with all alts contained
+     *  and then in method doesStateReachAcceptState() I remove the alts I
+     *  know to be uniquely predicted.
+     */
+    public List<Integer> unreachableAlts;
+
+	public int nAlts = 0;
+
+	/** We only want one accept state per predicted alt; track here */
+	public DFAState[] altToAcceptState;	
+	
 	/** Unique state numbers per DFA */
-	int stateCounter = 0;	
+	int stateCounter = 0;
+
+	public DFA(Grammar g, DecisionState startState) {
+		this.g = g;
+		this.decisionNFAStartState = startState;
+		nAlts = startState.getNumberOfTransitions();
+		unreachableAlts = new LinkedList();
+		for (int i = 1; i <= nAlts; i++) {
+			unreachableAlts.add(Utils.integer(i));
+		}
+		altToAcceptState = new DFAState[nAlts+1];
+	}
+
+	/** Add a new DFA state to this DFA if not already present.
+     *  To force an acyclic, fixed maximum depth DFA, just always
+	 *  return the incoming state.  By not reusing old states,
+	 *  no cycles can be created.  If we're doing fixed k lookahead
+	 *  don't updated uniqueStates, just return incoming state, which
+	 *  indicates it's a new state.
+     */
+    protected DFAState addState(DFAState d) {
+		// does a DFA state exist already with everything the same
+		// except its state number?
+		DFAState existing = (DFAState)uniqueStates.get(d);
+		if ( existing != null ) {
+            /*
+            System.out.println("state "+d.stateNumber+" exists as state "+
+                existing.stateNumber);
+                */
+            // already there...get the existing DFA state
+			return existing;
+		}
+
+		// if not there, then add new state.
+		uniqueStates.put(d,d);
+		d.stateNumber = stateCounter++;
+		return d;
+	}
+
+	public DFAState newState() {
+		DFAState n = new DFAState(this);
+//		states.setSize(n.stateNumber+1);
+//		states.set(n.stateNumber, n); // track state num to state
+		return n;
+	}
+
+	public String toString() {
+		if ( startState==null ) return "";
+		DFASerializer serializer = new DFASerializer(g, startState);
+		return serializer.toString();
+	}	
+
 }
--- a/tool/src/org/antlr/v4/automata/DFASerializer.java
+++ b/tool/src/org/antlr/v4/automata/DFASerializer.java
@ -0,0 +1,52 @@
+package org.antlr.v4.automata;
+
+import org.antlr.v4.tool.Grammar;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/** A DFA walker that knows how to dump them to serialized strings. */
+public class DFASerializer {
+	List<DFAState> work;
+	Set<DFAState> marked;
+	Grammar g;
+	DFAState start;
+
+	public DFASerializer(Grammar g, DFAState start) {
+		this.g = g;
+		this.start = start;
+	}
+
+	public String toString() {
+		if ( start==null ) return null;
+		marked = new HashSet<DFAState>();
+
+		work = new ArrayList<DFAState>();
+		work.add(start);
+
+		StringBuilder buf = new StringBuilder();
+		DFAState s = null;
+
+		while ( work.size()>0 ) {
+			s = work.remove(0);
+			if ( marked.contains(s) ) continue; 
+			int n = s.getNumberOfTransitions();
+			//System.out.println("visit "+getDFAStateString(s)+"; edges="+n);
+			marked.add(s);
+			for (int i=0; i<n; i++) {
+				Edge t = s.transition(i);
+				buf.append("-"+t.toString()+"->"+ getStateString(t.target)+'\n');
+			}
+		}
+		return buf.toString();
+	}
+
+	String getStateString(DFAState s) {
+		int n = s.stateNumber;
+		String stateStr = "s"+n;
+		stateStr = ":s"+n+"=>"+s.getUniquelyPredictedAlt();
+		return stateStr;
+	}
+}
--- a/tool/src/org/antlr/v4/automata/DFAState.java
+++ b/tool/src/org/antlr/v4/automata/DFAState.java
@ -1,7 +1,9 @@
 package org.antlr.v4.automata;

-import java.util.ArrayList;
-import java.util.List;
+import org.antlr.v4.misc.Utils;
+import org.stringtemplate.v4.misc.MultiMap;
+
+import java.util.*;

 /** A DFA state represents a set of possible NFA configurations.
 *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
@ -30,20 +32,27 @@ import java.util.List;
 *  but with different NFAContexts (with same or different alts)
 *  meaning that state was reached via a different set of rule invocations.
 */
-public class DFAState extends State {
+public class DFAState {
 	public static final int INITIAL_NUM_TRANSITIONS = 4;
+	public static final int INVALID_STATE_NUMBER = -1;
+
+	public int stateNumber = INVALID_STATE_NUMBER;
+
+	public boolean isAcceptState = false;

 	/** State in which DFA? */
 	public DFA dfa;

 	/** Track the transitions emanating from this DFA state. */
-	protected List<Transition> transitions =
-		new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
+	protected List<Edge> edges =
+		new ArrayList<Edge>(INITIAL_NUM_TRANSITIONS);

 	/** The set of NFA configurations (state,alt,context) for this DFA state */
 	public OrderedHashSet<NFAConfig> nfaConfigs =
 		new OrderedHashSet<NFAConfig>();

+	int cachedUniquelyPredicatedAlt = NFA.INVALID_ALT_NUMBER;
+
 	public DFAState(DFA dfa) { this.dfa = dfa; }

 	public void addNFAConfig(NFAState s, NFAConfig c) {
@ -53,29 +62,176 @@ public class DFAState extends State {

 	public NFAConfig addNFAConfig(NFAState state,
 								  int alt,
-								  NFAState invokingState)
+								  NFAState context)
 	{
-		NFAConfig c = new NFAConfig(state.stateNumber,
-									alt,
-									invokingState);
+		NFAConfig c = new NFAConfig(state, alt,	context);
 		addNFAConfig(state, c);
 		return c;
 	}

-	@Override
-	public int getNumberOfTransitions() { return transitions.size(); }
+	/** Walk each NFA configuration in this DFA state looking for a conflict
+	 *  where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with
+	 *  context conflicting ctx predicts alts i and j.  Return an Integer set
+	 *  of the alternative numbers that conflict.  Two contexts conflict if
+	 *  they are equal or one is a stack suffix of the other or one is
+	 *  the empty context.
+	 *
+	 *  Use a hash table to record the lists of configs for each state
+	 *  as they are encountered.  We need only consider states for which
+	 *  there is more than one configuration.  The configurations' predicted
+	 *  alt must be different or must have different contexts to avoid a
+	 *  conflict.
+	 */
+	protected Set<Integer> getConflictingAlts() {
+		// TODO this is called multiple times: cache result?
+		//System.out.println("getNondetAlts for DFA state "+stateNumber);
+		 Set<Integer> nondeterministicAlts = new HashSet<Integer>();

-	@Override
-	public void addTransition(Transition e) { transitions.add(e); }
+		// If only 1 NFA conf then no way it can be nondeterministic;
+		// save the overhead.  There are many o-a->o NFA transitions
+		// and so we save a hash map and iterator creation for each
+		// state.
+		int numConfigs = nfaConfigs.size();
+		if ( numConfigs <=1 ) {
+			return null;
+		}

-	@Override
-	public Transition transition(int i) { return transitions.get(i); }
+		// First get a list of configurations for each state.
+		// Most of the time, each state will have one associated configuration.
+		MultiMap<Integer, NFAConfig> stateToConfigListMap =
+			new MultiMap<Integer, NFAConfig>();
+		for (int i = 0; i < numConfigs; i++) {
+			NFAConfig configuration = (NFAConfig) nfaConfigs.get(i);
+			Integer stateI = Utils.integer(configuration.state.stateNumber);
+			stateToConfigListMap.map(stateI, configuration);
+		}
+		// potential conflicts are states with > 1 configuration and diff alts
+		Set states = stateToConfigListMap.keySet();
+		int numPotentialConflicts = 0;
+		for (Iterator it = states.iterator(); it.hasNext();) {
+			Integer stateI = (Integer) it.next();
+			boolean thisStateHasPotentialProblem = false;
+			List configsForState = (List)stateToConfigListMap.get(stateI);
+			int alt=0;
+			int numConfigsForState = configsForState.size();
+			for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) {
+				NFAConfig c = (NFAConfig) configsForState.get(i);
+				if ( alt==0 ) {
+					alt = c.alt;
+				}
+				else if ( c.alt!=alt ) {
+					/*
+					System.out.println("potential conflict in state "+stateI+
+									   " configs: "+configsForState);
+					*/
+					numPotentialConflicts++;
+					thisStateHasPotentialProblem = true;
+				}
+			}
+			if ( !thisStateHasPotentialProblem ) {
+				// remove NFA state's configurations from
+				// further checking; no issues with it
+				// (can't remove as it's concurrent modification; set to null)
+				stateToConfigListMap.put(stateI, null);
+			}
+		}
+
+		// a fast check for potential issues; most states have none
+		if ( numPotentialConflicts==0 ) {
+			return null;
+		}
+
+		// we have a potential problem, so now go through config lists again
+		// looking for different alts (only states with potential issues
+		// are left in the states set).  Now we will check context.
+		// For example, the list of configs for NFA state 3 in some DFA
+		// state might be:
+		//   [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2]
+		// I want to create a map from context to alts looking for overlap:
+		//   [28 18 $] -> 2
+		//   [28 $] -> 1
+		//   [$] -> 1,2
+		// Indeed a conflict exists as same state 3, same context [$], predicts
+		// alts 1 and 2.
+		// walk each state with potential conflicting configurations
+		for (Iterator it = states.iterator(); it.hasNext();) {
+			Integer stateI = (Integer) it.next();
+			List configsForState = (List)stateToConfigListMap.get(stateI);
+			// compare each configuration pair s, t to ensure:
+			// s.ctx different than t.ctx if s.alt != t.alt
+			int numConfigsForState = 0;
+			if ( configsForState!=null ) {
+				numConfigsForState = configsForState.size();
+			}
+			for (int i = 0; i < numConfigsForState; i++) {
+				NFAConfig s = (NFAConfig) configsForState.get(i);
+				for (int j = i+1; j < numConfigsForState; j++) {
+					NFAConfig t = (NFAConfig)configsForState.get(j);
+					// conflicts means s.ctx==t.ctx or s.ctx is a stack
+					// suffix of t.ctx or vice versa (if alts differ).
+					// Also a conflict if s.ctx or t.ctx is empty
+					if ( s.alt != t.alt && s.context != t.context ) {
+						nondeterministicAlts.add(Utils.integer(s.alt));
+						nondeterministicAlts.add(Utils.integer(t.alt));
+					}
+				}
+			}
+		}
+
+		if ( nondeterministicAlts.size()==0 ) {
+			return null;
+		}
+		return nondeterministicAlts;
+	}
+
+	/** Walk each configuration and if they are all the same alt, return
+	 *  that alt else return NFA.INVALID_ALT_NUMBER.  Ignore resolved
+	 *  configurations, but don't ignore resolveWithPredicate configs
+	 *  because this state should not be an accept state.  We need to add
+	 *  this to the work list and then have semantic predicate edges
+	 *  emanating from it.
+	 */
+	public int getUniquelyPredictedAlt() {
+		if ( cachedUniquelyPredicatedAlt!=NFA.INVALID_ALT_NUMBER ) {
+			return cachedUniquelyPredicatedAlt;
+		}
+		int alt = org.antlr.analysis.NFA.INVALID_ALT_NUMBER;
+		for (NFAConfig c : nfaConfigs) {
+			if ( alt== NFA.INVALID_ALT_NUMBER ) {
+				alt = c.alt; // found first nonresolved alt
+			}
+			else if ( c.alt!=alt ) {
+				return NFA.INVALID_ALT_NUMBER;
+			}
+		}
+		this.cachedUniquelyPredicatedAlt = alt;
+		return alt;
+	}
+
+	/** Get the set of all alts mentioned by all NFA configurations in this
+	 *  DFA state.
+	 */
+	public Set<Integer> getAltSet() {
+		Set<Integer> alts = new HashSet<Integer>();
+		for (NFAConfig c : nfaConfigs) {
+			alts.add(Utils.integer(c.alt));
+		}
+		if ( alts.size()==0 ) return null;
+		return alts;
+	}
+
+	
+	public int getNumberOfTransitions() { return edges.size(); }
+
+	public void addTransition(Edge e) { edges.add(e); }
+
+	public Edge transition(int i) { return edges.get(i); }

 	/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
 	public int hashCode() {
 		int h = 0;
 		for (NFAConfig c : nfaConfigs) {
-			h += c.state + c.alt;
+			h += c.state.stateNumber + c.alt;
 		}
 		return h;
 	}
@ -96,5 +252,20 @@ public class DFAState extends State {
 		DFAState other = (DFAState)o;
 		return this.nfaConfigs.equals(other.nfaConfigs);
 	}
+
+	/** Print all NFA states plus what alts they predict */
+	public String toString() {
+		StringBuffer buf = new StringBuffer();
+		buf.append(stateNumber+":{");
+		for (int i = 0; i < nfaConfigs.size(); i++) {
+			NFAConfig c = (NFAConfig)nfaConfigs.get(i);
+			if ( i>0 ) {
+				buf.append(", ");
+			}
+			buf.append(c);
+		}
+		buf.append("}");
+		return buf.toString();
+	}	
 	
 }
--- a/tool/src/org/antlr/v4/automata/DecisionState.java
+++ b/tool/src/org/antlr/v4/automata/DecisionState.java
@ -1,5 +1,6 @@
 package org.antlr.v4.automata;

 public class DecisionState extends BasicState {
+	public int decision;
 	public DecisionState(NFA nfa) { super(nfa); }
 }
--- a/tool/src/org/antlr/v4/automata/Edge.java
+++ b/tool/src/org/antlr/v4/automata/Edge.java
@ -1,7 +1,19 @@
 package org.antlr.v4.automata;

+import org.antlr.v4.misc.IntervalSet;
+import org.antlr.v4.tool.Grammar;
+
 /** A DFA edge (NFA edges are called transitions) */
 public class Edge {
+	public int atom = Label.INVALID;
+	public IntervalSet set;
+	
 	public DFAState target;

+	public Edge(DFAState target) { this.target = target; }
+
+	public String toString(Grammar g) {
+		if ( set==null ) return g.getTokenDisplayName(atom);
+		else return set.toString(g);
+	}
 }
--- a/tool/src/org/antlr/v4/automata/NFA.java
+++ b/tool/src/org/antlr/v4/automata/NFA.java
@ -10,6 +10,8 @@ import java.util.Map;

 /** */
 public class NFA {
+	public static final int INVALID_ALT_NUMBER = -1;
+	
 	public Grammar g;
 	public List<NFAState> states = new ArrayList<NFAState>();

--- a/tool/src/org/antlr/v4/automata/NFAConfig.java
+++ b/tool/src/org/antlr/v4/automata/NFAConfig.java
@ -8,13 +8,13 @@ package org.antlr.v4.automata;
 */
 public class NFAConfig {
 	/** The NFA state associated with this configuration */
-	public int state;
+	public NFAState state;

 	/** What alt is predicted by this configuration */
 	public int alt;

 	/** Record the NFA state that invoked another rule's start state */
-	public NFAState invokingState;
+	public NFAState context;

 	/** The set of semantic predicates associated with this NFA
 	 *  configuration.  The predicates were found on the way to
@ -45,13 +45,13 @@ public class NFAConfig {
 	 */
 	//protected boolean resolveWithPredicate;

-	public NFAConfig(int state,
+	public NFAConfig(NFAState state,
 					 int alt,
-					 NFAState invokingState)
+					 NFAState context)
 	{
 		this.state = state;
 		this.alt = alt;
-		this.invokingState = invokingState;
+		this.context = context;
 		//this.semanticContext = semanticContext;
 	}

@ -67,13 +67,13 @@ public class NFAConfig {
        NFAConfig other = (NFAConfig)o;
        return this.state==other.state &&
               this.alt==other.alt &&
-			   this.invokingState==other.invokingState; 
+			   this.context ==other.context;
 //               this.context.equals (other.context)&&
 //               this.semanticContext.equals(other.semanticContext)
    }

    public int hashCode() {
-        int h = state + alt;// + context.hashCode();
+        int h = state.stateNumber + alt;// + context.hashCode();
        return h;
    }

@ -88,9 +88,9 @@ public class NFAConfig {
 			buf.append("|");
 			buf.append(alt);
 		}
-		if ( invokingState!=null ) {
+		if ( context !=null ) {
            buf.append("|");
-            buf.append(invokingState);
+            buf.append(context);
        }
 //        if ( resolved ) {
 //            buf.append("|resolved");
--- a/tool/src/org/antlr/v4/automata/NFASerializer.java
+++ b/tool/src/org/antlr/v4/automata/NFASerializer.java
@ -7,33 +7,33 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;

-/** A FA (finite automata) walker that knows how to dump them to serialized
- *  strings.
- */
-public class FASerializer {
-	List<State> work;
-	Set<State> marked;
+/** An NFA walker that knows how to dump them to serialized strings. */
+public class NFASerializer {
+	List<NFAState> work;
+	Set<NFAState> marked;
 	Grammar g;
-	State start;
+	NFAState start;

-	public FASerializer(Grammar g, State start) {
+	public NFASerializer(Grammar g, NFAState start) {
 		this.g = g;
 		this.start = start;
 	}

 	public String toString() {
 		if ( start==null ) return null;
-		work = new ArrayList<State>();
-		marked = new HashSet<State>();
+		marked = new HashSet<NFAState>();
+
+		work = new ArrayList<NFAState>();
 		work.add(start);

 		StringBuilder buf = new StringBuilder();
-		State s = null;
+		NFAState s = null;
+
 		while ( work.size()>0 ) {
 			s = work.remove(0);
 			if ( marked.contains(s) ) continue; 
 			int n = s.getNumberOfTransitions();
-			//System.out.println("visit "+getStateString(s)+"; edges="+n);
+			//System.out.println("visit "+getNFAStateString(s)+"; edges="+n);
 			marked.add(s);
 			for (int i=0; i<n; i++) {
 				Transition t = s.transition(i);
@ -43,34 +43,30 @@ public class FASerializer {
 				}
 				buf.append(getStateString(s));
 				if ( t instanceof EpsilonTransition ) {
-					buf.append("->"+getStateString(t.target)+'\n');
+					buf.append("->"+ getStateString(t.target)+'\n');
 				}
 				else if ( t instanceof RuleTransition ) {
-					buf.append("->"+getStateString(t.target)+'\n');
+					buf.append("->"+ getStateString(t.target)+'\n');
 				}
 				else if ( t instanceof ActionTransition ) {
 					ActionTransition a = (ActionTransition)t;
-					buf.append("-"+a.actionAST.getText()+"->"+getStateString(t.target)+'\n');
+					buf.append("-"+a.actionAST.getText()+"->"+ getStateString(t.target)+'\n');
 				}
 				else if ( t instanceof AtomTransition ) {
 					AtomTransition a = (AtomTransition)t;
-					buf.append("-"+a.toString(g)+"->"+getStateString(t.target)+'\n');
+					buf.append("-"+a.toString(g)+"->"+ getStateString(t.target)+'\n');
 				}
 				else {
-					buf.append("-"+t.toString()+"->"+getStateString(t.target)+'\n');					
+					buf.append("-"+t.toString()+"->"+ getStateString(t.target)+'\n');
 				}
 			}
 		}
 		return buf.toString();
 	}

-	String getStateString(State s) {
+	String getStateString(NFAState s) {
 		int n = s.stateNumber;
 		String stateStr = "s"+n;
-//		if ( s instanceof DFAState ) {
-//			stateStr = ":s"+n+"=>"+((DFAState)s).getUniquelyPredictedAlt();
-//		}
-//		else
 		if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
 		else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n;
 		else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n;
--- a/tool/src/org/antlr/v4/automata/NFAState.java
+++ b/tool/src/org/antlr/v4/automata/NFAState.java
@ -2,7 +2,28 @@ package org.antlr.v4.automata;

 import org.antlr.v4.tool.GrammarAST;

-public class NFAState extends State {
+public class NFAState {
+	public static final int INVALID_STATE_NUMBER = -1;
+
+	public int stateNumber = INVALID_STATE_NUMBER;
+
+	@Override
+	public int hashCode() {
+		return super.hashCode();
+	}
+
+	@Override
+	public boolean equals(Object o) {
+		// are these states same object?
+		if ( o instanceof NFAState ) return this == (NFAState)o;
+		return false;
+	}
+
+	@Override
+	public String toString() {
+		return String.valueOf(stateNumber);
+	}
+
 	/** Which NFA are we in? */
 	public NFA nfa = null;

@ -11,16 +32,13 @@ public class NFAState extends State {

 	public NFAState(NFA nfa) { this.nfa = nfa; }
 	
-	@Override
 	public int getNumberOfTransitions() {
 		return 0;
 	}

-	@Override
 	public void addTransition(Transition e) {
 	}

-	@Override
 	public Transition transition(int i) {
 		return null;
 	}
--- a/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java
+++ b/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java
@ -0,0 +1,106 @@
+package org.antlr.v4.automata;
+
+import org.antlr.v4.tool.Grammar;
+
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+/** Code that embodies the NFA conversion to DFA. A new object is needed
+ *  per DFA (also required for thread safety if multiple conversions
+ *  launched).
+ */
+public class NFAToDFAConverter {
+	Grammar g;
+
+	DecisionState nfaStartState;
+
+	/** DFA we are creating */
+	DFA dfa;
+
+	/** A list of DFA states we still need to process during NFA conversion */
+	List<DFAState> work = new LinkedList<DFAState>();
+
+	public static boolean debug = false;
+	
+	public NFAToDFAConverter(Grammar g, DecisionState nfaStartState) {
+		this.g = g;
+		this.nfaStartState = nfaStartState;
+		dfa = new DFA(g, nfaStartState);
+	}
+
+	public DFA createDFA() {
+		dfa.startState = computeStartState();
+		dfa.addState(dfa.startState); // make sure dfa knows about this state
+		work.add(dfa.startState);
+
+		// while more DFA states to check, process them
+		while ( work.size()>0 ) {
+
+		}
+		
+		return dfa;
+	}
+
+	/** From this first NFA state of a decision, create a DFA.
+	 *  Walk each alt in decision and compute closure from the start of that
+	 *  rule, making sure that the closure does not include other alts within
+	 *  that same decision.  The idea is to associate a specific alt number
+	 *  with the starting closure so we can trace the alt number for all states
+	 *  derived from this.  At a stop state in the DFA, we can return this alt
+	 *  number, indicating which alt is predicted.
+	 */
+	public DFAState computeStartState() {
+		DFAState d = dfa.newState();
+
+		// add config for each alt start, then add closure for those states
+		for (int altNum=1; altNum<=dfa.nAlts; altNum++) {
+			Transition t = nfaStartState.transition(altNum-1);
+			NFAState altStart = t.target;
+			d.addNFAConfig(altStart, altNum+1, null);
+
+		}
+
+		closure(d);
+
+		return d;
+	}
+
+	/** For all NFA states (configurations) merged in d,
+	 *  compute the epsilon closure; that is, find all NFA states reachable
+	 *  from the NFA states in d via purely epsilon transitions.
+	 */
+	public void closure(DFAState d) {
+		if ( debug ) {
+			System.out.println("closure("+d+")");
+		}
+
+		List<NFAConfig> configs = new ArrayList<NFAConfig>();
+		for (NFAConfig c : d.nfaConfigs) {
+			closure(c.state, c.alt, c.context, configs);
+		}
+		d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d
+
+		System.out.println("after closure d="+d);
+	}
+
+	/** Where can we get from NFA state s traversing only epsilon transitions?
+	 */
+	public void closure(NFAState s, int altNum, NFAState context,
+						List<NFAConfig> configs)
+	{
+		NFAConfig proposedNFAConfig =
+			new NFAConfig(s, altNum, context);
+
+		// p itself is always in closure
+		configs.add(proposedNFAConfig);
+
+		int n = s.getNumberOfTransitions();
+		for (int i=0; i<n; i++) {
+			Transition t = s.transition(i);
+			if ( t.isEpsilon() ) {
+				closure(t.target, altNum, context, configs);
+			}
+		}
+	}
+}
--- a/tool/src/org/antlr/v4/automata/State.java
+++ b/tool/src/org/antlr/v4/automata/State.java
@ -1,36 +0,0 @@
-package org.antlr.v4.automata;
-
-/** A generic state machine state. */
-public abstract class State {
-    public static final int INVALID_STATE_NUMBER = -1;
-
-    public int stateNumber = INVALID_STATE_NUMBER;
-
-    /** An accept state is an end of rule state for lexers and
-     *  parser grammar rules.
-	 */
-	public boolean acceptState = false;
-
-    public abstract int getNumberOfTransitions();
-
-    public abstract void addTransition(Transition e);
-
-    public abstract Transition transition(int i);
-
-	@Override
-	public int hashCode() {
-		return super.hashCode();
-	}
-
-	@Override
-	public boolean equals(Object o) {
-		// are these states same object?
-		if ( o instanceof State ) return this == (State)o;
-		return false;
-	}
-
-	@Override
-	public String toString() {
-		return String.valueOf(stateNumber);
-	}
-}
--- a/tool/src/org/antlr/v4/automata/Transition.java
+++ b/tool/src/org/antlr/v4/automata/Transition.java
@ -1,5 +1,7 @@
 package org.antlr.v4.automata;

+import org.antlr.v4.tool.Grammar;
+
 /** An NFA transition between any two NFA states.  Subclasses define
 *  atom, set, epsilon, action, predicate, rule transitions.
 *
@ -22,4 +24,6 @@ public abstract class Transition implements Comparable {

 	/** Are we epsilon, action, sempred? */
 	public boolean isEpsilon() { return false; }
+
+	public String toString(Grammar g) { return toString(); }	
 }
--- a/tool/src/org/antlr/v4/misc/BitSet.java
+++ b/tool/src/org/antlr/v4/misc/BitSet.java
@ -27,8 +27,8 @@
 */
 package org.antlr.v4.misc;

-import org.antlr.analysis.Label;
-import org.antlr.tool.Grammar;
+import org.antlr.v4.automata.Label;
+import org.antlr.v4.tool.Grammar;

 import java.util.Collection;
 import java.util.Iterator;
--- a/tool/src/org/antlr/v4/misc/IntSet.java
+++ b/tool/src/org/antlr/v4/misc/IntSet.java
@ -27,7 +27,7 @@
 */
 package org.antlr.v4.misc;

-import org.antlr.tool.Grammar;
+import org.antlr.v4.tool.Grammar;

 import java.util.List;

--- a/tool/src/org/antlr/v4/misc/IntervalSet.java
+++ b/tool/src/org/antlr/v4/misc/IntervalSet.java
@ -27,8 +27,8 @@
 */
 package org.antlr.v4.misc;

-import org.antlr.analysis.Label;
-import org.antlr.tool.Grammar;
+import org.antlr.v4.automata.Label;
+import org.antlr.v4.tool.Grammar;

 import java.util.ArrayList;
 import java.util.Iterator;
@ -50,7 +50,7 @@ import java.util.ListIterator;
 *  The ranges are ordered and disjoint so that 2..6 appears before 101..103.
 */
 public class IntervalSet implements IntSet {
-	public static final IntervalSet COMPLETE_SET = IntervalSet.of(0,Label.MAX_CHAR_VALUE);
+	public static final IntervalSet COMPLETE_SET = IntervalSet.of(0, Label.MAX_CHAR_VALUE);

 	/** The list of sorted, disjoint intervals. */
    protected List<Interval> intervals;
@ -557,7 +557,7 @@ public class IntervalSet implements IntSet {
    }

    public String toString() {
-        return toString(null);
+        return toString((Grammar)null);
    }

    public String toString(Grammar g) {
--- a/tool/src/org/antlr/v4/misc/Utils.java
+++ b/tool/src/org/antlr/v4/misc/Utils.java
@ -40,4 +40,29 @@ public class Utils {
        }
        return buf.toString();
    }
+
+	/** Given a source string, src,
+		a string to replace, replacee,
+		and a string to replace with, replacer,
+		return a new string w/ the replacing done.
+		You can use replacer==null to remove replacee from the string.
+
+		This should be faster than Java's String.replaceAll as that one
+		uses regex (I only want to play with strings anyway).
+	*/
+	public static String replace(String src, String replacee, String replacer) {
+		StringBuffer result = new StringBuffer(src.length() + 50);
+		int startIndex = 0;
+		int endIndex = src.indexOf(replacee);
+		while(endIndex != -1) {
+			result.append(src.substring(startIndex,endIndex));
+			if ( replacer!=null ) {
+				result.append(replacer);
+			}
+			startIndex = endIndex + replacee.length();
+			endIndex = src.indexOf(replacee,startIndex);
+		}
+		result.append(src.substring(startIndex,src.length()));
+		return result.toString();
+	}	
 }
--- a/tool/src/org/antlr/v4/semantics/SemanticPipeline.java
+++ b/tool/src/org/antlr/v4/semantics/SemanticPipeline.java
@ -26,7 +26,13 @@ import java.util.Map;
 *  as separate objects, however).
 */
 public class SemanticPipeline {
-	public void process(Grammar g) {
+	public Grammar g;
+	
+	public SemanticPipeline(Grammar g) {
+		this.g = g;
+	}
+	
+	public void process() {
 		if ( g.ast==null ) return;
 		
 		// VALIDATE AST STRUCTURE
--- a/tool/src/org/antlr/v4/tool/DOTGenerator.java
+++ b/tool/src/org/antlr/v4/tool/DOTGenerator.java
@ -0,0 +1,290 @@
+package org.antlr.v4.tool;
+
+import org.antlr.v4.Tool;
+import org.antlr.v4.automata.*;
+import org.antlr.v4.misc.Utils;
+import org.stringtemplate.v4.ST;
+import org.stringtemplate.v4.STGroup;
+import org.stringtemplate.v4.STGroupDir;
+
+import java.util.*;
+
+/** The DOT (part of graphviz) generation aspect. */
+public class DOTGenerator {
+	public static final boolean STRIP_NONREDUCED_STATES = false;
+
+	protected String arrowhead="normal";
+	protected String rankdir="LR";
+
+	/** Library of output templates; use <attrname> format */
+    public static STGroup stlib = new STGroupDir("org/antlr/v4/tool/templates/dot");
+
+    /** To prevent infinite recursion when walking state machines, record
+     *  which states we've visited.  Make a new set every time you start
+     *  walking in case you reuse this object.
+     */
+    protected Set<Integer> markedStates = null;
+
+    protected Grammar grammar;
+
+    /** This aspect is associated with a grammar */
+	public DOTGenerator(Grammar grammar) {
+		this.grammar = grammar;
+	}
+
+    /** Return a String containing a DOT description that, when displayed,
+     *  will show the incoming state machine visually.  All nodes reachable
+     *  from startState will be included.
+     */
+	public String getDOT(NFAState startState) {
+		if ( startState==null ) {
+			return null;
+		}
+		// The output DOT graph for visualization
+		ST dot = null;
+		markedStates = new HashSet<Integer>();
+		dot = stlib.getInstanceOf("nfa");
+		dot.add("startState",
+				Utils.integer(startState.stateNumber));
+		walkRuleNFACreatingDOT(dot, startState);
+		dot.add("rankdir", rankdir);
+		return dot.toString();
+	}
+
+	public String getDOT(DFAState startState) {
+		if ( startState==null ) {
+			return null;
+		}
+		// The output DOT graph for visualization
+		ST dot = null;
+		markedStates = new HashSet<Integer>();
+		dot = stlib.getInstanceOf("dfa");
+		dot.add("startState",
+				Utils.integer(startState.stateNumber));
+		dot.add("useBox",
+				Boolean.valueOf(Tool.internalOption_ShowNFAConfigsInDFA));
+		walkCreatingDFADOT(dot, (DFAState)startState);
+		dot.add("rankdir", rankdir);
+		return dot.toString();
+	}
+
+	/** Return a String containing a DOT description that, when displayed,
+	 *  will show the incoming state machine visually.  All nodes reachable
+     *  from startState will be included.
+    public String getRuleNFADOT(State startState) {
+        // The output DOT graph for visualization
+        ST dot = stlib.getInstanceOf("org/antlr/tool/templates/dot/nfa");
+
+        markedStates = new HashSet();
+        dot.add("startState",
+                Utils.integer(startState.stateNumber));
+        walkRuleNFACreatingDOT(dot, startState);
+        return dot.toString();
+    }
+	 */
+
+    /** Do a depth-first walk of the state machine graph and
+     *  fill a DOT description template.  Keep filling the
+     *  states and edges attributes.
+     */
+    protected void walkCreatingDFADOT(ST dot,
+									  DFAState s)
+    {
+		if ( markedStates.contains(Utils.integer(s.stateNumber)) ) {
+			return; // already visited this node
+        }
+
+		markedStates.add(Utils.integer(s.stateNumber)); // mark this node as completed.
+
+        // first add this node
+        ST st;
+        if ( s.isAcceptState ) {
+            st = stlib.getInstanceOf("stopstate");
+        }
+        else {
+            st = stlib.getInstanceOf("state");
+        }
+        st.add("name", getStateLabel(s));
+        dot.add("states", st);
+
+        // make a DOT edge for each transition
+		for (int i = 0; i < s.getNumberOfTransitions(); i++) {
+			Edge edge = s.transition(i);
+			/*
+			System.out.println("dfa "+s.dfa.decisionNumber+
+				" edge from s"+s.stateNumber+" ["+i+"] of "+s.getNumberOfTransitions());
+			*/
+			st = stlib.getInstanceOf("edge");
+			st.add("label", getEdgeLabel(edge.toString(grammar)));
+			st.add("src", getStateLabel(s));
+            st.add("target", getStateLabel(edge.target));
+			st.add("arrowhead", arrowhead);
+            dot.add("edges", st);
+            walkCreatingDFADOT(dot, edge.target); // keep walkin'
+        }
+    }
+
+    /** Do a depth-first walk of the state machine graph and
+     *  fill a DOT description template.  Keep filling the
+     *  states and edges attributes.  We know this is an NFA
+     *  for a rule so don't traverse edges to other rules and
+     *  don't go past rule end state.
+     */
+    protected void walkRuleNFACreatingDOT(ST dot,
+                                          NFAState s)
+    {
+        if ( markedStates.contains(s) ) {
+            return; // already visited this node
+        }
+
+        markedStates.add(s.stateNumber); // mark this node as completed.
+
+        // first add this node
+        ST stateST;
+        if ( s instanceof RuleStopState ) {
+            stateST = stlib.getInstanceOf("stopstate");
+        }
+        else {
+            stateST = stlib.getInstanceOf("state");
+        }
+        stateST.add("name", getStateLabel(s));
+        dot.add("states", stateST);
+
+        if ( s instanceof RuleStopState )  {
+            return; // don't go past end of rule node to the follow states
+        }
+
+        // special case: if decision point, then line up the alt start states
+        // unless it's an end of block
+		if ( s instanceof DecisionState ) {
+			GrammarAST n = ((NFAState)s).ast;
+			if ( n!=null && s instanceof BlockEndState ) {
+				ST rankST = stlib.getInstanceOf("decision-rank");
+				NFAState alt = (NFAState)s;
+				while ( alt!=null ) {
+					rankST.add("states", getStateLabel(alt));
+					if ( alt.transition(1) !=null ) {
+						alt = (NFAState)alt.transition(1).target;
+					}
+					else {
+						alt=null;
+					}
+				}
+				dot.add("decisionRanks", rankST);
+			}
+		}
+
+        // make a DOT edge for each transition
+		ST edgeST = null;
+		for (int i = 0; i < s.getNumberOfTransitions(); i++) {
+            Transition edge = (Transition) s.transition(i);
+            if ( edge instanceof RuleTransition ) {
+                RuleTransition rr = ((RuleTransition)edge);
+                // don't jump to other rules, but display edge to follow node
+                edgeST = stlib.getInstanceOf("edge");
+				if ( rr.rule.g != grammar ) {
+					edgeST.add("label", "<"+rr.rule.g.name+"."+rr.rule.name+">");
+				}
+				else {
+					edgeST.add("label", "<"+rr.rule.name+">");
+				}
+				edgeST.add("src", getStateLabel(s));
+				edgeST.add("target", getStateLabel(rr.followState));
+				edgeST.add("arrowhead", arrowhead);
+                dot.add("edges", edgeST);
+				walkRuleNFACreatingDOT(dot, rr.followState);
+                continue;
+            }
+			if ( edge instanceof ActionTransition ) {
+				edgeST = stlib.getInstanceOf("action-edge");
+			}
+			else if ( edge.isEpsilon() ) {
+				edgeST = stlib.getInstanceOf("epsilon-edge");
+			}
+			else {
+				edgeST = stlib.getInstanceOf("edge");
+			}
+			edgeST.add("label", getEdgeLabel(edge.toString(grammar)));
+            edgeST.add("src", getStateLabel(s));
+			edgeST.add("target", getStateLabel(edge.target));
+			edgeST.add("arrowhead", arrowhead);
+            dot.add("edges", edgeST);
+            walkRuleNFACreatingDOT(dot, edge.target); // keep walkin'
+        }
+    }
+
+    /** Fix edge strings so they print out in DOT properly;
+	 *  generate any gated predicates on edge too.
+	 */
+    protected String getEdgeLabel(String label) {
+		label = Utils.replace(label,"\\", "\\\\");
+		label = Utils.replace(label,"\"", "\\\"");
+		label = Utils.replace(label,"\n", "\\\\n");
+		label = Utils.replace(label,"\r", "");
+        return label;
+    }
+
+	protected String getStateLabel(NFAState s) {
+		if ( s==null ) return "null";
+		String stateLabel = String.valueOf(s.stateNumber);
+		if ( s instanceof DecisionState ) {
+			stateLabel = stateLabel+",d="+((DecisionState)s).decision;
+		}
+		return '"'+stateLabel+'"';
+	}
+
+	protected String getStateLabel(DFAState s) {
+		if ( s==null ) return "null";
+		String stateLabel = String.valueOf(s.stateNumber);
+		StringBuffer buf = new StringBuffer(250);
+		buf.append('s');
+		buf.append(s.stateNumber);
+		if ( Tool.internalOption_ShowNFAConfigsInDFA ) {
+			Set<Integer> alts = ((DFAState)s).getAltSet();
+			if ( alts!=null ) {
+				buf.append("\\n");
+				// separate alts
+				List<Integer> altList = new ArrayList<Integer>();
+				altList.addAll(alts);
+				Collections.sort(altList);
+				Set configurations = ((DFAState) s).nfaConfigs;
+				for (int altIndex = 0; altIndex < altList.size(); altIndex++) {
+					Integer altI = (Integer) altList.get(altIndex);
+					int alt = altI.intValue();
+					if ( altIndex>0 ) {
+						buf.append("\\n");
+					}
+					buf.append("alt");
+					buf.append(alt);
+					buf.append(':');
+					// get a list of configs for just this alt
+					// it will help us print better later
+					List<NFAConfig> configsInAlt = new ArrayList<NFAConfig>();
+					for (Iterator it = configurations.iterator(); it.hasNext();) {
+						NFAConfig c = (NFAConfig) it.next();
+						if ( c.alt!=alt ) continue;
+						configsInAlt.add(c);
+					}
+					int n = 0;
+					for (int cIndex = 0; cIndex < configsInAlt.size(); cIndex++) {
+						NFAConfig c =
+							(NFAConfig)configsInAlt.get(cIndex);
+						n++;
+						buf.append(c.toString(false));
+						if ( (cIndex+1)<configsInAlt.size() ) {
+							buf.append(", ");
+						}
+						if ( n%5==0 && (configsInAlt.size()-cIndex)>3 ) {
+							buf.append("\\n");
+						}
+					}
+				}
+			}
+		}
+		stateLabel = buf.toString();
+		if ( s.isAcceptState ) {
+            stateLabel = stateLabel+"=>"+s.getUniquelyPredictedAlt();
+        }
+        return '"'+stateLabel+'"';
+    }
+}
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@ -130,8 +130,8 @@ public class Grammar implements AttributeResolver {
 		if ( this.ast==null || this.ast.hasErrors ) return;

 		Tool antlr = new Tool();
-		SemanticPipeline sem = new SemanticPipeline();
-		sem.process(this);
+		SemanticPipeline sem = new SemanticPipeline(this);
+		sem.process();
 		if ( getImportedGrammars()!=null ) { // process imported grammars (if any)
 			for (Grammar imp : getImportedGrammars()) {
 				antlr.process(imp);
--- a/tool/test/org/antlr/v4/test/TestNFAConstruction.java
+++ b/tool/test/org/antlr/v4/test/TestNFAConstruction.java
@ -916,7 +916,7 @@ public class TestNFAConstruction extends BaseTest {
 		NFA nfa = f.createNFA();
 		Rule r = g.getRule(ruleName);
 		NFAState startState = nfa.ruleToStartState.get(r);
-		FASerializer serializer = new FASerializer(g, startState);
+		NFASerializer serializer = new NFASerializer(g, startState);
 		String result = serializer.toString();

 		//System.out.print(result);