have pieces of DFA conversion working

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6742]
2010-03-07 16:16:10 -08:00 · 2010-03-07 16:16:10 -08:00 · 665b64fee0
parent 6ad8ff2a46
commit 665b64fee0
19 changed files with 468 additions and 39 deletions
--- a/tool/resources/org/antlr/v4/tool/templates/dot/action-edge.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/action-edge.st
@ -1 +1,3 @@
+action-edge() ::= <<
 <src> -> <target> [fontsize=11, fontname="Courier", arrowsize=.7, label = "<label>"<if(arrowhead)>, arrowhead = <arrowhead><endif>];
+>>
--- a/tool/resources/org/antlr/v4/tool/templates/dot/decision-rank.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/decision-rank.st
@ -1 +1,4 @@
+decision-rank() ::= <<
 {rank=same; rankdir=TB; <states; separator="; ">}
+>>
+
--- a/tool/resources/org/antlr/v4/tool/templates/dot/dfa.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/dfa.st
@ -1,7 +1,8 @@
+dfa() ::= <<
 digraph NFA {
 <if(rankdir)>rankdir=<rankdir>;<endif>
 <decisionRanks; separator="\n">
 <states; separator="\n">
 <edges; separator="\n">
 }
-
+>>
--- a/tool/resources/org/antlr/v4/tool/templates/dot/edge.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/edge.st
@ -1 +1,4 @@
+edge(src,target,label,arrowhead) ::= <<
 <src> -> <target> [fontsize=11, fontname="Courier", arrowsize=.7, label = "<label>"<if(arrowhead)>, arrowhead = <arrowhead><endif>];
+>>
+
--- a/tool/resources/org/antlr/v4/tool/templates/dot/epsilon-edge.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/epsilon-edge.st
@ -1 +1,3 @@
+epsilon-edge(src,label,target,arrowhead) ::= <<
 <src> -> <target> [fontname="Times-Italic", label = "e"];
+>>
--- a/tool/resources/org/antlr/v4/tool/templates/dot/nfa.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/nfa.st
@ -1,6 +1,8 @@
+nfa(startState, states, edges, rankdir, decisionRanks) ::= <<
 digraph NFA {
 rankdir=LR;
 <decisionRanks; separator="\n">
 <states; separator="\n">
 <edges; separator="\n">
 }
+>>
--- a/tool/resources/org/antlr/v4/tool/templates/dot/state.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/state.st
@ -1 +1,3 @@
+state(state, useBox, name) ::= <<
 node [fontsize=11, shape = <if(useBox)>box<else>circle, fixedsize=true, width=.4<endif>]; <name>
+>>
--- a/tool/resources/org/antlr/v4/tool/templates/dot/stopstate.st
+++ b/tool/resources/org/antlr/v4/tool/templates/dot/stopstate.st
@ -1 +1,3 @@
+stopstate(name,useBox) ::= <<
 node [fontsize=11, shape = <if(useBox)>polygon,sides=4,peripheries=2<else>doublecircle, fixedsize=true, width=.6<endif>]; <name>
+>>
--- a/tool/src/org/antlr/v4/Tool.java
+++ b/tool/src/org/antlr/v4/Tool.java
@ -1,5 +1,6 @@
 package org.antlr.v4;

+import org.antlr.codegen.CodeGenerator;
 import org.antlr.runtime.*;
 import org.antlr.runtime.tree.TreeWizard;
 import org.antlr.v4.analysis.AnalysisPipeline;
@ -12,8 +13,7 @@ import org.antlr.v4.parse.GrammarASTAdaptor;
 import org.antlr.v4.semantics.SemanticPipeline;
 import org.antlr.v4.tool.*;

-import java.io.File;
-import java.io.IOException;
+import java.io.*;
 import java.util.*;

 public class Tool {
@ -371,6 +371,9 @@ public class Tool {
 		NFAFactory factory = new ParserNFAFactory(g);
 		if ( g.getType()==ANTLRParser.LEXER ) factory = new LexerNFAFactory(g);
 		g.nfa = factory.createNFA();
+		
+		if ( generate_NFA_dot ) generateNFAs(g);
+		

 		// PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs
 		AnalysisPipeline anal = new AnalysisPipeline(g);
@ -480,6 +483,151 @@ public class Tool {
        return lexerAST;
    }

+	protected void generateNFAs(Grammar g) {
+		DOTGenerator dotGenerator = new DOTGenerator(g);
+		List<Grammar> grammars = new ArrayList<Grammar>();
+		grammars.add(g);
+		List<Grammar> imported = g.getAllImportedGrammars();
+		if ( imported!=null ) grammars.addAll(imported);
+		for (Grammar ig : grammars) {
+			for (Rule r : ig.rules.values()) {
+				try {
+					String dot = dotGenerator.getDOT(g.nfa.ruleToStartState.get(r));
+					if (dot != null) {
+						writeDOTFile(g, r, dot);
+					}
+				} catch (IOException ioe) {
+					ErrorManager.toolError(ErrorType.CANNOT_WRITE_FILE, ioe);
+				}
+			}
+		}
+	}
+
+	protected void writeDOTFile(Grammar g, Rule r, String dot) throws IOException {
+		writeDOTFile(g, r.g.name + "." + r.name, dot);
+	}
+
+	protected void writeDOTFile(Grammar g, String name, String dot) throws IOException {
+		Writer fw = getOutputFile(g, name + ".dot");
+		fw.write(dot);
+		fw.close();
+	}
+
+	/** This method is used by all code generators to create new output
+	 *  files. If the outputDir set by -o is not present it will be created.
+	 *  The final filename is sensitive to the output directory and
+	 *  the directory where the grammar file was found.  If -o is /tmp
+	 *  and the original grammar file was foo/t.g then output files
+	 *  go in /tmp/foo.
+	 *
+	 *  The output dir -o spec takes precedence if it's absolute.
+	 *  E.g., if the grammar file dir is absolute the output dir is given
+	 *  precendence. "-o /tmp /usr/lib/t.g" results in "/tmp/T.java" as
+	 *  output (assuming t.g holds T.java).
+	 *
+	 *  If no -o is specified, then just write to the directory where the
+	 *  grammar file was found.
+	 *
+	 *  If outputDirectory==null then write a String.
+	 */
+	public Writer getOutputFile(Grammar g, String fileName) throws IOException {
+		if (outputDirectory == null) {
+			return new StringWriter();
+		}
+		// output directory is a function of where the grammar file lives
+		// for subdir/T.g, you get subdir here.  Well, depends on -o etc...
+		// But, if this is a .tokens file, then we force the output to
+		// be the base output directory (or current directory if there is not a -o)
+		//
+		File outputDir;
+		if (fileName.endsWith(CodeGenerator.VOCAB_FILE_EXTENSION)) {
+			if (haveOutputDir) {
+				outputDir = new File(outputDirectory);
+			}
+			else {
+				outputDir = new File(".");
+			}
+		}
+		else {
+			outputDir = getOutputDirectory(g.fileName);
+		}
+		File outputFile = new File(outputDir, fileName);
+
+		if (!outputDir.exists()) {
+			outputDir.mkdirs();
+		}
+		FileWriter fw = new FileWriter(outputFile);
+		return new BufferedWriter(fw);
+	}
+
+	/**
+	 * Return the location where ANTLR will generate output files for a given file. This is a
+	 * base directory and output files will be relative to here in some cases
+	 * such as when -o option is used and input files are given relative
+	 * to the input directory.
+	 *
+	 * @param fileNameWithPath path to input source
+	 * @return
+	 */
+	public File getOutputDirectory(String fileNameWithPath) {
+
+		File outputDir = new File(outputDirectory);
+		String fileDirectory;
+
+		// Some files are given to us without a PATH but should should
+		// still be written to the output directory in the relative path of
+		// the output directory. The file directory is either the set of sub directories
+		// or just or the relative path recorded for the parent grammar. This means
+		// that when we write the tokens files, or the .java files for imported grammars
+		// taht we will write them in the correct place.
+		//
+		if (fileNameWithPath.lastIndexOf(File.separatorChar) == -1) {
+
+			// No path is included in the file name, so make the file
+			// directory the same as the parent grammar (which might sitll be just ""
+			// but when it is not, we will write the file in the correct place.
+			//
+			fileDirectory = grammarOutputDirectory;
+
+		}
+		else {
+			fileDirectory = fileNameWithPath.substring(0, fileNameWithPath.lastIndexOf(File.separatorChar));
+		}
+		if ( fileDirectory == null ) {
+			fileDirectory = ".";
+		}
+		if (haveOutputDir) {
+			// -o /tmp /var/lib/t.g => /tmp/T.java
+			// -o subdir/output /usr/lib/t.g => subdir/output/T.java
+			// -o . /usr/lib/t.g => ./T.java
+			if ((fileDirectory != null && !forceRelativeOutput) &&
+				(new File(fileDirectory).isAbsolute() ||
+				 fileDirectory.startsWith("~")) || // isAbsolute doesn't count this :(
+				forceAllFilesToOutputDir) {
+				// somebody set the dir, it takes precendence; write new file there
+				outputDir = new File(outputDirectory);
+			}
+			else {
+				// -o /tmp subdir/t.g => /tmp/subdir/t.g
+				if (fileDirectory != null) {
+					outputDir = new File(outputDirectory, fileDirectory);
+				}
+				else {
+					outputDir = new File(outputDirectory);
+				}
+			}
+		}
+		else {
+			// they didn't specify a -o dir so just write to location
+			// where grammar is, absolute or relative, this will only happen
+			// with command line invocation as build tools will always
+			// supply an output directory.
+			//
+			outputDir = new File(fileDirectory);
+		}
+		return outputDir;
+	}	
+	
    private static void version() {
        ErrorManager.info("ANTLR Parser Generator  Version " + new Tool().VERSION);
    }
--- a/tool/src/org/antlr/v4/automata/AtomTransition.java
+++ b/tool/src/org/antlr/v4/automata/AtomTransition.java
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
 import org.antlr.v4.misc.IntervalSet;
 import org.antlr.v4.tool.Grammar;

-/** */
+/** TODO: make all transitions sets? */
 public class AtomTransition extends Transition {
 	/** The token type or character value; or, signifies special label. */
 	protected int label;
@ -13,6 +13,8 @@ public class AtomTransition extends Transition {
 		this.target = target;
 	}

+	public IntervalSet label() { return IntervalSet.of(label); }
+
 	public int hashCode() { return label; }

 	public boolean equals(Object o) {
--- a/tool/src/org/antlr/v4/automata/DFASerializer.java
+++ b/tool/src/org/antlr/v4/automata/DFASerializer.java
@ -37,7 +37,7 @@ public class DFASerializer {
 			marked.add(s);
 			for (int i=0; i<n; i++) {
 				Edge t = s.transition(i);
-				buf.append("-"+t.toString()+"->"+ getStateString(t.target)+'\n');
+				buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
 			}
 		}
 		return buf.toString();
--- a/tool/src/org/antlr/v4/automata/DFAState.java
+++ b/tool/src/org/antlr/v4/automata/DFAState.java
@ -1,5 +1,6 @@
 package org.antlr.v4.automata;

+import org.antlr.analysis.NFA;
 import org.antlr.v4.misc.Utils;
 import org.stringtemplate.v4.misc.MultiMap;

@ -195,7 +196,7 @@ public class DFAState {
 		if ( cachedUniquelyPredicatedAlt!=NFA.INVALID_ALT_NUMBER ) {
 			return cachedUniquelyPredicatedAlt;
 		}
-		int alt = org.antlr.analysis.NFA.INVALID_ALT_NUMBER;
+		int alt = NFA.INVALID_ALT_NUMBER;
 		for (NFAConfig c : nfaConfigs) {
 			if ( alt== NFA.INVALID_ALT_NUMBER ) {
 				alt = c.alt; // found first nonresolved alt
@ -208,6 +209,23 @@ public class DFAState {
 		return alt;
 	}

+	/** Return the uniquely mentioned alt from the NFA configurations;
+	 *  Ignore the resolved bit etc...  Return INVALID_ALT_NUMBER
+	 *  if there is more than one alt mentioned.
+	 */
+	public int getUniqueAlt() {
+		int alt = NFA.INVALID_ALT_NUMBER;
+		for (NFAConfig c : nfaConfigs) {
+			if ( alt== NFA.INVALID_ALT_NUMBER ) {
+				alt = c.alt; // found first alt
+			}
+			else if ( c.alt!=alt ) {
+				return NFA.INVALID_ALT_NUMBER;
+			}
+		}
+		return alt;
+	}
+
 	/** Get the set of all alts mentioned by all NFA configurations in this
 	 *  DFA state.
 	 */
@ -220,7 +238,13 @@ public class DFAState {
 		return alts;
 	}

-	
+	public Set<NFAState> getUniqueNFAStates() {
+		Set<NFAState> alts = new HashSet<NFAState>();
+		for (NFAConfig c : nfaConfigs) alts.add(c.state);
+		if ( alts.size()==0 ) return null;
+		return alts;
+	}
+
 	public int getNumberOfTransitions() { return edges.size(); }

 	public void addTransition(Edge e) { edges.add(e); }
--- a/tool/src/org/antlr/v4/automata/Edge.java
+++ b/tool/src/org/antlr/v4/automata/Edge.java
@ -5,15 +5,17 @@ import org.antlr.v4.tool.Grammar;

 /** A DFA edge (NFA edges are called transitions) */
 public class Edge {
-	public int atom = Label.INVALID;
-	public IntervalSet set;
-	
+	public IntervalSet label;
 	public DFAState target;

-	public Edge(DFAState target) { this.target = target; }
+	public Edge(DFAState target, IntervalSet label) {
+		this.target = target;
+		this.label = label;
+	}
+
+	public String toString() { return label.toString(); }	

 	public String toString(Grammar g) {
-		if ( set==null ) return g.getTokenDisplayName(atom);
-		else return set.toString(g);
+		return label.toString(g);
 	}
 }
--- a/tool/src/org/antlr/v4/automata/Label.java
+++ b/tool/src/org/antlr/v4/automata/Label.java
@ -1,6 +1,7 @@
 package org.antlr.v4.automata;

 import org.antlr.runtime.Token;
+import org.antlr.v4.misc.IntervalSet;

 /** A state machine transition label.  A label can be either a simple
 *  label such as a token or character.  A label can be a set of char or
@ -73,6 +74,10 @@ public abstract class Label implements /*Comparable, */ Cloneable {
 	 */
 	public static final int EOR_TOKEN_TYPE = Token.EOR_TOKEN_TYPE;

+	public int atom = Label.INVALID;
+	public IntervalSet set;
+
+
 	public int compareTo(Object o) {
 		return 0; // TODO: impl
 	}
--- a/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java
+++ b/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java
@ -1,10 +1,9 @@
 package org.antlr.v4.automata;

+import org.antlr.v4.misc.IntervalSet;
 import org.antlr.v4.tool.Grammar;

-import java.util.ArrayList;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.*;

 /** Code that embodies the NFA conversion to DFA. A new object is needed
 *  per DFA (also required for thread safety if multiple conversions
@ -21,6 +20,12 @@ public class NFAToDFAConverter {
 	/** A list of DFA states we still need to process during NFA conversion */
 	List<DFAState> work = new LinkedList<DFAState>();

+	/** Used to prevent the closure operation from looping to itself and
+     *  hence looping forever.  Sensitive to the NFA state, the alt, and
+     *  the stack context.
+     */
+	Set<NFAConfig> closureBusy;
+
 	public static boolean debug = false;
 	
 	public NFAToDFAConverter(Grammar g, DecisionState nfaStartState) {
@ -36,12 +41,94 @@ public class NFAToDFAConverter {

 		// while more DFA states to check, process them
 		while ( work.size()>0 ) {
-
+			DFAState d = work.get(0);
+			reach(d);
+			work.remove(0); // we're done with this DFA state
 		}
 		
 		return dfa;
 	}

+	/** From this node, add a d--a-->t transition for all
+	 *  labels 'a' where t is a DFA node created
+	 *  from the set of NFA states reachable from any NFA
+	 *  state in DFA state d.
+	 */
+	void reach(DFAState d) {
+		OrderedHashSet<IntervalSet> labels = getReachableLabels(d);
+
+		for (IntervalSet label : labels) {
+			DFAState newd = reach(d, label);
+			if ( debug ) {
+				System.out.println("DFA state after reach "+label+" "+d+"-" +
+								   label.toString(g)+"->"+newd);
+			}
+			if ( newd==null ) {
+				// nothing was reached by label due to conflict resolution
+				// EOT also seems to be in here occasionally probably due
+				// to an end-of-rule state seeing it even though we'll pop
+				// an invoking state off the state; don't bother to conflict
+				// as this labels set is a covering approximation only.
+				continue;
+			}
+			if ( newd.getUniqueAlt()== NFA.INVALID_ALT_NUMBER ) {
+				// Only compute closure if a unique alt number is not known.
+				// If a unique alternative is mentioned among all NFA
+				// configurations then there is no possibility of needing to look
+				// beyond this state; also no possibility of a nondeterminism.
+				// This optimization May 22, 2006 just dropped -Xint time
+				// for analysis of Java grammar from 11.5s to 2s!  Wow.
+				closure(newd);  // add any NFA states reachable via epsilon
+			}
+			
+			// add if not in DFA yet and then make d-label->t
+			DFAState targetState = newd;
+			DFAState existingState = dfa.addState(newd);
+			if ( newd != existingState ) {
+				// already there...use/return the existing DFA state.
+				targetState = existingState;
+			}
+
+			d.addTransition(new Edge(targetState, label));
+
+//
+//			numberOfEdgesEmanating +=
+//				addTransition(d, label, targetState, targetToLabelMap);
+		}
+	}
+
+	/** Given the set of NFA states in DFA state d, find all NFA states
+	 *  reachable traversing label arcs.  By definition, there can be
+	 *  only one DFA state reachable by a single label from DFA state d so we must
+	 *  find and merge all NFA states reachable via label.  Return a new
+	 *  DFAState that has all of those NFA states with their context.
+	 *
+	 *  Because we cannot jump to another rule nor fall off the end of a rule
+	 *  via a non-epsilon transition, NFA states reachable from d have the
+	 *  same configuration as the NFA state in d.  So if NFA state 7 in d's
+	 *  configurations can reach NFA state 13 then 13 will be added to the
+	 *  new DFAState (labelDFATarget) with the same configuration as state
+	 *  7 had.
+	 */
+	public DFAState reach(DFAState d, IntervalSet label) {
+		//System.out.println("reach "+label.toString(dfa.nfa.grammar)+" from "+d.stateNumber);
+		DFAState labelTarget = dfa.newState();
+
+		for (NFAConfig c : d.nfaConfigs) {
+			int n = c.state.getNumberOfTransitions();
+			for (int i=0; i<n; i++) {               // for each transition
+				Transition t = c.state.transition(i);
+				// found a transition with label; does it collide with label?
+				if ( !t.isEpsilon() && !t.label().and(label).isNil() ) {
+					// add NFA target to (potentially) new DFA state
+					labelTarget.addNFAConfig(t.target, c.alt, c.context);
+				}
+			}
+		}
+		
+		return labelTarget;
+	}
+
 	/** From this first NFA state of a decision, create a DFA.
 	 *  Walk each alt in decision and compute closure from the start of that
 	 *  rule, making sure that the closure does not include other alts within
@ -57,10 +144,12 @@ public class NFAToDFAConverter {
 		for (int altNum=1; altNum<=dfa.nAlts; altNum++) {
 			Transition t = nfaStartState.transition(altNum-1);
 			NFAState altStart = t.target;
-			d.addNFAConfig(altStart, altNum+1, null);
+			d.addNFAConfig(altStart, altNum, null);

 		}

+		System.out.println("before closure start d="+d);
+
 		closure(d);

 		return d;
@ -75,12 +164,16 @@ public class NFAToDFAConverter {
 			System.out.println("closure("+d+")");
 		}

+		closureBusy = new HashSet<NFAConfig>();
+		
 		List<NFAConfig> configs = new ArrayList<NFAConfig>();
 		for (NFAConfig c : d.nfaConfigs) {
 			closure(c.state, c.alt, c.context, configs);
 		}
 		d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d

+		closureBusy = null; // wack all that memory used during closure
+
 		System.out.println("after closure d="+d);
 	}

@ -92,15 +185,159 @@ public class NFAToDFAConverter {
 		NFAConfig proposedNFAConfig =
 			new NFAConfig(s, altNum, context);

+		if ( closureBusy.contains(proposedNFAConfig) ) {
+			return;
+		}
+
+		closureBusy.add(proposedNFAConfig);
+
 		// p itself is always in closure
 		configs.add(proposedNFAConfig);

+		if ( s instanceof RuleStopState ) {
+			// do follow
+			int n = s.getNumberOfTransitions();
+			for (int i=0; i<n; i++) {
+				Transition t = s.transition(i);
+			}
+			return;
+		}
+
 		int n = s.getNumberOfTransitions();
 		for (int i=0; i<n; i++) {
 			Transition t = s.transition(i);
-			if ( t.isEpsilon() ) {
+			if ( t instanceof RuleTransition ) {
+				NFAState newContext = context;
+				if ( context==null ) newContext = s; // push new context
+				closure(t.target, altNum, newContext, configs);
+			}
+			else if ( t.isEpsilon() ) {
 				closure(t.target, altNum, context, configs);
 			}
 		}
 	}
+
+	public OrderedHashSet<IntervalSet> getReachableLabels(DFAState d) {
+		OrderedHashSet<IntervalSet> reachableLabels = new OrderedHashSet<IntervalSet>();
+		for (NFAState s : d.getUniqueNFAStates()) { // for each state
+			int n = s.getNumberOfTransitions();
+			for (int i=0; i<n; i++) {               // for each transition
+				Transition t = s.transition(i);
+				IntervalSet label = null;
+				if ( t instanceof AtomTransition ) {
+					label = IntervalSet.of(((AtomTransition)t).label);
+				}
+				else if ( t instanceof SetTransition ) {
+					label = ((SetTransition)t).label;
+				}
+				if ( label!=null ) {
+					addReachableLabel(reachableLabels, label);
+				}
+			}
+		}
+		System.out.println("reachable labels for "+d+"="+reachableLabels);
+		return reachableLabels;
+	}
+
+	/** Add label uniquely and disjointly; intersection with
+     *  another set or int/char forces breaking up the set(s).
+     *
+     *  Example, if reachable list of labels is [a..z, {k,9}, 0..9],
+     *  the disjoint list will be [{a..j,l..z}, k, 9, 0..8].
+     *
+     *  As we add NFA configurations to a DFA state, we might as well track
+     *  the set of all possible transition labels to make the DFA conversion
+     *  more efficient.  W/o the reachable labels, we'd need to check the
+     *  whole vocabulary space (could be 0..\uFFFE)!  The problem is that
+     *  labels can be sets, which may overlap with int labels or other sets.
+     *  As we need a deterministic set of transitions from any
+     *  state in the DFA, we must make the reachable labels set disjoint.
+     *  This operation amounts to finding the character classes for this
+     *  DFA state whereas with tools like flex, that need to generate a
+     *  homogeneous DFA, must compute char classes across all states.
+     *  We are going to generate DFAs with heterogeneous states so we
+     *  only care that the set of transitions out of a single state is
+     *  unique. :)
+     *
+     *  The idea for adding a new set, t, is to look for overlap with the
+     *  elements of existing list s.  Upon overlap, replace
+     *  existing set s[i] with two new disjoint sets, s[i]-t and s[i]&t.
+     *  (if s[i]-t is nil, don't add).  The remainder is t-s[i], which is
+     *  what you want to add to the set minus what was already there.  The
+     *  remainder must then be compared against the i+1..n elements in s
+     *  looking for another collision.  Each collision results in a smaller
+     *  and smaller remainder.  Stop when you run out of s elements or
+     *  remainder goes to nil.  If remainder is non nil when you run out of
+     *  s elements, then add remainder to the end.
+     */
+    protected void addReachableLabel(OrderedHashSet<IntervalSet> reachableLabels,
+									 IntervalSet label)
+	{
+		/*
+		System.out.println("addReachableLabel to state "+dfa.decisionNumber+"."+stateNumber+": "+label.getSet().toString(dfa.nfa.grammar));
+		System.out.println("start of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
+				"reachableLabels="+reachableLabels.toString());
+				*/
+		if ( reachableLabels.contains(label) ) { // exact label present
+            return;
+        }
+        IntervalSet remainder = label; // remainder starts out as whole set to add
+        int n = reachableLabels.size(); // only look at initial elements
+        // walk the existing list looking for the collision
+        for (int i=0; i<n; i++) {
+			IntervalSet rl = reachableLabels.get(i);
+            /*
+			System.out.println("comparing ["+i+"]: "+label.toString(dfa.nfa.grammar)+" & "+
+                    rl.toString(dfa.nfa.grammar)+"="+
+                    intersection.toString(dfa.nfa.grammar));
+            */
+			IntervalSet intersection = (IntervalSet)label.and(rl);
+			if ( intersection.isNil() ) {
+                continue;
+            }
+			//System.out.println(label+" collides with "+rl);
+
+			// For any (s_i, t) with s_i&t!=nil replace with (s_i-t, s_i&t)
+            // (ignoring s_i-t if nil; don't put in list)
+
+            // Replace existing s_i with intersection since we
+            // know that will always be a non nil character class
+			IntervalSet s_i = rl;
+            reachableLabels.set(i, intersection);
+
+            // Compute s_i-t to see what is in current set and not in incoming
+            IntervalSet existingMinusNewElements = (IntervalSet)s_i.subtract(label);
+			//System.out.println(s_i+"-"+t+"="+existingMinusNewElements);
+            if ( !existingMinusNewElements.isNil() ) {
+                // found a new character class, add to the end (doesn't affect
+                // outer loop duration due to n computation a priori.
+                reachableLabels.add(existingMinusNewElements);
+            }
+
+			/*
+            System.out.println("after collision, " +
+                    "reachableLabels="+reachableLabels.toString());
+					*/
+
+            // anything left to add to the reachableLabels?
+            remainder = (IntervalSet)label.subtract(s_i);
+            if ( remainder.isNil() ) {
+                break; // nothing left to add to set.  done!
+            }
+
+            label = remainder;
+        }
+        if ( !remainder.isNil() ) {
+			/*
+			System.out.println("before add remainder to state "+dfa.decisionNumber+"."+stateNumber+": " +
+					"reachableLabels="+reachableLabels.toString());
+			System.out.println("remainder state "+dfa.decisionNumber+"."+stateNumber+": "+remainder.toString(dfa.nfa.grammar));
+            */
+            reachableLabels.add(remainder);
+        }
+		/*
+		System.out.println("#END of add to state "+dfa.decisionNumber+"."+stateNumber+": " +
+				"reachableLabels="+reachableLabels.toString());
+				*/
+    }
 }
--- a/tool/src/org/antlr/v4/automata/RangeTransition.java
+++ b/tool/src/org/antlr/v4/automata/RangeTransition.java
@ -1,6 +1,7 @@
 package org.antlr.v4.automata;

 import org.antlr.v4.codegen.Target;
+import org.antlr.v4.misc.IntervalSet;

 public class RangeTransition extends Transition {
 	public int from;
@ -10,10 +11,14 @@ public class RangeTransition extends Transition {
 		this.from = from;
 		this.to = to;
 	}
+
 	public int compareTo(Object o) {
 		return 0;
 	}

+	@Override
+	public IntervalSet label() { return IntervalSet.of(from,to); }
+
 	@Override
 	public String toString() {
 		return Target.getANTLRCharLiteralForChar(from)+".."+
--- a/tool/src/org/antlr/v4/automata/SetTransition.java
+++ b/tool/src/org/antlr/v4/automata/SetTransition.java
@ -1,14 +1,13 @@
 package org.antlr.v4.automata;

-import org.antlr.v4.misc.IntSet;
 import org.antlr.v4.misc.IntervalSet;

 /** A label containing a set of values */
 public class SetTransition extends Transition {
 	/** A set of token types or character codes if label==SET */
-	protected IntSet label;
+	protected IntervalSet label;

-	public SetTransition(IntSet label) {
+	public SetTransition(IntervalSet label) {
 		if ( label==null ) {
 			this.label = IntervalSet.of(Label.INVALID);
 			return;
@ -16,6 +15,8 @@ public class SetTransition extends Transition {
 		this.label = label;
 	}

+	public IntervalSet label() { return label; }
+
 	public int compareTo(Object o) {
 		return 0;
 	}
--- a/tool/src/org/antlr/v4/automata/Transition.java
+++ b/tool/src/org/antlr/v4/automata/Transition.java
@ -1,5 +1,6 @@
 package org.antlr.v4.automata;

+import org.antlr.v4.misc.IntervalSet;
 import org.antlr.v4.tool.Grammar;

 /** An NFA transition between any two NFA states.  Subclasses define
@ -25,5 +26,7 @@ public abstract class Transition implements Comparable {
 	/** Are we epsilon, action, sempred? */
 	public boolean isEpsilon() { return false; }

+	public IntervalSet label() { return null; }
+
 	public String toString(Grammar g) { return toString(); }	
 }
--- a/tool/src/org/antlr/v4/tool/DOTGenerator.java
+++ b/tool/src/org/antlr/v4/tool/DOTGenerator.java
@ -48,7 +48,7 @@ public class DOTGenerator {
 				Utils.integer(startState.stateNumber));
 		walkRuleNFACreatingDOT(dot, startState);
 		dot.add("rankdir", rankdir);
-		return dot.toString();
+		return dot.render();
 	}

 	public String getDOT(DFAState startState) {
@ -65,24 +65,9 @@ public class DOTGenerator {
 				Boolean.valueOf(Tool.internalOption_ShowNFAConfigsInDFA));
 		walkCreatingDFADOT(dot, (DFAState)startState);
 		dot.add("rankdir", rankdir);
-		return dot.toString();
+		return dot.render();
 	}

-	/** Return a String containing a DOT description that, when displayed,
-	 *  will show the incoming state machine visually.  All nodes reachable
-     *  from startState will be included.
-    public String getRuleNFADOT(State startState) {
-        // The output DOT graph for visualization
-        ST dot = stlib.getInstanceOf("org/antlr/tool/templates/dot/nfa");
-
-        markedStates = new HashSet();
-        dot.add("startState",
-                Utils.integer(startState.stateNumber));
-        walkRuleNFACreatingDOT(dot, startState);
-        return dot.toString();
-    }
-	 */
-
    /** Do a depth-first walk of the state machine graph and
     *  fill a DOT description template.  Keep filling the
     *  states and edges attributes.