reorg, de-emphasis label fields

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6725]
2010-02-24 11:51:32 -08:00 · 2010-02-24 11:51:32 -08:00 · cbb8a6917a
parent 80ea7cf0db
commit cbb8a6917a
13 changed files with 169 additions and 30 deletions
--- a/tool/src/org/antlr/v4/automata/ActionTransition.java
+++ b/tool/src/org/antlr/v4/automata/ActionTransition.java
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
 import org.antlr.v4.tool.Grammar;
 import org.antlr.v4.tool.GrammarAST;

-public class ActionTransition extends Transition {
+public class ActionTransition extends NFATransition {
 	public GrammarAST actionAST;

 	public ActionTransition(GrammarAST actionAST) {
--- a/tool/src/org/antlr/v4/automata/AtomTransition.java
+++ b/tool/src/org/antlr/v4/automata/AtomTransition.java
@ -3,7 +3,7 @@ package org.antlr.v4.automata;
 import org.antlr.v4.misc.IntervalSet;

 /** */
-public class AtomTransition extends Transition {
+public class AtomTransition extends NFATransition {
 	/** The token type or character value; or, signifies special label. */
 	protected int label;

--- a/tool/src/org/antlr/v4/automata/DFAState.java
+++ b/tool/src/org/antlr/v4/automata/DFAState.java
@ -0,0 +1,45 @@
+package org.antlr.v4.automata;
+
+
+/** A DFA state represents a set of possible NFA configurations.
+ *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
+ *  to keep track of all possible states the NFA can be in after
+ *  reading each input symbol.  That is to say, after reading
+ *  input a1a2..an, the DFA is in a state that represents the
+ *  subset T of the states of the NFA that are reachable from the
+ *  NFA's start state along some path labeled a1a2..an."
+ *  In conventional NFA->DFA conversion, therefore, the subset T
+ *  would be a bitset representing the set of states the
+ *  NFA could be in.  We need to track the alt predicted by each
+ *  state as well, however.  More importantly, we need to maintain
+ *  a stack of states, tracking the closure operations as they
+ *  jump from rule to rule, emulating rule invocations (method calls).
+ *  Recall that NFAs do not normally have a stack like a pushdown-machine
+ *  so I have to add one to simulate the proper lookahead sequences for
+ *  the underlying LL grammar from which the NFA was derived.
+ *
+ *  I use a list of NFAConfiguration objects.  An NFAConfiguration
+ *  is both a state (ala normal conversion) and an NFAContext describing
+ *  the chain of rules (if any) followed to arrive at that state.  There
+ *  is also the semantic context, which is the "set" of predicates found
+ *  on the path to this configuration.
+ *
+ *  A DFA state may have multiple references to a particular state,
+ *  but with different NFAContexts (with same or different alts)
+ *  meaning that state was reached via a different set of rule invocations.
+ */
+public class DFAState extends State {
+	@Override
+	public int getNumberOfTransitions() {
+		return 0;
+	}
+
+	@Override
+	public void addTransition(NFATransition e) {
+	}
+
+	@Override
+	public NFATransition transition(int i) {
+		return null;
+	}
+}
--- a/tool/src/org/antlr/v4/automata/Edge.java
+++ b/tool/src/org/antlr/v4/automata/Edge.java
@ -0,0 +1,7 @@
+package org.antlr.v4.automata;
+
+/** A DFA edge (NFA edges are called transitions) */
+public class Edge {
+	public DFAState target;
+
+}
--- a/tool/src/org/antlr/v4/automata/EpsilonTransition.java
+++ b/tool/src/org/antlr/v4/automata/EpsilonTransition.java
@ -0,0 +1,9 @@
+package org.antlr.v4.automata;
+
+public class EpsilonTransition extends NFATransition {
+	public EpsilonTransition(NFAState target) { super(target); }
+
+	public int compareTo(Object o) {
+		return 0;
+	}
+}
--- a/tool/src/org/antlr/v4/automata/Label.java
+++ b/tool/src/org/antlr/v4/automata/Label.java
@ -11,17 +11,17 @@ import org.antlr.runtime.Token;
 public abstract class Label implements /*Comparable, */ Cloneable {
    public static final int INVALID = -7;

-	public static final int ACTION = -6;
+//	public static final int ACTION = -6;

-	public static final int EPSILON = -5;
+	//public static final int EPSILON = -5;

-    public static final String EPSILON_STR = "<EPSILON>";
+    //public static final String EPSILON_STR = "<EPSILON>";

    /** label is a semantic predicate; implies label is epsilon also */
-    public static final int SEMPRED = -4;
+//    public static final int SEMPRED = -4;

    /** label is a set of tokens or char */
-    public static final int SET = -3;
+//    public static final int SET = -3;

    /** End of Token is like EOF for lexer rules.  It implies that no more
     *  characters are available and that NFA conversion should terminate
--- a/tool/src/org/antlr/v4/automata/NFAState.java
+++ b/tool/src/org/antlr/v4/automata/NFAState.java
@ -0,0 +1,28 @@
+package org.antlr.v4.automata;
+
+/**
+ * 	// I need to distinguish between NFA decision states for (...)* and (...)+
+	// during NFA interpretation.
+	public static final int LOOPBACK = 1;
+	public static final int BLOCK_START = 2;
+	public static final int OPTIONAL_BLOCK_START = 3;
+	public static final int BYPASS = 4;
+	public static final int RIGHT_EDGE_OF_BLOCK = 5;
+
+ make subclasses for all of these
+ */
+public class NFAState extends State {
+	@Override
+	public int getNumberOfTransitions() {
+		return 0;
+	}
+
+	@Override
+	public void addTransition(NFATransition e) {
+	}
+
+	@Override
+	public NFATransition transition(int i) {
+		return null;
+	}
+}
--- a/tool/src/org/antlr/v4/automata/NFATransition.java
+++ b/tool/src/org/antlr/v4/automata/NFATransition.java
@ -0,0 +1,22 @@
+package org.antlr.v4.automata;
+
+/** An NFA transition between any two NFA states.  Subclasses define
+ *  atom, set, epsilon, action, predicate, rule transitions.
+ *
+ *  This is a one way link.  It emanates from a state (usually via a list of
+ *  transitions) and has a target state.
+ *
+ *  Since we never have to change the NFA transitions once we construct it,
+ *  we can fix these transitions as specific classes. The DFA transitions
+ *  on the other hand need to update the labels as it adds transitions to
+ *  the states. We'll use the term Edge for the DFA to distinguish them from
+ *  NFA transitions.
+ */
+public abstract class NFATransition implements Comparable {
+	/** The target of this transition */
+	public NFAState target;
+
+	public NFATransition() { }
+
+	public NFATransition(NFAState target) { this.target = target; }
+}
--- a/tool/src/org/antlr/v4/automata/RuleTransition.java
+++ b/tool/src/org/antlr/v4/automata/RuleTransition.java
@ -0,0 +1,25 @@
+package org.antlr.v4.automata;
+
+import org.antlr.v4.tool.Rule;
+
+/** */
+public class RuleTransition extends NFATransition {
+	/** Ptr to the rule definition object for this rule ref */
+	public Rule rule;
+
+	/** What node to begin computations following ref to rule */
+    public NFAState followState;
+
+    public RuleTransition(Rule rule,
+						  NFAState ruleStart,
+						  NFAState followState)
+	{
+		super(ruleStart);
+		this.rule = rule;
+		this.followState = followState;
+	}
+
+	public int compareTo(Object o) {
+		return 0;
+	}
+}
--- a/tool/src/org/antlr/v4/automata/SetTransition.java
+++ b/tool/src/org/antlr/v4/automata/SetTransition.java
@ -6,7 +6,6 @@ import org.antlr.v4.misc.IntervalSet;
 /** A label containing a set of values */
 public class SetTransition extends Label {
 	/** A set of token types or character codes if label==SET */
-	// TODO: try IntervalSet for everything
 	protected IntSet label;

 	public SetTransition(IntSet label) {
--- a/tool/src/org/antlr/v4/automata/State.java
+++ b/tool/src/org/antlr/v4/automata/State.java
@ -0,0 +1,19 @@
+package org.antlr.v4.automata;
+
+/** A generic state machine state. */
+public abstract class State {
+    public static final int INVALID_STATE_NUMBER = -1;
+
+    public int stateNumber = INVALID_STATE_NUMBER;
+
+    /** An accept state is an end of rule state for lexers and
+     *  parser grammar rules.
+	 */
+	public boolean acceptState = false;
+
+    public abstract int getNumberOfTransitions();
+
+    public abstract void addTransition(NFATransition e);
+
+    public abstract NFATransition transition(int i);
+}
--- a/tool/src/org/antlr/v4/automata/Transition.java
+++ b/tool/src/org/antlr/v4/automata/Transition.java
@ -1,15 +0,0 @@
-package org.antlr.v4.automata;
-
-import org.antlr.analysis.State;
-
-/** An NFA transition between any two NFA states.  Subclasses define
- *  atom, set, epsilon, action, predicate, rule transitions.
- *
- *  This is a one way link.  It emanates from a state (usually via a list of
- *  transitions) and has a target state.
- */
-public abstract class Transition implements Comparable {
-	/** The target of this transition */
-	public State target;
-	
-}
--- a/tool/src/org/antlr/v4/tool/Grammar.java
+++ b/tool/src/org/antlr/v4/tool/Grammar.java
@ -117,18 +117,18 @@ public class Grammar implements AttributeResolver {
 		typeToTokenList.setSize(Label.NUM_FAUX_LABELS+Token.MIN_TOKEN_TYPE-1);
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.INVALID, "<INVALID>");
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOT, "<EOT>");
-		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
-		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
-		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
+//		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SEMPRED, "<SEMPRED>");
+//		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.SET, "<SET>");
+//		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EPSILON, Label.EPSILON_STR);
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOF, "EOF");
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Label.EOR_TOKEN_TYPE-1, "<EOR>");
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.DOWN-1, "DOWN");
 		typeToTokenList.set(Label.NUM_FAUX_LABELS+Token.UP-1, "UP");
 		tokenNameToTypeMap.put("<INVALID>", Label.INVALID);
-		tokenNameToTypeMap.put("<ACTION>", Label.ACTION);
-		tokenNameToTypeMap.put("<EPSILON>", Label.EPSILON);
-		tokenNameToTypeMap.put("<SEMPRED>", Label.SEMPRED);
-		tokenNameToTypeMap.put("<SET>", Label.SET);
+//		tokenNameToTypeMap.put("<ACTION>", Label.ACTION);
+//		tokenNameToTypeMap.put("<EPSILON>", Label.EPSILON);
+//		tokenNameToTypeMap.put("<SEMPRED>", Label.SEMPRED);
+//		tokenNameToTypeMap.put("<SET>", Label.SET);
 		tokenNameToTypeMap.put("<EOT>", Label.EOT);
 		tokenNameToTypeMap.put("EOF", Label.EOF);
 		tokenNameToTypeMap.put("<EOR>", Label.EOR_TOKEN_TYPE);