fixed left-recur check, adding dfa stuff

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6740]
2010-03-07 12:12:08 -08:00 · 2010-03-07 12:12:08 -08:00 · 61fbb6571d
parent 309cb6d623
commit 61fbb6571d
6 changed files with 288 additions and 21 deletions
--- a/tool/src/org/antlr/v4/analysis/LeftRecursionDetector.java
+++ b/tool/src/org/antlr/v4/analysis/LeftRecursionDetector.java
@ -19,9 +19,6 @@ public class LeftRecursionDetector {
 	 */
 	Set<RuleStartState> rulesVisitedPerRuleCheck = new HashSet<RuleStartState>();

-	/** prevents epsilon-loop-induced infinite recursion. */
-	Set<NFAState> visitedStates = new HashSet<NFAState>();
-
 	public LeftRecursionDetector(NFA nfa) {	this.nfa = nfa; }

 	public void check() {
@ -29,10 +26,10 @@ public class LeftRecursionDetector {
 			//System.out.print("check "+start.rule.name);
 			rulesVisitedPerRuleCheck.clear();
 			rulesVisitedPerRuleCheck.add(start);
-			FASerializer ser = new FASerializer(nfa.g, start);
+			//FASerializer ser = new FASerializer(nfa.g, start);
 			//System.out.print(":\n"+ser+"\n");

-			check(start.rule, start);
+			check(start.rule, start, new HashSet<NFAState>());
 		}
 		//System.out.println("cycles="+listOfRecursiveCycles);
 	}
@ -48,11 +45,12 @@ public class LeftRecursionDetector {
 	 *  filling the cycles in listOfRecursiveCycles and also, as a
 	 *  side-effect, set leftRecursiveRules.
 	 */
-	public boolean check(Rule enclosingRule, NFAState s) {
+	public boolean check(Rule enclosingRule, NFAState s, Set<NFAState> visitedStates) {
 		if ( s instanceof RuleStopState ) return true;
 		if ( visitedStates.contains(s) ) return false;
 		visitedStates.add(s);

+		//System.out.println("visit "+s);
 		int n = s.getNumberOfTransitions();
 		boolean stateReachesStopState = false;
 		for (int i=0; i<n; i++) {
@ -64,17 +62,21 @@ public class LeftRecursionDetector {
 					addRulesToCycle(enclosingRule, r);
 				}
 				else {
+					// must visit if not already visited; mark target, pop when done
 					rulesVisitedPerRuleCheck.add((RuleStartState)t.target);
-					boolean nullable = check(r, t.target);
+					// send new visitedStates set per rule invocation
+					boolean nullable = check(r, t.target, new HashSet<NFAState>());
+					// we're back from visiting that rule
+					rulesVisitedPerRuleCheck.remove((RuleStartState)t.target);
 					if ( nullable ) {
-						stateReachesStopState |= check(enclosingRule, rt.followState);
+						stateReachesStopState |= check(enclosingRule, rt.followState, visitedStates);
 					}
 				}
 			}
 			else if ( t.isEpsilon() ) {
-				stateReachesStopState |= check(enclosingRule, t.target);
+				stateReachesStopState |= check(enclosingRule, t.target, visitedStates);
 			}
-			// ignore non-epsilon transitions
+			// else ignore non-epsilon transitions
 		}
 		return stateReachesStopState;
 	}
--- a/tool/src/org/antlr/v4/automata/AtomTransition.java
+++ b/tool/src/org/antlr/v4/automata/AtomTransition.java
@ -38,4 +38,8 @@ public class AtomTransition extends Transition {
 	public String toString(Grammar g) {
 		return g.getTokenDisplayName(label);
 	}
+
+	public String toString() {
+		return String.valueOf(label);
+	}
 }
--- a/tool/src/org/antlr/v4/automata/DFA.java
+++ b/tool/src/org/antlr/v4/automata/DFA.java
@ -1,4 +1,43 @@
 package org.antlr.v4.automata;

+import java.util.HashMap;
+import java.util.Map;
+
+/** A DFA (converted from a grammar's NFA).
+ *  DFAs are used as prediction machine for alternative blocks in all kinds
+ *  of recognizers (lexers, parsers, tree walkers).
+ */
 public class DFA {
+	/** What's the start state for this DFA? */
+    public DFAState startState;
+
+	/** Which NFA are we converting (well, which piece of the NFA)? */
+//    public NFA nfa;
+
+	/** From what NFAState did we create the DFA? */
+	public NFAState decisionNFAStartState;
+
+	/** A set of all uniquely-numbered DFA states.  Maps hash of DFAState
+     *  to the actual DFAState object.  We use this to detect
+     *  existing DFA states.  Map<DFAState,DFAState>.  Use Map so
+	 *  we can get old state back (Set only allows you to see if it's there).
+	 *  Not used during fixed k lookahead as it's a waste to fill it with
+	 *  a dup of states array.
+     */
+    public Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
+
+	/** Maps the state number to the actual DFAState.  This contains all
+	 *  states, but the states are not unique.  s3 might be same as s1 so
+	 *  s3 -> s1 in this table.  This is how cycles occur.  If fixed k,
+	 *  then these states will all be unique as states[i] always points
+	 *  at state i when no cycles exist.
+	 *
+	 *  This is managed in parallel with uniqueStates and simply provides
+	 *  a way to go from state number to DFAState rather than via a
+	 *  hash lookup.
+	 */
+	//protected List<DFAState> states = new ArrayList<DFAState>();
+
+	/** Unique state numbers per DFA */
+	int stateCounter = 0;	
 }
--- a/tool/src/org/antlr/v4/automata/DFAState.java
+++ b/tool/src/org/antlr/v4/automata/DFAState.java
@ -3,7 +3,6 @@ package org.antlr.v4.automata;
 import java.util.ArrayList;
 import java.util.List;

-
 /** A DFA state represents a set of possible NFA configurations.
 *  As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
 *  to keep track of all possible states the NFA can be in after
@ -21,7 +20,7 @@ import java.util.List;
 *  so I have to add one to simulate the proper lookahead sequences for
 *  the underlying LL grammar from which the NFA was derived.
 *
- *  I use a list of NFAConfiguration objects.  An NFAConfiguration
+ *  I use a list of NFAConfig objects.  An NFAConfiguration
 *  is both a state (ala normal conversion) and an NFAContext describing
 *  the chain of rules (if any) followed to arrive at that state.  There
 *  is also the semantic context, which is the "set" of predicates found
@ -33,21 +32,69 @@ import java.util.List;
 */
 public class DFAState extends State {
 	public static final int INITIAL_NUM_TRANSITIONS = 4;
+
+	/** State in which DFA? */
+	public DFA dfa;
+
 	/** Track the transitions emanating from this DFA state. */
-	protected List<org.antlr.analysis.Transition> transitions =
-		new ArrayList<org.antlr.analysis.Transition>(INITIAL_NUM_TRANSITIONS);
+	protected List<Transition> transitions =
+		new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);

-	@Override
-	public int getNumberOfTransitions() {
-		return 0;
+	/** The set of NFA configurations (state,alt,context) for this DFA state */
+	public OrderedHashSet<NFAConfig> nfaConfigs =
+		new OrderedHashSet<NFAConfig>();
+
+	public DFAState(DFA dfa) { this.dfa = dfa; }
+
+	public void addNFAConfig(NFAState s, NFAConfig c) {
+		if ( nfaConfigs.contains(c) ) return;
+		nfaConfigs.add(c);
+	}
+
+	public NFAConfig addNFAConfig(NFAState state,
+								  int alt,
+								  NFAState invokingState)
+	{
+		NFAConfig c = new NFAConfig(state.stateNumber,
+									alt,
+									invokingState);
+		addNFAConfig(state, c);
+		return c;
 	}

 	@Override
-	public void addTransition(Transition e) {
-	}
+	public int getNumberOfTransitions() { return transitions.size(); }

 	@Override
-	public Transition transition(int i) {
-		return null;
+	public void addTransition(Transition e) { transitions.add(e); }
+
+	@Override
+	public Transition transition(int i) { return transitions.get(i); }
+
+	/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
+	public int hashCode() {
+		int h = 0;
+		for (NFAConfig c : nfaConfigs) {
+			h += c.state + c.alt;
 		}
+		return h;
+	}
+
+	/** Two DFAStates are equal if their NFA configuration sets are the
+	 *  same. This method is used to see if a DFA state already exists.
+	 *
+	 *  Because the number of alternatives and number of NFA configurations are
+	 *  finite, there is a finite number of DFA states that can be processed.
+	 *  This is necessary to show that the algorithm terminates.
+	 *
+	 *  Cannot test the DFA state numbers here because in DFA.addState we need
+	 *  to know if any other state exists that has this exact set of NFA
+	 *  configurations.  The DFAState state number is irrelevant.
+	 */
+	public boolean equals(Object o) {
+		// compare set of NFA configurations in this set with other
+		DFAState other = (DFAState)o;
+		return this.nfaConfigs.equals(other.nfaConfigs);
+	}
+	
 }
--- a/tool/src/org/antlr/v4/automata/NFAConfig.java
+++ b/tool/src/org/antlr/v4/automata/NFAConfig.java
@ -0,0 +1,103 @@
+package org.antlr.v4.automata;
+
+/** An NFA state, predicted alt, and syntactic/semantic context.
+ *  The syntactic context is a pointer into the rule invocation
+ *  chain used to arrive at the state.  The semantic context is
+ *  the unordered set semantic predicates encountered before reaching
+ *  an NFA state.
+ */
+public class NFAConfig {
+	/** The NFA state associated with this configuration */
+	public int state;
+
+	/** What alt is predicted by this configuration */
+	public int alt;
+
+	/** Record the NFA state that invoked another rule's start state */
+	public NFAState invokingState;
+
+	/** The set of semantic predicates associated with this NFA
+	 *  configuration.  The predicates were found on the way to
+	 *  the associated NFA state in this syntactic context.
+	 *  Set<AST>: track nodes in grammar containing the predicate
+	 *  for error messages and such (nice to know where the predicate
+	 *  came from in case of duplicates etc...).  By using a set,
+	 *  the equals() method will correctly show {pred1,pred2} as equals()
+	 *  to {pred2,pred1}.
+	 */
+	//public SemanticContext semanticContext = SemanticContext.EMPTY_SEMANTIC_CONTEXT;
+
+	/** Indicate that this configuration has been resolved and no further
+	 *  DFA processing should occur with it.  Essentially, this is used
+	 *  as an "ignore" bit so that upon a set of nondeterministic configurations
+	 *  such as (s|2) and (s|3), I can set (s|3) to resolved=true (and any
+	 *  other configuration associated with alt 3).
+	 */
+	//protected boolean resolved;
+
+	/** This bit is used to indicate a semantic predicate will be
+	 *  used to resolve the conflict.  Method
+	 *  DFA.findNewDFAStatesAndAddDFATransitions will add edges for
+	 *  the predicates after it performs the reach operation.  The
+	 *  nondeterminism resolver sets this when it finds a set of
+	 *  nondeterministic configurations (as it does for "resolved" field)
+	 *  that have enough predicates to resolve the conflit.
+	 */
+	//protected boolean resolveWithPredicate;
+
+	public NFAConfig(int state,
+					 int alt,
+					 NFAState invokingState)
+	{
+		this.state = state;
+		this.alt = alt;
+		this.invokingState = invokingState;
+		//this.semanticContext = semanticContext;
+	}
+
+	/** An NFA configuration is equal to another if both have
+     *  the same state, they predict the same alternative, and
+     *  syntactic/semantic contexts are the same.  I don't think
+     *  the state|alt|ctx could be the same and have two different
+     *  semantic contexts, but might as well define equals to be
+     *  everything.
+     */
+    public boolean equals(Object o) {
+		if ( o==null ) return false;
+        NFAConfig other = (NFAConfig)o;
+        return this.state==other.state &&
+               this.alt==other.alt &&
+			   this.invokingState==other.invokingState; 
+//               this.context.equals (other.context)&&
+//               this.semanticContext.equals(other.semanticContext)
+    }
+
+    public int hashCode() {
+        int h = state + alt;// + context.hashCode();
+        return h;
+    }
+
+	public String toString() {
+		return toString(true);
+	}
+
+	public String toString(boolean showAlt) {
+		StringBuffer buf = new StringBuffer();
+		buf.append(state);
+		if ( showAlt ) {
+			buf.append("|");
+			buf.append(alt);
+		}
+		if ( invokingState!=null ) {
+            buf.append("|");
+            buf.append(invokingState);
+        }
+//        if ( resolved ) {
+//            buf.append("|resolved");
+//        }
+//		if ( resolveWithPredicate ) {
+//			buf.append("|resolveWithPredicate");
+//		}
+		return buf.toString();
+    }
+}
--- a/tool/src/org/antlr/v4/automata/OrderedHashSet.java
+++ b/tool/src/org/antlr/v4/automata/OrderedHashSet.java
@ -0,0 +1,72 @@
+package org.antlr.v4.automata;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+
+/** A HashMap that remembers the order that the elements were added.
+ *  You can alter the ith element with set(i,value) too :)  Unique list.
+ *  I need the replace/set-element-i functionality so I'm subclassing
+ *  OrderedHashSet.
+ */
+public class OrderedHashSet<T> extends HashSet<T> {
+    /** Track the elements as they are added to the set */
+    protected List<T> elements = new ArrayList<T>();
+
+    public T get(int i) {
+        return elements.get(i);
+    }
+
+    /** Replace an existing value with a new value; updates the element
+     *  list and the hash table, but not the key as that has not changed.
+     */
+    public T set(int i, T value) {
+        T oldElement = elements.get(i);
+        elements.set(i,value); // update list
+        super.remove(oldElement); // now update the set: remove/add
+        super.add(value);
+        return oldElement;
+    }
+
+    /** Add a value to list; keep in hashtable for consistency also;
+     *  Key is object itself.  Good for say asking if a certain string is in
+     *  a list of strings.
+     */
+    public boolean add(T value) {
+        boolean result = super.add(value);
+		if ( result ) {  // only track if new element not in set
+			elements.add(value);
+		}
+		return result;
+    }
+
+    public boolean remove(Object o) {
+		throw new UnsupportedOperationException();
+    }
+
+    public void clear() {
+        elements.clear();
+        super.clear();
+    }
+
+    /** Return the List holding list of table elements.  Note that you are
+     *  NOT getting a copy so don't write to the list.
+     */
+    public List<T> elements() {
+        return elements;
+    }
+
+    public int size() {
+		/*
+		if ( elements.size()!=super.size() ) {
+			ErrorManager.internalError("OrderedHashSet: elements and set size differs; "+
+									   elements.size()+"!="+super.size());
+        }
+        */
+        return elements.size();
+    }
+
+    public String toString() {
+        return elements.toString();
+    }
+}