fixed left-recur check, adding dfa stuff

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6740]
This commit is contained in:
parrt 2010-03-07 12:12:08 -08:00
parent 309cb6d623
commit 61fbb6571d
6 changed files with 288 additions and 21 deletions

View File

@ -19,9 +19,6 @@ public class LeftRecursionDetector {
*/
Set<RuleStartState> rulesVisitedPerRuleCheck = new HashSet<RuleStartState>();
/** prevents epsilon-loop-induced infinite recursion. */
Set<NFAState> visitedStates = new HashSet<NFAState>();
public LeftRecursionDetector(NFA nfa) { this.nfa = nfa; }
public void check() {
@ -29,10 +26,10 @@ public class LeftRecursionDetector {
//System.out.print("check "+start.rule.name);
rulesVisitedPerRuleCheck.clear();
rulesVisitedPerRuleCheck.add(start);
FASerializer ser = new FASerializer(nfa.g, start);
//FASerializer ser = new FASerializer(nfa.g, start);
//System.out.print(":\n"+ser+"\n");
check(start.rule, start);
check(start.rule, start, new HashSet<NFAState>());
}
//System.out.println("cycles="+listOfRecursiveCycles);
}
@ -48,11 +45,12 @@ public class LeftRecursionDetector {
* filling the cycles in listOfRecursiveCycles and also, as a
* side-effect, set leftRecursiveRules.
*/
public boolean check(Rule enclosingRule, NFAState s) {
public boolean check(Rule enclosingRule, NFAState s, Set<NFAState> visitedStates) {
if ( s instanceof RuleStopState ) return true;
if ( visitedStates.contains(s) ) return false;
visitedStates.add(s);
//System.out.println("visit "+s);
int n = s.getNumberOfTransitions();
boolean stateReachesStopState = false;
for (int i=0; i<n; i++) {
@ -64,17 +62,21 @@ public class LeftRecursionDetector {
addRulesToCycle(enclosingRule, r);
}
else {
// must visit if not already visited; mark target, pop when done
rulesVisitedPerRuleCheck.add((RuleStartState)t.target);
boolean nullable = check(r, t.target);
// send new visitedStates set per rule invocation
boolean nullable = check(r, t.target, new HashSet<NFAState>());
// we're back from visiting that rule
rulesVisitedPerRuleCheck.remove((RuleStartState)t.target);
if ( nullable ) {
stateReachesStopState |= check(enclosingRule, rt.followState);
stateReachesStopState |= check(enclosingRule, rt.followState, visitedStates);
}
}
}
else if ( t.isEpsilon() ) {
stateReachesStopState |= check(enclosingRule, t.target);
stateReachesStopState |= check(enclosingRule, t.target, visitedStates);
}
// ignore non-epsilon transitions
// else ignore non-epsilon transitions
}
return stateReachesStopState;
}

View File

@ -38,4 +38,8 @@ public class AtomTransition extends Transition {
public String toString(Grammar g) {
return g.getTokenDisplayName(label);
}
public String toString() {
return String.valueOf(label);
}
}

View File

@ -1,4 +1,43 @@
package org.antlr.v4.automata;
import java.util.HashMap;
import java.util.Map;
/** A DFA (converted from a grammar's NFA).
* DFAs are used as prediction machine for alternative blocks in all kinds
* of recognizers (lexers, parsers, tree walkers).
*/
public class DFA {
/** What's the start state for this DFA? */
public DFAState startState;
/** Which NFA are we converting (well, which piece of the NFA)? */
// public NFA nfa;
/** From what NFAState did we create the DFA? */
public NFAState decisionNFAStartState;
/** A set of all uniquely-numbered DFA states. Maps hash of DFAState
* to the actual DFAState object. We use this to detect
* existing DFA states. Map<DFAState,DFAState>. Use Map so
* we can get old state back (Set only allows you to see if it's there).
* Not used during fixed k lookahead as it's a waste to fill it with
* a dup of states array.
*/
public Map<DFAState, DFAState> uniqueStates = new HashMap<DFAState, DFAState>();
/** Maps the state number to the actual DFAState. This contains all
* states, but the states are not unique. s3 might be same as s1 so
* s3 -> s1 in this table. This is how cycles occur. If fixed k,
* then these states will all be unique as states[i] always points
* at state i when no cycles exist.
*
* This is managed in parallel with uniqueStates and simply provides
* a way to go from state number to DFAState rather than via a
* hash lookup.
*/
//protected List<DFAState> states = new ArrayList<DFAState>();
/** Unique state numbers per DFA */
int stateCounter = 0;
}

View File

@ -3,7 +3,6 @@ package org.antlr.v4.automata;
import java.util.ArrayList;
import java.util.List;
/** A DFA state represents a set of possible NFA configurations.
* As Aho, Sethi, Ullman p. 117 says "The DFA uses its state
* to keep track of all possible states the NFA can be in after
@ -21,7 +20,7 @@ import java.util.List;
* so I have to add one to simulate the proper lookahead sequences for
* the underlying LL grammar from which the NFA was derived.
*
* I use a list of NFAConfiguration objects. An NFAConfiguration
* I use a list of NFAConfig objects. An NFAConfiguration
* is both a state (ala normal conversion) and an NFAContext describing
* the chain of rules (if any) followed to arrive at that state. There
* is also the semantic context, which is the "set" of predicates found
@ -33,21 +32,69 @@ import java.util.List;
*/
public class DFAState extends State {
public static final int INITIAL_NUM_TRANSITIONS = 4;
/** State in which DFA? */
public DFA dfa;
/** Track the transitions emanating from this DFA state. */
protected List<org.antlr.analysis.Transition> transitions =
new ArrayList<org.antlr.analysis.Transition>(INITIAL_NUM_TRANSITIONS);
protected List<Transition> transitions =
new ArrayList<Transition>(INITIAL_NUM_TRANSITIONS);
@Override
public int getNumberOfTransitions() {
return 0;
/** The set of NFA configurations (state,alt,context) for this DFA state */
public OrderedHashSet<NFAConfig> nfaConfigs =
new OrderedHashSet<NFAConfig>();
public DFAState(DFA dfa) { this.dfa = dfa; }
public void addNFAConfig(NFAState s, NFAConfig c) {
if ( nfaConfigs.contains(c) ) return;
nfaConfigs.add(c);
}
public NFAConfig addNFAConfig(NFAState state,
int alt,
NFAState invokingState)
{
NFAConfig c = new NFAConfig(state.stateNumber,
alt,
invokingState);
addNFAConfig(state, c);
return c;
}
@Override
public void addTransition(Transition e) {
}
public int getNumberOfTransitions() { return transitions.size(); }
@Override
public Transition transition(int i) {
return null;
public void addTransition(Transition e) { transitions.add(e); }
@Override
public Transition transition(int i) { return transitions.get(i); }
/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
public int hashCode() {
int h = 0;
for (NFAConfig c : nfaConfigs) {
h += c.state + c.alt;
}
return h;
}
/** Two DFAStates are equal if their NFA configuration sets are the
* same. This method is used to see if a DFA state already exists.
*
* Because the number of alternatives and number of NFA configurations are
* finite, there is a finite number of DFA states that can be processed.
* This is necessary to show that the algorithm terminates.
*
* Cannot test the DFA state numbers here because in DFA.addState we need
* to know if any other state exists that has this exact set of NFA
* configurations. The DFAState state number is irrelevant.
*/
public boolean equals(Object o) {
// compare set of NFA configurations in this set with other
DFAState other = (DFAState)o;
return this.nfaConfigs.equals(other.nfaConfigs);
}
}

View File

@ -0,0 +1,103 @@
package org.antlr.v4.automata;
/** An NFA state, predicted alt, and syntactic/semantic context.
* The syntactic context is a pointer into the rule invocation
* chain used to arrive at the state. The semantic context is
* the unordered set semantic predicates encountered before reaching
* an NFA state.
*/
public class NFAConfig {
/** The NFA state associated with this configuration */
public int state;
/** What alt is predicted by this configuration */
public int alt;
/** Record the NFA state that invoked another rule's start state */
public NFAState invokingState;
/** The set of semantic predicates associated with this NFA
* configuration. The predicates were found on the way to
* the associated NFA state in this syntactic context.
* Set<AST>: track nodes in grammar containing the predicate
* for error messages and such (nice to know where the predicate
* came from in case of duplicates etc...). By using a set,
* the equals() method will correctly show {pred1,pred2} as equals()
* to {pred2,pred1}.
*/
//public SemanticContext semanticContext = SemanticContext.EMPTY_SEMANTIC_CONTEXT;
/** Indicate that this configuration has been resolved and no further
* DFA processing should occur with it. Essentially, this is used
* as an "ignore" bit so that upon a set of nondeterministic configurations
* such as (s|2) and (s|3), I can set (s|3) to resolved=true (and any
* other configuration associated with alt 3).
*/
//protected boolean resolved;
/** This bit is used to indicate a semantic predicate will be
* used to resolve the conflict. Method
* DFA.findNewDFAStatesAndAddDFATransitions will add edges for
* the predicates after it performs the reach operation. The
* nondeterminism resolver sets this when it finds a set of
* nondeterministic configurations (as it does for "resolved" field)
* that have enough predicates to resolve the conflit.
*/
//protected boolean resolveWithPredicate;
public NFAConfig(int state,
int alt,
NFAState invokingState)
{
this.state = state;
this.alt = alt;
this.invokingState = invokingState;
//this.semanticContext = semanticContext;
}
/** An NFA configuration is equal to another if both have
* the same state, they predict the same alternative, and
* syntactic/semantic contexts are the same. I don't think
* the state|alt|ctx could be the same and have two different
* semantic contexts, but might as well define equals to be
* everything.
*/
public boolean equals(Object o) {
if ( o==null ) return false;
NFAConfig other = (NFAConfig)o;
return this.state==other.state &&
this.alt==other.alt &&
this.invokingState==other.invokingState;
// this.context.equals (other.context)&&
// this.semanticContext.equals(other.semanticContext)
}
public int hashCode() {
int h = state + alt;// + context.hashCode();
return h;
}
public String toString() {
return toString(true);
}
public String toString(boolean showAlt) {
StringBuffer buf = new StringBuffer();
buf.append(state);
if ( showAlt ) {
buf.append("|");
buf.append(alt);
}
if ( invokingState!=null ) {
buf.append("|");
buf.append(invokingState);
}
// if ( resolved ) {
// buf.append("|resolved");
// }
// if ( resolveWithPredicate ) {
// buf.append("|resolveWithPredicate");
// }
return buf.toString();
}
}

View File

@ -0,0 +1,72 @@
package org.antlr.v4.automata;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
/** A HashMap that remembers the order that the elements were added.
* You can alter the ith element with set(i,value) too :) Unique list.
* I need the replace/set-element-i functionality so I'm subclassing
* OrderedHashSet.
*/
public class OrderedHashSet<T> extends HashSet<T> {
/** Track the elements as they are added to the set */
protected List<T> elements = new ArrayList<T>();
public T get(int i) {
return elements.get(i);
}
/** Replace an existing value with a new value; updates the element
* list and the hash table, but not the key as that has not changed.
*/
public T set(int i, T value) {
T oldElement = elements.get(i);
elements.set(i,value); // update list
super.remove(oldElement); // now update the set: remove/add
super.add(value);
return oldElement;
}
/** Add a value to list; keep in hashtable for consistency also;
* Key is object itself. Good for say asking if a certain string is in
* a list of strings.
*/
public boolean add(T value) {
boolean result = super.add(value);
if ( result ) { // only track if new element not in set
elements.add(value);
}
return result;
}
public boolean remove(Object o) {
throw new UnsupportedOperationException();
}
public void clear() {
elements.clear();
super.clear();
}
/** Return the List holding list of table elements. Note that you are
* NOT getting a copy so don't write to the list.
*/
public List<T> elements() {
return elements;
}
public int size() {
/*
if ( elements.size()!=super.size() ) {
ErrorManager.internalError("OrderedHashSet: elements and set size differs; "+
elements.size()+"!="+super.size());
}
*/
return elements.size();
}
public String toString() {
return elements.toString();
}
}