diff --git a/tool/src/org/antlr/v4/Tool.java b/tool/src/org/antlr/v4/Tool.java index 1d8db6739..234695022 100644 --- a/tool/src/org/antlr/v4/Tool.java +++ b/tool/src/org/antlr/v4/Tool.java @@ -359,8 +359,8 @@ public class Tool { //g.ast.inspect(); // MAKE SURE GRAMMAR IS SEMANTICALLY CORRECT (FILL IN GRAMMAR OBJECT) - SemanticPipeline sem = new SemanticPipeline(); - sem.process(g); + SemanticPipeline sem = new SemanticPipeline(g); + sem.process(); if ( g.getImportedGrammars()!=null ) { // process imported grammars (if any) for (Grammar imp : g.getImportedGrammars()) { process(imp); @@ -373,8 +373,8 @@ public class Tool { g.nfa = factory.createNFA(); // PERFORM GRAMMAR ANALYSIS ON NFA: BUILD DECISION DFAs - AnalysisPipeline anal = new AnalysisPipeline(); - anal.process(g); + AnalysisPipeline anal = new AnalysisPipeline(g); + anal.process(); // GENERATE CODE } diff --git a/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java b/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java index 3013a742b..b11ba19d3 100644 --- a/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java +++ b/tool/src/org/antlr/v4/analysis/AnalysisPipeline.java @@ -1,10 +1,18 @@ package org.antlr.v4.analysis; +import org.antlr.v4.automata.DFA; import org.antlr.v4.automata.DecisionState; +import org.antlr.v4.automata.NFAToDFAConverter; import org.antlr.v4.tool.Grammar; public class AnalysisPipeline { - public void process(Grammar g) { + public Grammar g; + + public AnalysisPipeline(Grammar g) { + this.g = g; + } + + public void process() { // LEFT-RECURSION CHECK LeftRecursionDetector lr = new LeftRecursionDetector(g.nfa); lr.check(); @@ -18,7 +26,10 @@ public class AnalysisPipeline { public void createDFA(DecisionState s) { // TRY APPROXIMATE LL(*) ANALYSIS - + NFAToDFAConverter conv = new NFAToDFAConverter(g, s); + DFA dfa = conv.createDFA(); + System.out.println("DFA="+dfa); + // REAL LL(*) ANALYSIS IF THAT FAILS } } diff --git a/tool/src/org/antlr/v4/automata/DFA.java b/tool/src/org/antlr/v4/automata/DFA.java index 9aad7c0a3..e3f4c4c4b 100644 --- a/tool/src/org/antlr/v4/automata/DFA.java +++ b/tool/src/org/antlr/v4/automata/DFA.java @@ -1,6 +1,11 @@ package org.antlr.v4.automata; +import org.antlr.v4.misc.Utils; +import org.antlr.v4.tool.Grammar; + import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; import java.util.Map; /** A DFA (converted from a grammar's NFA). @@ -8,6 +13,8 @@ import java.util.Map; * of recognizers (lexers, parsers, tree walkers). */ public class DFA { + Grammar g; + /** What's the start state for this DFA? */ public DFAState startState; @@ -15,7 +22,7 @@ public class DFA { // public NFA nfa; /** From what NFAState did we create the DFA? */ - public NFAState decisionNFAStartState; + public DecisionState decisionNFAStartState; /** A set of all uniquely-numbered DFA states. Maps hash of DFAState * to the actual DFAState object. We use this to detect @@ -38,6 +45,73 @@ public class DFA { */ //protected List states = new ArrayList(); + /** Each alt in an NFA derived from a grammar must have a DFA state that + * predicts it lest the parser not know what to do. Nondeterminisms can + * lead to this situation (assuming no semantic predicates can resolve + * the problem) and when for some reason, I cannot compute the lookahead + * (which might arise from an error in the algorithm or from + * left-recursion etc...). This list starts out with all alts contained + * and then in method doesStateReachAcceptState() I remove the alts I + * know to be uniquely predicted. + */ + public List unreachableAlts; + + public int nAlts = 0; + + /** We only want one accept state per predicted alt; track here */ + public DFAState[] altToAcceptState; + /** Unique state numbers per DFA */ - int stateCounter = 0; + int stateCounter = 0; + + public DFA(Grammar g, DecisionState startState) { + this.g = g; + this.decisionNFAStartState = startState; + nAlts = startState.getNumberOfTransitions(); + unreachableAlts = new LinkedList(); + for (int i = 1; i <= nAlts; i++) { + unreachableAlts.add(Utils.integer(i)); + } + altToAcceptState = new DFAState[nAlts+1]; + } + + /** Add a new DFA state to this DFA if not already present. + * To force an acyclic, fixed maximum depth DFA, just always + * return the incoming state. By not reusing old states, + * no cycles can be created. If we're doing fixed k lookahead + * don't updated uniqueStates, just return incoming state, which + * indicates it's a new state. + */ + protected DFAState addState(DFAState d) { + // does a DFA state exist already with everything the same + // except its state number? + DFAState existing = (DFAState)uniqueStates.get(d); + if ( existing != null ) { + /* + System.out.println("state "+d.stateNumber+" exists as state "+ + existing.stateNumber); + */ + // already there...get the existing DFA state + return existing; + } + + // if not there, then add new state. + uniqueStates.put(d,d); + d.stateNumber = stateCounter++; + return d; + } + + public DFAState newState() { + DFAState n = new DFAState(this); +// states.setSize(n.stateNumber+1); +// states.set(n.stateNumber, n); // track state num to state + return n; + } + + public String toString() { + if ( startState==null ) return ""; + DFASerializer serializer = new DFASerializer(g, startState); + return serializer.toString(); + } + } diff --git a/tool/src/org/antlr/v4/automata/DFASerializer.java b/tool/src/org/antlr/v4/automata/DFASerializer.java new file mode 100644 index 000000000..a8fe0bb12 --- /dev/null +++ b/tool/src/org/antlr/v4/automata/DFASerializer.java @@ -0,0 +1,52 @@ +package org.antlr.v4.automata; + +import org.antlr.v4.tool.Grammar; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** A DFA walker that knows how to dump them to serialized strings. */ +public class DFASerializer { + List work; + Set marked; + Grammar g; + DFAState start; + + public DFASerializer(Grammar g, DFAState start) { + this.g = g; + this.start = start; + } + + public String toString() { + if ( start==null ) return null; + marked = new HashSet(); + + work = new ArrayList(); + work.add(start); + + StringBuilder buf = new StringBuilder(); + DFAState s = null; + + while ( work.size()>0 ) { + s = work.remove(0); + if ( marked.contains(s) ) continue; + int n = s.getNumberOfTransitions(); + //System.out.println("visit "+getDFAStateString(s)+"; edges="+n); + marked.add(s); + for (int i=0; i"+ getStateString(t.target)+'\n'); + } + } + return buf.toString(); + } + + String getStateString(DFAState s) { + int n = s.stateNumber; + String stateStr = "s"+n; + stateStr = ":s"+n+"=>"+s.getUniquelyPredictedAlt(); + return stateStr; + } +} diff --git a/tool/src/org/antlr/v4/automata/DFAState.java b/tool/src/org/antlr/v4/automata/DFAState.java index 05b5c8908..ff2b078ea 100644 --- a/tool/src/org/antlr/v4/automata/DFAState.java +++ b/tool/src/org/antlr/v4/automata/DFAState.java @@ -1,7 +1,9 @@ package org.antlr.v4.automata; -import java.util.ArrayList; -import java.util.List; +import org.antlr.v4.misc.Utils; +import org.stringtemplate.v4.misc.MultiMap; + +import java.util.*; /** A DFA state represents a set of possible NFA configurations. * As Aho, Sethi, Ullman p. 117 says "The DFA uses its state @@ -30,20 +32,27 @@ import java.util.List; * but with different NFAContexts (with same or different alts) * meaning that state was reached via a different set of rule invocations. */ -public class DFAState extends State { +public class DFAState { public static final int INITIAL_NUM_TRANSITIONS = 4; + public static final int INVALID_STATE_NUMBER = -1; + + public int stateNumber = INVALID_STATE_NUMBER; + + public boolean isAcceptState = false; /** State in which DFA? */ public DFA dfa; /** Track the transitions emanating from this DFA state. */ - protected List transitions = - new ArrayList(INITIAL_NUM_TRANSITIONS); + protected List edges = + new ArrayList(INITIAL_NUM_TRANSITIONS); /** The set of NFA configurations (state,alt,context) for this DFA state */ public OrderedHashSet nfaConfigs = new OrderedHashSet(); + int cachedUniquelyPredicatedAlt = NFA.INVALID_ALT_NUMBER; + public DFAState(DFA dfa) { this.dfa = dfa; } public void addNFAConfig(NFAState s, NFAConfig c) { @@ -53,29 +62,176 @@ public class DFAState extends State { public NFAConfig addNFAConfig(NFAState state, int alt, - NFAState invokingState) + NFAState context) { - NFAConfig c = new NFAConfig(state.stateNumber, - alt, - invokingState); + NFAConfig c = new NFAConfig(state, alt, context); addNFAConfig(state, c); return c; } - @Override - public int getNumberOfTransitions() { return transitions.size(); } + /** Walk each NFA configuration in this DFA state looking for a conflict + * where (s|i|ctx) and (s|j|ctx) exist, indicating that state s with + * context conflicting ctx predicts alts i and j. Return an Integer set + * of the alternative numbers that conflict. Two contexts conflict if + * they are equal or one is a stack suffix of the other or one is + * the empty context. + * + * Use a hash table to record the lists of configs for each state + * as they are encountered. We need only consider states for which + * there is more than one configuration. The configurations' predicted + * alt must be different or must have different contexts to avoid a + * conflict. + */ + protected Set getConflictingAlts() { + // TODO this is called multiple times: cache result? + //System.out.println("getNondetAlts for DFA state "+stateNumber); + Set nondeterministicAlts = new HashSet(); - @Override - public void addTransition(Transition e) { transitions.add(e); } + // If only 1 NFA conf then no way it can be nondeterministic; + // save the overhead. There are many o-a->o NFA transitions + // and so we save a hash map and iterator creation for each + // state. + int numConfigs = nfaConfigs.size(); + if ( numConfigs <=1 ) { + return null; + } - @Override - public Transition transition(int i) { return transitions.get(i); } + // First get a list of configurations for each state. + // Most of the time, each state will have one associated configuration. + MultiMap stateToConfigListMap = + new MultiMap(); + for (int i = 0; i < numConfigs; i++) { + NFAConfig configuration = (NFAConfig) nfaConfigs.get(i); + Integer stateI = Utils.integer(configuration.state.stateNumber); + stateToConfigListMap.map(stateI, configuration); + } + // potential conflicts are states with > 1 configuration and diff alts + Set states = stateToConfigListMap.keySet(); + int numPotentialConflicts = 0; + for (Iterator it = states.iterator(); it.hasNext();) { + Integer stateI = (Integer) it.next(); + boolean thisStateHasPotentialProblem = false; + List configsForState = (List)stateToConfigListMap.get(stateI); + int alt=0; + int numConfigsForState = configsForState.size(); + for (int i = 0; i < numConfigsForState && numConfigsForState>1 ; i++) { + NFAConfig c = (NFAConfig) configsForState.get(i); + if ( alt==0 ) { + alt = c.alt; + } + else if ( c.alt!=alt ) { + /* + System.out.println("potential conflict in state "+stateI+ + " configs: "+configsForState); + */ + numPotentialConflicts++; + thisStateHasPotentialProblem = true; + } + } + if ( !thisStateHasPotentialProblem ) { + // remove NFA state's configurations from + // further checking; no issues with it + // (can't remove as it's concurrent modification; set to null) + stateToConfigListMap.put(stateI, null); + } + } + + // a fast check for potential issues; most states have none + if ( numPotentialConflicts==0 ) { + return null; + } + + // we have a potential problem, so now go through config lists again + // looking for different alts (only states with potential issues + // are left in the states set). Now we will check context. + // For example, the list of configs for NFA state 3 in some DFA + // state might be: + // [3|2|[28 18 $], 3|1|[28 $], 3|1, 3|2] + // I want to create a map from context to alts looking for overlap: + // [28 18 $] -> 2 + // [28 $] -> 1 + // [$] -> 1,2 + // Indeed a conflict exists as same state 3, same context [$], predicts + // alts 1 and 2. + // walk each state with potential conflicting configurations + for (Iterator it = states.iterator(); it.hasNext();) { + Integer stateI = (Integer) it.next(); + List configsForState = (List)stateToConfigListMap.get(stateI); + // compare each configuration pair s, t to ensure: + // s.ctx different than t.ctx if s.alt != t.alt + int numConfigsForState = 0; + if ( configsForState!=null ) { + numConfigsForState = configsForState.size(); + } + for (int i = 0; i < numConfigsForState; i++) { + NFAConfig s = (NFAConfig) configsForState.get(i); + for (int j = i+1; j < numConfigsForState; j++) { + NFAConfig t = (NFAConfig)configsForState.get(j); + // conflicts means s.ctx==t.ctx or s.ctx is a stack + // suffix of t.ctx or vice versa (if alts differ). + // Also a conflict if s.ctx or t.ctx is empty + if ( s.alt != t.alt && s.context != t.context ) { + nondeterministicAlts.add(Utils.integer(s.alt)); + nondeterministicAlts.add(Utils.integer(t.alt)); + } + } + } + } + + if ( nondeterministicAlts.size()==0 ) { + return null; + } + return nondeterministicAlts; + } + + /** Walk each configuration and if they are all the same alt, return + * that alt else return NFA.INVALID_ALT_NUMBER. Ignore resolved + * configurations, but don't ignore resolveWithPredicate configs + * because this state should not be an accept state. We need to add + * this to the work list and then have semantic predicate edges + * emanating from it. + */ + public int getUniquelyPredictedAlt() { + if ( cachedUniquelyPredicatedAlt!=NFA.INVALID_ALT_NUMBER ) { + return cachedUniquelyPredicatedAlt; + } + int alt = org.antlr.analysis.NFA.INVALID_ALT_NUMBER; + for (NFAConfig c : nfaConfigs) { + if ( alt== NFA.INVALID_ALT_NUMBER ) { + alt = c.alt; // found first nonresolved alt + } + else if ( c.alt!=alt ) { + return NFA.INVALID_ALT_NUMBER; + } + } + this.cachedUniquelyPredicatedAlt = alt; + return alt; + } + + /** Get the set of all alts mentioned by all NFA configurations in this + * DFA state. + */ + public Set getAltSet() { + Set alts = new HashSet(); + for (NFAConfig c : nfaConfigs) { + alts.add(Utils.integer(c.alt)); + } + if ( alts.size()==0 ) return null; + return alts; + } + + + public int getNumberOfTransitions() { return edges.size(); } + + public void addTransition(Edge e) { edges.add(e); } + + public Edge transition(int i) { return edges.get(i); } /** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */ public int hashCode() { int h = 0; for (NFAConfig c : nfaConfigs) { - h += c.state + c.alt; + h += c.state.stateNumber + c.alt; } return h; } @@ -96,5 +252,20 @@ public class DFAState extends State { DFAState other = (DFAState)o; return this.nfaConfigs.equals(other.nfaConfigs); } + + /** Print all NFA states plus what alts they predict */ + public String toString() { + StringBuffer buf = new StringBuffer(); + buf.append(stateNumber+":{"); + for (int i = 0; i < nfaConfigs.size(); i++) { + NFAConfig c = (NFAConfig)nfaConfigs.get(i); + if ( i>0 ) { + buf.append(", "); + } + buf.append(c); + } + buf.append("}"); + return buf.toString(); + } } diff --git a/tool/src/org/antlr/v4/automata/DecisionState.java b/tool/src/org/antlr/v4/automata/DecisionState.java index 4389960ef..2b2150c47 100644 --- a/tool/src/org/antlr/v4/automata/DecisionState.java +++ b/tool/src/org/antlr/v4/automata/DecisionState.java @@ -1,5 +1,6 @@ package org.antlr.v4.automata; public class DecisionState extends BasicState { + public int decision; public DecisionState(NFA nfa) { super(nfa); } } diff --git a/tool/src/org/antlr/v4/automata/Edge.java b/tool/src/org/antlr/v4/automata/Edge.java index 3d8812192..30cc9bb0f 100644 --- a/tool/src/org/antlr/v4/automata/Edge.java +++ b/tool/src/org/antlr/v4/automata/Edge.java @@ -1,7 +1,19 @@ package org.antlr.v4.automata; +import org.antlr.v4.misc.IntervalSet; +import org.antlr.v4.tool.Grammar; + /** A DFA edge (NFA edges are called transitions) */ public class Edge { + public int atom = Label.INVALID; + public IntervalSet set; + public DFAState target; + public Edge(DFAState target) { this.target = target; } + + public String toString(Grammar g) { + if ( set==null ) return g.getTokenDisplayName(atom); + else return set.toString(g); + } } diff --git a/tool/src/org/antlr/v4/automata/NFA.java b/tool/src/org/antlr/v4/automata/NFA.java index b100a5139..e706a0c00 100644 --- a/tool/src/org/antlr/v4/automata/NFA.java +++ b/tool/src/org/antlr/v4/automata/NFA.java @@ -10,6 +10,8 @@ import java.util.Map; /** */ public class NFA { + public static final int INVALID_ALT_NUMBER = -1; + public Grammar g; public List states = new ArrayList(); diff --git a/tool/src/org/antlr/v4/automata/NFAConfig.java b/tool/src/org/antlr/v4/automata/NFAConfig.java index 29453a75c..2cfa6b991 100644 --- a/tool/src/org/antlr/v4/automata/NFAConfig.java +++ b/tool/src/org/antlr/v4/automata/NFAConfig.java @@ -8,13 +8,13 @@ package org.antlr.v4.automata; */ public class NFAConfig { /** The NFA state associated with this configuration */ - public int state; + public NFAState state; /** What alt is predicted by this configuration */ public int alt; /** Record the NFA state that invoked another rule's start state */ - public NFAState invokingState; + public NFAState context; /** The set of semantic predicates associated with this NFA * configuration. The predicates were found on the way to @@ -45,13 +45,13 @@ public class NFAConfig { */ //protected boolean resolveWithPredicate; - public NFAConfig(int state, + public NFAConfig(NFAState state, int alt, - NFAState invokingState) + NFAState context) { this.state = state; this.alt = alt; - this.invokingState = invokingState; + this.context = context; //this.semanticContext = semanticContext; } @@ -67,13 +67,13 @@ public class NFAConfig { NFAConfig other = (NFAConfig)o; return this.state==other.state && this.alt==other.alt && - this.invokingState==other.invokingState; + this.context ==other.context; // this.context.equals (other.context)&& // this.semanticContext.equals(other.semanticContext) } public int hashCode() { - int h = state + alt;// + context.hashCode(); + int h = state.stateNumber + alt;// + context.hashCode(); return h; } @@ -88,9 +88,9 @@ public class NFAConfig { buf.append("|"); buf.append(alt); } - if ( invokingState!=null ) { + if ( context !=null ) { buf.append("|"); - buf.append(invokingState); + buf.append(context); } // if ( resolved ) { // buf.append("|resolved"); diff --git a/tool/src/org/antlr/v4/automata/FASerializer.java b/tool/src/org/antlr/v4/automata/NFASerializer.java similarity index 68% rename from tool/src/org/antlr/v4/automata/FASerializer.java rename to tool/src/org/antlr/v4/automata/NFASerializer.java index 02bfee4b2..e9497f696 100644 --- a/tool/src/org/antlr/v4/automata/FASerializer.java +++ b/tool/src/org/antlr/v4/automata/NFASerializer.java @@ -7,33 +7,33 @@ import java.util.HashSet; import java.util.List; import java.util.Set; -/** A FA (finite automata) walker that knows how to dump them to serialized - * strings. - */ -public class FASerializer { - List work; - Set marked; +/** An NFA walker that knows how to dump them to serialized strings. */ +public class NFASerializer { + List work; + Set marked; Grammar g; - State start; + NFAState start; - public FASerializer(Grammar g, State start) { + public NFASerializer(Grammar g, NFAState start) { this.g = g; this.start = start; } public String toString() { if ( start==null ) return null; - work = new ArrayList(); - marked = new HashSet(); + marked = new HashSet(); + + work = new ArrayList(); work.add(start); StringBuilder buf = new StringBuilder(); - State s = null; + NFAState s = null; + while ( work.size()>0 ) { s = work.remove(0); if ( marked.contains(s) ) continue; int n = s.getNumberOfTransitions(); - //System.out.println("visit "+getStateString(s)+"; edges="+n); + //System.out.println("visit "+getNFAStateString(s)+"; edges="+n); marked.add(s); for (int i=0; i"+getStateString(t.target)+'\n'); + buf.append("->"+ getStateString(t.target)+'\n'); } else if ( t instanceof RuleTransition ) { - buf.append("->"+getStateString(t.target)+'\n'); + buf.append("->"+ getStateString(t.target)+'\n'); } else if ( t instanceof ActionTransition ) { ActionTransition a = (ActionTransition)t; - buf.append("-"+a.actionAST.getText()+"->"+getStateString(t.target)+'\n'); + buf.append("-"+a.actionAST.getText()+"->"+ getStateString(t.target)+'\n'); } else if ( t instanceof AtomTransition ) { AtomTransition a = (AtomTransition)t; - buf.append("-"+a.toString(g)+"->"+getStateString(t.target)+'\n'); + buf.append("-"+a.toString(g)+"->"+ getStateString(t.target)+'\n'); } else { - buf.append("-"+t.toString()+"->"+getStateString(t.target)+'\n'); + buf.append("-"+t.toString()+"->"+ getStateString(t.target)+'\n'); } } } return buf.toString(); } - String getStateString(State s) { + String getStateString(NFAState s) { int n = s.stateNumber; String stateStr = "s"+n; -// if ( s instanceof DFAState ) { -// stateStr = ":s"+n+"=>"+((DFAState)s).getUniquelyPredictedAlt(); -// } -// else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n; else if ( s instanceof PlusBlockStartState ) stateStr = "PlusBlockStart_"+n; else if ( s instanceof StarBlockStartState ) stateStr = "StarBlockStart_"+n; diff --git a/tool/src/org/antlr/v4/automata/NFAState.java b/tool/src/org/antlr/v4/automata/NFAState.java index 9bc5287f3..69fe38169 100644 --- a/tool/src/org/antlr/v4/automata/NFAState.java +++ b/tool/src/org/antlr/v4/automata/NFAState.java @@ -2,7 +2,28 @@ package org.antlr.v4.automata; import org.antlr.v4.tool.GrammarAST; -public class NFAState extends State { +public class NFAState { + public static final int INVALID_STATE_NUMBER = -1; + + public int stateNumber = INVALID_STATE_NUMBER; + + @Override + public int hashCode() { + return super.hashCode(); + } + + @Override + public boolean equals(Object o) { + // are these states same object? + if ( o instanceof NFAState ) return this == (NFAState)o; + return false; + } + + @Override + public String toString() { + return String.valueOf(stateNumber); + } + /** Which NFA are we in? */ public NFA nfa = null; @@ -11,16 +32,13 @@ public class NFAState extends State { public NFAState(NFA nfa) { this.nfa = nfa; } - @Override public int getNumberOfTransitions() { return 0; } - @Override public void addTransition(Transition e) { } - @Override public Transition transition(int i) { return null; } diff --git a/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java b/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java new file mode 100644 index 000000000..5fba50136 --- /dev/null +++ b/tool/src/org/antlr/v4/automata/NFAToDFAConverter.java @@ -0,0 +1,106 @@ +package org.antlr.v4.automata; + +import org.antlr.v4.tool.Grammar; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** Code that embodies the NFA conversion to DFA. A new object is needed + * per DFA (also required for thread safety if multiple conversions + * launched). + */ +public class NFAToDFAConverter { + Grammar g; + + DecisionState nfaStartState; + + /** DFA we are creating */ + DFA dfa; + + /** A list of DFA states we still need to process during NFA conversion */ + List work = new LinkedList(); + + public static boolean debug = false; + + public NFAToDFAConverter(Grammar g, DecisionState nfaStartState) { + this.g = g; + this.nfaStartState = nfaStartState; + dfa = new DFA(g, nfaStartState); + } + + public DFA createDFA() { + dfa.startState = computeStartState(); + dfa.addState(dfa.startState); // make sure dfa knows about this state + work.add(dfa.startState); + + // while more DFA states to check, process them + while ( work.size()>0 ) { + + } + + return dfa; + } + + /** From this first NFA state of a decision, create a DFA. + * Walk each alt in decision and compute closure from the start of that + * rule, making sure that the closure does not include other alts within + * that same decision. The idea is to associate a specific alt number + * with the starting closure so we can trace the alt number for all states + * derived from this. At a stop state in the DFA, we can return this alt + * number, indicating which alt is predicted. + */ + public DFAState computeStartState() { + DFAState d = dfa.newState(); + + // add config for each alt start, then add closure for those states + for (int altNum=1; altNum<=dfa.nAlts; altNum++) { + Transition t = nfaStartState.transition(altNum-1); + NFAState altStart = t.target; + d.addNFAConfig(altStart, altNum+1, null); + + } + + closure(d); + + return d; + } + + /** For all NFA states (configurations) merged in d, + * compute the epsilon closure; that is, find all NFA states reachable + * from the NFA states in d via purely epsilon transitions. + */ + public void closure(DFAState d) { + if ( debug ) { + System.out.println("closure("+d+")"); + } + + List configs = new ArrayList(); + for (NFAConfig c : d.nfaConfigs) { + closure(c.state, c.alt, c.context, configs); + } + d.nfaConfigs.addAll(configs); // Add new NFA configs to DFA state d + + System.out.println("after closure d="+d); + } + + /** Where can we get from NFA state s traversing only epsilon transitions? + */ + public void closure(NFAState s, int altNum, NFAState context, + List configs) + { + NFAConfig proposedNFAConfig = + new NFAConfig(s, altNum, context); + + // p itself is always in closure + configs.add(proposedNFAConfig); + + int n = s.getNumberOfTransitions(); + for (int i=0; i intervals; @@ -557,7 +557,7 @@ public class IntervalSet implements IntSet { } public String toString() { - return toString(null); + return toString((Grammar)null); } public String toString(Grammar g) { diff --git a/tool/src/org/antlr/v4/misc/Utils.java b/tool/src/org/antlr/v4/misc/Utils.java index e0589972b..020b2f45e 100644 --- a/tool/src/org/antlr/v4/misc/Utils.java +++ b/tool/src/org/antlr/v4/misc/Utils.java @@ -40,4 +40,29 @@ public class Utils { } return buf.toString(); } + + /** Given a source string, src, + a string to replace, replacee, + and a string to replace with, replacer, + return a new string w/ the replacing done. + You can use replacer==null to remove replacee from the string. + + This should be faster than Java's String.replaceAll as that one + uses regex (I only want to play with strings anyway). + */ + public static String replace(String src, String replacee, String replacer) { + StringBuffer result = new StringBuffer(src.length() + 50); + int startIndex = 0; + int endIndex = src.indexOf(replacee); + while(endIndex != -1) { + result.append(src.substring(startIndex,endIndex)); + if ( replacer!=null ) { + result.append(replacer); + } + startIndex = endIndex + replacee.length(); + endIndex = src.indexOf(replacee,startIndex); + } + result.append(src.substring(startIndex,src.length())); + return result.toString(); + } } diff --git a/tool/src/org/antlr/v4/semantics/SemanticPipeline.java b/tool/src/org/antlr/v4/semantics/SemanticPipeline.java index 0603bf08f..562e8569c 100644 --- a/tool/src/org/antlr/v4/semantics/SemanticPipeline.java +++ b/tool/src/org/antlr/v4/semantics/SemanticPipeline.java @@ -26,7 +26,13 @@ import java.util.Map; * as separate objects, however). */ public class SemanticPipeline { - public void process(Grammar g) { + public Grammar g; + + public SemanticPipeline(Grammar g) { + this.g = g; + } + + public void process() { if ( g.ast==null ) return; // VALIDATE AST STRUCTURE diff --git a/tool/src/org/antlr/v4/tool/DOTGenerator.java b/tool/src/org/antlr/v4/tool/DOTGenerator.java new file mode 100644 index 000000000..2c37b7acf --- /dev/null +++ b/tool/src/org/antlr/v4/tool/DOTGenerator.java @@ -0,0 +1,290 @@ +package org.antlr.v4.tool; + +import org.antlr.v4.Tool; +import org.antlr.v4.automata.*; +import org.antlr.v4.misc.Utils; +import org.stringtemplate.v4.ST; +import org.stringtemplate.v4.STGroup; +import org.stringtemplate.v4.STGroupDir; + +import java.util.*; + +/** The DOT (part of graphviz) generation aspect. */ +public class DOTGenerator { + public static final boolean STRIP_NONREDUCED_STATES = false; + + protected String arrowhead="normal"; + protected String rankdir="LR"; + + /** Library of output templates; use format */ + public static STGroup stlib = new STGroupDir("org/antlr/v4/tool/templates/dot"); + + /** To prevent infinite recursion when walking state machines, record + * which states we've visited. Make a new set every time you start + * walking in case you reuse this object. + */ + protected Set markedStates = null; + + protected Grammar grammar; + + /** This aspect is associated with a grammar */ + public DOTGenerator(Grammar grammar) { + this.grammar = grammar; + } + + /** Return a String containing a DOT description that, when displayed, + * will show the incoming state machine visually. All nodes reachable + * from startState will be included. + */ + public String getDOT(NFAState startState) { + if ( startState==null ) { + return null; + } + // The output DOT graph for visualization + ST dot = null; + markedStates = new HashSet(); + dot = stlib.getInstanceOf("nfa"); + dot.add("startState", + Utils.integer(startState.stateNumber)); + walkRuleNFACreatingDOT(dot, startState); + dot.add("rankdir", rankdir); + return dot.toString(); + } + + public String getDOT(DFAState startState) { + if ( startState==null ) { + return null; + } + // The output DOT graph for visualization + ST dot = null; + markedStates = new HashSet(); + dot = stlib.getInstanceOf("dfa"); + dot.add("startState", + Utils.integer(startState.stateNumber)); + dot.add("useBox", + Boolean.valueOf(Tool.internalOption_ShowNFAConfigsInDFA)); + walkCreatingDFADOT(dot, (DFAState)startState); + dot.add("rankdir", rankdir); + return dot.toString(); + } + + /** Return a String containing a DOT description that, when displayed, + * will show the incoming state machine visually. All nodes reachable + * from startState will be included. + public String getRuleNFADOT(State startState) { + // The output DOT graph for visualization + ST dot = stlib.getInstanceOf("org/antlr/tool/templates/dot/nfa"); + + markedStates = new HashSet(); + dot.add("startState", + Utils.integer(startState.stateNumber)); + walkRuleNFACreatingDOT(dot, startState); + return dot.toString(); + } + */ + + /** Do a depth-first walk of the state machine graph and + * fill a DOT description template. Keep filling the + * states and edges attributes. + */ + protected void walkCreatingDFADOT(ST dot, + DFAState s) + { + if ( markedStates.contains(Utils.integer(s.stateNumber)) ) { + return; // already visited this node + } + + markedStates.add(Utils.integer(s.stateNumber)); // mark this node as completed. + + // first add this node + ST st; + if ( s.isAcceptState ) { + st = stlib.getInstanceOf("stopstate"); + } + else { + st = stlib.getInstanceOf("state"); + } + st.add("name", getStateLabel(s)); + dot.add("states", st); + + // make a DOT edge for each transition + for (int i = 0; i < s.getNumberOfTransitions(); i++) { + Edge edge = s.transition(i); + /* + System.out.println("dfa "+s.dfa.decisionNumber+ + " edge from s"+s.stateNumber+" ["+i+"] of "+s.getNumberOfTransitions()); + */ + st = stlib.getInstanceOf("edge"); + st.add("label", getEdgeLabel(edge.toString(grammar))); + st.add("src", getStateLabel(s)); + st.add("target", getStateLabel(edge.target)); + st.add("arrowhead", arrowhead); + dot.add("edges", st); + walkCreatingDFADOT(dot, edge.target); // keep walkin' + } + } + + /** Do a depth-first walk of the state machine graph and + * fill a DOT description template. Keep filling the + * states and edges attributes. We know this is an NFA + * for a rule so don't traverse edges to other rules and + * don't go past rule end state. + */ + protected void walkRuleNFACreatingDOT(ST dot, + NFAState s) + { + if ( markedStates.contains(s) ) { + return; // already visited this node + } + + markedStates.add(s.stateNumber); // mark this node as completed. + + // first add this node + ST stateST; + if ( s instanceof RuleStopState ) { + stateST = stlib.getInstanceOf("stopstate"); + } + else { + stateST = stlib.getInstanceOf("state"); + } + stateST.add("name", getStateLabel(s)); + dot.add("states", stateST); + + if ( s instanceof RuleStopState ) { + return; // don't go past end of rule node to the follow states + } + + // special case: if decision point, then line up the alt start states + // unless it's an end of block + if ( s instanceof DecisionState ) { + GrammarAST n = ((NFAState)s).ast; + if ( n!=null && s instanceof BlockEndState ) { + ST rankST = stlib.getInstanceOf("decision-rank"); + NFAState alt = (NFAState)s; + while ( alt!=null ) { + rankST.add("states", getStateLabel(alt)); + if ( alt.transition(1) !=null ) { + alt = (NFAState)alt.transition(1).target; + } + else { + alt=null; + } + } + dot.add("decisionRanks", rankST); + } + } + + // make a DOT edge for each transition + ST edgeST = null; + for (int i = 0; i < s.getNumberOfTransitions(); i++) { + Transition edge = (Transition) s.transition(i); + if ( edge instanceof RuleTransition ) { + RuleTransition rr = ((RuleTransition)edge); + // don't jump to other rules, but display edge to follow node + edgeST = stlib.getInstanceOf("edge"); + if ( rr.rule.g != grammar ) { + edgeST.add("label", "<"+rr.rule.g.name+"."+rr.rule.name+">"); + } + else { + edgeST.add("label", "<"+rr.rule.name+">"); + } + edgeST.add("src", getStateLabel(s)); + edgeST.add("target", getStateLabel(rr.followState)); + edgeST.add("arrowhead", arrowhead); + dot.add("edges", edgeST); + walkRuleNFACreatingDOT(dot, rr.followState); + continue; + } + if ( edge instanceof ActionTransition ) { + edgeST = stlib.getInstanceOf("action-edge"); + } + else if ( edge.isEpsilon() ) { + edgeST = stlib.getInstanceOf("epsilon-edge"); + } + else { + edgeST = stlib.getInstanceOf("edge"); + } + edgeST.add("label", getEdgeLabel(edge.toString(grammar))); + edgeST.add("src", getStateLabel(s)); + edgeST.add("target", getStateLabel(edge.target)); + edgeST.add("arrowhead", arrowhead); + dot.add("edges", edgeST); + walkRuleNFACreatingDOT(dot, edge.target); // keep walkin' + } + } + + /** Fix edge strings so they print out in DOT properly; + * generate any gated predicates on edge too. + */ + protected String getEdgeLabel(String label) { + label = Utils.replace(label,"\\", "\\\\"); + label = Utils.replace(label,"\"", "\\\""); + label = Utils.replace(label,"\n", "\\\\n"); + label = Utils.replace(label,"\r", ""); + return label; + } + + protected String getStateLabel(NFAState s) { + if ( s==null ) return "null"; + String stateLabel = String.valueOf(s.stateNumber); + if ( s instanceof DecisionState ) { + stateLabel = stateLabel+",d="+((DecisionState)s).decision; + } + return '"'+stateLabel+'"'; + } + + protected String getStateLabel(DFAState s) { + if ( s==null ) return "null"; + String stateLabel = String.valueOf(s.stateNumber); + StringBuffer buf = new StringBuffer(250); + buf.append('s'); + buf.append(s.stateNumber); + if ( Tool.internalOption_ShowNFAConfigsInDFA ) { + Set alts = ((DFAState)s).getAltSet(); + if ( alts!=null ) { + buf.append("\\n"); + // separate alts + List altList = new ArrayList(); + altList.addAll(alts); + Collections.sort(altList); + Set configurations = ((DFAState) s).nfaConfigs; + for (int altIndex = 0; altIndex < altList.size(); altIndex++) { + Integer altI = (Integer) altList.get(altIndex); + int alt = altI.intValue(); + if ( altIndex>0 ) { + buf.append("\\n"); + } + buf.append("alt"); + buf.append(alt); + buf.append(':'); + // get a list of configs for just this alt + // it will help us print better later + List configsInAlt = new ArrayList(); + for (Iterator it = configurations.iterator(); it.hasNext();) { + NFAConfig c = (NFAConfig) it.next(); + if ( c.alt!=alt ) continue; + configsInAlt.add(c); + } + int n = 0; + for (int cIndex = 0; cIndex < configsInAlt.size(); cIndex++) { + NFAConfig c = + (NFAConfig)configsInAlt.get(cIndex); + n++; + buf.append(c.toString(false)); + if ( (cIndex+1)3 ) { + buf.append("\\n"); + } + } + } + } + } + stateLabel = buf.toString(); + if ( s.isAcceptState ) { + stateLabel = stateLabel+"=>"+s.getUniquelyPredictedAlt(); + } + return '"'+stateLabel+'"'; + } +} diff --git a/tool/src/org/antlr/v4/tool/Grammar.java b/tool/src/org/antlr/v4/tool/Grammar.java index 4370e0ea0..c116c2773 100644 --- a/tool/src/org/antlr/v4/tool/Grammar.java +++ b/tool/src/org/antlr/v4/tool/Grammar.java @@ -130,8 +130,8 @@ public class Grammar implements AttributeResolver { if ( this.ast==null || this.ast.hasErrors ) return; Tool antlr = new Tool(); - SemanticPipeline sem = new SemanticPipeline(); - sem.process(this); + SemanticPipeline sem = new SemanticPipeline(this); + sem.process(); if ( getImportedGrammars()!=null ) { // process imported grammars (if any) for (Grammar imp : getImportedGrammars()) { antlr.process(imp); diff --git a/tool/test/org/antlr/v4/test/TestNFAConstruction.java b/tool/test/org/antlr/v4/test/TestNFAConstruction.java index a91730368..1615603eb 100644 --- a/tool/test/org/antlr/v4/test/TestNFAConstruction.java +++ b/tool/test/org/antlr/v4/test/TestNFAConstruction.java @@ -916,7 +916,7 @@ public class TestNFAConstruction extends BaseTest { NFA nfa = f.createNFA(); Rule r = g.getRule(ruleName); NFAState startState = nfa.ruleToStartState.get(r); - FASerializer serializer = new FASerializer(g, startState); + NFASerializer serializer = new NFASerializer(g, startState); String result = serializer.toString(); //System.out.print(result);