got sample input sequence and grammar mapping working for ambig paths

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6753]
This commit is contained in:
parrt 2010-03-15 11:55:56 -08:00
parent 773832bd4b
commit e70287954a
9 changed files with 239 additions and 41 deletions

View File

@ -19,6 +19,7 @@ public class AnalysisPipeline {
// BUILD DFA FOR EACH DECISION
for (DecisionState s : g.nfa.decisionToNFAState) {
System.out.println("\nDECISION "+s.decision);
DFA dfa = createDFA(s);
g.setLookaheadDFA(s.decision, dfa);
}
@ -26,27 +27,38 @@ public class AnalysisPipeline {
public DFA createDFA(DecisionState s) {
// TRY APPROXIMATE LL(*) ANALYSIS
StackLimitedNFAToDFAConverter approxConv = new StackLimitedNFAToDFAConverter(g, s);
DFA dfa = approxConv.createDFA();
System.out.println("DFA="+dfa);
if ( dfa.isDeterministic() ) {
System.out.println("deterministic :)");
StackLimitedNFAToDFAConverter conv = new StackLimitedNFAToDFAConverter(g, s);
DFA dfa = conv.createDFA();
System.out.print("DFA="+dfa);
if ( dfa.isAmbiguous() ) System.out.println("ambiguous");
else System.out.println("NOT ambiguous");
if ( dfa.valid() ) System.out.println("stack limited valid");
if ( dfa.valid() ) {
// ambig / unreachable errors
conv.issueAmbiguityWarnings();
return dfa;
}
else System.out.println("nondeterministic!!!!!!!");
// TODO: is it ok to have unreachable alts in approx? maybe we don't need to do full LL(*)
// Only do recursion limited version if we get dangling states in stack
// limited version. Ambiguities are ok because if the approx version
// gets an ambiguity it's defin
// REAL LL(*) ANALYSIS IF THAT FAILS
RecursionLimitedNFAToDFAConverter conv = new RecursionLimitedNFAToDFAConverter(g, s);
conv = new RecursionLimitedNFAToDFAConverter(g, s);
dfa = conv.createDFA();
System.out.println("DFA="+dfa);
if ( dfa.isDeterministic() ) {
System.out.println("recursion limited deterministic :)");
return dfa;
System.out.print("DFA="+dfa);
// ambig / unreachable errors
conv.issueAmbiguityWarnings();
if ( !dfa.valid() ) {
System.out.println("non-LL(*)");
System.out.println("recursion limited NOT valid :)");
}
else System.out.println("recursion limited nondeterministic!!!!!!!");
else System.out.println("recursion limited valid");
return dfa;
}
}

View File

@ -131,8 +131,8 @@ public class DFAVerifier {
// Visit every transition, track if at least one edge reaches stop state
// Cannot terminate when we know this state reaches stop state since
// all transitions must be traversed to set status of each DFA state.
for (int i=0; i<d.getNumberOfTransitions(); i++) {
Edge t = d.transition(i);
for (int i=0; i<d.getNumberOfEdges(); i++) {
Edge t = d.edge(i);
DFAState edgeTarget = (DFAState)t.target;
ReachableStatus targetStatus = status.get(edgeTarget);
if ( targetStatus==ReachableStatus.BUSY ) { // avoid cycles; they say nothing

View File

@ -0,0 +1,144 @@
package org.antlr.v4.analysis;
import org.antlr.runtime.Token;
import org.antlr.v4.automata.*;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.Utils;
import org.antlr.v4.tool.Grammar;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class MachineProbe {
DFA dfa;
public MachineProbe(DFA dfa) { this.dfa = dfa; }
List<DFAState> getAnyDFAPathToTarget(DFAState targetState) {
Set<DFAState> visited = new HashSet<DFAState>();
return getAnyDFAPathToTarget(dfa.startState, targetState, visited);
}
public List<DFAState> getAnyDFAPathToTarget(DFAState startState,
DFAState targetState,
Set<DFAState> visited)
{
List<DFAState> dfaStates = new ArrayList<DFAState>();
visited.add(startState);
if ( startState.equals(targetState) ) {
dfaStates.add(targetState);
return dfaStates;
}
for (Edge e : startState.edges) { // walk edges looking for valid path
if ( !visited.contains(e.target) ) {
List<DFAState> path =
getAnyDFAPathToTarget(e.target, targetState, visited);
if ( path!=null ) { // found path, we're done
dfaStates.add(startState);
dfaStates.addAll(path);
return dfaStates;
}
}
}
return null;
}
/** Return a list of edge labels from start state to targetState. */
public List<IntSet> getEdgeLabels(DFAState targetState) {
List<DFAState> dfaStates = getAnyDFAPathToTarget(targetState);
List<IntSet> labels = new ArrayList<IntSet>();
for (int i=0; i<dfaStates.size()-1; i++) {
DFAState d = dfaStates.get(i);
DFAState nextState = dfaStates.get(i + 1);
// walk looking for edge whose target is next dfa state
for (Edge e : d.edges) {
if ( e.target.stateNumber == nextState.stateNumber ) {
labels.add(e.label);
}
}
}
return labels;
}
/** Given List<IntSet>, return a String with a useful representation
* of the associated input string. One could show something different
* for lexers and parsers, for example.
*/
public String getInputSequenceDisplay(Grammar g, List<IntSet> labels) {
List<String> tokens = new ArrayList<String>();
for (IntSet label : labels) tokens.add(label.toString(g));
return Utils.join(tokens.iterator(), " ");
}
/** Given an alternative associated with a DFA state, return the list
* of tokens (from grammar) associated with path through NFA following
* the labels sequence. The nfaStates gives the set of NFA states
* associated with alt that take us from start to stop. One of the
* NFA states in nfaStates[i] will have an edge intersecting with
* labels[i].
*/
public List<Token> getGrammarLocationsForInputSequence(List<Set<NFAState>> nfaStates,
List<IntSet> labels)
{
List<Token> tokens = new ArrayList<Token>();
for (int i=0; i<nfaStates.size()-1; i++) {
Set<NFAState> cur = nfaStates.get(i);
Set<NFAState> next = nfaStates.get(i + 1);
IntSet label = labels.get(i);
// find NFA state with edge whose label matches labels[i]
nfaConfigLoop:
for (NFAState p : cur) {
// walk p's transitions, looking for label
for (int j=0; j<p.getNumberOfTransitions(); j++) {
Transition t = p.transition(j);
if ( !t.isEpsilon() &&
!t.label().and(label).isNil() &&
next.contains(t.target) )
{
tokens.add(p.ast.token);
break nfaConfigLoop; // found path, move to next NFAState set
}
}
}
}
return tokens;
}
// /** Used to find paths through syntactically ambiguous DFA. If we've
// * seen statement number before, what did we learn?
// */
// protected Map<Integer, Integer> stateReachable;
//
// public Map<DFAState, Set<DFAState>> getReachSets(Collection<DFAState> targets) {
// Map<DFAState, Set<DFAState>> reaches = new HashMap<DFAState, Set<DFAState>>();
// // targets can reach themselves
// for (final DFAState d : targets) {
// reaches.put(d,new HashSet<DFAState>() {{add(d);}});
// }
//
// boolean changed = true;
// while ( changed ) {
// changed = false;
// for (DFAState d : dfa.states.values()) {
// if ( d.getNumberOfEdges()==0 ) continue;
// Set<DFAState> r = reaches.get(d);
// if ( r==null ) {
// r = new HashSet<DFAState>();
// reaches.put(d, r);
// }
// int before = r.size();
// // add all reaches from all edge targets
// for (Edge e : d.edges) {
// //if ( targets.contains(e.target) ) r.add(e.target);
// r.addAll( reaches.get(e.target) );
// }
// int after = r.size();
// if ( after>before) changed = true;
// }
// }
// return reaches;
// }
}

View File

@ -58,7 +58,7 @@ public class Resolver {
boolean thisStateHasPotentialProblem = false;
for (List<NFAConfig> configsForState : stateToConfigListMap.values()) {
if ( configsForState.size()>1 ) {
int predictedAlt = Resolver.getUniqueAlt(configsForState, false);
int predictedAlt = Resolver.getUniqueAlt(configsForState);
if ( predictedAlt > 0 ) {
// remove NFA state's configurations from
// further checking; no issues with it
@ -158,7 +158,7 @@ public class Resolver {
}
public void resolveDeadState(DFAState d) {
if ( d.resolvedWithPredicates || d.getNumberOfTransitions()>0 ) return;
if ( d.resolvedWithPredicates || d.getNumberOfEdges()>0 ) return;
System.err.println("dangling DFA state "+d+" after reach / closures");
converter.danglingStates.add(d);
@ -183,7 +183,7 @@ public class Resolver {
* Return the min alt found.
*/
int resolveByPickingMinAlt(DFAState d, Set<Integer> alts) {
int min = Integer.MAX_VALUE;
int min = 0;
if ( alts !=null ) {
min = getMinAlt(alts);
}
@ -222,6 +222,10 @@ public class Resolver {
return min;
}
public static int getUniqueAlt(Collection<NFAConfig> nfaConfigs) {
return getUniqueAlt(nfaConfigs, true);
}
public static int getUniqueAlt(Collection<NFAConfig> nfaConfigs,
boolean ignoreResolved)
{

View File

@ -1,6 +1,8 @@
package org.antlr.v4.analysis;
import org.antlr.runtime.Token;
import org.antlr.v4.automata.*;
import org.antlr.v4.misc.IntSet;
import org.antlr.v4.misc.IntervalSet;
import org.antlr.v4.misc.OrderedHashSet;
import org.antlr.v4.tool.Grammar;
@ -151,7 +153,7 @@ public class StackLimitedNFAToDFAConverter {
void addTransition(DFAState d, IntervalSet label, DFAState t) {
DFAState existing = dfa.states.get(t);
if ( existing != null ) { // seen before; point at old one
d.addTransition(new Edge(existing, label));
d.addEdge(new Edge(existing, label));
return;
}
@ -173,7 +175,7 @@ public class StackLimitedNFAToDFAConverter {
dfa.addState(t); // add state we've never seen before
}
d.addTransition(new Edge(t, label));
d.addEdge(new Edge(t, label));
}
/** Given the set of NFA states in DFA state d, find all NFA states
@ -557,7 +559,7 @@ public class StackLimitedNFAToDFAConverter {
c.semanticContext);
dfa.defineAcceptState(c.alt, predDFATarget);
// add a transition to pred target from d
d.addTransition(new PredicateEdge(c.semanticContext, predDFATarget));
d.addEdge(new PredicateEdge(c.semanticContext, predDFATarget));
}
}
@ -567,5 +569,39 @@ public class StackLimitedNFAToDFAConverter {
if ( alt>0 && dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
}
return unreachable;
}
}
void issueAmbiguityWarnings() {
MachineProbe probe = new MachineProbe(dfa);
for (DFAState d : ambiguousStates) {
Set<Integer> alts = resolver.getAmbiguousAlts(d);
List<Integer> sorted = new ArrayList<Integer>(alts);
Collections.sort(sorted);
System.out.println("ambig alts="+sorted);
List<DFAState> dfaStates = probe.getAnyDFAPathToTarget(d);
System.out.print("path =");
for (DFAState d2 : dfaStates) {
System.out.print(" "+d2.stateNumber);
}
System.out.println("");
List<IntSet> labels = probe.getEdgeLabels(d);
System.out.println("labels="+probe.getInputSequenceDisplay(g, labels));
for (int alt : sorted) {
List<Set<NFAState>> nfaStates = new ArrayList<Set<NFAState>>();
for (DFAState d2 : dfaStates) {
nfaStates.add( d2.getUniqueNFAStates(alt) );
}
System.out.println("NFAConfigs per state: "+nfaStates);
List<Token> path =
probe.getGrammarLocationsForInputSequence(nfaStates, labels);
System.out.println("path = "+path);
}
}
if ( unreachableAlts.size()>0 ) System.out.println("unreachable="+unreachableAlts);
}
}

View File

@ -14,7 +14,7 @@ import java.util.Map;
* of recognizers (lexers, parsers, tree walkers).
*/
public class DFA {
Grammar g;
public Grammar g;
/** What's the start state for this DFA? */
public DFAState startState;
@ -85,21 +85,17 @@ public class DFA {
return n;
}
public boolean isDeterministic() {
// could imply converter.unreachableAlts.size()>0 too
public boolean isAmbiguous() {
boolean resolvedWithPredicates = true;
// flip resolvedWithPredicates if we find an ambig state not resolve with pred
for (DFAState d : converter.ambiguousStates) {
if ( !d.resolvedWithPredicates ) resolvedWithPredicates = false;
}
if ( converter.danglingStates.size()==0 &&
resolvedWithPredicates &&
converter.unreachableAlts.size()==0 )
{
return true;
}
// ...
return false;
return converter.ambiguousStates.size()>0 && !resolvedWithPredicates;
}
public boolean valid() { return converter.danglingStates.size()==0; }
public String toString() {
if ( startState==null ) return "";

View File

@ -33,11 +33,11 @@ public class DFASerializer {
s = work.remove(0);
if ( marked.contains(s) ) continue;
marked.add(s);
int n = s.getNumberOfTransitions();
int n = s.getNumberOfEdges();
//System.out.println("visit "+getStateString(s)+"; edges="+n);
for (int i=0; i<n; i++) {
buf.append(getStateString(s));
Edge t = s.transition(i);
Edge t = s.edge(i);
work.add( t.target );
buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
}

View File

@ -118,17 +118,23 @@ public class DFAState {
}
public Set<NFAState> getUniqueNFAStates() {
return getUniqueNFAStates(NFA.INVALID_ALT_NUMBER);
}
public Set<NFAState> getUniqueNFAStates(int alt) {
Set<NFAState> alts = new HashSet<NFAState>();
for (NFAConfig c : nfaConfigs) alts.add(c.state);
for (NFAConfig c : nfaConfigs) {
if ( alt==NFA.INVALID_ALT_NUMBER || c.alt==alt ) alts.add(c.state);
}
if ( alts.size()==0 ) return null;
return alts;
}
public int getNumberOfTransitions() { return edges.size(); }
public int getNumberOfEdges() { return edges.size(); }
public void addTransition(Edge e) { edges.add(e); }
public void addEdge(Edge e) { edges.add(e); }
public Edge transition(int i) { return edges.get(i); }
public Edge edge(int i) { return edges.get(i); }
/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
public int hashCode() {

View File

@ -172,8 +172,8 @@ public class DOTGenerator {
markedStates.add(d);
// make a DOT edge for each transition
for (int i = 0; i < d.getNumberOfTransitions(); i++) {
Edge edge = d.transition(i);
for (int i = 0; i < d.getNumberOfEdges(); i++) {
Edge edge = d.edge(i);
/*
System.out.println("dfa "+s.dfa.decisionNumber+
" edge from s"+s.stateNumber+" ["+i+"] of "+s.getNumberOfTransitions());