forked from jasder/antlr
got sample input sequence and grammar mapping working for ambig paths
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 6753]
This commit is contained in:
parent
773832bd4b
commit
e70287954a
|
@ -19,6 +19,7 @@ public class AnalysisPipeline {
|
|||
|
||||
// BUILD DFA FOR EACH DECISION
|
||||
for (DecisionState s : g.nfa.decisionToNFAState) {
|
||||
System.out.println("\nDECISION "+s.decision);
|
||||
DFA dfa = createDFA(s);
|
||||
g.setLookaheadDFA(s.decision, dfa);
|
||||
}
|
||||
|
@ -26,27 +27,38 @@ public class AnalysisPipeline {
|
|||
|
||||
public DFA createDFA(DecisionState s) {
|
||||
// TRY APPROXIMATE LL(*) ANALYSIS
|
||||
StackLimitedNFAToDFAConverter approxConv = new StackLimitedNFAToDFAConverter(g, s);
|
||||
DFA dfa = approxConv.createDFA();
|
||||
System.out.println("DFA="+dfa);
|
||||
if ( dfa.isDeterministic() ) {
|
||||
System.out.println("deterministic :)");
|
||||
StackLimitedNFAToDFAConverter conv = new StackLimitedNFAToDFAConverter(g, s);
|
||||
DFA dfa = conv.createDFA();
|
||||
System.out.print("DFA="+dfa);
|
||||
if ( dfa.isAmbiguous() ) System.out.println("ambiguous");
|
||||
else System.out.println("NOT ambiguous");
|
||||
|
||||
if ( dfa.valid() ) System.out.println("stack limited valid");
|
||||
|
||||
if ( dfa.valid() ) {
|
||||
// ambig / unreachable errors
|
||||
conv.issueAmbiguityWarnings();
|
||||
return dfa;
|
||||
}
|
||||
else System.out.println("nondeterministic!!!!!!!");
|
||||
|
||||
// TODO: is it ok to have unreachable alts in approx? maybe we don't need to do full LL(*)
|
||||
|
||||
// Only do recursion limited version if we get dangling states in stack
|
||||
// limited version. Ambiguities are ok because if the approx version
|
||||
// gets an ambiguity it's defin
|
||||
|
||||
// REAL LL(*) ANALYSIS IF THAT FAILS
|
||||
RecursionLimitedNFAToDFAConverter conv = new RecursionLimitedNFAToDFAConverter(g, s);
|
||||
conv = new RecursionLimitedNFAToDFAConverter(g, s);
|
||||
dfa = conv.createDFA();
|
||||
System.out.println("DFA="+dfa);
|
||||
if ( dfa.isDeterministic() ) {
|
||||
System.out.println("recursion limited deterministic :)");
|
||||
return dfa;
|
||||
System.out.print("DFA="+dfa);
|
||||
|
||||
// ambig / unreachable errors
|
||||
conv.issueAmbiguityWarnings();
|
||||
if ( !dfa.valid() ) {
|
||||
System.out.println("non-LL(*)");
|
||||
System.out.println("recursion limited NOT valid :)");
|
||||
}
|
||||
else System.out.println("recursion limited nondeterministic!!!!!!!");
|
||||
else System.out.println("recursion limited valid");
|
||||
|
||||
return dfa;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -131,8 +131,8 @@ public class DFAVerifier {
|
|||
// Visit every transition, track if at least one edge reaches stop state
|
||||
// Cannot terminate when we know this state reaches stop state since
|
||||
// all transitions must be traversed to set status of each DFA state.
|
||||
for (int i=0; i<d.getNumberOfTransitions(); i++) {
|
||||
Edge t = d.transition(i);
|
||||
for (int i=0; i<d.getNumberOfEdges(); i++) {
|
||||
Edge t = d.edge(i);
|
||||
DFAState edgeTarget = (DFAState)t.target;
|
||||
ReachableStatus targetStatus = status.get(edgeTarget);
|
||||
if ( targetStatus==ReachableStatus.BUSY ) { // avoid cycles; they say nothing
|
||||
|
|
|
@ -0,0 +1,144 @@
|
|||
package org.antlr.v4.analysis;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.automata.*;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.Utils;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
public class MachineProbe {
|
||||
DFA dfa;
|
||||
|
||||
public MachineProbe(DFA dfa) { this.dfa = dfa; }
|
||||
|
||||
List<DFAState> getAnyDFAPathToTarget(DFAState targetState) {
|
||||
Set<DFAState> visited = new HashSet<DFAState>();
|
||||
return getAnyDFAPathToTarget(dfa.startState, targetState, visited);
|
||||
}
|
||||
|
||||
public List<DFAState> getAnyDFAPathToTarget(DFAState startState,
|
||||
DFAState targetState,
|
||||
Set<DFAState> visited)
|
||||
{
|
||||
List<DFAState> dfaStates = new ArrayList<DFAState>();
|
||||
visited.add(startState);
|
||||
if ( startState.equals(targetState) ) {
|
||||
dfaStates.add(targetState);
|
||||
return dfaStates;
|
||||
}
|
||||
for (Edge e : startState.edges) { // walk edges looking for valid path
|
||||
if ( !visited.contains(e.target) ) {
|
||||
List<DFAState> path =
|
||||
getAnyDFAPathToTarget(e.target, targetState, visited);
|
||||
if ( path!=null ) { // found path, we're done
|
||||
dfaStates.add(startState);
|
||||
dfaStates.addAll(path);
|
||||
return dfaStates;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Return a list of edge labels from start state to targetState. */
|
||||
public List<IntSet> getEdgeLabels(DFAState targetState) {
|
||||
List<DFAState> dfaStates = getAnyDFAPathToTarget(targetState);
|
||||
List<IntSet> labels = new ArrayList<IntSet>();
|
||||
for (int i=0; i<dfaStates.size()-1; i++) {
|
||||
DFAState d = dfaStates.get(i);
|
||||
DFAState nextState = dfaStates.get(i + 1);
|
||||
// walk looking for edge whose target is next dfa state
|
||||
for (Edge e : d.edges) {
|
||||
if ( e.target.stateNumber == nextState.stateNumber ) {
|
||||
labels.add(e.label);
|
||||
}
|
||||
}
|
||||
}
|
||||
return labels;
|
||||
}
|
||||
|
||||
/** Given List<IntSet>, return a String with a useful representation
|
||||
* of the associated input string. One could show something different
|
||||
* for lexers and parsers, for example.
|
||||
*/
|
||||
public String getInputSequenceDisplay(Grammar g, List<IntSet> labels) {
|
||||
List<String> tokens = new ArrayList<String>();
|
||||
for (IntSet label : labels) tokens.add(label.toString(g));
|
||||
return Utils.join(tokens.iterator(), " ");
|
||||
}
|
||||
|
||||
/** Given an alternative associated with a DFA state, return the list
|
||||
* of tokens (from grammar) associated with path through NFA following
|
||||
* the labels sequence. The nfaStates gives the set of NFA states
|
||||
* associated with alt that take us from start to stop. One of the
|
||||
* NFA states in nfaStates[i] will have an edge intersecting with
|
||||
* labels[i].
|
||||
*/
|
||||
public List<Token> getGrammarLocationsForInputSequence(List<Set<NFAState>> nfaStates,
|
||||
List<IntSet> labels)
|
||||
{
|
||||
List<Token> tokens = new ArrayList<Token>();
|
||||
for (int i=0; i<nfaStates.size()-1; i++) {
|
||||
Set<NFAState> cur = nfaStates.get(i);
|
||||
Set<NFAState> next = nfaStates.get(i + 1);
|
||||
IntSet label = labels.get(i);
|
||||
// find NFA state with edge whose label matches labels[i]
|
||||
nfaConfigLoop:
|
||||
for (NFAState p : cur) {
|
||||
// walk p's transitions, looking for label
|
||||
for (int j=0; j<p.getNumberOfTransitions(); j++) {
|
||||
Transition t = p.transition(j);
|
||||
if ( !t.isEpsilon() &&
|
||||
!t.label().and(label).isNil() &&
|
||||
next.contains(t.target) )
|
||||
{
|
||||
tokens.add(p.ast.token);
|
||||
break nfaConfigLoop; // found path, move to next NFAState set
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// /** Used to find paths through syntactically ambiguous DFA. If we've
|
||||
// * seen statement number before, what did we learn?
|
||||
// */
|
||||
// protected Map<Integer, Integer> stateReachable;
|
||||
//
|
||||
// public Map<DFAState, Set<DFAState>> getReachSets(Collection<DFAState> targets) {
|
||||
// Map<DFAState, Set<DFAState>> reaches = new HashMap<DFAState, Set<DFAState>>();
|
||||
// // targets can reach themselves
|
||||
// for (final DFAState d : targets) {
|
||||
// reaches.put(d,new HashSet<DFAState>() {{add(d);}});
|
||||
// }
|
||||
//
|
||||
// boolean changed = true;
|
||||
// while ( changed ) {
|
||||
// changed = false;
|
||||
// for (DFAState d : dfa.states.values()) {
|
||||
// if ( d.getNumberOfEdges()==0 ) continue;
|
||||
// Set<DFAState> r = reaches.get(d);
|
||||
// if ( r==null ) {
|
||||
// r = new HashSet<DFAState>();
|
||||
// reaches.put(d, r);
|
||||
// }
|
||||
// int before = r.size();
|
||||
// // add all reaches from all edge targets
|
||||
// for (Edge e : d.edges) {
|
||||
// //if ( targets.contains(e.target) ) r.add(e.target);
|
||||
// r.addAll( reaches.get(e.target) );
|
||||
// }
|
||||
// int after = r.size();
|
||||
// if ( after>before) changed = true;
|
||||
// }
|
||||
// }
|
||||
// return reaches;
|
||||
// }
|
||||
|
||||
}
|
|
@ -58,7 +58,7 @@ public class Resolver {
|
|||
boolean thisStateHasPotentialProblem = false;
|
||||
for (List<NFAConfig> configsForState : stateToConfigListMap.values()) {
|
||||
if ( configsForState.size()>1 ) {
|
||||
int predictedAlt = Resolver.getUniqueAlt(configsForState, false);
|
||||
int predictedAlt = Resolver.getUniqueAlt(configsForState);
|
||||
if ( predictedAlt > 0 ) {
|
||||
// remove NFA state's configurations from
|
||||
// further checking; no issues with it
|
||||
|
@ -158,7 +158,7 @@ public class Resolver {
|
|||
}
|
||||
|
||||
public void resolveDeadState(DFAState d) {
|
||||
if ( d.resolvedWithPredicates || d.getNumberOfTransitions()>0 ) return;
|
||||
if ( d.resolvedWithPredicates || d.getNumberOfEdges()>0 ) return;
|
||||
|
||||
System.err.println("dangling DFA state "+d+" after reach / closures");
|
||||
converter.danglingStates.add(d);
|
||||
|
@ -183,7 +183,7 @@ public class Resolver {
|
|||
* Return the min alt found.
|
||||
*/
|
||||
int resolveByPickingMinAlt(DFAState d, Set<Integer> alts) {
|
||||
int min = Integer.MAX_VALUE;
|
||||
int min = 0;
|
||||
if ( alts !=null ) {
|
||||
min = getMinAlt(alts);
|
||||
}
|
||||
|
@ -222,6 +222,10 @@ public class Resolver {
|
|||
return min;
|
||||
}
|
||||
|
||||
public static int getUniqueAlt(Collection<NFAConfig> nfaConfigs) {
|
||||
return getUniqueAlt(nfaConfigs, true);
|
||||
}
|
||||
|
||||
public static int getUniqueAlt(Collection<NFAConfig> nfaConfigs,
|
||||
boolean ignoreResolved)
|
||||
{
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
package org.antlr.v4.analysis;
|
||||
|
||||
import org.antlr.runtime.Token;
|
||||
import org.antlr.v4.automata.*;
|
||||
import org.antlr.v4.misc.IntSet;
|
||||
import org.antlr.v4.misc.IntervalSet;
|
||||
import org.antlr.v4.misc.OrderedHashSet;
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
|
@ -151,7 +153,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
void addTransition(DFAState d, IntervalSet label, DFAState t) {
|
||||
DFAState existing = dfa.states.get(t);
|
||||
if ( existing != null ) { // seen before; point at old one
|
||||
d.addTransition(new Edge(existing, label));
|
||||
d.addEdge(new Edge(existing, label));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -173,7 +175,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
dfa.addState(t); // add state we've never seen before
|
||||
}
|
||||
|
||||
d.addTransition(new Edge(t, label));
|
||||
d.addEdge(new Edge(t, label));
|
||||
}
|
||||
|
||||
/** Given the set of NFA states in DFA state d, find all NFA states
|
||||
|
@ -557,7 +559,7 @@ public class StackLimitedNFAToDFAConverter {
|
|||
c.semanticContext);
|
||||
dfa.defineAcceptState(c.alt, predDFATarget);
|
||||
// add a transition to pred target from d
|
||||
d.addTransition(new PredicateEdge(c.semanticContext, predDFATarget));
|
||||
d.addEdge(new PredicateEdge(c.semanticContext, predDFATarget));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -567,5 +569,39 @@ public class StackLimitedNFAToDFAConverter {
|
|||
if ( alt>0 && dfa.altToAcceptState[alt]==null ) unreachable.add(alt);
|
||||
}
|
||||
return unreachable;
|
||||
}
|
||||
}
|
||||
|
||||
void issueAmbiguityWarnings() {
|
||||
MachineProbe probe = new MachineProbe(dfa);
|
||||
|
||||
for (DFAState d : ambiguousStates) {
|
||||
Set<Integer> alts = resolver.getAmbiguousAlts(d);
|
||||
List<Integer> sorted = new ArrayList<Integer>(alts);
|
||||
Collections.sort(sorted);
|
||||
System.out.println("ambig alts="+sorted);
|
||||
List<DFAState> dfaStates = probe.getAnyDFAPathToTarget(d);
|
||||
System.out.print("path =");
|
||||
for (DFAState d2 : dfaStates) {
|
||||
System.out.print(" "+d2.stateNumber);
|
||||
}
|
||||
System.out.println("");
|
||||
|
||||
List<IntSet> labels = probe.getEdgeLabels(d);
|
||||
|
||||
System.out.println("labels="+probe.getInputSequenceDisplay(g, labels));
|
||||
|
||||
for (int alt : sorted) {
|
||||
List<Set<NFAState>> nfaStates = new ArrayList<Set<NFAState>>();
|
||||
for (DFAState d2 : dfaStates) {
|
||||
nfaStates.add( d2.getUniqueNFAStates(alt) );
|
||||
}
|
||||
System.out.println("NFAConfigs per state: "+nfaStates);
|
||||
List<Token> path =
|
||||
probe.getGrammarLocationsForInputSequence(nfaStates, labels);
|
||||
System.out.println("path = "+path);
|
||||
}
|
||||
}
|
||||
if ( unreachableAlts.size()>0 ) System.out.println("unreachable="+unreachableAlts);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ import java.util.Map;
|
|||
* of recognizers (lexers, parsers, tree walkers).
|
||||
*/
|
||||
public class DFA {
|
||||
Grammar g;
|
||||
public Grammar g;
|
||||
|
||||
/** What's the start state for this DFA? */
|
||||
public DFAState startState;
|
||||
|
@ -85,21 +85,17 @@ public class DFA {
|
|||
return n;
|
||||
}
|
||||
|
||||
public boolean isDeterministic() {
|
||||
// could imply converter.unreachableAlts.size()>0 too
|
||||
public boolean isAmbiguous() {
|
||||
boolean resolvedWithPredicates = true;
|
||||
// flip resolvedWithPredicates if we find an ambig state not resolve with pred
|
||||
for (DFAState d : converter.ambiguousStates) {
|
||||
if ( !d.resolvedWithPredicates ) resolvedWithPredicates = false;
|
||||
}
|
||||
if ( converter.danglingStates.size()==0 &&
|
||||
resolvedWithPredicates &&
|
||||
converter.unreachableAlts.size()==0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// ...
|
||||
return false;
|
||||
return converter.ambiguousStates.size()>0 && !resolvedWithPredicates;
|
||||
}
|
||||
|
||||
public boolean valid() { return converter.danglingStates.size()==0; }
|
||||
|
||||
public String toString() {
|
||||
if ( startState==null ) return "";
|
||||
|
|
|
@ -33,11 +33,11 @@ public class DFASerializer {
|
|||
s = work.remove(0);
|
||||
if ( marked.contains(s) ) continue;
|
||||
marked.add(s);
|
||||
int n = s.getNumberOfTransitions();
|
||||
int n = s.getNumberOfEdges();
|
||||
//System.out.println("visit "+getStateString(s)+"; edges="+n);
|
||||
for (int i=0; i<n; i++) {
|
||||
buf.append(getStateString(s));
|
||||
Edge t = s.transition(i);
|
||||
Edge t = s.edge(i);
|
||||
work.add( t.target );
|
||||
buf.append("-"+t.toString(g)+"->"+ getStateString(t.target)+'\n');
|
||||
}
|
||||
|
|
|
@ -118,17 +118,23 @@ public class DFAState {
|
|||
}
|
||||
|
||||
public Set<NFAState> getUniqueNFAStates() {
|
||||
return getUniqueNFAStates(NFA.INVALID_ALT_NUMBER);
|
||||
}
|
||||
|
||||
public Set<NFAState> getUniqueNFAStates(int alt) {
|
||||
Set<NFAState> alts = new HashSet<NFAState>();
|
||||
for (NFAConfig c : nfaConfigs) alts.add(c.state);
|
||||
for (NFAConfig c : nfaConfigs) {
|
||||
if ( alt==NFA.INVALID_ALT_NUMBER || c.alt==alt ) alts.add(c.state);
|
||||
}
|
||||
if ( alts.size()==0 ) return null;
|
||||
return alts;
|
||||
}
|
||||
|
||||
public int getNumberOfTransitions() { return edges.size(); }
|
||||
public int getNumberOfEdges() { return edges.size(); }
|
||||
|
||||
public void addTransition(Edge e) { edges.add(e); }
|
||||
public void addEdge(Edge e) { edges.add(e); }
|
||||
|
||||
public Edge transition(int i) { return edges.get(i); }
|
||||
public Edge edge(int i) { return edges.get(i); }
|
||||
|
||||
/** A decent hash for a DFA state is the sum of the NFA state/alt pairs. */
|
||||
public int hashCode() {
|
||||
|
|
|
@ -172,8 +172,8 @@ public class DOTGenerator {
|
|||
markedStates.add(d);
|
||||
|
||||
// make a DOT edge for each transition
|
||||
for (int i = 0; i < d.getNumberOfTransitions(); i++) {
|
||||
Edge edge = d.transition(i);
|
||||
for (int i = 0; i < d.getNumberOfEdges(); i++) {
|
||||
Edge edge = d.edge(i);
|
||||
/*
|
||||
System.out.println("dfa "+s.dfa.decisionNumber+
|
||||
" edge from s"+s.stateNumber+" ["+i+"] of "+s.getNumberOfTransitions());
|
||||
|
|
Loading…
Reference in New Issue