Major update to v4. I backed out a change I made on Christmas then mistakenly prevented any lexer DFA creation. Per http://www.antlr.org/wiki/display/~admin/2011/12/29/Flaw+in+ANTLR+v3+LL%28*%29+analysis+algorithm I fixed a major flaw in ANTLR's notion of context. To do that, I needed to create a new LoopEndState, with all of its fanout to the serialization and parser ATN construction. got a very good start on ParserATNPathFinder, which uses basic recursion to find all possible paths and return a tree with the possibilities. I left it in the condition where he would sometimes loop forever; it needs to track sets of configurations in the busy set; it using states at the moment. added a new signal from the interpreter: reportAttemptingFullContext. I fixed a bug where configuration sets derived from a configuration that had reachesIntoOuterContext>0 were not being considered as dipping into the outer context. The ambiguity checker needed to switch so that a check for exact matches not suffixes when doing full context. It's faster at the very least for full context. added some more support routines to DFA. Added TraceTree in support of the new ParserATNPathFinder.

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9764]
This commit is contained in:
parrt 2011-12-29 17:04:40 -08:00
parent 3a3ed27e60
commit a923ad8765
19 changed files with 5545 additions and 97 deletions

View File

@ -55,7 +55,7 @@ public class ATNConfig {
* execution of the ATN simulator.
*/
@Nullable
public final RuleContext context;
public RuleContext context;
/**
* Indicates that we have reached this ATN configuration after

View File

@ -30,8 +30,13 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.OrderedHashSet;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
/** Specialized OrderedHashSet that can track info about the set.
* Might be able to optimize later w/o affecting code that uses this set.
*/
@ -53,6 +58,14 @@ public class ATNConfigSet extends OrderedHashSet<ATNConfig> {
this.dipsIntoOuterContext = old.dipsIntoOuterContext;
}
public Set<ATNState> getStates() {
Set<ATNState> states = new HashSet<ATNState>();
for (ATNConfig c : this.elements) {
states.add(c.state);
}
return states;
}
@Override
public String toString() {
StringBuilder buf = new StringBuilder();

View File

@ -70,6 +70,9 @@ public abstract class ATNSimulator {
}
ATNState s = stateFactory(stype, i);
s.ruleIndex = toInt(data[p++]);
if ( stype == ATNState.LOOP_END ) { // special case
((LoopEndState)s).loopBackStateNumber = toInt(data[p++]);
}
atn.addState(s);
}
int nrules = toInt(data[p++]);
@ -179,6 +182,7 @@ public abstract class ATNSimulator {
case ATNState.STAR_LOOP_BACK : s = new StarLoopbackState(); break;
case ATNState.STAR_LOOP_ENTRY : s = new StarLoopEntryState(); break;
case ATNState.PLUS_LOOP_BACK : s = new PlusLoopbackState(); break;
case ATNState.LOOP_END : s = new LoopEndState(); break;
default :
System.err.println("invalid state type in ATN deserialization: "+type+" for state "+stateNumber);
break;

View File

@ -49,6 +49,7 @@ public class ATNState {
public static final int STAR_LOOP_BACK = 9;
public static final int STAR_LOOP_ENTRY = 10;
public static final int PLUS_LOOP_BACK = 11;
public static final int LOOP_END = 12;
public static final List<String> serializationNames =
Collections.unmodifiableList(Arrays.asList(
@ -63,7 +64,8 @@ public class ATNState {
"BLOCK_END",
"STAR_LOOP_BACK",
"STAR_LOOP_ENTRY",
"PLUS_LOOP_BACK"
"PLUS_LOOP_BACK",
"LOOP_END"
));
public static final Map<Class<? extends ATNState>, Integer> serializationTypes =
@ -78,7 +80,8 @@ public class ATNState {
put(BlockEndState.class, BLOCK_END);
put(PlusLoopbackState.class, PLUS_LOOP_BACK);
put(StarLoopbackState.class, STAR_LOOP_BACK);
put(StarLoopEntryState.class, STAR_LOOP_ENTRY);
put(StarLoopEntryState.class, STAR_LOOP_ENTRY);
put(LoopEndState.class, LOOP_END);
}});
public static final int INVALID_STATE_NUMBER = -1;

View File

@ -525,7 +525,6 @@ public class LexerATNSimulator extends ATNSimulator {
PredicateTransition pt = (PredicateTransition)t;
if ( recog == null || recog.sempred(null, pt.ruleIndex, pt.predIndex) ) {
c = new ATNConfig(config, t.target, pt.getPredicate());
// c.traversedPredicate = true;
}
}
// ignore actions; just exec one per rule upon accept
@ -640,7 +639,7 @@ public class LexerATNSimulator extends ATNSimulator {
{
firstConfigWithRuleStopState = c;
}
if ( c.semanticContext!=null ) traversedPredicate = true;
if ( c.semanticContext!=null && c.semanticContext!=SemanticContext.NONE ) traversedPredicate = true;
// if ( c.traversedPredicate ) traversedPredicate = true;
}

View File

@ -0,0 +1,35 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.atn;
/** Mark the end of a * or + loop */
public class LoopEndState extends ATNState {
public int loopBackStateNumber;
}

View File

@ -0,0 +1,187 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.BaseRecognizer;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.runtime.tree.TraceTree;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ParserATNPathFinder extends v2ParserATNSimulator {
public ParserATNPathFinder(@Nullable BaseRecognizer parser, @NotNull ATN atn) {
super(parser, atn);
}
/** Given an input sequence, as a subset of the input stream, trace the path through the
* ATN starting at s. The path returned includes s and the final target of the last input
* symbol. If there are multiple paths through the ATN to the final state, it uses the first
* method finds. This is used to figure out how input sequence is matched in more than one
* way between the alternatives of a decision. It's only that decision we are concerned with
* and so if there are ambiguous decisions further along, we will ignore them for the
* purposes of computing the path to the final state. To figure out multiple paths for
* decision, use this method on the left edge of the alternatives of the decision in question.
*
* TODO: I haven't figured out what to do with nongreedy decisions yet
* TODO: preds. unless i create rule specific ctxs, i can't eval preds. also must eval args!
*/
public TraceTree trace(@NotNull ATNState s, @Nullable RuleContext ctx,
TokenStream input, int start, int stop)
{
System.out.println("REACHES "+s.stateNumber+" start state");
List<TraceTree> leaves = new ArrayList<TraceTree>();
HashSet<ATNState>[] busy = new HashSet[stop-start+1];
for (int i = 0; i < busy.length; i++) {
busy[i] = new HashSet<ATNState>();
}
TraceTree path = _trace(s, ctx, ctx, input, start, start, stop, leaves, busy);
if ( path!=null ) path.leaves = leaves;
return path;
}
/** Returns true if we found path */
public TraceTree _trace(@NotNull ATNState s, RuleContext initialContext, RuleContext ctx,
TokenStream input, int start, int i, int stop,
List<TraceTree> leaves, @NotNull Set<ATNState>[] busy)
{
TraceTree root = new TraceTree(s);
if ( i>stop ) {
leaves.add(root); // track final states
System.out.println("leaves=" + leaves);
return root;
}
if ( !busy[i-start].add(s) ) {
System.out.println("already visited "+s.stateNumber+" at input "+i+"="+input.get(i).getText());
return null;
}
busy[i-start].add(s);
System.out.println("TRACE "+s.stateNumber+" at input "+input.get(i).getText());
if ( s instanceof RuleStopState) {
// We hit rule end. If we have context info, use it
if ( ctx!=null && !ctx.isEmpty() ) {
System.out.println("stop state "+s.stateNumber+", ctx="+ctx);
ATNState invokingState = atn.states.get(ctx.invokingState);
RuleTransition rt = (RuleTransition)invokingState.transition(0);
ATNState retState = rt.followState;
return _trace(retState, initialContext, ctx.parent, input, start, i, stop, leaves, busy);
}
else {
// else if we have no context info, just chase follow links (if greedy)
System.out.println("FALLING off rule "+getRuleName(s.ruleIndex));
}
}
int n = s.getNumberOfTransitions();
boolean aGoodPath = false;
TraceTree found = null;
for (int j=0; j<n; j++) {
Transition t = s.transition(j);
if ( t.getClass() == RuleTransition.class ) {
RuleContext newContext =
new RuleContext(ctx, s.stateNumber);
found = _trace(t.target, initialContext, newContext, input, start, i, stop, leaves, busy);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
continue;
}
if ( t instanceof PredicateTransition ) {
found = predTransition(initialContext, ctx, input, start, i, stop, leaves, busy, root, t);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
continue;
}
if ( t.isEpsilon() ) {
found = _trace(t.target, initialContext, ctx, input, start, i, stop, leaves, busy);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
continue;
}
if ( t.getClass() == WildcardTransition.class ) {
System.out.println("REACHES " + t.target.stateNumber + " matching input " + input.get(i).getText());
found = _trace(t.target, initialContext, ctx, input, start, i+1, stop, leaves, busy);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
continue;
}
IntervalSet set = t.label();
if ( set!=null ) {
if ( t instanceof NotSetTransition ) {
if ( !set.contains(input.get(i).getType()) ) {
System.out.println("REACHES " + t.target.stateNumber + " matching input " + input.get(i).getText());
found = _trace(t.target, initialContext, ctx, input, start, i+1, stop, leaves, busy);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
}
}
else {
if ( set.contains(input.get(i).getType()) ) {
System.out.println("REACHES " + t.target.stateNumber + " matching input " + input.get(i).getText());
found = _trace(t.target, initialContext, ctx, input, start, i+1, stop, leaves, busy);
if ( found!=null ) {aGoodPath=true; root.addChild(found);}
}
}
}
}
if ( aGoodPath ) return root; // found at least one transition leading to success
return null;
}
public TraceTree predTransition(RuleContext initialContext, RuleContext ctx, TokenStream input, int start,
int i, int stop, List<TraceTree> leaves, Set<ATNState>[] busy,
TraceTree root, Transition t)
{
SemanticContext.Predicate pred = ((PredicateTransition) t).getPredicate();
boolean pass = false;
if ( pred.isCtxDependent ) {
if ( ctx instanceof ParserRuleContext && ctx==initialContext ) {
System.out.println("eval pred "+pred+"="+pred.eval(parser, ctx));
pass = pred.eval(parser, ctx);
}
else {
pass = true; // see thru ctx dependent when out of context
}
}
else {
System.out.println("eval pred "+pred+"="+pred.eval(parser, initialContext));
pass = pred.eval(parser, ctx);
}
if ( pass ) {
return _trace(t.target, initialContext, ctx, input, start, i, stop, leaves, busy);
}
return null;
}
}

View File

@ -221,6 +221,12 @@ import java.util.*;
optimization later if necessary.
* if non-greedy, no report and resolve to the exit alternative
*
* By default we do full context-sensitive LL(*) parsing not
* Strong LL(*) parsing. If we fail with Strong LL(*) we
* try full LL(*). That means we rewind and use context information
* when closure operations fall off the end of the rule that
* holds the decision were evaluating
*/
public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
public static boolean debug = false;
@ -235,33 +241,18 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
@Nullable
protected final BaseRecognizer<Symbol> parser;
@NotNull
public final Map<RuleContext, DFA[]> ctxToDFAs;
public Map<RuleContext, DFA>[] decisionToDFAPerCtx; // TODO: USE THIS ONE
@NotNull
public final DFA[] decisionToDFA;
/** By default we do full context-sensitive LL(*) parsing not
* Strong LL(*) parsing. If we fail with Strong LL(*) we
* try full LL(*). That means we rewind and use context information
* when closure operations fall off the end of the rule that
* holds the decision were evaluating.
*/
protected boolean userWantsCtxSensitive = true;
/** Testing only! */
public v2ParserATNSimulator(@NotNull ATN atn) {
this(null, atn);
}
@Override
public void reset() {
}
public v2ParserATNSimulator(@Nullable BaseRecognizer<Symbol> parser, @NotNull ATN atn) {
super(atn);
this.parser = parser;
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
// ctxToDFAs = new HashMap<RuleContext, DFA[]>();
// TODO (sam): why distinguish on parser != null?
decisionToDFA = new DFA[atn.getNumberOfDecisions() + (parser != null ? 1 : 0)];
// DOTGenerator dot = new DOTGenerator(null);
@ -269,13 +260,17 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
// System.out.println(dot.getDOT(atn.rules.get(1), parser.getRuleNames()));
}
@Override
public void reset() {
}
public int adaptivePredict(@NotNull SymbolStream<Symbol> input, int decision,
@Nullable ParserRuleContext outerContext)
{
predict_calls++;
DFA dfa = decisionToDFA[decision];
if ( dfa==null || dfa.s0==null ) {
ATNState startState = atn.decisionToState.get(decision);
DecisionState startState = atn.decisionToState.get(decision);
decisionToDFA[decision] = dfa = new DFA(startState);
dfa.decision = decision;
return predictATN(dfa, input, outerContext);
@ -305,7 +300,8 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
", outerContext="+outerContext.toString(parser));
DecisionState decState = atn.getDecisionState(dfa.decision);
boolean greedy = decState.isGreedy;
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, greedy);
boolean loopsSimulateTailRecursion = false;
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, ParserRuleContext.EMPTY, greedy, loopsSimulateTailRecursion);
dfa.s0 = addDFAState(dfa, s0_closure);
int alt = 0;
@ -347,9 +343,10 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
if ( dfa_debug ) System.out.println("DFA state "+s.stateNumber+" LA(1)=="+getLookaheadName(input));
if ( s.isCtxSensitive ) {
if ( dfa_debug ) System.out.println("ctx sensitive state "+outerContext+" in "+s);
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
boolean loopsSimulateTailRecursion = true;
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy, loopsSimulateTailRecursion);
ATNConfigSet fullCtxSet =
execATNWithFullContext(dfa, s0_closure,
execATNWithFullContext(dfa, s, s0_closure,
input, startIndex,
outerContext,
decState.getNumberOfTransitions(),
@ -497,7 +494,8 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
boolean greedy = decState.isGreedy;
while (true) { // while more work
ATNConfigSet reach = computeReachSet(previous, t, greedy);
boolean loopsSimulateTailRecursion = false;
ATNConfigSet reach = computeReachSet(previous, t, greedy, loopsSimulateTailRecursion);
if ( reach==null ) throw noViableAlt(input, outerContext, previous, startIndex);
D = addDFAEdge(dfa, previous, t, reach); // always adding edge even if to a conflict state
int predictedAlt = getUniqueAlt(reach);
@ -507,7 +505,8 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
D.prediction = predictedAlt;
}
else {
D.configset.conflictingAlts = getConflictingAlts(reach);
boolean fullCtx = false;
D.configset.conflictingAlts = getConflictingAlts(reach, fullCtx);
if ( D.configset.conflictingAlts!=null ) {
if ( greedy ) {
int k = input.index() - startIndex + 1; // how much input we used
@ -517,15 +516,16 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
k == 1 ) // SLL(1) == LL(1)
{
if ( !D.configset.hasSemanticContext ) {
reportAmbiguity(dfa, startIndex, input.index(), D.configset.conflictingAlts, D.configset);
reportAmbiguity(dfa, D, startIndex, input.index(), D.configset.conflictingAlts, D.configset);
}
D.isAcceptState = true;
predictedAlt = resolveToMinAlt(D, D.configset.conflictingAlts);
}
else {
if ( debug ) System.out.println("RETRY with outerContext="+outerContext);
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
fullCtxSet = execATNWithFullContext(dfa, s0_closure,
loopsSimulateTailRecursion = true;
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy, loopsSimulateTailRecursion);
fullCtxSet = execATNWithFullContext(dfa, D, s0_closure,
input, startIndex,
outerContext,
decState.getNumberOfTransitions(),
@ -595,6 +595,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
// comes back with reach.uniqueAlt set to a valid alt
public ATNConfigSet execATNWithFullContext(DFA dfa,
DFAState D, // how far we got before failing over
@NotNull ATNConfigSet s0,
@NotNull SymbolStream<Symbol> input, int startIndex,
ParserRuleContext outerContext,
@ -610,13 +611,14 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
input.seek(startIndex);
int t = input.LA(1);
while (true) { // while more work
reach = computeReachSet(previous, t, greedy);
reach = computeReachSet(previous, t, greedy, true);
if ( reach==null ) {
throw noViableAlt(input, outerContext, reach, startIndex);
throw noViableAlt(input, outerContext, previous, startIndex);
}
reach.uniqueAlt = getUniqueAlt(reach);
if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) break;
reach.conflictingAlts = getConflictingAlts(reach);
boolean fullCtx = true;
reach.conflictingAlts = getConflictingAlts(reach, fullCtx);
if ( reach.conflictingAlts!=null ) break;
previous = reach;
input.consume();
@ -654,13 +656,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
// must have conflict and no semantic preds
reportAmbiguity(dfa, startIndex, input.index(), reach.conflictingAlts, reach);
reportAmbiguity(dfa, D, startIndex, input.index(), reach.conflictingAlts, reach);
reach.uniqueAlt = reach.conflictingAlts.getMinElement();
return reach;
}
protected ATNConfigSet computeReachSet(ATNConfigSet closure, int t, boolean greedy) {
protected ATNConfigSet computeReachSet(ATNConfigSet closure, int t, boolean greedy, boolean loopsSimulateTailRecursion) {
if ( debug ) System.out.println("in computeReachSet, starting closure: " + closure);
ATNConfigSet reach = new ATNConfigSet();
for (ATNConfig c : closure) {
@ -671,7 +673,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNState target = getReachableTarget(trans, t);
if ( target!=null ) {
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
closure(new ATNConfig(c, target), reach, closureBusy, false, greedy);
closure(new ATNConfig(c, target), reach, closureBusy, false, greedy, loopsSimulateTailRecursion);
}
}
}
@ -682,7 +684,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
@NotNull
public ATNConfigSet computeStartState(@NotNull ATNState p,
@Nullable RuleContext ctx,
boolean greedy)
boolean greedy, boolean loopsSimulateTailRecursion)
{
RuleContext initialContext = ctx; // always at least the implicit call to start rule
ATNConfigSet configs = new ATNConfigSet();
@ -691,7 +693,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNState target = p.transition(i).target;
ATNConfig c = new ATNConfig(target, i+1, initialContext);
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
closure(c, configs, closureBusy, true, greedy);
closure(c, configs, closureBusy, true, greedy, loopsSimulateTailRecursion);
}
return configs;
@ -840,11 +842,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
waste to pursue the closure. Might have to advance when we do
ambig detection thought :(
*/
// TODO: loopsSimulateTailRecursion might not be necessary. seems slow without it. see what that is 12/29/11
protected void closure(@NotNull ATNConfig config,
@NotNull ATNConfigSet configs,
@NotNull Set<ATNConfig> closureBusy,
boolean collectPredicates,
boolean greedy)
boolean collectPredicates,
boolean greedy, boolean loopsSimulateTailRecursion)
{
if ( debug ) System.out.println("closure("+config.toString(parser,true)+")");
@ -869,7 +873,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
// gotten that context AFTER having falling off a rule.
// Make sure we track that we are now out of context.
c.reachesIntoOuterContext = config.reachesIntoOuterContext;
closure(c, configs, closureBusy, collectPredicates, greedy);
closure(c, configs, closureBusy, collectPredicates, greedy, loopsSimulateTailRecursion);
return;
}
else {
@ -878,6 +882,23 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
getRuleName(config.state.ruleIndex));
}
}
else if ( loopsSimulateTailRecursion ) {
if ( config.state.getClass()==StarLoopbackState.class ||
config.state.getClass()==PlusLoopbackState.class )
{
config.context = new RuleContext(config.context, config.state.stateNumber);
// alter config; it's ok, since all calls to closure pass in a fresh config for us to chase
if ( debug ) System.out.println("Loop back; push "+config.state.stateNumber+", stack="+config.context);
}
else if ( config.state.getClass()==LoopEndState.class ) {
if ( debug ) System.out.println("Loop end; pop, stack="+config.context);
RuleContext p = config.context;
LoopEndState end = (LoopEndState) config.state;
while ( !p.isEmpty() && p.invokingState == end.loopBackStateNumber ) {
p = config.context = config.context.parent; // "pop"
}
}
}
ATNState p = config.state;
// optimization
@ -886,6 +907,9 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
if ( config.semanticContext!=null && config.semanticContext!=SemanticContext.NONE ) {
configs.hasSemanticContext = true;
}
if ( config.reachesIntoOuterContext>0 ) {
configs.dipsIntoOuterContext = true;
}
if ( debug ) System.out.println("added config "+configs);
}
@ -896,20 +920,22 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNConfig c = getEpsilonTarget(config, t, continueCollecting);
if ( c!=null ) {
if ( config.state instanceof RuleStopState ) {
// fell off end of rule
// target fell off end of rule; mark resulting c as having dipped into outer context
// We can't get here if incoming config was rule stop and we had context
// track how far we dip into outer context. Might
// come in handy and we avoid evaluating context dependent
// preds if this is > 0.
c.reachesIntoOuterContext++;
configs.dipsIntoOuterContext = true;
configs.dipsIntoOuterContext = true; // TODO: can remove? only care when we add to set per middle of this method
if ( debug ) System.out.println("dips into outer ctx: "+c);
}
closure(c, configs, closureBusy, continueCollecting, greedy);
closure(c, configs, closureBusy, continueCollecting, greedy, loopsSimulateTailRecursion);
}
}
}
@NotNull
private String getRuleName(int index) {
public String getRuleName(int index) {
if ( parser!=null && index>=0 ) return parser.getRuleNames()[index];
return "<rule "+index+">";
}
@ -1054,7 +1080,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
functions
*/
@Nullable
public IntervalSet getConflictingAlts(@NotNull ATNConfigSet configs) {
public IntervalSet getConflictingAlts(@NotNull ATNConfigSet configs, boolean fullCtx) {
if ( debug ) System.out.println("### check ambiguous "+configs);
// First get a list of configurations for each state.
// Most of the time, each state will have one associated configuration.
@ -1118,16 +1144,21 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNConfig c = configsPerState.get(i);
for (int j = i+1; j < size; j++) {
ATNConfig d = configsPerState.get(j);
if ( c.alt != d.alt && c.context.conflictsWith(d.context) ) {
if ( debug ) {
System.out.println("we reach state "+c.state.stateNumber+
" in rule "+
(parser !=null ? getRuleName(c.state.ruleIndex) :"n/a")+
" alts "+c.alt+","+d.alt+" from ctx "+c.context.toString(parser)
+" and "+ d.context.toString(parser));
if ( c.alt != d.alt ) {
boolean conflicting =
(fullCtx && c.context.equals(d.context)) ||
(!fullCtx && c.context.conflictsWith(d.context));
if ( conflicting ) {
if ( debug ) {
System.out.println("we reach state "+c.state.stateNumber+
" in rule "+
(parser !=null ? getRuleName(c.state.ruleIndex) :"n/a")+
" alts "+c.alt+","+d.alt+" from ctx "+c.context.toString(parser)
+" and "+ d.context.toString(parser));
}
ambigAlts.add(c.alt);
ambigAlts.add(d.alt);
}
ambigAlts.add(c.alt);
ambigAlts.add(d.alt);
}
}
}
@ -1192,10 +1223,6 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
return getTokenName(input.LA(1));
}
public void setContextSensitive(boolean ctxSensitive) {
this.userWantsCtxSensitive = ctxSensitive;
}
public void dumpDeadEndConfigs(@NotNull NoViableAltException nvae) {
System.err.println("dead end configs: ");
for (ATNConfig c : nvae.deadEndConfigs) {
@ -1320,7 +1347,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
System.out.println("reportAttemptingFullContext decision="+dfa.decision+":"+configs+
", input="+parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) parser.getErrorHandler().reportContextSensitivity(parser, dfa, startIndex, stopIndex, configs);
if ( parser!=null ) parser.getErrorHandler().reportAttemptingFullContext(parser, dfa, startIndex, stopIndex, configs);
}
public void reportContextSensitivity(DFA dfa, ATNConfigSet configs, int startIndex, int stopIndex) {
@ -1332,13 +1359,29 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
/** If context sensitive parsing, we know it's ambiguity not conflict */
public void reportAmbiguity(@NotNull DFA dfa, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull ATNConfigSet configs)
{
if ( debug || retry_debug ) {
System.out.println("reportAmbiguity "+
ambigAlts+":"+configs+
public void reportAmbiguity(@NotNull DFA dfa, DFAState D, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull ATNConfigSet configs)
{
if ( debug || retry_debug ) {
// ParserATNPathFinder finder = new ParserATNPathFinder(parser, atn);
// int i = 1;
// for (Transition t : dfa.atnStartState.transitions) {
// System.out.println("ALT "+i+"=");
// System.out.println(startIndex+".."+stopIndex+", len(input)="+parser.getInputStream().size());
// TraceTree path = finder.trace(t.target, parser.getContext(), (TokenStream)parser.getInputStream(),
// startIndex, stopIndex);
// if ( path!=null ) {
// System.out.println("path = "+path.toStringTree());
// for (TraceTree leaf : path.leaves) {
// List<ATNState> states = path.getPathToNode(leaf);
// System.out.println("states="+states);
// }
// }
// i++;
// }
System.out.println("reportAmbiguity "+
ambigAlts+":"+configs+
", input="+parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, dfa, startIndex, stopIndex,

View File

@ -28,12 +28,14 @@
*/
package org.antlr.v4.runtime.dfa;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.*;
public class DFA {
/** A set of all DFA states. Use Map so we can get old state back
@ -47,14 +49,62 @@ public class DFA {
/** From which ATN state did we create this DFA? */
@NotNull
public final ATNState atnStartState;
public final DecisionState atnStartState;
/** Set of configs for a DFA state with at least one conflict? Mainly used as "return value"
* from predictATN() for retry.
*/
// public OrderedHashSet<ATNConfig> conflictSet;
public DFA(@NotNull ATNState atnStartState) { this.atnStartState = atnStartState; }
public DFA(@NotNull DecisionState atnStartState) { this.atnStartState = atnStartState; }
/** Find the path in DFA from s0 to s, returning list of states encountered (inclusively) */
// public List<DFAState> getPathToState(DFAState finalState, TokenStream input, int start, int stop) {
// if ( s0==null ) return null;
// List<DFAState> states = new ArrayList<DFAState>();
// states.add(s0);
// DFAState p = s0;
// int i = start;
// Token t = input.get(i);
// while ( p != finalState && i<stop ) {
// int la = t.getType();
// if ( p.edges == null || la >= p.edges.length || la < -1 || p.edges[la+1] == null ) {
// return states;
// }
// DFAState target = p.edges[la+1];
// if ( target == ATNSimulator.ERROR ) {
// return states;
// }
// states.add(target);
// p = target;
// i++;
// t = input.get(i);
// }
// return states;
// }
public List<Set<ATNState>> getATNStatesAlongPath(v2ParserATNSimulator atn,
List<DFAState> dfaStates,
TokenStream input, int start, int stop)
{
List<Set<ATNState>> atnStates = new ArrayList<Set<ATNState>>();
int i = start;
for (DFAState D : dfaStates) {
Set<ATNState> fullSet = D.configset.getStates();
Set<ATNState> statesInvolved = new HashSet<ATNState>();
for (ATNState astate : fullSet) {
Transition t = astate.transition(0);
ATNState target = atn.getReachableTarget(t, input.get(i).getType());
if ( target!=null ) {
statesInvolved.add(astate);
}
}
System.out.println("statesInvolved upon "+input.get(i).getText()+"="+statesInvolved);
i++;
atnStates.add(statesInvolved);
}
return atnStates;
}
@Override
public String toString() { return toString(null); }

View File

@ -119,6 +119,7 @@ public class TestRig {
Parser parser = parserCtor.newInstance(tokens);
parser.setErrorHandler(new DiagnosticErrorStrategy<Token>());
parser.getInterpreter().setTrace(true);
if ( printTree || gui || psFile!=null ) {
parser.setBuildParseTree(true);

View File

@ -0,0 +1,104 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.runtime.tree;
import org.antlr.v4.runtime.atn.ATNState;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
public class TraceTree implements Tree {
public TraceTree parent;
public List<TraceTree> children;
/** If this node is root, it has list of all leaves of tree after ParserATNPathFinder.trace() */
public List<TraceTree> leaves;
public ATNState state; // payload
public TraceTree(ATNState s) { state = s; }
public void addChild(TraceTree t) {
if ( children==null ) {
children = new ArrayList<TraceTree>();
}
children.add(t);
t.parent = this;
}
public void addChild(ATNState s) { addChild(new TraceTree(s)); }
@Override
public Tree getChild(int i) {
if ( children==null ) {
throw new IndexOutOfBoundsException(i+"<0 or >"+getChildCount());
}
return children.get(i);
}
@Override
public Tree getParent() {
return parent;
}
@Override
public Object getPayload() {
return state;
}
@Override
public int getChildCount() {
if ( children==null ) return 0;
return children.size();
}
public List<ATNState> getPathToNode(TraceTree s) {
List<ATNState> states = new LinkedList<ATNState>();
TraceTree p = s;
while ( p!=null ) {
states.add(0, p.state);
p = p.parent;
}
if ( states.size()==0 ) return null;
return states;
}
@Override
public String toString() {
if ( state==null ) return "null";
return state.toString();
}
@Override
public String toStringTree() {
return Trees.toStringTree(this, null);
}
}

View File

@ -51,10 +51,20 @@ grammar R;
This seems reusable as opposed to the input index. It might be complicated
to track this. In the general case, we would need a mapping from rule
invocation of rule r to a count, and within a specific rule context. That
might add a HashMap for every RuleContext. ick. Also, one about the context
might add a HashMap for every RuleContext. ick. Also, what about the context
that I create during ATN simulation? I would have to track that as well
as the generated code in the parser. Rule invocation states would act
like triggers that would bump account for that target rule in the current ctx.
Actually, maybe only my ATN sim would have to do it. prog then expr_or_assign
would be real elements on stack then I would create expr, expr_primary, pop
them both (for 2nd alt of expr_or_assign) and pop back into prog. Then, I'd
push expr_or_assign again but could notice I was calling 2nd time from prog.
Maybe make one big map: count[ctx][invocation-state] -> value to keep out
of RuleContext. Used only during sim anyway.
Make sure that this doesn't cause r* for optional r to miss an ambiguity
since 2nd invocation would have diff stack.
*/
prog : expr_or_assign* ;
@ -67,9 +77,11 @@ prog : expr_or_assign* ;
//prog : expr_or_assign prog | ;
expr_or_assign
: expr '=' expr_or_assign
: expr '++'
| expr // match ID a, fall out, reenter, match "(i)<-x" via alt 1
;
// it thinks it's same context from prog, but it's not; it's
// 2nd time through expr_or_assign* loop.
;
expr : expr_primary ('<-' ID)? ;
expr_primary

4973
tool/playground/R.g4.graffle Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,19 @@
grammar T;
s : A+ ;
A : {true}? 'a' ;
/* This is ambig too.
s_ : s EOF ;
s : a s
|
;
*/
s : (a)* EOF ; // ambig; can match A B in alt 3 or alt 2 then alt 1
a : e '!'
| e
;
e : B
| A // both alts 2,3 can reach end of s upon abEOF
| A B
;
A : 'a' ;
B : 'b' ;
WS : (' '|'\n')+ {skip();} ;

View File

@ -56,7 +56,7 @@ public class ATNSerializer {
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ...
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
@ -88,6 +88,7 @@ public class ATNSerializer {
}
data.add(s.getStateType());
data.add(s.ruleIndex);
if ( s.getStateType() == ATNState.LOOP_END ) data.add(((LoopEndState)s).loopBackStateNumber);
nedges += s.getNumberOfTransitions();
for (int i=0; i<s.getNumberOfTransitions(); i++) {
Transition t = s.transition(i);
@ -200,9 +201,14 @@ public class ATNSerializer {
int stype = ATNSimulator.toInt(data[p++]);
if ( stype==ATNState.INVALID_TYPE ) continue; // ignore bad type of states
int ruleIndex = ATNSimulator.toInt(data[p++]);
String arg = "";
if ( stype == ATNState.LOOP_END ) {
int loopBackStateNumber = ATNSimulator.toInt(data[p++]);
arg = " "+loopBackStateNumber;
}
buf.append((i - 1) + ":" +
ATNState.serializationNames.get(stype) + " "+
ruleIndex + "\n");
ruleIndex + arg + "\n");
}
int nrules = ATNSimulator.toInt(data[p++]);
for (int i=0; i<nrules; i++) {

View File

@ -424,10 +424,11 @@ public class ParserATNFactory implements ATNFactory {
PlusBlockStartState blkStart = (PlusBlockStartState)blk.left;
BlockEndState blkEnd = (BlockEndState)blk.right;
PlusLoopbackState loop = (PlusLoopbackState)newState(PlusLoopbackState.class, plusAST);
PlusLoopbackState loop = newState(PlusLoopbackState.class, plusAST);
atn.defineDecisionState(loop);
ATNState end = newState(ATNState.class, plusAST);
LoopEndState end = newState(LoopEndState.class, plusAST);
blkStart.loopBackState = loop;
end.loopBackStateNumber = loop.stateNumber;
plusAST.atnState = blkStart;
epsilon(blkEnd, loop); // blk can see loop back
@ -452,9 +453,9 @@ public class ParserATNFactory implements ATNFactory {
*
* |-------------|
* v |
* o--[o-blk-o]->o->o
* o--[o-blk-o]->o o
* | ^
* o----------------|
* -----------------|
*
* Note that the optional bypass must jump outside the loop as (A|B)* is
* not the same thing as (A|B|)+.
@ -464,11 +465,12 @@ public class ParserATNFactory implements ATNFactory {
StarBlockStartState blkStart = (StarBlockStartState)elem.left;
BlockEndState blkEnd = (BlockEndState)elem.right;
StarLoopEntryState entry = (StarLoopEntryState)newState(StarLoopEntryState.class, starAST);
StarLoopEntryState entry = newState(StarLoopEntryState.class, starAST);
atn.defineDecisionState(entry);
ATNState end = newState(ATNState.class, starAST);
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
LoopEndState end = newState(LoopEndState.class, starAST);
StarLoopbackState loop = newState(StarLoopbackState.class, starAST);
entry.loopBackState = loop;
end.loopBackStateNumber = loop.stateNumber;
BlockAST blkAST = (BlockAST)starAST.getChild(0);
entry.isGreedy = isGreedy(blkAST);

View File

@ -31,13 +31,16 @@ package org.antlr.v4.tool.interp;
import org.antlr.v4.Tool;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.v2ParserATNSimulator;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import org.antlr.v4.tool.Grammar;
import java.util.HashSet;
import java.util.Set;
public class ParserInterpreter {
class DummyParser extends Parser {
public Grammar g;
@ -94,4 +97,5 @@ public class ParserInterpreter {
public v2ParserATNSimulator<Token> getATNSimulator() {
return atnSimulator;
}
}

View File

@ -34,10 +34,7 @@ import org.antlr.v4.automata.ParserATNFactory;
import org.antlr.v4.runtime.NoViableAltException;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.v2ParserATNSimulator;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.tool.DOTGenerator;
import org.antlr.v4.tool.Grammar;
@ -506,7 +503,7 @@ public class TestATNParserPrediction extends BaseTest {
// ParserATNSimulator<Token> interp = new ParserATNSimulator<Token>(atn);
TokenStream input = new IntTokenStream(types);
ParserInterpreter interp = new ParserInterpreter(g, input);
ATNState startState = atn.decisionToState.get(decision);
DecisionState startState = atn.decisionToState.get(decision);
DFA dfa = new DFA(startState);
dfa.decision = decision;
int alt = interp.predictATN(dfa, input, ParserRuleContext.EMPTY, false);
@ -549,7 +546,7 @@ public class TestATNParserPrediction extends BaseTest {
System.out.println(types);
TokenStream input = new IntTokenStream(types);
try {
ATNState startState = atn.decisionToState.get(0);
DecisionState startState = atn.decisionToState.get(0);
DFA dfa = new DFA(startState);
// Rule r = g.getRule(ruleName);
//ATNState startState = atn.ruleToStartState.get(r);

View File

@ -229,7 +229,7 @@ public class TestATNSerialization extends BaseTest {
"4:PLUS_BLOCK_START 0\n" +
"5:BLOCK_END 0\n" +
"6:PLUS_LOOP_BACK 0\n" +
"7:BASIC 0\n" +
"7:LOOP_END 0\n" +
"8:BASIC 0\n" +
"9:BASIC 0\n" +
"10:BASIC 0\n" +
@ -406,7 +406,7 @@ public class TestATNSerialization extends BaseTest {
"5:PLUS_BLOCK_START 0\n" +
"6:BLOCK_END 0\n" +
"7:PLUS_LOOP_BACK 0\n" +
"8:BASIC 0\n" +
"8:LOOP_END 0\n" +
"rule 0:1 3,-1\n" +
"mode 0:0\n" +
"0->1 EPSILON 0,0,0\n" +
@ -560,7 +560,7 @@ public class TestATNSerialization extends BaseTest {
"10:PLUS_BLOCK_START 0\n" +
"11:BLOCK_END 0\n" +
"12:PLUS_LOOP_BACK 0\n" +
"13:BASIC 0\n" +
"13:LOOP_END 0\n" +
"14:BASIC 1\n" +
"15:BASIC 1\n" +
"16:BASIC 1\n" +