Merge branch 'new-conflicting-alts'

This commit is contained in:
Terence Parr 2012-10-14 18:51:12 -07:00
commit cb340b1e04
18 changed files with 806 additions and 691 deletions

View File

@ -31,32 +31,34 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import org.antlr.v4.runtime.misc.Nullable;
import java.util.BitSet;
/** How to emit recognition errors */
public interface ANTLRErrorListener {
/** Upon syntax error, notify any interested parties. This is not how to
* recover from errors or compute error messages. The parser
* ANTLRErrorStrategy specifies how to recover from syntax errors
* and how to compute error messages. This listener's job is simply to
* emit a computed message, though it has enough information to
* create its own message in many cases.
/** Upon syntax error, notify any interested parties. This is not
* how to recover from errors or compute error messages. The
* parser ANTLRErrorStrategy specifies how to recover from syntax
* errors and how to compute error messages. This listener's job
* is simply to emit a computed message, though it has enough
* information to create its own message in many cases.
*
* The RecognitionException is non-null for all syntax errors except
* when we discover mismatched token errors that we can recover from
* in-line, without returning from the surrounding rule (via the
* single token insertion and deletion mechanism).
* The RecognitionException is non-null for all syntax errors
* except when we discover mismatched token errors that we can
* recover from in-line, without returning from the surrounding
* rule (via the single token insertion and deletion mechanism).
*
* @param recognizer
* What parser got the error. From this object, you
* can access the context as well as the input stream.
* What parser got the error. From this
* object, you can access the context as well
* as the input stream.
* @param offendingSymbol
* The offending token in the input token stream, unless recognizer
* is a lexer (then it's null)
* If no viable alternative error, e has token
* at which we started production for the decision.
* The offending token in the input token
* stream, unless recognizer is a lexer (then it's null) If
* no viable alternative error, e has token at which we
* started production for the decision.
* @param line
* At what line in input to the error occur? This always refers to
* stopTokenIndex
@ -77,15 +79,18 @@ public interface ANTLRErrorListener {
String msg,
@Nullable RecognitionException e);
/** Called when the parser detects a true ambiguity: an input sequence can be matched
* literally by two or more pass through the grammar. ANTLR resolves the ambiguity in
* favor of the alternative appearing first in the grammar. The start and stop index are
* zero-based absolute indices into the token stream. ambigAlts is a set of alternative numbers
* that can match the input sequence. This method is only called when we are parsing with
* full context.
/** Called when the parser detects a true ambiguity: an input
* sequence can be matched literally by two or more pass through
* the grammar. ANTLR resolves the ambiguity in favor of the
* alternative appearing first in the grammar. The start and stop
* index are zero-based absolute indices into the token
* stream. ambigAlts is a set of alternative numbers that can
* match the input sequence. This method is only called when we
* are parsing with full context.
*/
void reportAmbiguity(@NotNull Parser recognizer,
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
DFA dfa, int startIndex, int stopIndex,
@NotNull BitSet ambigAlts,
@NotNull ATNConfigSet configs);
void reportAttemptingFullContext(@NotNull Parser recognizer,
@ -93,10 +98,11 @@ public interface ANTLRErrorListener {
int startIndex, int stopIndex,
@NotNull ATNConfigSet configs);
/** Called by the parser when it find a conflict that is resolved by retrying the parse
* with full context. This is not a warning; it simply notifies you that your grammar
* is more complicated than Strong LL can handle. The parser moved up to full context
* parsing for that input sequence.
/** Called by the parser when it find a conflict that is resolved
* by retrying the parse with full context. This is not a
* warning; it simply notifies you that your grammar is more
* complicated than Strong LL can handle. The parser moved up to
* full context parsing for that input sequence.
*/
void reportContextSensitivity(@NotNull Parser recognizer,
@NotNull DFA dfa,

View File

@ -30,10 +30,10 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.BitSet;
/**
*
* @author Sam Harwell
*/
public class BaseErrorListener implements ANTLRErrorListener {
@ -52,7 +52,7 @@ public class BaseErrorListener implements ANTLRErrorListener {
DFA dfa,
int startIndex,
int stopIndex,
IntervalSet ambigAlts,
BitSet ambigAlts,
ATNConfigSet configs)
{
}

View File

@ -32,18 +32,20 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.BitSet;
public class DiagnosticErrorListener extends BaseErrorListener {
@Override
public void reportAmbiguity(@NotNull Parser recognizer,
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
DFA dfa, int startIndex, int stopIndex,
@NotNull BitSet ambigAlts,
@NotNull ATNConfigSet configs)
{
recognizer.notifyErrorListeners("reportAmbiguity d=" + dfa.decision +
": ambigAlts=" + ambigAlts + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
": ambigAlts=" + ambigAlts + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
}
@Override
@ -53,16 +55,18 @@ public class DiagnosticErrorListener extends BaseErrorListener {
@NotNull ATNConfigSet configs)
{
recognizer.notifyErrorListeners("reportAttemptingFullContext d=" +
dfa.decision + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
dfa.decision + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
}
@Override
public void reportContextSensitivity(@NotNull Parser recognizer, @NotNull DFA dfa,
int startIndex, int stopIndex, @NotNull ATNConfigSet configs)
public void reportContextSensitivity(@NotNull Parser recognizer,
@NotNull DFA dfa,
int startIndex, int stopIndex,
@NotNull ATNConfigSet configs)
{
recognizer.notifyErrorListeners("reportContextSensitivity d=" +
dfa.decision + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
dfa.decision + ", input='" +
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
}
}

View File

@ -30,12 +30,11 @@ package org.antlr.v4.runtime;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.BitSet;
import java.util.Collection;
/**
*
* @author Sam Harwell
*/
public class ProxyErrorListener implements ANTLRErrorListener {
@ -63,7 +62,7 @@ public class ProxyErrorListener implements ANTLRErrorListener {
DFA dfa,
int startIndex,
int stopIndex,
IntervalSet ambigAlts,
BitSet ambigAlts,
ATNConfigSet configs)
{
for (ANTLRErrorListener listener : delegates) {

View File

@ -71,6 +71,14 @@ public class ATNConfig {
@NotNull
public final SemanticContext semanticContext;
public ATNConfig(ATNConfig old) { // dup
this.state = old.state;
this.alt = old.alt;
this.context = old.context;
this.semanticContext = old.semanticContext;
this.reachesIntoOuterContext = old.reachesIntoOuterContext;
}
public ATNConfig(@NotNull ATNState state,
int alt,
@Nullable PredictionContext context)
@ -93,15 +101,26 @@ public class ATNConfig {
this(c, state, c.context, c.semanticContext);
}
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @NotNull SemanticContext semanticContext) {
this(c, state, c.context, semanticContext);
}
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
@NotNull SemanticContext semanticContext)
{
this(c, state, c.context, semanticContext);
}
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @Nullable PredictionContext context) {
public ATNConfig(@NotNull ATNConfig c,
@NotNull SemanticContext semanticContext)
{
this(c, c.state, c.context, semanticContext);
}
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
@Nullable PredictionContext context)
{
this(c, state, context, c.semanticContext);
}
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @Nullable PredictionContext context,
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
@Nullable PredictionContext context,
@NotNull SemanticContext semanticContext)
{
this.state = state;

View File

@ -31,9 +31,9 @@ package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.Array2DHashSet;
import org.antlr.v4.runtime.misc.DoubleKeyMap;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
@ -280,7 +280,8 @@ public class ATNConfigSet implements Set<ATNConfig> {
// TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
// TODO: can we track conflicts as they are added to save scanning configs later?
public int uniqueAlt;
protected IntervalSet conflictingAlts;
protected BitSet conflictingAlts;
// Used in parser and lexer. In lexer, it indicates we hit a pred
// while computing a closure operation. Don't make a DFA state from this.
public boolean hasSemanticContext;

View File

@ -0,0 +1,408 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.misc.FlexibleHashMap;
import org.antlr.v4.runtime.misc.NotNull;
import java.util.BitSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
public enum PredictionMode {
/** Do only local context prediction (SLL style) and using
* heuristic which almost always works but is much faster
* than precise answer.
*/
SLL,
/** Full LL(*) that always gets right answer. For speed
* reasons, we terminate the prediction process when we know for
* sure which alt to predict. We don't always know what
* the ambiguity is in this mode.
*/
LL,
/** Tell the full LL prediction algorithm to pursue lookahead until
* it has uniquely predicted an alternative without conflict or it's
* certain that it's found an ambiguous input sequence. when this
* variable is false. When true, the prediction process will
* continue looking for the exact ambiguous sequence even if
* it has already figured out which alternative to predict.
*/
LL_EXACT_AMBIG_DETECTION;
/** A Map that uses just the state and the stack context as the key. */
static class AltAndContextMap extends FlexibleHashMap<ATNConfig,BitSet> {
/** Code is function of (s, _, ctx, _) */
@Override
public int hashCode(ATNConfig o) {
int hashCode = 7;
hashCode = 31 * hashCode + o.state.stateNumber;
hashCode = 31 * hashCode + o.context.hashCode();
return hashCode;
}
@Override
public boolean equals(ATNConfig a, ATNConfig b) {
if ( a==b ) return true;
if ( a==null || b==null ) return false;
if ( hashCode(a) != hashCode(b) ) return false;
return a.state.stateNumber==b.state.stateNumber
&& b.context.equals(b.context);
}
}
/**
SLL prediction termination.
There are two cases: the usual combined SLL+LL parsing and
pure SLL parsing that has no fail over to full LL.
COMBINED SLL+LL PARSING
SLL can decide to give up any point, even immediately,
failing over to full LL. To be as efficient as possible,
though, SLL should fail over only when it's positive it can't get
anywhere on more lookahead without seeing a conflict.
Assuming combined SLL+LL parsing, an SLL confg set with only
conflicting subsets should failover to full LL, even if the
config sets don't resolve to the same alternative like {1,2}
and {3,4}. If there is at least one nonconflicting set of
configs, SLL could continue with the hopes that more lookahead
will resolve via one of those nonconflicting configs.
Here's the prediction termination rule them: SLL (for SLL+LL
parsing) stops when it sees only conflicting config subsets.
In contrast, full LL keeps going when there is uncertainty.
HEURISTIC
As a heuristic, we stop prediction when we see any conflicting subset
unless we see a state that only has one alternative associated with
it. The single-alt-state thing lets prediction continue upon rules
like (otherwise, it would admit defeat too soon):
// [12|1|[], 6|2|[], 12|2|[]].
s : (ID | ID ID?) ';' ;
When the ATN simulation reaches the state before ';', it has a DFA
state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 12|1|[]
and 12|2|[] conflict, but we cannot stop processing this node because
alternative to has another way to continue, via [6|2|[]].
It also let's us continue for this rule:
// [1|1|[], 1|2|[], 8|3|[]]
a : A | A | A B ;
After matching input A, we reach the stop state for rule A, state 1.
State 8 is the state right before B. Clearly alternatives 1 and 2
conflict and no amount of further lookahead will separate the two.
However, alternative 3 will be able to continue and so we do not stop
working on this state. In the previous example, we're concerned with
states associated with the conflicting alternatives. Here alt 3 is not
associated with the conflicting configs, but since we can continue
looking for input reasonably, don't declare the state done.
PURE SLL PARSING
To handle pure SLL parsing, all we have to do is make sure that we
combine stack contexts for configurations that differ only by semantic
predicate. From there, we can do the usual SLL termination heuristic.
PREDICATES IN SLL+LL PARSING
SLL decisions don't evaluate predicates until after they reach DFA
stop states because they need to create the DFA cache that
works in all (semantic) situations. (In contrast, full LL
evaluates predicates collected during start state computation
so it can ignore predicates thereafter.) This means that SLL
termination detection can totally ignore semantic predicates.
Of course, implementation-wise, ATNConfigSets combine stack
contexts but not semantic predicate contexts so we might see
two configs like this:
(s, 1, x, {}), (s, 1, x', {p})
Before testing these configurations against others, we have
to merge x and x' (w/o modifying the existing configs). For
example, we test (x+x')==x'' when looking for conflicts in
the following configs.
(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})
If the configuration set has predicates, which we can test
quickly, this algorithm makes a copy of the configs and
strip out all of the predicates so that a standard
ATNConfigSet will merge everything ignoring
predicates.
*/
public static boolean hasSLLConflictTerminatingPrediction(PredictionMode mode, @NotNull ATNConfigSet configs) {
// pure SLL mode parsing
if ( mode == PredictionMode.SLL ) {
// Don't bother with combining configs from different semantic
// contexts if we can fail over to full LL; costs more time
// since we'll often fail over anyway.
if ( configs.hasSemanticContext ) {
// dup configs, tossing out semantic predicates
ATNConfigSet dup = new ATNConfigSet();
for (ATNConfig c : configs) {
c = new ATNConfig(c,SemanticContext.NONE);
dup.add(c);
}
configs = dup;
}
// now we have combined contexts for configs with dissimilar preds
}
// pure SLL or combined SLL+LL mode parsing
Collection<BitSet> altsets = getConflictingAltSubsets(configs);
boolean heuristic =
hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
return heuristic;
}
/**
Full LL prediction termination.
Can we stop looking ahead during ATN simulation or is there some
uncertainty as to which alternative we will ultimately pick, after
consuming more input? Even if there are partial conflicts, we might
know that everything is going to resolve to the same minimum
alt. That means we can stop since no more lookahead will change that
fact. On the other hand, there might be multiple conflicts that
resolve to different minimums. That means we need more look ahead to
decide which of those alternatives we should predict.
The basic idea is to split the set of configurations, C, into
conflicting (s, _, ctx, _) subsets and singleton subsets with
non-conflicting configurations. Two config's conflict if they have
identical state and rule stack contexts but different alternative
numbers: (s, i, ctx, _), (s, j, ctx, _) for i!=j.
Reduce these config subsets to the set of possible alternatives. You
can compute the alternative subsets in one go as follows:
A_s,ctx = {i | (s, i, ctx, _) for in C holding s, ctx fixed}
Or in pseudo-code:
for c in C:
map[c] U= c.alt # map hash/equals uses s and x, not alt and not pred
Then map.values is the set of A_s,ctx sets.
If |A_s,ctx|=1 then there is no conflict associated with s and ctx.
Reduce the subsets to singletons by choosing a minimum of each subset.
If the union of these alternatives sets is a singleton, then no amount
of more lookahead will help us. We will always pick that
alternative. If, however, there is more than one alternative, then we
are uncertain which alt to predict and must continue looking for
resolution. We may or may not discover an ambiguity in the future,
even if there are no conflicting subsets this round.
The biggest sin is to terminate early because it means we've made a
decision but were uncertain as to the eventual outcome. We haven't
used enough lookahead. On the other hand, announcing a conflict too
late is no big deal; you will still have the conflict. It's just
inefficient. It might even look until the end of file.
Semantic predicates for full LL aren't involved in this decision
because the predicates are evaluated during start state computation.
This set of configurations was derived from the initial subset with
configurations holding false predicate stripped out.
CONFLICTING CONFIGS
Two configurations, (s, i, x) and (s, j, x'), conflict when i!=j but
x = x'. Because we merge all (s, i, _) configurations together, that
means that there are at most n configurations associated with state s
for n possible alternatives in the decision. The merged stacks
complicate the comparison of config contexts, x and x'. Sam checks to
see if one is a subset of the other by calling merge and checking to
see if the merged result is either x or x'. If the x associated with
lowest alternative i is the superset, then i is the only possible
prediction since the others resolve to min i as well. If, however, x
is associated with j>i then at least one stack configuration for j is
not in conflict with alt i. The algorithm should keep going, looking
for more lookahead due to the uncertainty.
For simplicity, I'm doing a equality check between x and x' that lets
the algorithm continue to consume lookahead longer than necessary.
The reason I like the equality is of course the simplicity but also
because that is the test you need to detect the alternatives that are
actually in conflict.
CONTINUE/STOP RULE
Continue if union of resolved alt sets from nonconflicting and
conflicting alt subsets has more than one alt. We are uncertain about
which alternative to predict.
The complete set of alternatives, [i for (_,i,_)], tells us
which alternatives are still in the running for the amount of input
we've consumed at this point. The conflicting sets let us to strip
away configurations that won't lead to more states (because we
resolve conflicts to the configuration with a minimum alternate for
given conflicting set.)
CASES:
* no conflicts & > 1 alt in set => continue
* (s, 1, x), (s, 2, x), (s, 3, z)
(s', 1, y), (s', 2, y)
yields nonconflicting set {3} U conflicting sets min({1,2}) U min({1,2}) = {1,3}
=> continue
* (s, 1, x), (s, 2, x),
(s', 1, y), (s', 2, y)
(s'', 1, z)
yields nonconflicting set you this {1} U conflicting sets min({1,2}) U min({1,2}) = {1}
=> stop and predict 1
* (s, 1, x), (s, 2, x),
(s', 1, y), (s', 2, y)
yields conflicting, reduced sets {1} U {1} = {1}
=> stop and predict 1, can announce ambiguity {1,2}
* (s, 1, x), (s, 2, x)
(s', 2, y), (s', 3, y)
yields conflicting, reduced sets {1} U {2} = {1,2}
=> continue
* (s, 1, x), (s, 2, x)
(s', 3, y), (s', 4, y)
yields conflicting, reduced sets {1} U {3} = {1,3}
=> continue
EXACT AMBIGUITY DETECTION
If all states report the same conflicting alt set, then we know we
have the real ambiguity set:
|A_i|>1 and A_i = A_j for all i, j.
In other words, we continue examining lookahead until all A_i have
more than one alt and all A_i are the same. If A={{1,2}, {1,3}}, then
regular LL prediction would terminate because the resolved set is
{1}. To determine what the real ambiguity is, we have to know whether
the ambiguity is between one and two or one and three so we keep
going. We can only stop prediction when we need exact ambiguity
detection when the sets look like A={{1,2}} or {{1,2},{1,2}} etc...
*/
public static int resolvesToJustOneViableAlt(Collection<BitSet> altsets) {
return getSingleViableAlt(altsets);
}
public static boolean allSubsetsConflict(Collection<BitSet> altsets) {
return !hasNonConflictingAltSet(altsets);
}
/** return (there exists len(A_i)==1 for some A_i in altsets A) */
public static boolean hasNonConflictingAltSet(Collection<BitSet> altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()==1 ) {
return true;
}
}
return false;
}
/** return (there exists len(A_i)>1 for some A_i in altsets A) */
public static boolean hasConflictingAltSet(Collection<BitSet> altsets) {
for (BitSet alts : altsets) {
if ( alts.cardinality()>1 ) {
return true;
}
}
return false;
}
public static boolean allSubsetsEqual(Collection<BitSet> altsets) {
Iterator<BitSet> it = altsets.iterator();
BitSet first = it.next();
while ( it.hasNext() ) {
BitSet next = it.next();
if ( !next.equals(first) ) return false;
}
return true;
}
public static int getUniqueAlt(Collection<BitSet> altsets) {
BitSet all = getAlts(altsets);
if ( all.cardinality()==1 ) return all.nextSetBit(0);
return ATN.INVALID_ALT_NUMBER;
}
public static BitSet getAlts(Collection<BitSet> altsets) {
BitSet all = new BitSet();
for (BitSet alts : altsets) {
all.or(alts);
}
return all;
}
/**
* This function gets the conflicting alt subsets from a configuration set.
* for c in configs:
* map[c] U= c.alt # map hash/equals uses s and x, not alt and not pred
*/
public static Collection<BitSet> getConflictingAltSubsets(ATNConfigSet configs) {
AltAndContextMap configToAlts = new AltAndContextMap();
for (ATNConfig c : configs) {
BitSet alts = configToAlts.get(c);
if ( alts==null ) {
alts = new BitSet();
configToAlts.put(c, alts);
}
alts.set(c.alt);
}
return configToAlts.values();
}
/** Get a map from state to alt subset from a configuration set.
* for c in configs:
* map[c.state] U= c.alt
*/
public static Map<ATNState, BitSet> getStateToAltMap(ATNConfigSet configs) {
Map<ATNState, BitSet> m = new HashMap<ATNState, BitSet>();
for (ATNConfig c : configs) {
BitSet alts = m.get(c.state);
if ( alts==null ) {
alts = new BitSet();
m.put(c.state, alts);
}
alts.set(c.alt);
}
return m;
}
public static boolean hasStateAssociatedWithOneAlt(ATNConfigSet configs) {
Map<ATNState, BitSet> x = getStateToAltMap(configs);
for (BitSet alts : x.values()) {
if ( alts.cardinality()==1 ) return true;
}
return false;
}
public static int getSingleViableAlt(Collection<BitSet> altsets) {
BitSet viableAlts = new BitSet();
for (BitSet alts : altsets) {
int minAlt = alts.nextSetBit(0);
viableAlts.set(minAlt);
if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt
return ATN.INVALID_ALT_NUMBER;
}
}
return viableAlts.nextSetBit(0);
}
}

View File

@ -38,6 +38,7 @@ import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.atn.PredictionMode;
import javax.print.PrintException;
import java.io.FileInputStream;
@ -224,7 +225,7 @@ public class TestRig {
}
if ( SLL ) {
parser.getInterpreter().setSLL(true);
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
}
parser.setTokenStream(tokens);

1
tool/playground/T-input Normal file
View File

@ -0,0 +1 @@
abc

View File

@ -1,12 +1,5 @@
grammar T;
s : expr[0] ;
expr[int _p]
: ID
( {5 >= $_p}? '*' expr[6]
| {4 >= $_p}? '+' expr[5]
)*
;
ID : [a-zA-Z]+ ; // match identifiers
WS : [ \t\r\n]+ -> skip ; // toss out whitespace
s@after {dumpDFA();}
: ID | ID {;} ;
ID : 'a'..'z'+ ;
WS : (' '|'\t'|'\n')+ {skip();} ;

View File

@ -6,6 +6,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.PredictionMode;
import java.io.File;
@ -127,7 +128,7 @@ class TestJava {
parser.setTokenStream(tokens);
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
if ( SLL ) parser.getInterpreter().setSLL(true);
if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
// start parsing at the compilationUnit rule
ParserRuleContext<Token> tree = parser.compilationUnit();
if ( showTree ) tree.inspect(parser);

View File

@ -36,6 +36,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.PredictionMode;
import java.io.File;
import java.util.ArrayList;
@ -197,7 +198,7 @@ class TestJavaLR {
System.out.println(ParserATNSimulator.predict_calls +" parser predict calls");
System.out.println(ParserATNSimulator.retry_with_context +" retry_with_context after SLL conflict");
System.out.println(ParserATNSimulator.retry_with_context_indicates_no_conflict +" retry sees no conflict");
System.out.println(ParserATNSimulator.retry_with_context_predicts_same_as_alt +" retry predicts same alt as resolving conflict");
System.out.println(ParserATNSimulator.retry_with_context_predicts_same_alt +" retry predicts same alt as resolving conflict");
System.out.println(ParserATNSimulator.retry_with_context_from_dfa +" retry from DFA");
}
@ -260,7 +261,7 @@ class TestJavaLR {
JavaLRParser parser = new JavaLRParser(tokens);
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
if ( bail ) parser.setErrorHandler(new BailErrorStrategy());
if ( SLL ) parser.getInterpreter().setSLL(true);
if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
// start parsing at the compilationUnit rule
ParserRuleContext<Token> t = parser.compilationUnit();

View File

@ -411,7 +411,7 @@ public class TestATNParserPrediction extends BaseTest {
checkDFAConstruction(lg, g, decision, inputs, dfa);
}
@Test public void testAmbigDef() throws Exception {
@Test public void testContinuePrediction() throws Exception {
// Sam found prev def of ambiguity was too restrictive.
// E.g., (13, 1, []), (13, 2, []), (12, 2, []) should not
// be declared ambig since (12, 2, []) can take us to
@ -429,45 +429,23 @@ public class TestATNParserPrediction extends BaseTest {
int decision = 1;
checkPredictedAlt(lg, g, decision, "a;", 1);
checkPredictedAlt(lg, g, decision, "ab;", 2);
}
// After matching these inputs for decision, what is DFA after each prediction?
// String[] inputs = {
// "34a",
// "34ab",
// "((34))a",
// "((34))ab",
// };
// String[] dfa = {
// "s0-INT->s1\n" +
// "s1-'a'->s2\n" +
// "s2-EOF->:s3=>1\n",
//
// "s0-INT->s1\n" +
// "s1-'a'->s2\n" +
// "s2-EOF->:s3=>1\n" +
// "s2-'b'->:s4=>2\n",
//
// "s0-'('->s5\n" +
// "s0-INT->s1\n" +
// "s1-'a'->s2\n" +
// "s2-EOF->:s3=>1\n" +
// "s2-'b'->:s4=>2\n" +
// "s5-'('->s6\n" +
// "s6-INT->s7\n" +
// "s7-')'->s8\n" +
// "s8-')'->s1\n",
//
// "s0-'('->s5\n" +
// "s0-INT->s1\n" +
// "s1-'a'->s2\n" +
// "s2-EOF->:s3=>1\n" +
// "s2-'b'->:s4=>2\n" +
// "s5-'('->s6\n" +
// "s6-INT->s7\n" +
// "s7-')'->s8\n" +
// "s8-')'->s1\n",
// };
// checkDFAConstruction(lg, g, decision, inputs, dfa);
@Test public void testContinuePrediction2() throws Exception {
// ID is ambig for first two alts, but ID SEMI lets us move forward with alt 3
LexerGrammar lg = new LexerGrammar(
"lexer grammar L;\n" +
"ID : 'a'..'z' ;\n" + // one char
"SEMI : ';' ;\n"+
"INT : '0'..'9'+ ;\n"
);
Grammar g = new Grammar(
"parser grammar T;\n"+
"tokens {ID,SEMI,INT}\n" +
"a : ID | ID | ID SEMI ;\n");
int decision = 0;
checkPredictedAlt(lg, g, decision, "a", 1);
checkPredictedAlt(lg, g, decision, "a;", 3);
}
/** first check that the ATN predicts right alt.

View File

@ -41,7 +41,7 @@ import org.junit.Test;
*/
public class TestFullContextParsing extends BaseTest {
@Test public void testAmbigYieldsNonCtxSensitiveDFA() {
@Test public void testAmbigYieldsCtxSensitiveDFA() {
String grammar =
"grammar T;\n"+
"s" +
@ -53,9 +53,9 @@ public class TestFullContextParsing extends BaseTest {
"abc", true);
String expecting =
"Decision 0:\n" +
"s0-ID->:s1=>1\n"; // not ctx sensitive
"s0-ID->s1^\n"; // ctx sensitive
assertEquals(expecting, result);
assertEquals("line 1:0 reportAmbiguity d=0: ambigAlts={1..2}, input='abc'\n",
assertEquals("line 1:0 reportAttemptingFullContext d=0, input='abc'\n",
this.stderrDuringParse);
}
@ -122,10 +122,11 @@ public class TestFullContextParsing extends BaseTest {
String grammar =
"grammar T;\n"+
"s" +
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
"@after {dumpDFA();}\n" +
" : '{' stat* '}'" +
" ;\n" +
"stat: 'if' ID 'then' stat ('else' 'foo')?\n" +
"stat: 'if' ID 'then' stat ('else' ID)?\n" +
" | 'return'\n" +
" ;" +
"ID : 'a'..'z'+ ;\n"+
@ -139,19 +140,6 @@ public class TestFullContextParsing extends BaseTest {
assertEquals(expecting, result);
assertEquals(null, this.stderrDuringParse);
input =
"{ if x then if y then return else foo }";
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
input, true);
expecting =
"Decision 1:\n" +
"s0-'else'->s1^\n" +
"s0-'}'->:s2=>2\n";
assertEquals(expecting, result);
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:38 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
this.stderrDuringParse);
input = "{ if x then return else foo }";
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
input, true);
@ -169,15 +157,35 @@ public class TestFullContextParsing extends BaseTest {
"line 1:19 reportContextSensitivity d=1, input='else'\n",
this.stderrDuringParse);
input = "{ if x then return else foo }";
input =
"{ if x then if y then return else foo }";
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
input, true);
expecting =
"Decision 1:\n" +
"s0-'else'->s1^\n" +
"s0-'}'->:s2=>2\n";
assertEquals(expecting, result);
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:38 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
this.stderrDuringParse);
// should not be ambiguous because the second 'else bar' clearly
// indicates that the first else should match to the innermost if.
// LL_EXACT_AMBIG_DETECTION makes us keep going to resolve
input =
"{ if x then if y then return else foo else bar }";
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
input, true);
expecting =
"Decision 1:\n" +
"s0-'else'->s1^\n";
assertEquals(expecting, result);
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:19 reportContextSensitivity d=1, input='else'\n",
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:38 reportContextSensitivity d=1, input='elsefooelse'\n" +
"line 1:38 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:38 reportContextSensitivity d=1, input='else'\n",
this.stderrDuringParse);
input =
@ -193,7 +201,7 @@ public class TestFullContextParsing extends BaseTest {
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:19 reportContextSensitivity d=1, input='else'\n" +
"line 2:27 reportAttemptingFullContext d=1, input='else'\n" +
"line 2:36 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
"line 2:36 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
this.stderrDuringParse);
input =
@ -209,7 +217,7 @@ public class TestFullContextParsing extends BaseTest {
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
"line 1:19 reportContextSensitivity d=1, input='else'\n" +
"line 2:27 reportAttemptingFullContext d=1, input='else'\n" +
"line 2:36 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
"line 2:36 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
this.stderrDuringParse);
}
@ -221,7 +229,9 @@ public class TestFullContextParsing extends BaseTest {
public void testLoopsSimulateTailRecursion() throws Exception {
String grammar =
"grammar T;\n" +
"prog: expr_or_assign*;\n" +
"prog\n" +
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
" : expr_or_assign*;\n" +
"expr_or_assign\n" +
" : expr '++' {System.out.println(\"fail.\");}\n" +
" | expr {System.out.println(\"pass: \"+$expr.text);}\n" +
@ -236,11 +246,77 @@ public class TestFullContextParsing extends BaseTest {
"";
String found = execParser("T.g4", grammar, "TParser", "TLexer", "prog", "a(i)<-x", true);
assertEquals("pass.\n", found);
assertEquals("pass: a(i)<-x\n", found);
String expecting =
"line 1:3 reportAttemptingFullContext d=3, input='a(i)'\n" +
"line 1:7 reportAmbiguity d=3: ambigAlts={2..3}, input='a(i)<-x'\n";
"line 1:7 reportAmbiguity d=3: ambigAlts={2, 3}, input='a(i)<-x'\n";
assertEquals(expecting, this.stderrDuringParse);
}
@Test
public void testAmbiguityNoLoop() throws Exception {
// simpler version of testLoopsSimulateTailRecursion, no loops
String grammar =
"grammar T;\n" +
"prog\n" +
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
" : expr expr {System.out.println(\"alt 1\");}\n" +
" | expr\n" +
" ;\n" +
"expr: '@'\n" +
" | ID '@'\n" +
" | ID\n" +
" ;\n" +
"ID : [a-z]+ ;\n" +
"WS : [ \r\n\t]+ -> skip ;\n";
String found = execParser("T.g4", grammar, "TParser", "TLexer", "prog", "a@", true);
assertEquals("alt 1\n", found);
String expecting =
"line 1:2 reportAttemptingFullContext d=0, input='a@'\n" +
"line 1:2 reportAmbiguity d=0: ambigAlts={1, 2}, input='a@'\n" +
"line 1:2 reportAttemptingFullContext d=1, input='a@'\n" +
"line 1:2 reportContextSensitivity d=1, input='a@'\n";
assertEquals(expecting, this.stderrDuringParse);
}
@Test
public void testExprAmbiguity() throws Exception {
// translated left-recursive expr rule to test ambig detection
String grammar =
"grammar T;\n" +
"s\n" +
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
" : expr[0] {System.out.println($expr.ctx.toStringTree(this));} ;\n" +
"\n" +
"expr[int _p]\n" +
" : ID\n" +
" ( {5 >= $_p}? '*' expr[6]\n" +
" | {4 >= $_p}? '+' expr[5]\n" +
" )*\n" +
" ;\n" +
"\n" +
"ID : [a-zA-Z]+ ; // match identifiers\n" +
"WS : [ \\t\\r\\n]+ -> skip ; // toss out whitespace\n";
String found = execParser("T.g4", grammar, "TParser", "TLexer", "s", "a+b", true);
assertEquals("(expr a + (expr b))\n", found);
String expecting =
"line 1:1 reportAttemptingFullContext d=1, input='+'\n" +
"line 1:1 reportContextSensitivity d=1, input='+'\n";
assertEquals(expecting, this.stderrDuringParse);
found = execParser("T.g4", grammar, "TParser", "TLexer", "s", "a+b*c", true);
assertEquals("(expr a + (expr b * (expr c)))\n", found);
expecting =
"line 1:1 reportAttemptingFullContext d=1, input='+'\n" +
"line 1:1 reportContextSensitivity d=1, input='+'\n" +
"line 1:3 reportAttemptingFullContext d=1, input='*'\n" +
"line 1:5 reportAmbiguity d=1: ambigAlts={1, 2}, input='*c'\n";
assertEquals(expecting, this.stderrDuringParse);
}

View File

@ -330,8 +330,7 @@ public class TestLeftRecursion extends BaseTest {
result = execParser("Expr.g4", grammar, "ExprParser", "ExprLexer", "prog", "a+b*2\n", true);
assertEquals("line 1:1 reportAttemptingFullContext d=3, input='+'\n" +
"line 1:1 reportContextSensitivity d=3, input='+'\n" +
"line 1:3 reportAttemptingFullContext d=3, input='*'\n" +
"line 1:3 reportAmbiguity d=3: ambigAlts={1..2}, input='*'\n",
"line 1:3 reportAttemptingFullContext d=3, input='*'\n",
stderrDuringParse);
result = execParser("Expr.g4", grammar, "ExprParser", "ExprLexer", "prog", "(1+2)*3\n", true);

View File

@ -1,28 +0,0 @@
package org.antlr.v4.test;
import org.antlr.runtime.RecognitionException;
import org.antlr.v4.Tool;
import org.antlr.v4.codegen.CodeGenerator;
import org.antlr.v4.tool.LexerGrammar;
import org.junit.Test;
import org.stringtemplate.v4.ST;
public class TestLexerAttributes extends BaseTest {
@Test
public void testSetType() throws RecognitionException {
LexerGrammar g = new LexerGrammar(
"lexer grammar T;\n" +
"A : 'a' {#$type=101;#} ;\n"
);
Tool antlr = new Tool();
antlr.process(g,false);
CodeGenerator gen = new CodeGenerator(g);
ST outputFileST = gen.generateLexer();
String output = outputFileST.render();
int start = output.indexOf('#');
int end = output.lastIndexOf('#');
String snippet = output.substring(start+1,end);
assertEquals("_type = 101;", snippet);
}
}

View File

@ -35,6 +35,24 @@ public class TestSemPredEvalParser extends BaseTest {
// TEST VALIDATING PREDS
@Test public void testSimpleValidate() throws Exception {
String grammar =
"grammar T;\n" +
"s : a ;\n" +
"a : {false}? ID {System.out.println(\"alt 1\");}\n" +
" | {true}? INT {System.out.println(\"alt 2\");}\n" +
" ;\n" +
"ID : 'a'..'z'+ ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = execParser("T.g4", grammar, "TParser", "TLexer", "s",
"x", false);
String expecting = "line 1:0 no viable alternative at input 'x'\n";
assertEquals(expecting, stderrDuringParse);
}
@Test public void testSimpleValidate2() throws Exception {
String grammar =
"grammar T;\n" +
"s : a a a;\n" +
@ -129,16 +147,14 @@ public class TestSemPredEvalParser extends BaseTest {
}
@Test public void test2UnpredicatedAlts() throws Exception {
// We have n-2 predicates for n alternatives. We have no choice
// but to pick the first on predicated alternative if the n-2
// predicates fail.
// this should call reportInsufficientPredicates()
// We have n-2 predicates for n alternatives. pick first alt
String grammar =
"grammar T;\n" +
"@header {" +
"import java.util.*;" +
"}" +
"s : a ';' a;\n" + // do 2x: once in ATN, next in DFA
"s : {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
" a ';' a;\n" + // do 2x: once in ATN, next in DFA
"a : ID {System.out.println(\"alt 1\");}\n" +
" | ID {System.out.println(\"alt 2\");}\n" +
" | {false}? ID {System.out.println(\"alt 3\");}\n" +
@ -154,23 +170,20 @@ public class TestSemPredEvalParser extends BaseTest {
"alt 1\n";
assertEquals(expecting, found);
assertEquals("line 1:0 reportAttemptingFullContext d=0, input='x'\n" +
"line 1:0 reportAmbiguity d=0: ambigAlts={1..2}, input='x'\n" +
"line 1:0 reportAmbiguity d=0: ambigAlts={1, 2}, input='x'\n" +
"line 1:3 reportAttemptingFullContext d=0, input='y'\n" +
"line 1:3 reportAmbiguity d=0: ambigAlts={1..2}, input='y'\n",
"line 1:3 reportAmbiguity d=0: ambigAlts={1, 2}, input='y'\n",
this.stderrDuringParse);
}
@Test public void test2UnpredicatedAltsAndOneOrthogonalAlt() throws Exception {
// We have n-2 predicates for n alternatives. We have no choice
// but to pick the first on predicated alternative if the n-2
// predicates fail.
// this should call reportInsufficientPredicates()
String grammar =
"grammar T;\n" +
"@header {" +
"import java.util.*;" +
"}" +
"s : a ';' a ';' a;\n" +
"s : {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
" a ';' a ';' a;\n" +
"a : INT {System.out.println(\"alt 1\");}\n" +
" | ID {System.out.println(\"alt 2\");}\n" + // must pick this one for ID since pred is false
" | ID {System.out.println(\"alt 3\");}\n" +
@ -188,9 +201,9 @@ public class TestSemPredEvalParser extends BaseTest {
"alt 2\n";
assertEquals(expecting, found);
assertEquals("line 1:4 reportAttemptingFullContext d=0, input='x'\n" +
"line 1:4 reportAmbiguity d=0: ambigAlts={2..3}, input='x'\n" +
"line 1:4 reportAmbiguity d=0: ambigAlts={2, 3}, input='x'\n" +
"line 1:7 reportAttemptingFullContext d=0, input='y'\n" +
"line 1:7 reportAmbiguity d=0: ambigAlts={2..3}, input='y'\n",
"line 1:7 reportAmbiguity d=0: ambigAlts={2, 3}, input='y'\n",
this.stderrDuringParse);
}