forked from jasder/antlr
Merge branch 'new-conflicting-alts'
This commit is contained in:
commit
cb340b1e04
|
@ -31,32 +31,34 @@ package org.antlr.v4.runtime;
|
|||
|
||||
import org.antlr.v4.runtime.atn.ATNConfigSet;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
import org.antlr.v4.runtime.misc.Nullable;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/** How to emit recognition errors */
|
||||
public interface ANTLRErrorListener {
|
||||
/** Upon syntax error, notify any interested parties. This is not how to
|
||||
* recover from errors or compute error messages. The parser
|
||||
* ANTLRErrorStrategy specifies how to recover from syntax errors
|
||||
* and how to compute error messages. This listener's job is simply to
|
||||
* emit a computed message, though it has enough information to
|
||||
* create its own message in many cases.
|
||||
/** Upon syntax error, notify any interested parties. This is not
|
||||
* how to recover from errors or compute error messages. The
|
||||
* parser ANTLRErrorStrategy specifies how to recover from syntax
|
||||
* errors and how to compute error messages. This listener's job
|
||||
* is simply to emit a computed message, though it has enough
|
||||
* information to create its own message in many cases.
|
||||
*
|
||||
* The RecognitionException is non-null for all syntax errors except
|
||||
* when we discover mismatched token errors that we can recover from
|
||||
* in-line, without returning from the surrounding rule (via the
|
||||
* single token insertion and deletion mechanism).
|
||||
* The RecognitionException is non-null for all syntax errors
|
||||
* except when we discover mismatched token errors that we can
|
||||
* recover from in-line, without returning from the surrounding
|
||||
* rule (via the single token insertion and deletion mechanism).
|
||||
*
|
||||
* @param recognizer
|
||||
* What parser got the error. From this object, you
|
||||
* can access the context as well as the input stream.
|
||||
* What parser got the error. From this
|
||||
* object, you can access the context as well
|
||||
* as the input stream.
|
||||
* @param offendingSymbol
|
||||
* The offending token in the input token stream, unless recognizer
|
||||
* is a lexer (then it's null)
|
||||
* If no viable alternative error, e has token
|
||||
* at which we started production for the decision.
|
||||
* The offending token in the input token
|
||||
* stream, unless recognizer is a lexer (then it's null) If
|
||||
* no viable alternative error, e has token at which we
|
||||
* started production for the decision.
|
||||
* @param line
|
||||
* At what line in input to the error occur? This always refers to
|
||||
* stopTokenIndex
|
||||
|
@ -77,15 +79,18 @@ public interface ANTLRErrorListener {
|
|||
String msg,
|
||||
@Nullable RecognitionException e);
|
||||
|
||||
/** Called when the parser detects a true ambiguity: an input sequence can be matched
|
||||
* literally by two or more pass through the grammar. ANTLR resolves the ambiguity in
|
||||
* favor of the alternative appearing first in the grammar. The start and stop index are
|
||||
* zero-based absolute indices into the token stream. ambigAlts is a set of alternative numbers
|
||||
* that can match the input sequence. This method is only called when we are parsing with
|
||||
* full context.
|
||||
/** Called when the parser detects a true ambiguity: an input
|
||||
* sequence can be matched literally by two or more pass through
|
||||
* the grammar. ANTLR resolves the ambiguity in favor of the
|
||||
* alternative appearing first in the grammar. The start and stop
|
||||
* index are zero-based absolute indices into the token
|
||||
* stream. ambigAlts is a set of alternative numbers that can
|
||||
* match the input sequence. This method is only called when we
|
||||
* are parsing with full context.
|
||||
*/
|
||||
void reportAmbiguity(@NotNull Parser recognizer,
|
||||
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull BitSet ambigAlts,
|
||||
@NotNull ATNConfigSet configs);
|
||||
|
||||
void reportAttemptingFullContext(@NotNull Parser recognizer,
|
||||
|
@ -93,10 +98,11 @@ public interface ANTLRErrorListener {
|
|||
int startIndex, int stopIndex,
|
||||
@NotNull ATNConfigSet configs);
|
||||
|
||||
/** Called by the parser when it find a conflict that is resolved by retrying the parse
|
||||
* with full context. This is not a warning; it simply notifies you that your grammar
|
||||
* is more complicated than Strong LL can handle. The parser moved up to full context
|
||||
* parsing for that input sequence.
|
||||
/** Called by the parser when it find a conflict that is resolved
|
||||
* by retrying the parse with full context. This is not a
|
||||
* warning; it simply notifies you that your grammar is more
|
||||
* complicated than Strong LL can handle. The parser moved up to
|
||||
* full context parsing for that input sequence.
|
||||
*/
|
||||
void reportContextSensitivity(@NotNull Parser recognizer,
|
||||
@NotNull DFA dfa,
|
||||
|
|
|
@ -30,10 +30,10 @@ package org.antlr.v4.runtime;
|
|||
|
||||
import org.antlr.v4.runtime.atn.ATNConfigSet;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Sam Harwell
|
||||
*/
|
||||
public class BaseErrorListener implements ANTLRErrorListener {
|
||||
|
@ -52,7 +52,7 @@ public class BaseErrorListener implements ANTLRErrorListener {
|
|||
DFA dfa,
|
||||
int startIndex,
|
||||
int stopIndex,
|
||||
IntervalSet ambigAlts,
|
||||
BitSet ambigAlts,
|
||||
ATNConfigSet configs)
|
||||
{
|
||||
}
|
||||
|
|
|
@ -32,18 +32,20 @@ package org.antlr.v4.runtime;
|
|||
import org.antlr.v4.runtime.atn.ATNConfigSet;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.runtime.misc.Interval;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
import java.util.BitSet;
|
||||
|
||||
public class DiagnosticErrorListener extends BaseErrorListener {
|
||||
@Override
|
||||
public void reportAmbiguity(@NotNull Parser recognizer,
|
||||
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull BitSet ambigAlts,
|
||||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
recognizer.notifyErrorListeners("reportAmbiguity d=" + dfa.decision +
|
||||
": ambigAlts=" + ambigAlts + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
": ambigAlts=" + ambigAlts + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -53,16 +55,18 @@ public class DiagnosticErrorListener extends BaseErrorListener {
|
|||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
recognizer.notifyErrorListeners("reportAttemptingFullContext d=" +
|
||||
dfa.decision + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
dfa.decision + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportContextSensitivity(@NotNull Parser recognizer, @NotNull DFA dfa,
|
||||
int startIndex, int stopIndex, @NotNull ATNConfigSet configs)
|
||||
public void reportContextSensitivity(@NotNull Parser recognizer,
|
||||
@NotNull DFA dfa,
|
||||
int startIndex, int stopIndex,
|
||||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
recognizer.notifyErrorListeners("reportContextSensitivity d=" +
|
||||
dfa.decision + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
dfa.decision + ", input='" +
|
||||
recognizer.getTokenStream().getText(Interval.of(startIndex, stopIndex)) + "'");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -30,12 +30,11 @@ package org.antlr.v4.runtime;
|
|||
|
||||
import org.antlr.v4.runtime.atn.ATNConfigSet;
|
||||
import org.antlr.v4.runtime.dfa.DFA;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author Sam Harwell
|
||||
*/
|
||||
public class ProxyErrorListener implements ANTLRErrorListener {
|
||||
|
@ -63,7 +62,7 @@ public class ProxyErrorListener implements ANTLRErrorListener {
|
|||
DFA dfa,
|
||||
int startIndex,
|
||||
int stopIndex,
|
||||
IntervalSet ambigAlts,
|
||||
BitSet ambigAlts,
|
||||
ATNConfigSet configs)
|
||||
{
|
||||
for (ANTLRErrorListener listener : delegates) {
|
||||
|
|
|
@ -71,6 +71,14 @@ public class ATNConfig {
|
|||
@NotNull
|
||||
public final SemanticContext semanticContext;
|
||||
|
||||
public ATNConfig(ATNConfig old) { // dup
|
||||
this.state = old.state;
|
||||
this.alt = old.alt;
|
||||
this.context = old.context;
|
||||
this.semanticContext = old.semanticContext;
|
||||
this.reachesIntoOuterContext = old.reachesIntoOuterContext;
|
||||
}
|
||||
|
||||
public ATNConfig(@NotNull ATNState state,
|
||||
int alt,
|
||||
@Nullable PredictionContext context)
|
||||
|
@ -93,15 +101,26 @@ public class ATNConfig {
|
|||
this(c, state, c.context, c.semanticContext);
|
||||
}
|
||||
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @NotNull SemanticContext semanticContext) {
|
||||
this(c, state, c.context, semanticContext);
|
||||
}
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
|
||||
@NotNull SemanticContext semanticContext)
|
||||
{
|
||||
this(c, state, c.context, semanticContext);
|
||||
}
|
||||
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @Nullable PredictionContext context) {
|
||||
public ATNConfig(@NotNull ATNConfig c,
|
||||
@NotNull SemanticContext semanticContext)
|
||||
{
|
||||
this(c, c.state, c.context, semanticContext);
|
||||
}
|
||||
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
|
||||
@Nullable PredictionContext context)
|
||||
{
|
||||
this(c, state, context, c.semanticContext);
|
||||
}
|
||||
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state, @Nullable PredictionContext context,
|
||||
public ATNConfig(@NotNull ATNConfig c, @NotNull ATNState state,
|
||||
@Nullable PredictionContext context,
|
||||
@NotNull SemanticContext semanticContext)
|
||||
{
|
||||
this.state = state;
|
||||
|
|
|
@ -31,9 +31,9 @@ package org.antlr.v4.runtime.atn;
|
|||
|
||||
import org.antlr.v4.runtime.misc.Array2DHashSet;
|
||||
import org.antlr.v4.runtime.misc.DoubleKeyMap;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
|
@ -280,7 +280,8 @@ public class ATNConfigSet implements Set<ATNConfig> {
|
|||
// TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
|
||||
// TODO: can we track conflicts as they are added to save scanning configs later?
|
||||
public int uniqueAlt;
|
||||
protected IntervalSet conflictingAlts;
|
||||
protected BitSet conflictingAlts;
|
||||
|
||||
// Used in parser and lexer. In lexer, it indicates we hit a pred
|
||||
// while computing a closure operation. Don't make a DFA state from this.
|
||||
public boolean hasSemanticContext;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,408 @@
|
|||
package org.antlr.v4.runtime.atn;
|
||||
|
||||
import org.antlr.v4.runtime.misc.FlexibleHashMap;
|
||||
import org.antlr.v4.runtime.misc.NotNull;
|
||||
|
||||
import java.util.BitSet;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
public enum PredictionMode {
|
||||
/** Do only local context prediction (SLL style) and using
|
||||
* heuristic which almost always works but is much faster
|
||||
* than precise answer.
|
||||
*/
|
||||
SLL,
|
||||
|
||||
/** Full LL(*) that always gets right answer. For speed
|
||||
* reasons, we terminate the prediction process when we know for
|
||||
* sure which alt to predict. We don't always know what
|
||||
* the ambiguity is in this mode.
|
||||
*/
|
||||
LL,
|
||||
|
||||
/** Tell the full LL prediction algorithm to pursue lookahead until
|
||||
* it has uniquely predicted an alternative without conflict or it's
|
||||
* certain that it's found an ambiguous input sequence. when this
|
||||
* variable is false. When true, the prediction process will
|
||||
* continue looking for the exact ambiguous sequence even if
|
||||
* it has already figured out which alternative to predict.
|
||||
*/
|
||||
LL_EXACT_AMBIG_DETECTION;
|
||||
|
||||
/** A Map that uses just the state and the stack context as the key. */
|
||||
static class AltAndContextMap extends FlexibleHashMap<ATNConfig,BitSet> {
|
||||
/** Code is function of (s, _, ctx, _) */
|
||||
@Override
|
||||
public int hashCode(ATNConfig o) {
|
||||
int hashCode = 7;
|
||||
hashCode = 31 * hashCode + o.state.stateNumber;
|
||||
hashCode = 31 * hashCode + o.context.hashCode();
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(ATNConfig a, ATNConfig b) {
|
||||
if ( a==b ) return true;
|
||||
if ( a==null || b==null ) return false;
|
||||
if ( hashCode(a) != hashCode(b) ) return false;
|
||||
return a.state.stateNumber==b.state.stateNumber
|
||||
&& b.context.equals(b.context);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
SLL prediction termination.
|
||||
|
||||
There are two cases: the usual combined SLL+LL parsing and
|
||||
pure SLL parsing that has no fail over to full LL.
|
||||
|
||||
COMBINED SLL+LL PARSING
|
||||
|
||||
SLL can decide to give up any point, even immediately,
|
||||
failing over to full LL. To be as efficient as possible,
|
||||
though, SLL should fail over only when it's positive it can't get
|
||||
anywhere on more lookahead without seeing a conflict.
|
||||
|
||||
Assuming combined SLL+LL parsing, an SLL confg set with only
|
||||
conflicting subsets should failover to full LL, even if the
|
||||
config sets don't resolve to the same alternative like {1,2}
|
||||
and {3,4}. If there is at least one nonconflicting set of
|
||||
configs, SLL could continue with the hopes that more lookahead
|
||||
will resolve via one of those nonconflicting configs.
|
||||
|
||||
Here's the prediction termination rule them: SLL (for SLL+LL
|
||||
parsing) stops when it sees only conflicting config subsets.
|
||||
In contrast, full LL keeps going when there is uncertainty.
|
||||
|
||||
HEURISTIC
|
||||
|
||||
As a heuristic, we stop prediction when we see any conflicting subset
|
||||
unless we see a state that only has one alternative associated with
|
||||
it. The single-alt-state thing lets prediction continue upon rules
|
||||
like (otherwise, it would admit defeat too soon):
|
||||
|
||||
// [12|1|[], 6|2|[], 12|2|[]].
|
||||
s : (ID | ID ID?) ';' ;
|
||||
|
||||
When the ATN simulation reaches the state before ';', it has a DFA
|
||||
state that looks like: [12|1|[], 6|2|[], 12|2|[]]. Naturally 12|1|[]
|
||||
and 12|2|[] conflict, but we cannot stop processing this node because
|
||||
alternative to has another way to continue, via [6|2|[]].
|
||||
|
||||
It also let's us continue for this rule:
|
||||
|
||||
// [1|1|[], 1|2|[], 8|3|[]]
|
||||
a : A | A | A B ;
|
||||
|
||||
After matching input A, we reach the stop state for rule A, state 1.
|
||||
State 8 is the state right before B. Clearly alternatives 1 and 2
|
||||
conflict and no amount of further lookahead will separate the two.
|
||||
However, alternative 3 will be able to continue and so we do not stop
|
||||
working on this state. In the previous example, we're concerned with
|
||||
states associated with the conflicting alternatives. Here alt 3 is not
|
||||
associated with the conflicting configs, but since we can continue
|
||||
looking for input reasonably, don't declare the state done.
|
||||
|
||||
PURE SLL PARSING
|
||||
|
||||
To handle pure SLL parsing, all we have to do is make sure that we
|
||||
combine stack contexts for configurations that differ only by semantic
|
||||
predicate. From there, we can do the usual SLL termination heuristic.
|
||||
|
||||
PREDICATES IN SLL+LL PARSING
|
||||
|
||||
SLL decisions don't evaluate predicates until after they reach DFA
|
||||
stop states because they need to create the DFA cache that
|
||||
works in all (semantic) situations. (In contrast, full LL
|
||||
evaluates predicates collected during start state computation
|
||||
so it can ignore predicates thereafter.) This means that SLL
|
||||
termination detection can totally ignore semantic predicates.
|
||||
|
||||
Of course, implementation-wise, ATNConfigSets combine stack
|
||||
contexts but not semantic predicate contexts so we might see
|
||||
two configs like this:
|
||||
|
||||
(s, 1, x, {}), (s, 1, x', {p})
|
||||
|
||||
Before testing these configurations against others, we have
|
||||
to merge x and x' (w/o modifying the existing configs). For
|
||||
example, we test (x+x')==x'' when looking for conflicts in
|
||||
the following configs.
|
||||
|
||||
(s, 1, x, {}), (s, 1, x', {p}), (s, 2, x'', {})
|
||||
|
||||
If the configuration set has predicates, which we can test
|
||||
quickly, this algorithm makes a copy of the configs and
|
||||
strip out all of the predicates so that a standard
|
||||
ATNConfigSet will merge everything ignoring
|
||||
predicates.
|
||||
*/
|
||||
public static boolean hasSLLConflictTerminatingPrediction(PredictionMode mode, @NotNull ATNConfigSet configs) {
|
||||
// pure SLL mode parsing
|
||||
if ( mode == PredictionMode.SLL ) {
|
||||
// Don't bother with combining configs from different semantic
|
||||
// contexts if we can fail over to full LL; costs more time
|
||||
// since we'll often fail over anyway.
|
||||
if ( configs.hasSemanticContext ) {
|
||||
// dup configs, tossing out semantic predicates
|
||||
ATNConfigSet dup = new ATNConfigSet();
|
||||
for (ATNConfig c : configs) {
|
||||
c = new ATNConfig(c,SemanticContext.NONE);
|
||||
dup.add(c);
|
||||
}
|
||||
configs = dup;
|
||||
}
|
||||
// now we have combined contexts for configs with dissimilar preds
|
||||
}
|
||||
|
||||
// pure SLL or combined SLL+LL mode parsing
|
||||
|
||||
Collection<BitSet> altsets = getConflictingAltSubsets(configs);
|
||||
boolean heuristic =
|
||||
hasConflictingAltSet(altsets) && !hasStateAssociatedWithOneAlt(configs);
|
||||
return heuristic;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Full LL prediction termination.
|
||||
|
||||
Can we stop looking ahead during ATN simulation or is there some
|
||||
uncertainty as to which alternative we will ultimately pick, after
|
||||
consuming more input? Even if there are partial conflicts, we might
|
||||
know that everything is going to resolve to the same minimum
|
||||
alt. That means we can stop since no more lookahead will change that
|
||||
fact. On the other hand, there might be multiple conflicts that
|
||||
resolve to different minimums. That means we need more look ahead to
|
||||
decide which of those alternatives we should predict.
|
||||
|
||||
The basic idea is to split the set of configurations, C, into
|
||||
conflicting (s, _, ctx, _) subsets and singleton subsets with
|
||||
non-conflicting configurations. Two config's conflict if they have
|
||||
identical state and rule stack contexts but different alternative
|
||||
numbers: (s, i, ctx, _), (s, j, ctx, _) for i!=j.
|
||||
|
||||
Reduce these config subsets to the set of possible alternatives. You
|
||||
can compute the alternative subsets in one go as follows:
|
||||
|
||||
A_s,ctx = {i | (s, i, ctx, _) for in C holding s, ctx fixed}
|
||||
|
||||
Or in pseudo-code:
|
||||
|
||||
for c in C:
|
||||
map[c] U= c.alt # map hash/equals uses s and x, not alt and not pred
|
||||
|
||||
Then map.values is the set of A_s,ctx sets.
|
||||
|
||||
If |A_s,ctx|=1 then there is no conflict associated with s and ctx.
|
||||
|
||||
Reduce the subsets to singletons by choosing a minimum of each subset.
|
||||
If the union of these alternatives sets is a singleton, then no amount
|
||||
of more lookahead will help us. We will always pick that
|
||||
alternative. If, however, there is more than one alternative, then we
|
||||
are uncertain which alt to predict and must continue looking for
|
||||
resolution. We may or may not discover an ambiguity in the future,
|
||||
even if there are no conflicting subsets this round.
|
||||
|
||||
The biggest sin is to terminate early because it means we've made a
|
||||
decision but were uncertain as to the eventual outcome. We haven't
|
||||
used enough lookahead. On the other hand, announcing a conflict too
|
||||
late is no big deal; you will still have the conflict. It's just
|
||||
inefficient. It might even look until the end of file.
|
||||
|
||||
Semantic predicates for full LL aren't involved in this decision
|
||||
because the predicates are evaluated during start state computation.
|
||||
This set of configurations was derived from the initial subset with
|
||||
configurations holding false predicate stripped out.
|
||||
|
||||
CONFLICTING CONFIGS
|
||||
|
||||
Two configurations, (s, i, x) and (s, j, x'), conflict when i!=j but
|
||||
x = x'. Because we merge all (s, i, _) configurations together, that
|
||||
means that there are at most n configurations associated with state s
|
||||
for n possible alternatives in the decision. The merged stacks
|
||||
complicate the comparison of config contexts, x and x'. Sam checks to
|
||||
see if one is a subset of the other by calling merge and checking to
|
||||
see if the merged result is either x or x'. If the x associated with
|
||||
lowest alternative i is the superset, then i is the only possible
|
||||
prediction since the others resolve to min i as well. If, however, x
|
||||
is associated with j>i then at least one stack configuration for j is
|
||||
not in conflict with alt i. The algorithm should keep going, looking
|
||||
for more lookahead due to the uncertainty.
|
||||
|
||||
For simplicity, I'm doing a equality check between x and x' that lets
|
||||
the algorithm continue to consume lookahead longer than necessary.
|
||||
The reason I like the equality is of course the simplicity but also
|
||||
because that is the test you need to detect the alternatives that are
|
||||
actually in conflict.
|
||||
|
||||
CONTINUE/STOP RULE
|
||||
|
||||
Continue if union of resolved alt sets from nonconflicting and
|
||||
conflicting alt subsets has more than one alt. We are uncertain about
|
||||
which alternative to predict.
|
||||
|
||||
The complete set of alternatives, [i for (_,i,_)], tells us
|
||||
which alternatives are still in the running for the amount of input
|
||||
we've consumed at this point. The conflicting sets let us to strip
|
||||
away configurations that won't lead to more states (because we
|
||||
resolve conflicts to the configuration with a minimum alternate for
|
||||
given conflicting set.)
|
||||
|
||||
CASES:
|
||||
|
||||
* no conflicts & > 1 alt in set => continue
|
||||
|
||||
* (s, 1, x), (s, 2, x), (s, 3, z)
|
||||
(s', 1, y), (s', 2, y)
|
||||
yields nonconflicting set {3} U conflicting sets min({1,2}) U min({1,2}) = {1,3}
|
||||
=> continue
|
||||
|
||||
* (s, 1, x), (s, 2, x),
|
||||
(s', 1, y), (s', 2, y)
|
||||
(s'', 1, z)
|
||||
yields nonconflicting set you this {1} U conflicting sets min({1,2}) U min({1,2}) = {1}
|
||||
=> stop and predict 1
|
||||
|
||||
* (s, 1, x), (s, 2, x),
|
||||
(s', 1, y), (s', 2, y)
|
||||
yields conflicting, reduced sets {1} U {1} = {1}
|
||||
=> stop and predict 1, can announce ambiguity {1,2}
|
||||
|
||||
* (s, 1, x), (s, 2, x)
|
||||
(s', 2, y), (s', 3, y)
|
||||
yields conflicting, reduced sets {1} U {2} = {1,2}
|
||||
=> continue
|
||||
|
||||
* (s, 1, x), (s, 2, x)
|
||||
(s', 3, y), (s', 4, y)
|
||||
yields conflicting, reduced sets {1} U {3} = {1,3}
|
||||
=> continue
|
||||
|
||||
EXACT AMBIGUITY DETECTION
|
||||
|
||||
If all states report the same conflicting alt set, then we know we
|
||||
have the real ambiguity set:
|
||||
|
||||
|A_i|>1 and A_i = A_j for all i, j.
|
||||
|
||||
In other words, we continue examining lookahead until all A_i have
|
||||
more than one alt and all A_i are the same. If A={{1,2}, {1,3}}, then
|
||||
regular LL prediction would terminate because the resolved set is
|
||||
{1}. To determine what the real ambiguity is, we have to know whether
|
||||
the ambiguity is between one and two or one and three so we keep
|
||||
going. We can only stop prediction when we need exact ambiguity
|
||||
detection when the sets look like A={{1,2}} or {{1,2},{1,2}} etc...
|
||||
*/
|
||||
public static int resolvesToJustOneViableAlt(Collection<BitSet> altsets) {
|
||||
return getSingleViableAlt(altsets);
|
||||
}
|
||||
|
||||
public static boolean allSubsetsConflict(Collection<BitSet> altsets) {
|
||||
return !hasNonConflictingAltSet(altsets);
|
||||
}
|
||||
|
||||
/** return (there exists len(A_i)==1 for some A_i in altsets A) */
|
||||
public static boolean hasNonConflictingAltSet(Collection<BitSet> altsets) {
|
||||
for (BitSet alts : altsets) {
|
||||
if ( alts.cardinality()==1 ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** return (there exists len(A_i)>1 for some A_i in altsets A) */
|
||||
public static boolean hasConflictingAltSet(Collection<BitSet> altsets) {
|
||||
for (BitSet alts : altsets) {
|
||||
if ( alts.cardinality()>1 ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static boolean allSubsetsEqual(Collection<BitSet> altsets) {
|
||||
Iterator<BitSet> it = altsets.iterator();
|
||||
BitSet first = it.next();
|
||||
while ( it.hasNext() ) {
|
||||
BitSet next = it.next();
|
||||
if ( !next.equals(first) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public static int getUniqueAlt(Collection<BitSet> altsets) {
|
||||
BitSet all = getAlts(altsets);
|
||||
if ( all.cardinality()==1 ) return all.nextSetBit(0);
|
||||
return ATN.INVALID_ALT_NUMBER;
|
||||
}
|
||||
|
||||
public static BitSet getAlts(Collection<BitSet> altsets) {
|
||||
BitSet all = new BitSet();
|
||||
for (BitSet alts : altsets) {
|
||||
all.or(alts);
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function gets the conflicting alt subsets from a configuration set.
|
||||
* for c in configs:
|
||||
* map[c] U= c.alt # map hash/equals uses s and x, not alt and not pred
|
||||
*/
|
||||
public static Collection<BitSet> getConflictingAltSubsets(ATNConfigSet configs) {
|
||||
AltAndContextMap configToAlts = new AltAndContextMap();
|
||||
for (ATNConfig c : configs) {
|
||||
BitSet alts = configToAlts.get(c);
|
||||
if ( alts==null ) {
|
||||
alts = new BitSet();
|
||||
configToAlts.put(c, alts);
|
||||
}
|
||||
alts.set(c.alt);
|
||||
}
|
||||
return configToAlts.values();
|
||||
}
|
||||
|
||||
/** Get a map from state to alt subset from a configuration set.
|
||||
* for c in configs:
|
||||
* map[c.state] U= c.alt
|
||||
*/
|
||||
public static Map<ATNState, BitSet> getStateToAltMap(ATNConfigSet configs) {
|
||||
Map<ATNState, BitSet> m = new HashMap<ATNState, BitSet>();
|
||||
for (ATNConfig c : configs) {
|
||||
BitSet alts = m.get(c.state);
|
||||
if ( alts==null ) {
|
||||
alts = new BitSet();
|
||||
m.put(c.state, alts);
|
||||
}
|
||||
alts.set(c.alt);
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
public static boolean hasStateAssociatedWithOneAlt(ATNConfigSet configs) {
|
||||
Map<ATNState, BitSet> x = getStateToAltMap(configs);
|
||||
for (BitSet alts : x.values()) {
|
||||
if ( alts.cardinality()==1 ) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public static int getSingleViableAlt(Collection<BitSet> altsets) {
|
||||
BitSet viableAlts = new BitSet();
|
||||
for (BitSet alts : altsets) {
|
||||
int minAlt = alts.nextSetBit(0);
|
||||
viableAlts.set(minAlt);
|
||||
if ( viableAlts.cardinality()>1 ) { // more than 1 viable alt
|
||||
return ATN.INVALID_ALT_NUMBER;
|
||||
}
|
||||
}
|
||||
return viableAlts.nextSetBit(0);
|
||||
}
|
||||
|
||||
}
|
|
@ -38,6 +38,7 @@ import org.antlr.v4.runtime.Parser;
|
|||
import org.antlr.v4.runtime.ParserRuleContext;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.TokenStream;
|
||||
import org.antlr.v4.runtime.atn.PredictionMode;
|
||||
|
||||
import javax.print.PrintException;
|
||||
import java.io.FileInputStream;
|
||||
|
@ -224,7 +225,7 @@ public class TestRig {
|
|||
}
|
||||
|
||||
if ( SLL ) {
|
||||
parser.getInterpreter().setSLL(true);
|
||||
parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
|
||||
}
|
||||
|
||||
parser.setTokenStream(tokens);
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
abc
|
|
@ -1,12 +1,5 @@
|
|||
grammar T;
|
||||
s : expr[0] ;
|
||||
|
||||
expr[int _p]
|
||||
: ID
|
||||
( {5 >= $_p}? '*' expr[6]
|
||||
| {4 >= $_p}? '+' expr[5]
|
||||
)*
|
||||
;
|
||||
|
||||
ID : [a-zA-Z]+ ; // match identifiers
|
||||
WS : [ \t\r\n]+ -> skip ; // toss out whitespace
|
||||
s@after {dumpDFA();}
|
||||
: ID | ID {;} ;
|
||||
ID : 'a'..'z'+ ;
|
||||
WS : (' '|'\t'|'\n')+ {skip();} ;
|
||||
|
|
|
@ -6,6 +6,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
|
|||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.atn.LexerATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.ParserATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.PredictionMode;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
@ -127,7 +128,7 @@ class TestJava {
|
|||
parser.setTokenStream(tokens);
|
||||
|
||||
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
|
||||
if ( SLL ) parser.getInterpreter().setSLL(true);
|
||||
if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
|
||||
// start parsing at the compilationUnit rule
|
||||
ParserRuleContext<Token> tree = parser.compilationUnit();
|
||||
if ( showTree ) tree.inspect(parser);
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.antlr.v4.runtime.ParserRuleContext;
|
|||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.atn.LexerATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.ParserATNSimulator;
|
||||
import org.antlr.v4.runtime.atn.PredictionMode;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.ArrayList;
|
||||
|
@ -197,7 +198,7 @@ class TestJavaLR {
|
|||
System.out.println(ParserATNSimulator.predict_calls +" parser predict calls");
|
||||
System.out.println(ParserATNSimulator.retry_with_context +" retry_with_context after SLL conflict");
|
||||
System.out.println(ParserATNSimulator.retry_with_context_indicates_no_conflict +" retry sees no conflict");
|
||||
System.out.println(ParserATNSimulator.retry_with_context_predicts_same_as_alt +" retry predicts same alt as resolving conflict");
|
||||
System.out.println(ParserATNSimulator.retry_with_context_predicts_same_alt +" retry predicts same alt as resolving conflict");
|
||||
System.out.println(ParserATNSimulator.retry_with_context_from_dfa +" retry from DFA");
|
||||
}
|
||||
|
||||
|
@ -260,7 +261,7 @@ class TestJavaLR {
|
|||
JavaLRParser parser = new JavaLRParser(tokens);
|
||||
if ( diag ) parser.addErrorListener(new DiagnosticErrorListener());
|
||||
if ( bail ) parser.setErrorHandler(new BailErrorStrategy());
|
||||
if ( SLL ) parser.getInterpreter().setSLL(true);
|
||||
if ( SLL ) parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
|
||||
|
||||
// start parsing at the compilationUnit rule
|
||||
ParserRuleContext<Token> t = parser.compilationUnit();
|
||||
|
|
|
@ -411,7 +411,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
checkDFAConstruction(lg, g, decision, inputs, dfa);
|
||||
}
|
||||
|
||||
@Test public void testAmbigDef() throws Exception {
|
||||
@Test public void testContinuePrediction() throws Exception {
|
||||
// Sam found prev def of ambiguity was too restrictive.
|
||||
// E.g., (13, 1, []), (13, 2, []), (12, 2, []) should not
|
||||
// be declared ambig since (12, 2, []) can take us to
|
||||
|
@ -429,45 +429,23 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
int decision = 1;
|
||||
checkPredictedAlt(lg, g, decision, "a;", 1);
|
||||
checkPredictedAlt(lg, g, decision, "ab;", 2);
|
||||
}
|
||||
|
||||
// After matching these inputs for decision, what is DFA after each prediction?
|
||||
// String[] inputs = {
|
||||
// "34a",
|
||||
// "34ab",
|
||||
// "((34))a",
|
||||
// "((34))ab",
|
||||
// };
|
||||
// String[] dfa = {
|
||||
// "s0-INT->s1\n" +
|
||||
// "s1-'a'->s2\n" +
|
||||
// "s2-EOF->:s3=>1\n",
|
||||
//
|
||||
// "s0-INT->s1\n" +
|
||||
// "s1-'a'->s2\n" +
|
||||
// "s2-EOF->:s3=>1\n" +
|
||||
// "s2-'b'->:s4=>2\n",
|
||||
//
|
||||
// "s0-'('->s5\n" +
|
||||
// "s0-INT->s1\n" +
|
||||
// "s1-'a'->s2\n" +
|
||||
// "s2-EOF->:s3=>1\n" +
|
||||
// "s2-'b'->:s4=>2\n" +
|
||||
// "s5-'('->s6\n" +
|
||||
// "s6-INT->s7\n" +
|
||||
// "s7-')'->s8\n" +
|
||||
// "s8-')'->s1\n",
|
||||
//
|
||||
// "s0-'('->s5\n" +
|
||||
// "s0-INT->s1\n" +
|
||||
// "s1-'a'->s2\n" +
|
||||
// "s2-EOF->:s3=>1\n" +
|
||||
// "s2-'b'->:s4=>2\n" +
|
||||
// "s5-'('->s6\n" +
|
||||
// "s6-INT->s7\n" +
|
||||
// "s7-')'->s8\n" +
|
||||
// "s8-')'->s1\n",
|
||||
// };
|
||||
// checkDFAConstruction(lg, g, decision, inputs, dfa);
|
||||
@Test public void testContinuePrediction2() throws Exception {
|
||||
// ID is ambig for first two alts, but ID SEMI lets us move forward with alt 3
|
||||
LexerGrammar lg = new LexerGrammar(
|
||||
"lexer grammar L;\n" +
|
||||
"ID : 'a'..'z' ;\n" + // one char
|
||||
"SEMI : ';' ;\n"+
|
||||
"INT : '0'..'9'+ ;\n"
|
||||
);
|
||||
Grammar g = new Grammar(
|
||||
"parser grammar T;\n"+
|
||||
"tokens {ID,SEMI,INT}\n" +
|
||||
"a : ID | ID | ID SEMI ;\n");
|
||||
int decision = 0;
|
||||
checkPredictedAlt(lg, g, decision, "a", 1);
|
||||
checkPredictedAlt(lg, g, decision, "a;", 3);
|
||||
}
|
||||
|
||||
/** first check that the ATN predicts right alt.
|
||||
|
|
|
@ -41,7 +41,7 @@ import org.junit.Test;
|
|||
|
||||
*/
|
||||
public class TestFullContextParsing extends BaseTest {
|
||||
@Test public void testAmbigYieldsNonCtxSensitiveDFA() {
|
||||
@Test public void testAmbigYieldsCtxSensitiveDFA() {
|
||||
String grammar =
|
||||
"grammar T;\n"+
|
||||
"s" +
|
||||
|
@ -53,9 +53,9 @@ public class TestFullContextParsing extends BaseTest {
|
|||
"abc", true);
|
||||
String expecting =
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->:s1=>1\n"; // not ctx sensitive
|
||||
"s0-ID->s1^\n"; // ctx sensitive
|
||||
assertEquals(expecting, result);
|
||||
assertEquals("line 1:0 reportAmbiguity d=0: ambigAlts={1..2}, input='abc'\n",
|
||||
assertEquals("line 1:0 reportAttemptingFullContext d=0, input='abc'\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
@ -122,10 +122,11 @@ public class TestFullContextParsing extends BaseTest {
|
|||
String grammar =
|
||||
"grammar T;\n"+
|
||||
"s" +
|
||||
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
"@after {dumpDFA();}\n" +
|
||||
" : '{' stat* '}'" +
|
||||
" ;\n" +
|
||||
"stat: 'if' ID 'then' stat ('else' 'foo')?\n" +
|
||||
"stat: 'if' ID 'then' stat ('else' ID)?\n" +
|
||||
" | 'return'\n" +
|
||||
" ;" +
|
||||
"ID : 'a'..'z'+ ;\n"+
|
||||
|
@ -139,19 +140,6 @@ public class TestFullContextParsing extends BaseTest {
|
|||
assertEquals(expecting, result);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
"{ if x then if y then return else foo }";
|
||||
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
expecting =
|
||||
"Decision 1:\n" +
|
||||
"s0-'else'->s1^\n" +
|
||||
"s0-'}'->:s2=>2\n";
|
||||
assertEquals(expecting, result);
|
||||
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:38 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input = "{ if x then return else foo }";
|
||||
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
|
@ -169,15 +157,35 @@ public class TestFullContextParsing extends BaseTest {
|
|||
"line 1:19 reportContextSensitivity d=1, input='else'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input = "{ if x then return else foo }";
|
||||
input =
|
||||
"{ if x then if y then return else foo }";
|
||||
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
expecting =
|
||||
"Decision 1:\n" +
|
||||
"s0-'else'->s1^\n" +
|
||||
"s0-'}'->:s2=>2\n";
|
||||
assertEquals(expecting, result);
|
||||
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:38 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
// should not be ambiguous because the second 'else bar' clearly
|
||||
// indicates that the first else should match to the innermost if.
|
||||
// LL_EXACT_AMBIG_DETECTION makes us keep going to resolve
|
||||
|
||||
input =
|
||||
"{ if x then if y then return else foo else bar }";
|
||||
result = execParser("T.g4", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
expecting =
|
||||
"Decision 1:\n" +
|
||||
"s0-'else'->s1^\n";
|
||||
assertEquals(expecting, result);
|
||||
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:19 reportContextSensitivity d=1, input='else'\n",
|
||||
assertEquals("line 1:29 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:38 reportContextSensitivity d=1, input='elsefooelse'\n" +
|
||||
"line 1:38 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:38 reportContextSensitivity d=1, input='else'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
|
@ -193,7 +201,7 @@ public class TestFullContextParsing extends BaseTest {
|
|||
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:19 reportContextSensitivity d=1, input='else'\n" +
|
||||
"line 2:27 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 2:36 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
|
||||
"line 2:36 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
|
@ -209,7 +217,7 @@ public class TestFullContextParsing extends BaseTest {
|
|||
assertEquals("line 1:19 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 1:19 reportContextSensitivity d=1, input='else'\n" +
|
||||
"line 2:27 reportAttemptingFullContext d=1, input='else'\n" +
|
||||
"line 2:36 reportAmbiguity d=1: ambigAlts={1..2}, input='elsefoo}'\n",
|
||||
"line 2:36 reportAmbiguity d=1: ambigAlts={1, 2}, input='elsefoo}'\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
@ -221,7 +229,9 @@ public class TestFullContextParsing extends BaseTest {
|
|||
public void testLoopsSimulateTailRecursion() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"prog: expr_or_assign*;\n" +
|
||||
"prog\n" +
|
||||
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
" : expr_or_assign*;\n" +
|
||||
"expr_or_assign\n" +
|
||||
" : expr '++' {System.out.println(\"fail.\");}\n" +
|
||||
" | expr {System.out.println(\"pass: \"+$expr.text);}\n" +
|
||||
|
@ -236,11 +246,77 @@ public class TestFullContextParsing extends BaseTest {
|
|||
"";
|
||||
|
||||
String found = execParser("T.g4", grammar, "TParser", "TLexer", "prog", "a(i)<-x", true);
|
||||
assertEquals("pass.\n", found);
|
||||
assertEquals("pass: a(i)<-x\n", found);
|
||||
|
||||
String expecting =
|
||||
"line 1:3 reportAttemptingFullContext d=3, input='a(i)'\n" +
|
||||
"line 1:7 reportAmbiguity d=3: ambigAlts={2..3}, input='a(i)<-x'\n";
|
||||
"line 1:7 reportAmbiguity d=3: ambigAlts={2, 3}, input='a(i)<-x'\n";
|
||||
assertEquals(expecting, this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAmbiguityNoLoop() throws Exception {
|
||||
// simpler version of testLoopsSimulateTailRecursion, no loops
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"prog\n" +
|
||||
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
" : expr expr {System.out.println(\"alt 1\");}\n" +
|
||||
" | expr\n" +
|
||||
" ;\n" +
|
||||
"expr: '@'\n" +
|
||||
" | ID '@'\n" +
|
||||
" | ID\n" +
|
||||
" ;\n" +
|
||||
"ID : [a-z]+ ;\n" +
|
||||
"WS : [ \r\n\t]+ -> skip ;\n";
|
||||
|
||||
String found = execParser("T.g4", grammar, "TParser", "TLexer", "prog", "a@", true);
|
||||
assertEquals("alt 1\n", found);
|
||||
|
||||
String expecting =
|
||||
"line 1:2 reportAttemptingFullContext d=0, input='a@'\n" +
|
||||
"line 1:2 reportAmbiguity d=0: ambigAlts={1, 2}, input='a@'\n" +
|
||||
"line 1:2 reportAttemptingFullContext d=1, input='a@'\n" +
|
||||
"line 1:2 reportContextSensitivity d=1, input='a@'\n";
|
||||
assertEquals(expecting, this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExprAmbiguity() throws Exception {
|
||||
// translated left-recursive expr rule to test ambig detection
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s\n" +
|
||||
"@init {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
" : expr[0] {System.out.println($expr.ctx.toStringTree(this));} ;\n" +
|
||||
"\n" +
|
||||
"expr[int _p]\n" +
|
||||
" : ID\n" +
|
||||
" ( {5 >= $_p}? '*' expr[6]\n" +
|
||||
" | {4 >= $_p}? '+' expr[5]\n" +
|
||||
" )*\n" +
|
||||
" ;\n" +
|
||||
"\n" +
|
||||
"ID : [a-zA-Z]+ ; // match identifiers\n" +
|
||||
"WS : [ \\t\\r\\n]+ -> skip ; // toss out whitespace\n";
|
||||
|
||||
String found = execParser("T.g4", grammar, "TParser", "TLexer", "s", "a+b", true);
|
||||
assertEquals("(expr a + (expr b))\n", found);
|
||||
|
||||
String expecting =
|
||||
"line 1:1 reportAttemptingFullContext d=1, input='+'\n" +
|
||||
"line 1:1 reportContextSensitivity d=1, input='+'\n";
|
||||
assertEquals(expecting, this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g4", grammar, "TParser", "TLexer", "s", "a+b*c", true);
|
||||
assertEquals("(expr a + (expr b * (expr c)))\n", found);
|
||||
|
||||
expecting =
|
||||
"line 1:1 reportAttemptingFullContext d=1, input='+'\n" +
|
||||
"line 1:1 reportContextSensitivity d=1, input='+'\n" +
|
||||
"line 1:3 reportAttemptingFullContext d=1, input='*'\n" +
|
||||
"line 1:5 reportAmbiguity d=1: ambigAlts={1, 2}, input='*c'\n";
|
||||
assertEquals(expecting, this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
|
|
@ -330,8 +330,7 @@ public class TestLeftRecursion extends BaseTest {
|
|||
result = execParser("Expr.g4", grammar, "ExprParser", "ExprLexer", "prog", "a+b*2\n", true);
|
||||
assertEquals("line 1:1 reportAttemptingFullContext d=3, input='+'\n" +
|
||||
"line 1:1 reportContextSensitivity d=3, input='+'\n" +
|
||||
"line 1:3 reportAttemptingFullContext d=3, input='*'\n" +
|
||||
"line 1:3 reportAmbiguity d=3: ambigAlts={1..2}, input='*'\n",
|
||||
"line 1:3 reportAttemptingFullContext d=3, input='*'\n",
|
||||
stderrDuringParse);
|
||||
|
||||
result = execParser("Expr.g4", grammar, "ExprParser", "ExprLexer", "prog", "(1+2)*3\n", true);
|
||||
|
|
|
@ -1,28 +0,0 @@
|
|||
package org.antlr.v4.test;
|
||||
|
||||
import org.antlr.runtime.RecognitionException;
|
||||
import org.antlr.v4.Tool;
|
||||
import org.antlr.v4.codegen.CodeGenerator;
|
||||
import org.antlr.v4.tool.LexerGrammar;
|
||||
import org.junit.Test;
|
||||
import org.stringtemplate.v4.ST;
|
||||
|
||||
public class TestLexerAttributes extends BaseTest {
|
||||
@Test
|
||||
public void testSetType() throws RecognitionException {
|
||||
LexerGrammar g = new LexerGrammar(
|
||||
"lexer grammar T;\n" +
|
||||
"A : 'a' {#$type=101;#} ;\n"
|
||||
);
|
||||
Tool antlr = new Tool();
|
||||
antlr.process(g,false);
|
||||
CodeGenerator gen = new CodeGenerator(g);
|
||||
ST outputFileST = gen.generateLexer();
|
||||
String output = outputFileST.render();
|
||||
int start = output.indexOf('#');
|
||||
int end = output.lastIndexOf('#');
|
||||
String snippet = output.substring(start+1,end);
|
||||
assertEquals("_type = 101;", snippet);
|
||||
}
|
||||
|
||||
}
|
|
@ -35,6 +35,24 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
// TEST VALIDATING PREDS
|
||||
|
||||
@Test public void testSimpleValidate() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : a ;\n" +
|
||||
"a : {false}? ID {System.out.println(\"alt 1\");}\n" +
|
||||
" | {true}? INT {System.out.println(\"alt 2\");}\n" +
|
||||
" ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = execParser("T.g4", grammar, "TParser", "TLexer", "s",
|
||||
"x", false);
|
||||
|
||||
String expecting = "line 1:0 no viable alternative at input 'x'\n";
|
||||
assertEquals(expecting, stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testSimpleValidate2() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : a a a;\n" +
|
||||
|
@ -129,16 +147,14 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
}
|
||||
|
||||
@Test public void test2UnpredicatedAlts() throws Exception {
|
||||
// We have n-2 predicates for n alternatives. We have no choice
|
||||
// but to pick the first on predicated alternative if the n-2
|
||||
// predicates fail.
|
||||
// this should call reportInsufficientPredicates()
|
||||
// We have n-2 predicates for n alternatives. pick first alt
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"@header {" +
|
||||
"import java.util.*;" +
|
||||
"}" +
|
||||
"s : a ';' a;\n" + // do 2x: once in ATN, next in DFA
|
||||
"s : {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
" a ';' a;\n" + // do 2x: once in ATN, next in DFA
|
||||
"a : ID {System.out.println(\"alt 1\");}\n" +
|
||||
" | ID {System.out.println(\"alt 2\");}\n" +
|
||||
" | {false}? ID {System.out.println(\"alt 3\");}\n" +
|
||||
|
@ -154,23 +170,20 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
"alt 1\n";
|
||||
assertEquals(expecting, found);
|
||||
assertEquals("line 1:0 reportAttemptingFullContext d=0, input='x'\n" +
|
||||
"line 1:0 reportAmbiguity d=0: ambigAlts={1..2}, input='x'\n" +
|
||||
"line 1:0 reportAmbiguity d=0: ambigAlts={1, 2}, input='x'\n" +
|
||||
"line 1:3 reportAttemptingFullContext d=0, input='y'\n" +
|
||||
"line 1:3 reportAmbiguity d=0: ambigAlts={1..2}, input='y'\n",
|
||||
"line 1:3 reportAmbiguity d=0: ambigAlts={1, 2}, input='y'\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void test2UnpredicatedAltsAndOneOrthogonalAlt() throws Exception {
|
||||
// We have n-2 predicates for n alternatives. We have no choice
|
||||
// but to pick the first on predicated alternative if the n-2
|
||||
// predicates fail.
|
||||
// this should call reportInsufficientPredicates()
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"@header {" +
|
||||
"import java.util.*;" +
|
||||
"}" +
|
||||
"s : a ';' a ';' a;\n" +
|
||||
"s : {_interp.setPredictionMode(PredictionMode.LL_EXACT_AMBIG_DETECTION);}\n" +
|
||||
" a ';' a ';' a;\n" +
|
||||
"a : INT {System.out.println(\"alt 1\");}\n" +
|
||||
" | ID {System.out.println(\"alt 2\");}\n" + // must pick this one for ID since pred is false
|
||||
" | ID {System.out.println(\"alt 3\");}\n" +
|
||||
|
@ -188,9 +201,9 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
"alt 2\n";
|
||||
assertEquals(expecting, found);
|
||||
assertEquals("line 1:4 reportAttemptingFullContext d=0, input='x'\n" +
|
||||
"line 1:4 reportAmbiguity d=0: ambigAlts={2..3}, input='x'\n" +
|
||||
"line 1:4 reportAmbiguity d=0: ambigAlts={2, 3}, input='x'\n" +
|
||||
"line 1:7 reportAttemptingFullContext d=0, input='y'\n" +
|
||||
"line 1:7 reportAmbiguity d=0: ambigAlts={2..3}, input='y'\n",
|
||||
"line 1:7 reportAmbiguity d=0: ambigAlts={2, 3}, input='y'\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue