forked from jasder/antlr
almost got new ATN engine working; separated .* nongreedy tests, reorg args on reporting methods
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9627]
This commit is contained in:
parent
fa3483a7fc
commit
5ad1505fdb
|
@ -120,8 +120,8 @@ public interface ANTLRErrorStrategy<Symbol> {
|
|||
* full context.
|
||||
*/
|
||||
void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs);
|
||||
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs);
|
||||
|
||||
/** Called by the parser when it detects an input sequence that can be matched by two paths
|
||||
* through the grammar. The difference between this and the reportAmbiguity method lies in
|
||||
|
@ -129,9 +129,9 @@ public interface ANTLRErrorStrategy<Symbol> {
|
|||
* we can't be sure if a conflict is an ambiguity or simply a weakness in the Strong LL parsing
|
||||
* strategy. If we are parsing with full context, this method is never called.
|
||||
*/
|
||||
void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs);
|
||||
// void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
// int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
// @NotNull OrderedHashSet<ATNConfig> configs);
|
||||
|
||||
/** Called by the parser when it find a conflict that is resolved by retrying the parse
|
||||
* with full context. This is not a warning; it simply notifies you that your grammar
|
||||
|
@ -149,6 +149,7 @@ public interface ANTLRErrorStrategy<Symbol> {
|
|||
* if the predicates fail.
|
||||
*/
|
||||
void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
@NotNull DFA dfa,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull SemanticContext[] altToPred,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs);
|
||||
|
|
|
@ -192,7 +192,8 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
|
|||
SymbolStream<Symbol> tokens = recognizer.getInputStream();
|
||||
String input;
|
||||
if (tokens instanceof TokenStream) {
|
||||
input = ((TokenStream)tokens).toString(e.startToken, e.offendingToken);
|
||||
if ( e.startToken.getType()==Token.EOF ) input = "<EOF>";
|
||||
else input = ((TokenStream)tokens).toString(e.startToken, e.offendingToken);
|
||||
} else {
|
||||
input = "<unknown input>";
|
||||
}
|
||||
|
@ -556,15 +557,8 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
|
|||
|
||||
@Override
|
||||
public void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -576,6 +570,7 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
|
|||
|
||||
@Override
|
||||
public void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
@NotNull DFA dfa,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull SemanticContext[] altToPred,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
|
|
|
@ -41,17 +41,10 @@ import java.util.Arrays;
|
|||
public class DiagnosticErrorStrategy<Symbol> extends DefaultErrorStrategy<Symbol> {
|
||||
@Override
|
||||
public void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
recognizer.notifyListeners("reportAmbiguity " + ambigAlts + ":" + configs + ", input=" +
|
||||
recognizer.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
int startIndex, int stopIndex, IntervalSet ambigAlts, OrderedHashSet<ATNConfig> configs) {
|
||||
recognizer.notifyListeners("reportConflict " + ambigAlts + ":" + configs + ", input=" +
|
||||
recognizer.notifyListeners("reportAmbiguity d="+dfa.decision + ": "+ ambigAlts + ":" + configs + ", input=" +
|
||||
recognizer.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
|
||||
|
@ -59,17 +52,18 @@ public class DiagnosticErrorStrategy<Symbol> extends DefaultErrorStrategy<Symbol
|
|||
public void reportContextSensitivity(@NotNull BaseRecognizer<Symbol> recognizer, @NotNull DFA dfa,
|
||||
int startIndex, int stopIndex, @NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
recognizer.notifyListeners("reportContextSensitivity: " + configs + ", input=" +
|
||||
recognizer.notifyListeners("reportContextSensitivity d="+dfa.decision +": "+ configs + ", input=" +
|
||||
recognizer.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
|
||||
@NotNull DFA dfa,
|
||||
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
|
||||
@NotNull SemanticContext[] altToPred,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
recognizer.notifyListeners("reportInsufficientPredicates " + ambigAlts + ":" + Arrays.toString(altToPred) +
|
||||
recognizer.notifyListeners("reportInsufficientPredicates d="+dfa.decision +": " +ambigAlts + ":" + Arrays.toString(altToPred) +
|
||||
", " + configs + ", input=" + recognizer.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
|
|||
*/
|
||||
public class ATNConfigSet extends OrderedHashSet<ATNConfig> {
|
||||
// TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
|
||||
// TODO: can we track conflicts as they are added to save scanning configs later?
|
||||
public int uniqueAlt;
|
||||
public IntervalSet conflictingAlts;
|
||||
public boolean hasSemanticContext;
|
||||
|
|
|
@ -536,7 +536,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
if ( retry_debug ) System.out.println("ctx empty; no need to retry");
|
||||
// no point in retrying with ctx since it's same.
|
||||
// this implies that we have a true ambiguity
|
||||
reportAmbiguity(startIndex, input.index(), ambigAlts, reach);
|
||||
reportAmbiguity(dfa, startIndex, input.index(), ambigAlts, reach);
|
||||
resolveToProperAlt(decState, ambigAlts, reach);
|
||||
return ATN.INVALID_ALT_NUMBER;
|
||||
}
|
||||
|
@ -547,7 +547,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
|
||||
dfa.conflictSet = (OrderedHashSet<ATNConfig>)reach.clone(); // most recent set with conflict
|
||||
reportConflict(startIndex, input.index(), ambigAlts, reach);
|
||||
// reportConflict(startIndex, input.index(), ambigAlts, reach);
|
||||
resolveToProperAlt(decState, ambigAlts, reach);
|
||||
return ATN.INVALID_ALT_NUMBER;
|
||||
}
|
||||
|
@ -595,7 +595,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
{
|
||||
// We need at least n-1 predicates for n ambiguous alts
|
||||
if ( tooFewPredicates(altToPred) ) {
|
||||
reportInsufficientPredicates(startIndex, input.index(),
|
||||
reportInsufficientPredicates(dfa, startIndex, input.index(),
|
||||
ambigAlts, altToPred, reach);
|
||||
}
|
||||
List<DFAState.PredPrediction> predPredictions = getPredicatePredictions(ambigAlts, altToPred);
|
||||
|
@ -776,7 +776,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
|
||||
if ( ctx_dfa.conflictSet!=null ) {
|
||||
// System.out.println("retry gives ambig for "+input.toString(startIndex, input.index()));
|
||||
reportAmbiguity(startIndex, input.index(), getAmbiguousAlts(ctx_dfa.conflictSet), ctx_dfa.conflictSet);
|
||||
reportAmbiguity(dfa, startIndex, input.index(), getAmbiguousAlts(ctx_dfa.conflictSet), ctx_dfa.conflictSet);
|
||||
}
|
||||
else {
|
||||
// System.out.println("NO ambig for "+input.toString(startIndex, input.index()));
|
||||
|
@ -1006,16 +1006,16 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
return new ATNConfig(config, t.target, newContext);
|
||||
}
|
||||
|
||||
public void reportConflict(int startIndex, int stopIndex,
|
||||
@NotNull IntervalSet alts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
if ( debug || retry_debug ) {
|
||||
System.out.println("reportConflict "+alts+":"+configs+
|
||||
", input="+parser.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
if ( parser!=null ) parser.getErrorHandler().reportConflict(parser, startIndex, stopIndex, alts, configs);
|
||||
}
|
||||
// public void reportConflict(int startIndex, int stopIndex,
|
||||
// @NotNull IntervalSet alts,
|
||||
// @NotNull OrderedHashSet<ATNConfig> configs)
|
||||
// {
|
||||
// if ( debug || retry_debug ) {
|
||||
// System.out.println("reportConflict "+alts+":"+configs+
|
||||
// ", input="+parser.getInputString(startIndex, stopIndex));
|
||||
// }
|
||||
// if ( parser!=null ) parser.getErrorHandler().reportConflict(parser, startIndex, stopIndex, alts, configs);
|
||||
// }
|
||||
|
||||
public void reportContextSensitivity(DFA dfa, OrderedHashSet<ATNConfig> configs, int startIndex, int stopIndex) {
|
||||
if ( debug || retry_debug ) {
|
||||
|
@ -1026,7 +1026,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
|
||||
/** If context sensitive parsing, we know it's ambiguity not conflict */
|
||||
public void reportAmbiguity(int startIndex, int stopIndex,
|
||||
public void reportAmbiguity(@NotNull DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull IntervalSet ambigAlts,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
{
|
||||
|
@ -1035,11 +1035,11 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
ambigAlts+":"+configs+
|
||||
", input="+parser.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, startIndex, stopIndex,
|
||||
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, dfa, startIndex, stopIndex,
|
||||
ambigAlts, configs);
|
||||
}
|
||||
|
||||
public void reportInsufficientPredicates(int startIndex, int stopIndex,
|
||||
public void reportInsufficientPredicates(@NotNull DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull IntervalSet ambigAlts,
|
||||
@NotNull SemanticContext[] altToPred,
|
||||
@NotNull OrderedHashSet<ATNConfig> configs)
|
||||
|
@ -1050,7 +1050,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
parser.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
if ( parser!=null ) {
|
||||
parser.getErrorHandler().reportInsufficientPredicates(parser, startIndex, stopIndex, ambigAlts,
|
||||
parser.getErrorHandler().reportInsufficientPredicates(parser, dfa, startIndex, stopIndex, ambigAlts,
|
||||
altToPred, configs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -226,7 +226,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
|
||||
ATNConfigSet fullCtxSet = execATNWithFullContext(s0_closure, input, startIndex, greedy);
|
||||
if ( fullCtxSet.conflictingAlts!=null ) {
|
||||
reportAmbiguity(startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
|
||||
reportAmbiguity(dfa, startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
|
||||
ctx_alt = fullCtxSet.conflictingAlts.getMinElement();
|
||||
}
|
||||
else {
|
||||
|
@ -356,6 +356,8 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
conflict
|
||||
conflict + preds
|
||||
|
||||
TODO: greedy + those
|
||||
|
||||
*/
|
||||
public int execATN(@NotNull DFA dfa, @NotNull DFAState s0,
|
||||
@NotNull SymbolStream<Symbol> input, int startIndex,
|
||||
|
@ -377,7 +379,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
|
||||
while (true) { // while more work
|
||||
ATNConfigSet reach = computeReachSet(previous, t, greedy);
|
||||
if ( reach.size()==0 ) noViableAlt(input, outerContext, previous, startIndex);
|
||||
if ( reach==null ) throw noViableAlt(input, outerContext, previous, startIndex);
|
||||
D = addDFAEdge(dfa, previous, t, reach); // always adding edge even if to a conflict state
|
||||
int predictedAlt = getUniqueAlt(reach);
|
||||
if ( predictedAlt!=ATN.INVALID_ALT_NUMBER ) {
|
||||
|
@ -389,9 +391,9 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
D.configset.conflictingAlts = getConflictingAlts(reach);
|
||||
if ( D.configset.conflictingAlts!=null ) {
|
||||
D.isAcceptState = true; // when ambig or ctx sens or nongreedy or .* loop hitting rule stop
|
||||
if ( decState.isGreedy ) {
|
||||
if ( greedy ) {
|
||||
if ( outerContext == ParserRuleContext.EMPTY ) {
|
||||
reportAmbiguity(startIndex, input.index(), D.configset.conflictingAlts, D.configset);
|
||||
reportAmbiguity(dfa, startIndex, input.index(), D.configset.conflictingAlts, D.configset);
|
||||
resolveToMinAlt(D, D.configset.conflictingAlts);
|
||||
}
|
||||
else {
|
||||
|
@ -399,7 +401,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
|
||||
fullCtxSet = execATNWithFullContext(s0_closure, input, startIndex, greedy);
|
||||
if ( fullCtxSet.conflictingAlts!=null ) {
|
||||
reportAmbiguity(startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
|
||||
reportAmbiguity(dfa, startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
|
||||
predictedAlt = fullCtxSet.conflictingAlts.getMinElement();
|
||||
resolveToMinAlt(D, fullCtxSet.conflictingAlts);
|
||||
}
|
||||
|
@ -410,17 +412,33 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
else {
|
||||
// if we reached end of rule via exit branch and decision nongreedy, we matched
|
||||
// upon ambiguity for nongreedy, default to exit branch to avoid inf loop
|
||||
// this handles case where we find ambiguity that stops DFA construction
|
||||
// before a config hits rule stop state. Was leaving prediction blank.
|
||||
int exitAlt = 2;
|
||||
ATNConfig cstop = configWithAltAtStopState(reach, exitAlt);
|
||||
if ( cstop!=null ) {
|
||||
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
|
||||
return cstop.alt;
|
||||
}
|
||||
D.prediction = exitAlt;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( !greedy ) {
|
||||
int exitAlt = 2;
|
||||
if ( predictedAlt != ATN.INVALID_ALT_NUMBER && configWithAltAtStopState(reach, 1) ) {
|
||||
if ( debug ) System.out.println("nongreedy loop but unique alt "+D.configset.uniqueAlt+" at "+reach);
|
||||
// reaches end via .* means nothing after.
|
||||
D.isAcceptState = true;
|
||||
D.prediction = predictedAlt = exitAlt;
|
||||
}
|
||||
else {// if we reached end of rule via exit branch and decision nongreedy, we matched
|
||||
if ( configWithAltAtStopState(reach, exitAlt) ) {
|
||||
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
|
||||
D.isAcceptState = true;
|
||||
D.prediction = predictedAlt = exitAlt;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ATNConfigSet configs = D.configset;
|
||||
if ( D.isCtxSensitive ) configs = fullCtxSet;
|
||||
if ( D.isAcceptState && configs.hasSemanticContext ) {
|
||||
|
@ -428,7 +446,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
predicateDFAState(D, configs, outerContext, nalts);
|
||||
if ( tooFewPredicates(D, outerContext, nalts) ) {
|
||||
IntervalSet conflictingAlts = getConflictingAltsFromConfigSet(configs);
|
||||
reportInsufficientPredicates(startIndex, input.index(),
|
||||
reportInsufficientPredicates(dfa, startIndex, input.index(),
|
||||
conflictingAlts,
|
||||
getPredsForAmbigAlts(conflictingAlts, configs, nalts),
|
||||
configs);
|
||||
|
@ -459,6 +477,9 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
int t = input.LA(1);
|
||||
while (true) { // while more work
|
||||
ATNConfigSet reach = computeReachSet(previous, t, greedy);
|
||||
if ( reach==null ) {
|
||||
parser.notifyListeners("ERROR: how can reach be empty after doing no-ctx ATN sim?");
|
||||
}
|
||||
reach.uniqueAlt = getUniqueAlt(reach);
|
||||
if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) return reach;
|
||||
reach.conflictingAlts = getConflictingAlts(reach);
|
||||
|
@ -470,7 +491,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
|
||||
protected ATNConfigSet computeReachSet(ATNConfigSet closure, int t, boolean greedy) {
|
||||
if ( debug ) System.out.println("in reach starting closure: " + closure);
|
||||
if ( debug ) System.out.println("in computeReachSet, starting closure: " + closure);
|
||||
ATNConfigSet reach = new ATNConfigSet();
|
||||
for (ATNConfig c : closure) {
|
||||
if ( debug ) System.out.println("testing "+getTokenName(t)+" at "+c.toString());
|
||||
|
@ -484,6 +505,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
}
|
||||
if ( reach.size()==0 ) return null;
|
||||
return reach;
|
||||
}
|
||||
|
||||
|
@ -651,6 +673,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
if ( !closureBusy.add(config) ) return; // avoid infinite recursion
|
||||
|
||||
if ( config.state instanceof RuleStopState ) {
|
||||
if ( !greedy ) {
|
||||
// don't see past end of a rule for any nongreedy decision
|
||||
if ( debug ) System.out.println("NONGREEDY at stop state of "+
|
||||
getRuleName(config.state.ruleIndex));
|
||||
configs.add(config);
|
||||
return;
|
||||
}
|
||||
// We hit rule end. If we have context info, use it
|
||||
if ( config.context!=null && !config.context.isEmpty() ) {
|
||||
RuleContext newContext = config.context.parent; // "pop" invoking state
|
||||
|
@ -667,13 +696,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
else {
|
||||
// else if we have no context info, just chase follow links (if greedy)
|
||||
if ( !greedy ) {
|
||||
if ( debug ) System.out.println("NONGREEDY at stop state of "+
|
||||
getRuleName(config.state.ruleIndex));
|
||||
// don't purse past end of a rule for any nongreedy decision
|
||||
configs.add(config);
|
||||
return;
|
||||
}
|
||||
// if ( !greedy ) {
|
||||
// if ( debug ) System.out.println("NONGREEDY at stop state of "+
|
||||
// getRuleName(config.state.ruleIndex));
|
||||
// // don't purse past end of a rule for any nongreedy decision
|
||||
// configs.add(config);
|
||||
// return;
|
||||
// }
|
||||
if ( debug ) System.out.println("FALLING off rule "+
|
||||
getRuleName(config.state.ruleIndex));
|
||||
}
|
||||
|
@ -1080,18 +1109,17 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
|
||||
@Nullable
|
||||
public ATNConfig configWithAltAtStopState(@NotNull Collection<ATNConfig> configs, int alt) {
|
||||
public boolean configWithAltAtStopState(@NotNull Collection<ATNConfig> configs, int alt) {
|
||||
for (ATNConfig c : configs) {
|
||||
if ( c.alt == alt ) {
|
||||
if ( c.state.getClass() == RuleStopState.class ) {
|
||||
return c;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
protected DFAState addDFAEdge(@NotNull DFA dfa,
|
||||
@NotNull ATNConfigSet p,
|
||||
int t,
|
||||
|
@ -1149,7 +1177,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
}
|
||||
|
||||
/** If context sensitive parsing, we know it's ambiguity not conflict */
|
||||
public void reportAmbiguity(int startIndex, int stopIndex,
|
||||
public void reportAmbiguity(@NotNull DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull IntervalSet ambigAlts,
|
||||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
|
@ -1158,11 +1186,11 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
ambigAlts+":"+configs+
|
||||
", input="+parser.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, startIndex, stopIndex,
|
||||
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, dfa, startIndex, stopIndex,
|
||||
ambigAlts, configs);
|
||||
}
|
||||
|
||||
public void reportInsufficientPredicates(int startIndex, int stopIndex,
|
||||
public void reportInsufficientPredicates(@NotNull DFA dfa, int startIndex, int stopIndex,
|
||||
@NotNull IntervalSet ambigAlts,
|
||||
@NotNull SemanticContext[] altToPred,
|
||||
@NotNull ATNConfigSet configs)
|
||||
|
@ -1173,7 +1201,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
|
|||
parser.getInputString(startIndex, stopIndex));
|
||||
}
|
||||
if ( parser!=null ) {
|
||||
parser.getErrorHandler().reportInsufficientPredicates(parser, startIndex, stopIndex, ambigAlts,
|
||||
parser.getErrorHandler().reportInsufficientPredicates(parser, dfa, startIndex, stopIndex, ambigAlts,
|
||||
altToPred, configs);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
grammar T;
|
||||
@header {import java.util.*;}
|
||||
s : a ';' a;
|
||||
a : ID {System.out.println("alt 1");}
|
||||
| ID {System.out.println("alt 2");}
|
||||
| {false}? ID {System.out.println("alt 3");}
|
||||
;
|
||||
s : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;
|
||||
tag : '<' .+ '>' ;
|
||||
header : 'x' 'y' ;
|
||||
EQ : '=' ;
|
||||
COMMA : ',' ;
|
||||
ID : 'a'..'z'+ ;
|
||||
STR : '"' (options {greedy=false;}:.)* '"' ;
|
||||
INT : '0'..'9'+;
|
||||
WS : (' '|'\n') {skip();} ;
|
||||
|
|
|
@ -0,0 +1,698 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
package org.antlr.v4.test;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestNonGreedyLoops extends BaseTest {
|
||||
@Test public void testNongreedyLoopOnEndIsNop() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : any ID EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"any : .* ;\n"+
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x", true);
|
||||
assertEquals("x\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->:s1=>2\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"34 x", true);
|
||||
assertEquals("34x\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->:s1=>2\n", found);
|
||||
assertEquals("line 1:0 extraneous input '34' expecting ID\n", this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyPlusLoopOnEndIsNop() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : any ID EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"any : .+ ;\n"+ // .+ on end of rule always gives no viable alt. can't bypass but can't match
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x", true);
|
||||
assertEquals("x\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->:s1=>2\n", found);
|
||||
assertEquals("line 1:0 no viable alternative at input 'x'\n", this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopInOtherRule() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} | b {System.out.println(\"alt 2\");} ;\n" +
|
||||
"a : .* ID ;\n"+
|
||||
"b : .* INT ;\n"+
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x", true);
|
||||
assertEquals("alt 1\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->s1\n" +
|
||||
"s1-EOF->:s2=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-ID->:s1=>2\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"34", true);
|
||||
assertEquals("alt 2\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->s1\n" +
|
||||
"s1-EOF->:s2=>2\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-INT->:s1=>2\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"34 x", true);
|
||||
assertEquals("alt 1\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->s1\n" +
|
||||
"s1-ID->s2\n" +
|
||||
"s2-EOF->:s3=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"s0-ID->:s2=>2\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyPlusLoopInOtherRule() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} | b {System.out.println(\"alt 2\");} ;\n" +
|
||||
"a : .+ ID ;\n"+
|
||||
"b : .+ INT ;\n"+
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"2 3 x", true);
|
||||
assertEquals("alt 1\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->s1\n" +
|
||||
"s1-INT->s2\n" +
|
||||
"s2-ID->s3\n" +
|
||||
"s3-EOF->:s4=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"s0-ID->:s2=>2\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"2 3", true);
|
||||
assertEquals("alt 2\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->s1\n" +
|
||||
"s1-INT->s2\n" +
|
||||
"s2-EOF->:s3=>2\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-INT->:s1=>2\n", found);
|
||||
assertEquals("line 1:0 no viable alternative at input '2'\n", this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"a b c 3", true);
|
||||
assertEquals("alt 2\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->s1\n" +
|
||||
"s1-ID->s2\n" +
|
||||
"s2-INT->s3\n" +
|
||||
"s2-ID->s2\n" +
|
||||
"s3-EOF->:s4=>2\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-INT->:s2=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
assertEquals(null, this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopInOneAlt() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} EOF | b {System.out.println(\"alt 2\");} EOF ;\n" +
|
||||
"a : .* ;\n"+ // s comes here upon ID but then bypasses, error on EOF
|
||||
"b : INT ;\n"+
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x", true);
|
||||
assertEquals("alt 1\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-ID->:s1=>2\n", found);
|
||||
assertEquals("line 1:0 extraneous input 'x' expecting <EOF>\n", this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"34", true);
|
||||
assertEquals("alt 1\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-INT->s1\n" +
|
||||
"s1-EOF->:s2=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-INT->:s1=>2\n", found); // resolves INT EOF to alt 1 from s since ambig 'tween a and b
|
||||
assertEquals("line 1:2 reportAmbiguity d=0: {1..2}:[(1,1,[]), (1,2,[])],conflictingAlts={1..2}, input=34\n" +
|
||||
"line 1:0 extraneous input '34' expecting <EOF>\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : block EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"block : '{' .* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"{ }";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("{}\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'}'->:s1=>2\n", found);
|
||||
input =
|
||||
"{a b { }";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("{ab{}\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'{'->:s1=>1\n" +
|
||||
"s0-'}'->:s2=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
input =
|
||||
"{ } a 2 { }"; // FAILS to match since it terminates loop at first { }
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("", found); // should not print output; resync kills rest of input til '}' then returns normally
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoop() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( x=34 ) { } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if(x=34){};\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-')'->s2\n" +
|
||||
"s0-'='->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"s2-'{'->s3\n" +
|
||||
"s3-'}'->:s4=>2\n", found);
|
||||
input =
|
||||
"if ( ))) ) { } ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if()))){};\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-')'->s1\n" +
|
||||
"s1-'{'->s3\n" +
|
||||
"s1-')'->:s2=>1\n" +
|
||||
"s3-'}'->:s4=>2\n", found);
|
||||
input =
|
||||
"if (() { } a 2) { } ;"; // The first { } should match block so should stop
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("", found); // should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' (block|.)* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( x=34 ) { {return a} b 34 } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if(x=34){{returna}b34};\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-')'->s2\n" +
|
||||
"s0-'='->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"s2-'{'->s3\n" +
|
||||
"s3-'{'->s4\n" +
|
||||
"s4-'}'->:s5=>2\n" +
|
||||
"s4-ID->s4\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'{'->:s1=>1\n" +
|
||||
"s0-INT->:s2=>2\n" +
|
||||
"s0-ID->:s2=>2\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'{'->:s1=>1\n" +
|
||||
"s0-'}'->:s3=>2\n" +
|
||||
"s0-INT->:s2=>1\n" +
|
||||
"s0-ID->:s2=>1\n", found);
|
||||
|
||||
input =
|
||||
"if ( ()) ) { {return a} b 34 } ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if(())){{returna}b34};\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-')'->s2\n" +
|
||||
"s0-'('->:s1=>1\n" +
|
||||
"s2-'{'->s4\n" +
|
||||
"s2-')'->:s3=>1\n" +
|
||||
"s4-'{'->s5\n" +
|
||||
"s5-'}'->:s6=>2\n" +
|
||||
"s5-ID->s5\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'{'->:s1=>1\n" +
|
||||
"s0-INT->:s2=>2\n" +
|
||||
"s0-ID->:s2=>2\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'{'->:s1=>1\n" +
|
||||
"s0-'}'->:s3=>2\n" +
|
||||
"s0-INT->:s2=>1\n" +
|
||||
"s0-ID->:s2=>1\n", found);
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
|
||||
// EOF on end means LL(*) can identify when to stop the loop.
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : stat* ID '=' ID ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("x=1;a=b;\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->s1\n" +
|
||||
"s1-'='->s2\n" +
|
||||
"s2-INT->:s3=>1\n" +
|
||||
"s2-ID->s4\n" +
|
||||
"s4-';'->s5\n" +
|
||||
"s5-EOF->:s6=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-ID->:s1=>3\n", found);
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'if'->:s1=>1\n" +
|
||||
"s0-'return'->:s2=>1\n" +
|
||||
"s0-ID->s3\n" +
|
||||
"s3-'='->s4\n" +
|
||||
"s4-ID->s5\n" +
|
||||
"s5-';'->s6\n" +
|
||||
"s6-EOF->:s7=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'{'->:s2=>4\n" +
|
||||
"s0-'if'->:s1=>1\n" +
|
||||
"s0-'return'->:s4=>2\n" +
|
||||
"s0-ID->:s3=>3\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'{'->:s2=>1\n" +
|
||||
"s0-'return'->:s3=>1\n" +
|
||||
"s0-'}'->:s4=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
// can't match EOF to ID '=' '3' ';'
|
||||
assertEquals("line 1:9 no viable alternative at input '<EOF>'\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("line 1:14 no viable alternative at input '<EOF>'\n",
|
||||
this.stderrDuringParse);
|
||||
// should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNecessary() throws Exception {
|
||||
// stops scanning ahead at end of rule s since decision is nongreedy.
|
||||
// this says: "match statements until we see a=b; assignment; ignore any
|
||||
// statements that follow."
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"random : s ;" + // call s so s isn't followed by EOF directly
|
||||
"s @after {dumpDFA();} : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
|
||||
" {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b; x=y;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("x=1;a=b;\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->s1\n" +
|
||||
"s1-'='->s2\n" +
|
||||
"s2-INT->:s3=>1\n" +
|
||||
"s2-ID->s4\n" +
|
||||
"s4-';'->:s5=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-ID->:s1=>3\n", found); // ignores x=1 that follows first a=b assignment
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'if'->:s1=>1\n" +
|
||||
"s0-'return'->:s2=>1\n" +
|
||||
"s0-ID->s3\n" +
|
||||
"s3-'='->s4\n" +
|
||||
"s4-ID->s5\n" +
|
||||
"s5-';'->:s6=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'{'->:s2=>4\n" +
|
||||
"s0-'if'->:s1=>1\n" +
|
||||
"s0-'return'->:s4=>2\n" +
|
||||
"s0-ID->:s3=>3\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'{'->:s2=>1\n" +
|
||||
"s0-'return'->:s3=>1\n" +
|
||||
"s0-'}'->:s4=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match either stat
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("line 1:9 no viable alternative at input '<EOF>'\n",
|
||||
this.stderrDuringParse);
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, true);
|
||||
assertEquals("x=1;a=b;\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-ID->s1\n" +
|
||||
"s1-'='->s2\n" +
|
||||
"s2-INT->:s3=>1\n" +
|
||||
"s2-ID->s4\n" +
|
||||
"s4-';'->:s5=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-ID->:s1=>3\n", found); // should not finish all input
|
||||
}
|
||||
|
||||
@Test public void testHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : (item)+ {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"item : tag | . ;\n" +
|
||||
"tag : '<' '/'? .* '>' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"<a>foo</a>", true);
|
||||
assertEquals("<a>foo</a>\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-EOF->:s3=>2\n" +
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"s0-ID->:s2=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'<'->s1\n" +
|
||||
"s0-ID->:s5=>2\n" +
|
||||
"s1-'/'->s2\n" +
|
||||
"s1-ID->s2\n" +
|
||||
"s2-'>'->s3\n" +
|
||||
"s2-ID->s2\n" +
|
||||
"s3-EOF->:s6=>1\n" +
|
||||
"s3-'<'->:s4=>1\n" +
|
||||
"s3-ID->s3\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'/'->:s2=>1\n" +
|
||||
"s0-ID->:s1=>2\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" +
|
||||
"s0-'>'->:s2=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
assertEquals("line 1:6 reportAmbiguity d=1: {1..2}:[(26,1,[14 6]), (33,1,[14 6]), (22,1,[14 6]), (20,1,[14 6]), (16,1,[6]), (1,1,[]), (22,2,[14 6]), (26,2,[14 6]), (33,2,[14 6]), (20,2,[14 6]), (16,2,[6]), (1,2,[])],conflictingAlts={1..2}, input=<a>foo<\n" +
|
||||
"line 1:10 reportAmbiguity d=1: {1..2}:[(35,1,[]), (35,2,[])],conflictingAlts={1..2}, input=</a>\n" +
|
||||
"line 1:7 reportAmbiguity d=2: {1..2}:[(26,1,[14 6]), (33,1,[14 6]), (26,2,[14 6]), (33,2,[14 6])],conflictingAlts={1..2}, input=/\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"<a></a>", true);
|
||||
assertEquals("<a></a>\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-EOF->:s2=>2\n" +
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'<'->s1\n" +
|
||||
"s1-'/'->s2\n" +
|
||||
"s1-ID->s2\n" +
|
||||
"s2-'>'->s3\n" +
|
||||
"s2-ID->s2\n" +
|
||||
"s3-EOF->:s5=>1\n" +
|
||||
"s3-'<'->:s4=>1\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'/'->:s2=>1\n" +
|
||||
"s0-ID->:s1=>2\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" +
|
||||
"s0-'>'->:s2=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"</b><a src=\"abc\", width=32>", true);
|
||||
assertEquals("</b><asrc=\"abc\",width=32>\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-EOF->:s2=>2\n" +
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'<'->s1\n" +
|
||||
"s1-'/'->s2\n" +
|
||||
"s1-ID->s2\n" +
|
||||
"s2-'>'->s3\n" +
|
||||
"s2-'='->s2\n" +
|
||||
"s2-','->s2\n" +
|
||||
"s2-ID->s2\n" +
|
||||
"s2-STR->s2\n" +
|
||||
"s2-INT->s2\n" +
|
||||
"s3-EOF->:s5=>1\n" +
|
||||
"s3-'<'->:s4=>1\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-'/'->:s1=>1\n" +
|
||||
"s0-ID->:s2=>2\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" +
|
||||
"s0-'>'->:s2=>2\n" +
|
||||
"s0-'='->:s1=>1\n" +
|
||||
"s0-','->:s1=>1\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"s0-STR->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n", found);
|
||||
}
|
||||
|
||||
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
|
||||
* that sees into a non-greedy loop, thinks it is greedy.
|
||||
*/
|
||||
@Test
|
||||
public void testFindHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s @after {dumpDFA();} : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
|
||||
"tag : '<' .+ '>' ;\n" +
|
||||
"header : 'x' 'y' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
System.out.println(grammar);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
",=foo <a x= 3>32skidoo<a><img>", true);
|
||||
assertEquals("<ax=3>\n" +
|
||||
"<a>\n" +
|
||||
"<img>\n" +
|
||||
"Decision 0:\n" + // .*
|
||||
"s0-'<'->s2\n" +
|
||||
"s0-'='->:s1=>1\n" +
|
||||
"s0-','->:s1=>1\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"s2-ID->s3\n" +
|
||||
"s3-'x'->s4\n" +
|
||||
"s3-'>'->:s5=>2\n" +
|
||||
"s3-INT->s3\n" +
|
||||
"s4-'='->s3\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" + // (tag|header)
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" + // (...)*
|
||||
"s0-EOF->:s3=>2\n" +
|
||||
"s0-'<'->:s2=>1\n" +
|
||||
"s0-','->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" + // .+
|
||||
"s0-'x'->:s1=>1\n" +
|
||||
"s0-'>'->:s2=>2\n" +
|
||||
"s0-'='->:s1=>1\n" +
|
||||
"s0-ID->:s1=>1\n" +
|
||||
"s0-INT->:s1=>1\n", found);
|
||||
assertEquals(null,
|
||||
this.stderrDuringParse);
|
||||
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x x<a>", true);
|
||||
assertEquals("<a>\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'x'->s1\n" +
|
||||
"s0-'<'->s4\n" +
|
||||
"s1-'x'->:s2=>1\n" +
|
||||
"s1-'<'->:s3=>1\n" +
|
||||
"s4-ID->s5\n" +
|
||||
"s5-'>'->:s6=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-EOF->:s2=>2\n" +
|
||||
"s0-'x'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" +
|
||||
"s0-'>'->:s2=>2\n" +
|
||||
"s0-ID->:s1=>1\n", found);
|
||||
// gets line 1:3 no viable alternative at input '>'. Why??
|
||||
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
|
||||
// Seeing '.' in a lookahead prediction can be misleading!!
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
"x <><a>", true);
|
||||
assertEquals("null\n" +
|
||||
"<a>\n" +
|
||||
"Decision 0:\n" +
|
||||
"s0-'x'->s1\n" +
|
||||
"s0-'>'->:s6=>1\n" +
|
||||
"s0-'<'->s3\n" +
|
||||
"s1-'<'->:s2=>1\n" +
|
||||
"s3-'>'->s4\n" +
|
||||
"s3-ID->s4\n" +
|
||||
"s4-'>'->:s7=>2\n" +
|
||||
"s4-'<'->:s5=>2\n" +
|
||||
"\n" +
|
||||
"Decision 1:\n" +
|
||||
"s0-'<'->:s1=>1\n" +
|
||||
"\n" +
|
||||
"Decision 2:\n" +
|
||||
"s0-EOF->:s3=>2\n" +
|
||||
"s0-'x'->:s1=>1\n" +
|
||||
"s0-'>'->:s2=>1\n" +
|
||||
"\n" +
|
||||
"Decision 3:\n" +
|
||||
"s0-'>'->:s1=>2\n" +
|
||||
"s0-ID->:s2=>1\n", // doesn't match tag; null
|
||||
found);
|
||||
assertEquals("line 1:3 no viable alternative at input '>'\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
}
|
|
@ -147,265 +147,4 @@ public class TestParserExec extends BaseTest {
|
|||
assertEquals("a34c\n", found);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : block EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"block : '{' .* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"{ }";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("{}\n", found);
|
||||
input =
|
||||
"{a b { }";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("{ab{}\n", found);
|
||||
input =
|
||||
"{ } a 2 { }"; // FAILS to match since it terminates loop at first { }
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("", found); // should not print output; resync kills rest of input
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoop() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( x=34 ) { } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(x=34){};\n", found);
|
||||
input =
|
||||
"if ( ))) ) { } ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if()))){};\n", found);
|
||||
input =
|
||||
"if (() { } a 2) { } ;"; // The first { } should match block so should stop
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("", found); // should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' (block|.)* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input1 =
|
||||
"if ( x=34 ) { {return a} b 34 } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input1, false);
|
||||
assertEquals("if(x=34){{returna}b34};\n", found);
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
|
||||
// EOF on end means LL(*) can identify when to stop the loop.
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : stat* ID '=' ID ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found);
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '3' ';'
|
||||
assertEquals("line 1:9 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("line 1:14 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
// should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNecessary() throws Exception {
|
||||
// stops scanning ahead at end of rule s since decision is nongreedy.
|
||||
// this says: "match statements until we see a=b; assignment; ignore any
|
||||
// statements that follow."
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"random : s ;" + // call s so s isn't followed by EOF directly
|
||||
"s : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
|
||||
" {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b; x=y;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found); // ignores x=1 that follows first a=b assignment
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match either stat
|
||||
execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("line 1:9 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found); // should not finish all input
|
||||
}
|
||||
|
||||
@Test public void testHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : tag+ {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"tag : '<' '/'? .* '>' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"<a>foo</a>", false);
|
||||
assertEquals("<a>foo</a>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"<a></a>", false);
|
||||
assertEquals("<a></a>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"</b><a src=\"abc\", width=32>", false);
|
||||
assertEquals("</b><asrc=\"abc\",width=32>\n", found);
|
||||
}
|
||||
|
||||
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
|
||||
* that sees into a non-greedy loop, thinks it is greedy.
|
||||
*/
|
||||
@Test public void testFindHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
|
||||
"tag : '<' .+ '>' ;\n" +
|
||||
"header : 'x' 'y' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
",=foo <a x= 3>32skidoo<a><img>", false);
|
||||
assertEquals("<ax=3>\n" +
|
||||
"<a>\n" +
|
||||
"<img>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"x x<a>", false);
|
||||
assertEquals("<a>\n", found);
|
||||
// gets line 1:3 no viable alternative at input '>'. Why??
|
||||
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
|
||||
// Seeing '.' in a lookahead prediction can be misleading!!
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"x <><a>", false);
|
||||
assertEquals("null\n" + // doesn't match tag; null
|
||||
"<a>\n", found);
|
||||
}
|
||||
|
||||
/** See comment on testNongreedyLoopEndOfRuleStuffFollowing */
|
||||
@Test public void testNongreedyLoopEndOfRule() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' INT ')' .* ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( 34 ) a b";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34)ab\n", found);
|
||||
input =
|
||||
"if ( 34 ))) ) ( a = = b(";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34))))(a==b(\n", found);
|
||||
}
|
||||
|
||||
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
|
||||
* since it immediately hits the end of the rule. Non-greedy loops
|
||||
* never consume more tokens than exist following the .* in that
|
||||
* same rule. So, in this case, the greedy loop always wins and it will
|
||||
* suck tokens until end of file. Unfortunately, the '.' in rule s
|
||||
* will not match, leading to a syntax error.
|
||||
*/
|
||||
@Test public void testNongreedyLoopEndOfRuleStuffFollowing() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat '.' {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' INT ')' .* ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( 34 ) a b .";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34)ab.\n", found);
|
||||
assertEquals("line 1:15 no viable alternative at input ''\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -155,8 +155,8 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
"alt 1\n" +
|
||||
"alt 1\n";
|
||||
assertEquals(expecting, found);
|
||||
assertEquals("line 1:0 reportAmbiguity {1..3}:[(6,1,[]), (6,2,[]), (6,3,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n" +
|
||||
"line 1:0 reportInsufficientPredicates {1..3}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,1,[],up=1), (1,1,[],up=1), (6,2,[],up=1), (1,2,[],up=1), (6,3,[],{1:0}?,up=1), (1,3,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n",
|
||||
assertEquals("line 1:0 reportAmbiguity d=0: {1..3}:[(6,1,[]), (6,2,[]), (6,3,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n" +
|
||||
"line 1:0 reportInsufficientPredicates d=0: {1..3}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,1,[],up=1), (1,1,[],up=1), (6,2,[],up=1), (1,2,[],up=1), (6,3,[],{1:0}?,up=1), (1,3,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
@ -187,8 +187,8 @@ public class TestSemPredEvalParser extends BaseTest {
|
|||
"alt 2\n" +
|
||||
"alt 2\n";
|
||||
assertEquals(expecting, found);
|
||||
assertEquals("line 1:4 reportAmbiguity {2..4}:[(10,2,[]), (10,3,[]), (10,4,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n" +
|
||||
"line 1:4 reportInsufficientPredicates {2..4}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,2,[],up=1), (10,2,[],up=1), (1,2,[],up=1), (6,3,[],up=1), (10,3,[],up=1), (1,3,[],up=1), (6,4,[],{1:0}?,up=1), (10,4,[],{1:0}?,up=1), (1,4,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n",
|
||||
assertEquals("line 1:4 reportAmbiguity d=0: {2..4}:[(10,2,[]), (10,3,[]), (10,4,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n" +
|
||||
"line 1:4 reportInsufficientPredicates d=0: {2..4}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,2,[],up=1), (10,2,[],up=1), (1,2,[],up=1), (6,3,[],up=1), (10,3,[],up=1), (1,3,[],up=1), (6,4,[],{1:0}?,up=1), (10,4,[],{1:0}?,up=1), (1,4,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n",
|
||||
this.stderrDuringParse);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue