almost got new ATN engine working; separated .* nongreedy tests, reorg args on reporting methods

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9627]
This commit is contained in:
parrt 2011-12-15 11:03:41 -08:00
parent fa3483a7fc
commit 5ad1505fdb
10 changed files with 800 additions and 344 deletions

View File

@ -120,8 +120,8 @@ public interface ANTLRErrorStrategy<Symbol> {
* full context.
*/
void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs);
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs);
/** Called by the parser when it detects an input sequence that can be matched by two paths
* through the grammar. The difference between this and the reportAmbiguity method lies in
@ -129,9 +129,9 @@ public interface ANTLRErrorStrategy<Symbol> {
* we can't be sure if a conflict is an ambiguity or simply a weakness in the Strong LL parsing
* strategy. If we are parsing with full context, this method is never called.
*/
void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs);
// void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
// int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
// @NotNull OrderedHashSet<ATNConfig> configs);
/** Called by the parser when it find a conflict that is resolved by retrying the parse
* with full context. This is not a warning; it simply notifies you that your grammar
@ -149,6 +149,7 @@ public interface ANTLRErrorStrategy<Symbol> {
* if the predicates fail.
*/
void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
@NotNull DFA dfa,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull SemanticContext[] altToPred,
@NotNull OrderedHashSet<ATNConfig> configs);

View File

@ -192,7 +192,8 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
SymbolStream<Symbol> tokens = recognizer.getInputStream();
String input;
if (tokens instanceof TokenStream) {
input = ((TokenStream)tokens).toString(e.startToken, e.offendingToken);
if ( e.startToken.getType()==Token.EOF ) input = "<EOF>";
else input = ((TokenStream)tokens).toString(e.startToken, e.offendingToken);
} else {
input = "<unknown input>";
}
@ -556,15 +557,8 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
@Override
public void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
{
}
@Override
public void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
{
}
@ -576,6 +570,7 @@ public class DefaultErrorStrategy<Symbol> implements ANTLRErrorStrategy<Symbol>
@Override
public void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
@NotNull DFA dfa,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull SemanticContext[] altToPred,
@NotNull OrderedHashSet<ATNConfig> configs)

View File

@ -41,17 +41,10 @@ import java.util.Arrays;
public class DiagnosticErrorStrategy<Symbol> extends DefaultErrorStrategy<Symbol> {
@Override
public void reportAmbiguity(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
DFA dfa, int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
{
recognizer.notifyListeners("reportAmbiguity " + ambigAlts + ":" + configs + ", input=" +
recognizer.getInputString(startIndex, stopIndex));
}
@Override
public void reportConflict(@NotNull BaseRecognizer<Symbol> recognizer,
int startIndex, int stopIndex, IntervalSet ambigAlts, OrderedHashSet<ATNConfig> configs) {
recognizer.notifyListeners("reportConflict " + ambigAlts + ":" + configs + ", input=" +
recognizer.notifyListeners("reportAmbiguity d="+dfa.decision + ": "+ ambigAlts + ":" + configs + ", input=" +
recognizer.getInputString(startIndex, stopIndex));
}
@ -59,17 +52,18 @@ public class DiagnosticErrorStrategy<Symbol> extends DefaultErrorStrategy<Symbol
public void reportContextSensitivity(@NotNull BaseRecognizer<Symbol> recognizer, @NotNull DFA dfa,
int startIndex, int stopIndex, @NotNull OrderedHashSet<ATNConfig> configs)
{
recognizer.notifyListeners("reportContextSensitivity: " + configs + ", input=" +
recognizer.notifyListeners("reportContextSensitivity d="+dfa.decision +": "+ configs + ", input=" +
recognizer.getInputString(startIndex, stopIndex));
}
@Override
public void reportInsufficientPredicates(@NotNull BaseRecognizer<Symbol> recognizer,
@NotNull DFA dfa,
int startIndex, int stopIndex, @NotNull IntervalSet ambigAlts,
@NotNull SemanticContext[] altToPred,
@NotNull OrderedHashSet<ATNConfig> configs)
{
recognizer.notifyListeners("reportInsufficientPredicates " + ambigAlts + ":" + Arrays.toString(altToPred) +
recognizer.notifyListeners("reportInsufficientPredicates d="+dfa.decision +": " +ambigAlts + ":" + Arrays.toString(altToPred) +
", " + configs + ", input=" + recognizer.getInputString(startIndex, stopIndex));
}
}

View File

@ -37,6 +37,7 @@ import org.antlr.v4.runtime.misc.OrderedHashSet;
*/
public class ATNConfigSet extends OrderedHashSet<ATNConfig> {
// TODO: these fields make me pretty uncomfortable but nice to pack up info together, saves recomputation
// TODO: can we track conflicts as they are added to save scanning configs later?
public int uniqueAlt;
public IntervalSet conflictingAlts;
public boolean hasSemanticContext;

View File

@ -536,7 +536,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
if ( retry_debug ) System.out.println("ctx empty; no need to retry");
// no point in retrying with ctx since it's same.
// this implies that we have a true ambiguity
reportAmbiguity(startIndex, input.index(), ambigAlts, reach);
reportAmbiguity(dfa, startIndex, input.index(), ambigAlts, reach);
resolveToProperAlt(decState, ambigAlts, reach);
return ATN.INVALID_ALT_NUMBER;
}
@ -547,7 +547,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
}
dfa.conflictSet = (OrderedHashSet<ATNConfig>)reach.clone(); // most recent set with conflict
reportConflict(startIndex, input.index(), ambigAlts, reach);
// reportConflict(startIndex, input.index(), ambigAlts, reach);
resolveToProperAlt(decState, ambigAlts, reach);
return ATN.INVALID_ALT_NUMBER;
}
@ -595,7 +595,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
{
// We need at least n-1 predicates for n ambiguous alts
if ( tooFewPredicates(altToPred) ) {
reportInsufficientPredicates(startIndex, input.index(),
reportInsufficientPredicates(dfa, startIndex, input.index(),
ambigAlts, altToPred, reach);
}
List<DFAState.PredPrediction> predPredictions = getPredicatePredictions(ambigAlts, altToPred);
@ -776,7 +776,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
if ( ctx_dfa.conflictSet!=null ) {
// System.out.println("retry gives ambig for "+input.toString(startIndex, input.index()));
reportAmbiguity(startIndex, input.index(), getAmbiguousAlts(ctx_dfa.conflictSet), ctx_dfa.conflictSet);
reportAmbiguity(dfa, startIndex, input.index(), getAmbiguousAlts(ctx_dfa.conflictSet), ctx_dfa.conflictSet);
}
else {
// System.out.println("NO ambig for "+input.toString(startIndex, input.index()));
@ -1006,16 +1006,16 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
return new ATNConfig(config, t.target, newContext);
}
public void reportConflict(int startIndex, int stopIndex,
@NotNull IntervalSet alts,
@NotNull OrderedHashSet<ATNConfig> configs)
{
if ( debug || retry_debug ) {
System.out.println("reportConflict "+alts+":"+configs+
", input="+parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) parser.getErrorHandler().reportConflict(parser, startIndex, stopIndex, alts, configs);
}
// public void reportConflict(int startIndex, int stopIndex,
// @NotNull IntervalSet alts,
// @NotNull OrderedHashSet<ATNConfig> configs)
// {
// if ( debug || retry_debug ) {
// System.out.println("reportConflict "+alts+":"+configs+
// ", input="+parser.getInputString(startIndex, stopIndex));
// }
// if ( parser!=null ) parser.getErrorHandler().reportConflict(parser, startIndex, stopIndex, alts, configs);
// }
public void reportContextSensitivity(DFA dfa, OrderedHashSet<ATNConfig> configs, int startIndex, int stopIndex) {
if ( debug || retry_debug ) {
@ -1026,7 +1026,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
}
/** If context sensitive parsing, we know it's ambiguity not conflict */
public void reportAmbiguity(int startIndex, int stopIndex,
public void reportAmbiguity(@NotNull DFA dfa, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull OrderedHashSet<ATNConfig> configs)
{
@ -1035,11 +1035,11 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
ambigAlts+":"+configs+
", input="+parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, startIndex, stopIndex,
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, dfa, startIndex, stopIndex,
ambigAlts, configs);
}
public void reportInsufficientPredicates(int startIndex, int stopIndex,
public void reportInsufficientPredicates(@NotNull DFA dfa, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull SemanticContext[] altToPred,
@NotNull OrderedHashSet<ATNConfig> configs)
@ -1050,7 +1050,7 @@ public class ParserATNSimulator<Symbol> extends ATNSimulator {
parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) {
parser.getErrorHandler().reportInsufficientPredicates(parser, startIndex, stopIndex, ambigAlts,
parser.getErrorHandler().reportInsufficientPredicates(parser, dfa, startIndex, stopIndex, ambigAlts,
altToPred, configs);
}
}

View File

@ -226,7 +226,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
ATNConfigSet fullCtxSet = execATNWithFullContext(s0_closure, input, startIndex, greedy);
if ( fullCtxSet.conflictingAlts!=null ) {
reportAmbiguity(startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
reportAmbiguity(dfa, startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
ctx_alt = fullCtxSet.conflictingAlts.getMinElement();
}
else {
@ -356,6 +356,8 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
conflict
conflict + preds
TODO: greedy + those
*/
public int execATN(@NotNull DFA dfa, @NotNull DFAState s0,
@NotNull SymbolStream<Symbol> input, int startIndex,
@ -377,7 +379,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
while (true) { // while more work
ATNConfigSet reach = computeReachSet(previous, t, greedy);
if ( reach.size()==0 ) noViableAlt(input, outerContext, previous, startIndex);
if ( reach==null ) throw noViableAlt(input, outerContext, previous, startIndex);
D = addDFAEdge(dfa, previous, t, reach); // always adding edge even if to a conflict state
int predictedAlt = getUniqueAlt(reach);
if ( predictedAlt!=ATN.INVALID_ALT_NUMBER ) {
@ -389,9 +391,9 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
D.configset.conflictingAlts = getConflictingAlts(reach);
if ( D.configset.conflictingAlts!=null ) {
D.isAcceptState = true; // when ambig or ctx sens or nongreedy or .* loop hitting rule stop
if ( decState.isGreedy ) {
if ( greedy ) {
if ( outerContext == ParserRuleContext.EMPTY ) {
reportAmbiguity(startIndex, input.index(), D.configset.conflictingAlts, D.configset);
reportAmbiguity(dfa, startIndex, input.index(), D.configset.conflictingAlts, D.configset);
resolveToMinAlt(D, D.configset.conflictingAlts);
}
else {
@ -399,7 +401,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ATNConfigSet s0_closure = computeStartState(dfa.atnStartState, outerContext, greedy);
fullCtxSet = execATNWithFullContext(s0_closure, input, startIndex, greedy);
if ( fullCtxSet.conflictingAlts!=null ) {
reportAmbiguity(startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
reportAmbiguity(dfa, startIndex, input.index(), fullCtxSet.conflictingAlts, fullCtxSet);
predictedAlt = fullCtxSet.conflictingAlts.getMinElement();
resolveToMinAlt(D, fullCtxSet.conflictingAlts);
}
@ -410,17 +412,33 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
}
else {
// if we reached end of rule via exit branch and decision nongreedy, we matched
// upon ambiguity for nongreedy, default to exit branch to avoid inf loop
// this handles case where we find ambiguity that stops DFA construction
// before a config hits rule stop state. Was leaving prediction blank.
int exitAlt = 2;
ATNConfig cstop = configWithAltAtStopState(reach, exitAlt);
if ( cstop!=null ) {
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
return cstop.alt;
}
D.prediction = exitAlt;
}
}
}
if ( !greedy ) {
int exitAlt = 2;
if ( predictedAlt != ATN.INVALID_ALT_NUMBER && configWithAltAtStopState(reach, 1) ) {
if ( debug ) System.out.println("nongreedy loop but unique alt "+D.configset.uniqueAlt+" at "+reach);
// reaches end via .* means nothing after.
D.isAcceptState = true;
D.prediction = predictedAlt = exitAlt;
}
else {// if we reached end of rule via exit branch and decision nongreedy, we matched
if ( configWithAltAtStopState(reach, exitAlt) ) {
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
D.isAcceptState = true;
D.prediction = predictedAlt = exitAlt;
}
}
}
ATNConfigSet configs = D.configset;
if ( D.isCtxSensitive ) configs = fullCtxSet;
if ( D.isAcceptState && configs.hasSemanticContext ) {
@ -428,7 +446,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
predicateDFAState(D, configs, outerContext, nalts);
if ( tooFewPredicates(D, outerContext, nalts) ) {
IntervalSet conflictingAlts = getConflictingAltsFromConfigSet(configs);
reportInsufficientPredicates(startIndex, input.index(),
reportInsufficientPredicates(dfa, startIndex, input.index(),
conflictingAlts,
getPredsForAmbigAlts(conflictingAlts, configs, nalts),
configs);
@ -459,6 +477,9 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
int t = input.LA(1);
while (true) { // while more work
ATNConfigSet reach = computeReachSet(previous, t, greedy);
if ( reach==null ) {
parser.notifyListeners("ERROR: how can reach be empty after doing no-ctx ATN sim?");
}
reach.uniqueAlt = getUniqueAlt(reach);
if ( reach.uniqueAlt!=ATN.INVALID_ALT_NUMBER ) return reach;
reach.conflictingAlts = getConflictingAlts(reach);
@ -470,7 +491,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
protected ATNConfigSet computeReachSet(ATNConfigSet closure, int t, boolean greedy) {
if ( debug ) System.out.println("in reach starting closure: " + closure);
if ( debug ) System.out.println("in computeReachSet, starting closure: " + closure);
ATNConfigSet reach = new ATNConfigSet();
for (ATNConfig c : closure) {
if ( debug ) System.out.println("testing "+getTokenName(t)+" at "+c.toString());
@ -484,6 +505,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
}
}
if ( reach.size()==0 ) return null;
return reach;
}
@ -651,6 +673,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
if ( !closureBusy.add(config) ) return; // avoid infinite recursion
if ( config.state instanceof RuleStopState ) {
if ( !greedy ) {
// don't see past end of a rule for any nongreedy decision
if ( debug ) System.out.println("NONGREEDY at stop state of "+
getRuleName(config.state.ruleIndex));
configs.add(config);
return;
}
// We hit rule end. If we have context info, use it
if ( config.context!=null && !config.context.isEmpty() ) {
RuleContext newContext = config.context.parent; // "pop" invoking state
@ -667,13 +696,13 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
else {
// else if we have no context info, just chase follow links (if greedy)
if ( !greedy ) {
if ( debug ) System.out.println("NONGREEDY at stop state of "+
getRuleName(config.state.ruleIndex));
// don't purse past end of a rule for any nongreedy decision
configs.add(config);
return;
}
// if ( !greedy ) {
// if ( debug ) System.out.println("NONGREEDY at stop state of "+
// getRuleName(config.state.ruleIndex));
// // don't purse past end of a rule for any nongreedy decision
// configs.add(config);
// return;
// }
if ( debug ) System.out.println("FALLING off rule "+
getRuleName(config.state.ruleIndex));
}
@ -1080,18 +1109,17 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
@Nullable
public ATNConfig configWithAltAtStopState(@NotNull Collection<ATNConfig> configs, int alt) {
public boolean configWithAltAtStopState(@NotNull Collection<ATNConfig> configs, int alt) {
for (ATNConfig c : configs) {
if ( c.alt == alt ) {
if ( c.state.getClass() == RuleStopState.class ) {
return c;
return true;
}
}
}
return null;
return false;
}
protected DFAState addDFAEdge(@NotNull DFA dfa,
@NotNull ATNConfigSet p,
int t,
@ -1149,7 +1177,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
}
/** If context sensitive parsing, we know it's ambiguity not conflict */
public void reportAmbiguity(int startIndex, int stopIndex,
public void reportAmbiguity(@NotNull DFA dfa, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull ATNConfigSet configs)
{
@ -1158,11 +1186,11 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
ambigAlts+":"+configs+
", input="+parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, startIndex, stopIndex,
if ( parser!=null ) parser.getErrorHandler().reportAmbiguity(parser, dfa, startIndex, stopIndex,
ambigAlts, configs);
}
public void reportInsufficientPredicates(int startIndex, int stopIndex,
public void reportInsufficientPredicates(@NotNull DFA dfa, int startIndex, int stopIndex,
@NotNull IntervalSet ambigAlts,
@NotNull SemanticContext[] altToPred,
@NotNull ATNConfigSet configs)
@ -1173,7 +1201,7 @@ public class v2ParserATNSimulator<Symbol> extends ATNSimulator {
parser.getInputString(startIndex, stopIndex));
}
if ( parser!=null ) {
parser.getErrorHandler().reportInsufficientPredicates(parser, startIndex, stopIndex, ambigAlts,
parser.getErrorHandler().reportInsufficientPredicates(parser, dfa, startIndex, stopIndex, ambigAlts,
altToPred, configs);
}
}

View File

@ -1,10 +1,10 @@
grammar T;
@header {import java.util.*;}
s : a ';' a;
a : ID {System.out.println("alt 1");}
| ID {System.out.println("alt 2");}
| {false}? ID {System.out.println("alt 3");}
;
s : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;
tag : '<' .+ '>' ;
header : 'x' 'y' ;
EQ : '=' ;
COMMA : ',' ;
ID : 'a'..'z'+ ;
STR : '"' (options {greedy=false;}:.)* '"' ;
INT : '0'..'9'+;
WS : (' '|'\n') {skip();} ;

View File

@ -0,0 +1,698 @@
/*
[The "BSD license"]
Copyright (c) 2011 Terence Parr
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.antlr.v4.test;
import org.junit.Test;
public class TestNonGreedyLoops extends BaseTest {
@Test public void testNongreedyLoopOnEndIsNop() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : any ID EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"any : .* ;\n"+
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x", true);
assertEquals("x\n" +
"Decision 0:\n" +
"s0-ID->:s1=>2\n", found);
assertEquals(null, this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"34 x", true);
assertEquals("34x\n" +
"Decision 0:\n" +
"s0-INT->:s1=>2\n", found);
assertEquals("line 1:0 extraneous input '34' expecting ID\n", this.stderrDuringParse);
}
@Test public void testNongreedyPlusLoopOnEndIsNop() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : any ID EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"any : .+ ;\n"+ // .+ on end of rule always gives no viable alt. can't bypass but can't match
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x", true);
assertEquals("x\n" +
"Decision 0:\n" +
"s0-ID->:s1=>2\n", found);
assertEquals("line 1:0 no viable alternative at input 'x'\n", this.stderrDuringParse);
}
@Test public void testNongreedyLoopInOtherRule() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} | b {System.out.println(\"alt 2\");} ;\n" +
"a : .* ID ;\n"+
"b : .* INT ;\n"+
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x", true);
assertEquals("alt 1\n" +
"Decision 0:\n" +
"s0-ID->s1\n" +
"s1-EOF->:s2=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-ID->:s1=>2\n", found);
assertEquals(null, this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"34", true);
assertEquals("alt 2\n" +
"Decision 0:\n" +
"s0-INT->s1\n" +
"s1-EOF->:s2=>2\n" +
"\n" +
"Decision 2:\n" +
"s0-INT->:s1=>2\n", found);
assertEquals(null, this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"34 x", true);
assertEquals("alt 1\n" +
"Decision 0:\n" +
"s0-INT->s1\n" +
"s1-ID->s2\n" +
"s2-EOF->:s3=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-INT->:s1=>1\n" +
"s0-ID->:s2=>2\n", found);
assertEquals(null, this.stderrDuringParse);
}
@Test public void testNongreedyPlusLoopInOtherRule() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} | b {System.out.println(\"alt 2\");} ;\n" +
"a : .+ ID ;\n"+
"b : .+ INT ;\n"+
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"2 3 x", true);
assertEquals("alt 1\n" +
"Decision 0:\n" +
"s0-INT->s1\n" +
"s1-INT->s2\n" +
"s2-ID->s3\n" +
"s3-EOF->:s4=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-INT->:s1=>1\n" +
"s0-ID->:s2=>2\n", found);
assertEquals(null, this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"2 3", true);
assertEquals("alt 2\n" +
"Decision 0:\n" +
"s0-INT->s1\n" +
"s1-INT->s2\n" +
"s2-EOF->:s3=>2\n" +
"\n" +
"Decision 2:\n" +
"s0-INT->:s1=>2\n", found);
assertEquals("line 1:0 no viable alternative at input '2'\n", this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"a b c 3", true);
assertEquals("alt 2\n" +
"Decision 0:\n" +
"s0-ID->s1\n" +
"s1-ID->s2\n" +
"s2-INT->s3\n" +
"s2-ID->s2\n" +
"s3-EOF->:s4=>2\n" +
"\n" +
"Decision 2:\n" +
"s0-INT->:s2=>2\n" +
"s0-ID->:s1=>1\n", found);
assertEquals(null, this.stderrDuringParse);
}
@Test public void testNongreedyLoopInOneAlt() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : a {System.out.println(\"alt 1\");} EOF | b {System.out.println(\"alt 2\");} EOF ;\n" +
"a : .* ;\n"+ // s comes here upon ID but then bypasses, error on EOF
"b : INT ;\n"+
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x", true);
assertEquals("alt 1\n" +
"Decision 0:\n" +
"s0-ID->:s1=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-ID->:s1=>2\n", found);
assertEquals("line 1:0 extraneous input 'x' expecting <EOF>\n", this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"34", true);
assertEquals("alt 1\n" +
"Decision 0:\n" +
"s0-INT->s1\n" +
"s1-EOF->:s2=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-INT->:s1=>2\n", found); // resolves INT EOF to alt 1 from s since ambig 'tween a and b
assertEquals("line 1:2 reportAmbiguity d=0: {1..2}:[(1,1,[]), (1,2,[])],conflictingAlts={1..2}, input=34\n" +
"line 1:0 extraneous input '34' expecting <EOF>\n",
this.stderrDuringParse);
}
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : block EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"block : '{' .* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"{ }";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("{}\n" +
"Decision 0:\n" +
"s0-'}'->:s1=>2\n", found);
input =
"{a b { }";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("{ab{}\n" +
"Decision 0:\n" +
"s0-'{'->:s1=>1\n" +
"s0-'}'->:s2=>2\n" +
"s0-ID->:s1=>1\n", found);
input =
"{ } a 2 { }"; // FAILS to match since it terminates loop at first { }
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("", found); // should not print output; resync kills rest of input til '}' then returns normally
}
@Test public void testNongreedyLoop() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( x=34 ) { } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if(x=34){};\n" +
"Decision 0:\n" +
"s0-')'->s2\n" +
"s0-'='->:s1=>1\n" +
"s0-INT->:s1=>1\n" +
"s0-ID->:s1=>1\n" +
"s2-'{'->s3\n" +
"s3-'}'->:s4=>2\n", found);
input =
"if ( ))) ) { } ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if()))){};\n" +
"Decision 0:\n" +
"s0-')'->s1\n" +
"s1-'{'->s3\n" +
"s1-')'->:s2=>1\n" +
"s3-'}'->:s4=>2\n", found);
input =
"if (() { } a 2) { } ;"; // The first { } should match block so should stop
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("", found); // should not finish to print output
}
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' (block|.)* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( x=34 ) { {return a} b 34 } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if(x=34){{returna}b34};\n" +
"Decision 0:\n" +
"s0-')'->s2\n" +
"s0-'='->:s1=>1\n" +
"s0-INT->:s1=>1\n" +
"s0-ID->:s1=>1\n" +
"s2-'{'->s3\n" +
"s3-'{'->s4\n" +
"s4-'}'->:s5=>2\n" +
"s4-ID->s4\n" +
"\n" +
"Decision 1:\n" +
"s0-'{'->:s1=>1\n" +
"s0-INT->:s2=>2\n" +
"s0-ID->:s2=>2\n" +
"\n" +
"Decision 2:\n" +
"s0-'{'->:s1=>1\n" +
"s0-'}'->:s3=>2\n" +
"s0-INT->:s2=>1\n" +
"s0-ID->:s2=>1\n", found);
input =
"if ( ()) ) { {return a} b 34 } ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if(())){{returna}b34};\n" +
"Decision 0:\n" +
"s0-')'->s2\n" +
"s0-'('->:s1=>1\n" +
"s2-'{'->s4\n" +
"s2-')'->:s3=>1\n" +
"s4-'{'->s5\n" +
"s5-'}'->:s6=>2\n" +
"s5-ID->s5\n" +
"\n" +
"Decision 1:\n" +
"s0-'{'->:s1=>1\n" +
"s0-INT->:s2=>2\n" +
"s0-ID->:s2=>2\n" +
"\n" +
"Decision 2:\n" +
"s0-'{'->:s1=>1\n" +
"s0-'}'->:s3=>2\n" +
"s0-INT->:s2=>1\n" +
"s0-ID->:s2=>1\n", found);
}
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
// EOF on end means LL(*) can identify when to stop the loop.
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : stat* ID '=' ID ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("x=1;a=b;\n" +
"Decision 0:\n" +
"s0-ID->s1\n" +
"s1-'='->s2\n" +
"s2-INT->:s3=>1\n" +
"s2-ID->s4\n" +
"s4-';'->s5\n" +
"s5-EOF->:s6=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-ID->:s1=>3\n", found);
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n" +
"Decision 0:\n" +
"s0-'if'->:s1=>1\n" +
"s0-'return'->:s2=>1\n" +
"s0-ID->s3\n" +
"s3-'='->s4\n" +
"s4-ID->s5\n" +
"s5-';'->s6\n" +
"s6-EOF->:s7=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-'{'->:s2=>4\n" +
"s0-'if'->:s1=>1\n" +
"s0-'return'->:s4=>2\n" +
"s0-ID->:s3=>3\n" +
"\n" +
"Decision 2:\n" +
"s0-'{'->:s2=>1\n" +
"s0-'return'->:s3=>1\n" +
"s0-'}'->:s4=>2\n" +
"s0-ID->:s1=>1\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
// can't match EOF to ID '=' '3' ';'
assertEquals("line 1:9 no viable alternative at input '<EOF>'\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("line 1:14 no viable alternative at input '<EOF>'\n",
this.stderrDuringParse);
// should not finish to print output
}
@Test public void testStatLoopNongreedyNecessary() throws Exception {
// stops scanning ahead at end of rule s since decision is nongreedy.
// this says: "match statements until we see a=b; assignment; ignore any
// statements that follow."
String grammar =
"grammar T;\n" +
"random : s ;" + // call s so s isn't followed by EOF directly
"s @after {dumpDFA();} : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
" {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b; x=y;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("x=1;a=b;\n" +
"Decision 0:\n" +
"s0-ID->s1\n" +
"s1-'='->s2\n" +
"s2-INT->:s3=>1\n" +
"s2-ID->s4\n" +
"s4-';'->:s5=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-ID->:s1=>3\n", found); // ignores x=1 that follows first a=b assignment
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n" +
"Decision 0:\n" +
"s0-'if'->:s1=>1\n" +
"s0-'return'->:s2=>1\n" +
"s0-ID->s3\n" +
"s3-'='->s4\n" +
"s4-ID->s5\n" +
"s5-';'->:s6=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-'{'->:s2=>4\n" +
"s0-'if'->:s1=>1\n" +
"s0-'return'->:s4=>2\n" +
"s0-ID->:s3=>3\n" +
"\n" +
"Decision 2:\n" +
"s0-'{'->:s2=>1\n" +
"s0-'return'->:s3=>1\n" +
"s0-'}'->:s4=>2\n" +
"s0-ID->:s1=>1\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match either stat
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
// can't match EOF to ID '=' '0' ';'
assertEquals("line 1:9 no viable alternative at input '<EOF>'\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, true);
assertEquals("x=1;a=b;\n" +
"Decision 0:\n" +
"s0-ID->s1\n" +
"s1-'='->s2\n" +
"s2-INT->:s3=>1\n" +
"s2-ID->s4\n" +
"s4-';'->:s5=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-ID->:s1=>3\n", found); // should not finish all input
}
@Test public void testHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : (item)+ {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"item : tag | . ;\n" +
"tag : '<' '/'? .* '>' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"<a>foo</a>", true);
assertEquals("<a>foo</a>\n" +
"Decision 0:\n" +
"s0-EOF->:s3=>2\n" +
"s0-'<'->:s1=>1\n" +
"s0-ID->:s2=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-'<'->s1\n" +
"s0-ID->:s5=>2\n" +
"s1-'/'->s2\n" +
"s1-ID->s2\n" +
"s2-'>'->s3\n" +
"s2-ID->s2\n" +
"s3-EOF->:s6=>1\n" +
"s3-'<'->:s4=>1\n" +
"s3-ID->s3\n" +
"\n" +
"Decision 2:\n" +
"s0-'/'->:s2=>1\n" +
"s0-ID->:s1=>2\n" +
"\n" +
"Decision 3:\n" +
"s0-'>'->:s2=>2\n" +
"s0-ID->:s1=>1\n", found);
assertEquals("line 1:6 reportAmbiguity d=1: {1..2}:[(26,1,[14 6]), (33,1,[14 6]), (22,1,[14 6]), (20,1,[14 6]), (16,1,[6]), (1,1,[]), (22,2,[14 6]), (26,2,[14 6]), (33,2,[14 6]), (20,2,[14 6]), (16,2,[6]), (1,2,[])],conflictingAlts={1..2}, input=<a>foo<\n" +
"line 1:10 reportAmbiguity d=1: {1..2}:[(35,1,[]), (35,2,[])],conflictingAlts={1..2}, input=</a>\n" +
"line 1:7 reportAmbiguity d=2: {1..2}:[(26,1,[14 6]), (33,1,[14 6]), (26,2,[14 6]), (33,2,[14 6])],conflictingAlts={1..2}, input=/\n",
this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"<a></a>", true);
assertEquals("<a></a>\n" +
"Decision 0:\n" +
"s0-EOF->:s2=>2\n" +
"s0-'<'->:s1=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-'<'->s1\n" +
"s1-'/'->s2\n" +
"s1-ID->s2\n" +
"s2-'>'->s3\n" +
"s2-ID->s2\n" +
"s3-EOF->:s5=>1\n" +
"s3-'<'->:s4=>1\n" +
"\n" +
"Decision 2:\n" +
"s0-'/'->:s2=>1\n" +
"s0-ID->:s1=>2\n" +
"\n" +
"Decision 3:\n" +
"s0-'>'->:s2=>2\n" +
"s0-ID->:s1=>1\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"</b><a src=\"abc\", width=32>", true);
assertEquals("</b><asrc=\"abc\",width=32>\n" +
"Decision 0:\n" +
"s0-EOF->:s2=>2\n" +
"s0-'<'->:s1=>1\n" +
"\n" +
"Decision 1:\n" +
"s0-'<'->s1\n" +
"s1-'/'->s2\n" +
"s1-ID->s2\n" +
"s2-'>'->s3\n" +
"s2-'='->s2\n" +
"s2-','->s2\n" +
"s2-ID->s2\n" +
"s2-STR->s2\n" +
"s2-INT->s2\n" +
"s3-EOF->:s5=>1\n" +
"s3-'<'->:s4=>1\n" +
"\n" +
"Decision 2:\n" +
"s0-'/'->:s1=>1\n" +
"s0-ID->:s2=>2\n" +
"\n" +
"Decision 3:\n" +
"s0-'>'->:s2=>2\n" +
"s0-'='->:s1=>1\n" +
"s0-','->:s1=>1\n" +
"s0-ID->:s1=>1\n" +
"s0-STR->:s1=>1\n" +
"s0-INT->:s1=>1\n", found);
}
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
* that sees into a non-greedy loop, thinks it is greedy.
*/
@Test
public void testFindHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"s @after {dumpDFA();} : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
"tag : '<' .+ '>' ;\n" +
"header : 'x' 'y' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
System.out.println(grammar);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
",=foo <a x= 3>32skidoo<a><img>", true);
assertEquals("<ax=3>\n" +
"<a>\n" +
"<img>\n" +
"Decision 0:\n" + // .*
"s0-'<'->s2\n" +
"s0-'='->:s1=>1\n" +
"s0-','->:s1=>1\n" +
"s0-ID->:s1=>1\n" +
"s0-INT->:s1=>1\n" +
"s2-ID->s3\n" +
"s3-'x'->s4\n" +
"s3-'>'->:s5=>2\n" +
"s3-INT->s3\n" +
"s4-'='->s3\n" +
"\n" +
"Decision 1:\n" + // (tag|header)
"s0-'<'->:s1=>1\n" +
"\n" +
"Decision 2:\n" + // (...)*
"s0-EOF->:s3=>2\n" +
"s0-'<'->:s2=>1\n" +
"s0-','->:s1=>1\n" +
"s0-INT->:s1=>1\n" +
"\n" +
"Decision 3:\n" + // .+
"s0-'x'->:s1=>1\n" +
"s0-'>'->:s2=>2\n" +
"s0-'='->:s1=>1\n" +
"s0-ID->:s1=>1\n" +
"s0-INT->:s1=>1\n", found);
assertEquals(null,
this.stderrDuringParse);
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x x<a>", true);
assertEquals("<a>\n" +
"Decision 0:\n" +
"s0-'x'->s1\n" +
"s0-'<'->s4\n" +
"s1-'x'->:s2=>1\n" +
"s1-'<'->:s3=>1\n" +
"s4-ID->s5\n" +
"s5-'>'->:s6=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-'<'->:s1=>1\n" +
"\n" +
"Decision 2:\n" +
"s0-EOF->:s2=>2\n" +
"s0-'x'->:s1=>1\n" +
"\n" +
"Decision 3:\n" +
"s0-'>'->:s2=>2\n" +
"s0-ID->:s1=>1\n", found);
// gets line 1:3 no viable alternative at input '>'. Why??
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
// Seeing '.' in a lookahead prediction can be misleading!!
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
"x <><a>", true);
assertEquals("null\n" +
"<a>\n" +
"Decision 0:\n" +
"s0-'x'->s1\n" +
"s0-'>'->:s6=>1\n" +
"s0-'<'->s3\n" +
"s1-'<'->:s2=>1\n" +
"s3-'>'->s4\n" +
"s3-ID->s4\n" +
"s4-'>'->:s7=>2\n" +
"s4-'<'->:s5=>2\n" +
"\n" +
"Decision 1:\n" +
"s0-'<'->:s1=>1\n" +
"\n" +
"Decision 2:\n" +
"s0-EOF->:s3=>2\n" +
"s0-'x'->:s1=>1\n" +
"s0-'>'->:s2=>1\n" +
"\n" +
"Decision 3:\n" +
"s0-'>'->:s1=>2\n" +
"s0-ID->:s2=>1\n", // doesn't match tag; null
found);
assertEquals("line 1:3 no viable alternative at input '>'\n",
this.stderrDuringParse);
}
}

View File

@ -147,265 +147,4 @@ public class TestParserExec extends BaseTest {
assertEquals("a34c\n", found);
}
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
String grammar =
"grammar T;\n" +
"s : block EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"block : '{' .* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"{ }";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("{}\n", found);
input =
"{a b { }";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("{ab{}\n", found);
input =
"{ } a 2 { }"; // FAILS to match since it terminates loop at first { }
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("", found); // should not print output; resync kills rest of input
}
@Test public void testNongreedyLoop() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( x=34 ) { } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(x=34){};\n", found);
input =
"if ( ))) ) { } ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if()))){};\n", found);
input =
"if (() { } a 2) { } ;"; // The first { } should match block so should stop
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("", found); // should not finish to print output
}
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' (block|.)* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input1 =
"if ( x=34 ) { {return a} b 34 } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input1, false);
assertEquals("if(x=34){{returna}b34};\n", found);
}
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
// EOF on end means LL(*) can identify when to stop the loop.
String grammar =
"grammar T;\n" +
"s : stat* ID '=' ID ';' EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found);
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '3' ';'
assertEquals("line 1:9 no viable alternative at input ''\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("line 1:14 no viable alternative at input ''\n",
this.stderrDuringParse);
// should not finish to print output
}
@Test public void testStatLoopNongreedyNecessary() throws Exception {
// stops scanning ahead at end of rule s since decision is nongreedy.
// this says: "match statements until we see a=b; assignment; ignore any
// statements that follow."
String grammar =
"grammar T;\n" +
"random : s ;" + // call s so s isn't followed by EOF directly
"s : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
" {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b; x=y;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found); // ignores x=1 that follows first a=b assignment
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match either stat
execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '0' ';'
assertEquals("line 1:9 no viable alternative at input ''\n",
this.stderrDuringParse);
input =
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found); // should not finish all input
}
@Test public void testHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"a : tag+ {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"tag : '<' '/'? .* '>' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"<a>foo</a>", false);
assertEquals("<a>foo</a>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"<a></a>", false);
assertEquals("<a></a>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"</b><a src=\"abc\", width=32>", false);
assertEquals("</b><asrc=\"abc\",width=32>\n", found);
}
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
* that sees into a non-greedy loop, thinks it is greedy.
*/
@Test public void testFindHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"a : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
"tag : '<' .+ '>' ;\n" +
"header : 'x' 'y' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
",=foo <a x= 3>32skidoo<a><img>", false);
assertEquals("<ax=3>\n" +
"<a>\n" +
"<img>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"x x<a>", false);
assertEquals("<a>\n", found);
// gets line 1:3 no viable alternative at input '>'. Why??
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
// Seeing '.' in a lookahead prediction can be misleading!!
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"x <><a>", false);
assertEquals("null\n" + // doesn't match tag; null
"<a>\n", found);
}
/** See comment on testNongreedyLoopEndOfRuleStuffFollowing */
@Test public void testNongreedyLoopEndOfRule() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat EOF {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' INT ')' .* ;\n" +
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( 34 ) a b";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34)ab\n", found);
input =
"if ( 34 ))) ) ( a = = b(";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34))))(a==b(\n", found);
}
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
* since it immediately hits the end of the rule. Non-greedy loops
* never consume more tokens than exist following the .* in that
* same rule. So, in this case, the greedy loop always wins and it will
* suck tokens until end of file. Unfortunately, the '.' in rule s
* will not match, leading to a syntax error.
*/
@Test public void testNongreedyLoopEndOfRuleStuffFollowing() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat '.' {System.out.println(_input.toString(0,_input.index()-1));} ;\n" +
"ifstat : 'if' '(' INT ')' .* ;\n" +
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( 34 ) a b .";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34)ab.\n", found);
assertEquals("line 1:15 no viable alternative at input ''\n",
this.stderrDuringParse);
}
}

View File

@ -155,8 +155,8 @@ public class TestSemPredEvalParser extends BaseTest {
"alt 1\n" +
"alt 1\n";
assertEquals(expecting, found);
assertEquals("line 1:0 reportAmbiguity {1..3}:[(6,1,[]), (6,2,[]), (6,3,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n" +
"line 1:0 reportInsufficientPredicates {1..3}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,1,[],up=1), (1,1,[],up=1), (6,2,[],up=1), (1,2,[],up=1), (6,3,[],{1:0}?,up=1), (1,3,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n",
assertEquals("line 1:0 reportAmbiguity d=0: {1..3}:[(6,1,[]), (6,2,[]), (6,3,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n" +
"line 1:0 reportInsufficientPredicates d=0: {1..3}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,1,[],up=1), (1,1,[],up=1), (6,2,[],up=1), (1,2,[],up=1), (6,3,[],{1:0}?,up=1), (1,3,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={1..3}, input=x\n",
this.stderrDuringParse);
}
@ -187,8 +187,8 @@ public class TestSemPredEvalParser extends BaseTest {
"alt 2\n" +
"alt 2\n";
assertEquals(expecting, found);
assertEquals("line 1:4 reportAmbiguity {2..4}:[(10,2,[]), (10,3,[]), (10,4,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n" +
"line 1:4 reportInsufficientPredicates {2..4}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,2,[],up=1), (10,2,[],up=1), (1,2,[],up=1), (6,3,[],up=1), (10,3,[],up=1), (1,3,[],up=1), (6,4,[],{1:0}?,up=1), (10,4,[],{1:0}?,up=1), (1,4,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n",
assertEquals("line 1:4 reportAmbiguity d=0: {2..4}:[(10,2,[]), (10,3,[]), (10,4,[],{1:0}?)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n" +
"line 1:4 reportInsufficientPredicates d=0: {2..4}:[{-1:-1}?, {-1:-1}?, {-1:-1}?, {-1:-1}?, {1:0}?], [(6,2,[],up=1), (10,2,[],up=1), (1,2,[],up=1), (6,3,[],up=1), (10,3,[],up=1), (1,3,[],up=1), (6,4,[],{1:0}?,up=1), (10,4,[],{1:0}?,up=1), (1,4,[],{1:0}?,up=1)],hasSemanticContext=true,conflictingAlts={2..4}, input=x\n",
this.stderrDuringParse);
}