got parser nongreedy loops going

[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9032]
This commit is contained in:
parrt 2011-08-12 19:51:12 -08:00
parent f44c49a8b8
commit 085dd05bf1
25 changed files with 495 additions and 131 deletions

View File

@ -235,7 +235,7 @@ public class BufferedTokenStream implements TokenStream {
}
public String toString(int start, int stop) {
if ( start<0 || stop<0 ) return null;
if ( start<0 || stop<0 ) return "";
if ( p == -1 ) setup();
if ( stop>=tokens.size() ) stop = tokens.size()-1;
StringBuffer buf = new StringBuffer();

View File

@ -57,8 +57,8 @@ public class NoViableAltException extends RecognitionException {
if ( recognizer!=null ) {
TokenStream tokens = recognizer.input;
String bad = tokens.toString(startIndex, index);
return "NoViableAltException(input=\""+bad+"\" last token type is "+getUnexpectedType();
return "NoViableAltException(input=\""+bad+"\" last token type is "+getUnexpectedType()+")";
}
return "NoViableAltException(last token type is "+getUnexpectedType();
return "NoViableAltException(last token type is "+getUnexpectedType()+")";
}
}

View File

@ -121,9 +121,11 @@ public abstract class ATNSimulator {
int ndecisions = toInt(data[p++]);
for (int i=1; i<=ndecisions; i++) {
int s = toInt(data[p++]);
int isGreedy = toInt(data[p++]);
DecisionState decState = (DecisionState)atn.states.get(s);
atn.decisionToState.add((DecisionState) decState);
atn.decisionToState.add(decState);
decState.decision = i-1;
decState.isGreedy = isGreedy==1;
}
return atn;
}

View File

@ -39,6 +39,8 @@ public class DecisionState extends ATNState {
public int decision = -1;
public boolean isGreedy = true;
@Override
public int getNumberOfTransitions() { return transitions.size(); }

View File

@ -29,7 +29,7 @@
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.RuleContext;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.*;
@ -51,10 +51,9 @@ public class LL1Analyzer {
// System.out.println("LOOK("+s.stateNumber+")");
if ( s==null ) return null;
IntervalSet[] look = new IntervalSet[s.getNumberOfTransitions()+1];
Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
for (int alt=1; alt<=s.getNumberOfTransitions(); alt++) {
look[alt] = new IntervalSet();
lookBusy.clear();
Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
_LOOK(s.transition(alt - 1).target, RuleContext.EMPTY, look[alt], lookBusy);
}
return look;
@ -85,7 +84,7 @@ public class LL1Analyzer {
int n = s.getNumberOfTransitions();
for (int i=0; i<n; i++) {
Transition t = s.transition(i);
if ( t instanceof RuleTransition ) {
if ( t.getClass() == RuleTransition.class ) {
RuleContext newContext =
new RuleContext(ctx, s.stateNumber, t.target.stateNumber);
_LOOK(t.target, newContext, look, lookBusy);
@ -93,6 +92,9 @@ public class LL1Analyzer {
else if ( t.isEpsilon() ) {
_LOOK(t.target, ctx, look, lookBusy);
}
else if ( t.getClass() == WildcardTransition.class ) {
look.addAll( IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType) );
}
else {
// System.out.println("adding "+ t);
IntervalSet set = t.label();

View File

@ -138,7 +138,7 @@ public class LexerATNSimulator extends ATNSimulator {
t = input.LA(1);
}
if ( prevAcceptState==null ) {
System.out.println("!!! no viable alt in dfa");
if ( debug ) System.out.println("!!! no viable alt in dfa");
return -1;
}
if ( recog!=null ) {
@ -188,7 +188,7 @@ public class LexerATNSimulator extends ATNSimulator {
// if we reach lexer accept state, toss out any configs in rest
// of configs work list associated with this rule (config.alt);
// that rule is done. this is how we cut off nongreedy .+ loops.
deleteConfigsForAlt(closure, ci, c.alt);
deleteWildcardConfigsForAlt(closure, ci, c.alt);
// int j=ci+1;
// while ( j<closure.size() ) {
// ATNConfig c2 = closure.get(j);
@ -252,7 +252,7 @@ public class LexerATNSimulator extends ATNSimulator {
else System.out.println("ACTION "+ruleIndex+":"+ruleIndex);
}
int actionIndex = atn.ruleToActionIndex[ruleIndex];
if ( actionIndex>=0 ) recog.action(null, ruleIndex, actionIndex);
if ( actionIndex>=0 && recog!=null ) recog.action(null, ruleIndex, actionIndex);
return ttype;
}
@ -290,14 +290,14 @@ public class LexerATNSimulator extends ATNSimulator {
return null;
}
public void deleteConfigsForAlt(OrderedHashSet<ATNConfig> closure, int ci, int alt) {
public void deleteWildcardConfigsForAlt(OrderedHashSet<ATNConfig> closure, int ci, int alt) {
int j=ci+1;
while ( j<closure.size() ) {
ATNConfig c = closure.get(j);
boolean isWildcard = c.state.getClass() == ATNState.class &&
c.state.transition(0).getClass() == WildcardTransition.class;
if ( c.alt == alt && isWildcard ) {
System.out.println("kill "+c);
// System.out.println("kill "+c);
closure.remove(j);
}
else j++;

View File

@ -64,8 +64,6 @@ public class ParserATNSimulator extends ATNSimulator {
*/
protected RuleContext outerContext;
protected Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
public ParserATNSimulator(ATN atn) {
super(atn);
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
@ -89,7 +87,7 @@ public class ParserATNSimulator extends ATNSimulator {
ATNState startState = atn.decisionToState.get(decision);
decisionToDFA[decision] = dfa = new DFA(startState);
dfa.decision = decision;
return predictATN(dfa, input, decision, outerContext, false);
return predictATN(dfa, input, outerContext, false);
}
else {
//dump(dfa);
@ -102,7 +100,6 @@ public class ParserATNSimulator extends ATNSimulator {
}
public int predictATN(DFA dfa, TokenStream input,
int decision,
RuleContext outerContext,
boolean useContext)
{
@ -114,7 +111,7 @@ public class ParserATNSimulator extends ATNSimulator {
RuleContext ctx = RuleContext.EMPTY;
if ( useContext ) ctx = outerContext;
OrderedHashSet<ATNConfig> s0_closure =
computeStartState(dfa.atnStartState, ctx);
computeStartState(dfa.decision, dfa.atnStartState, ctx);
dfa.s0 = addDFAState(dfa, s0_closure);
if ( prevAccept!=null ) {
dfa.s0.isAcceptState = true;
@ -138,7 +135,7 @@ public class ParserATNSimulator extends ATNSimulator {
public int matchATN(TokenStream input, ATNState startState) {
DFA dfa = new DFA(startState);
RuleContext ctx = RuleContext.EMPTY;
OrderedHashSet<ATNConfig> s0_closure = computeStartState(startState, ctx);
OrderedHashSet<ATNConfig> s0_closure = computeStartState(dfa.decision, startState, ctx);
return execATN(input, dfa, input.index(), s0_closure, false);
}
@ -222,7 +219,7 @@ public class ParserATNSimulator extends ATNSimulator {
t = input.LA(1);
}
if ( prevAcceptState==null ) {
System.out.println("!!! no viable alt in dfa");
if ( debug ) System.out.println("!!! no viable alt in dfa");
return -1;
}
if ( dfa_debug ) System.out.println("DFA decision "+dfa.decision+
@ -236,7 +233,7 @@ public class ParserATNSimulator extends ATNSimulator {
OrderedHashSet<ATNConfig> s0,
boolean useContext)
{
if ( debug ) System.out.println("ATN decision "+dfa.decision+" exec LA(1)=="+input.LT(1));
if ( debug ) System.out.println("execATN decision "+dfa.decision+" exec LA(1)=="+input.LT(1));
ATN_failover++;
OrderedHashSet<ATNConfig> closure = new OrderedHashSet<ATNConfig>();
@ -250,6 +247,10 @@ public class ParserATNSimulator extends ATNSimulator {
return prevAccept.alt;
}
DecisionState decState = null;
if ( atn.decisionToState.size()>0 ) decState = atn.decisionToState.get(dfa.decision);
if ( debug ) System.out.println("decision state = "+decState);
prevAccept = null;
prevAcceptIndex = -1;
OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();
@ -265,7 +266,8 @@ public class ParserATNSimulator extends ATNSimulator {
Transition trans = c.state.transition(ti);
ATNState target = getReachableTarget(trans, t);
if ( target!=null ) {
closure(new ATNConfig(c, target), reach);
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
closure(new ATNConfig(c, target), reach, decState, closureBusy);
}
}
}
@ -277,7 +279,7 @@ public class ParserATNSimulator extends ATNSimulator {
ATNState loc = atn.states.get(outerContext.s);
String rname = "n/a";
if ( parser !=null ) rname = parser.getRuleNames()[loc.ruleIndex];
System.out.println("AMBIG in "+rname+" for alt "+ambigAlts+" upon "+
System.out.println("AMBIG dec "+dfa.decision+" in "+rname+" for alt "+ambigAlts+" upon "+
input.toString(startIndex, input.index()));
System.out.println("REACH="+reach);
}
@ -289,7 +291,16 @@ public class ParserATNSimulator extends ATNSimulator {
// System.out.println("AMBIG orig="+outerContext.toString((BaseRecognizer)recog)+" for alt "+ambigAlts+" upon "+
// input.toString(startIndex, input.index()));
if ( !userWantsCtxSensitive || useContext ) {
resolveToMinAlt(reach, ambigAlts);
// resolve ambiguity
if ( decState.isGreedy ) {
// if greedy, resolve in favor of alt coming first
resolveToMinAlt(reach, ambigAlts);
}
else {
// if nongreedy loop, always pick exit branch to match
// what follows instead of re-entering loop
resolveNongreedyToExitBranch(reach, ambigAlts);
}
}
else {
return retryWithContext(input, dfa, startIndex, outerContext,
@ -309,6 +320,18 @@ public class ParserATNSimulator extends ATNSimulator {
return uniqueAlt;
}
if ( decState!=null && !decState.isGreedy ) {
// if we reached end of rule via exit branch, we matched
int exitAlt = 2;
ATNConfig cstop = configWithAltAtStopState(reach, exitAlt);
if ( cstop!=null ) {
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
prevAccept = cstop;
prevAcceptIndex = input.index();
break;
}
}
if ( reach.size()==0 ) {
break;
}
@ -323,7 +346,7 @@ public class ParserATNSimulator extends ATNSimulator {
OrderedHashSet<ATNConfig> tmp = reach;
reach = closure;
closure = tmp;
reach.clear(); // THIS MIGHT BE SLOW! kills each element; realloc might be faster
reach.clear(); // TODO: THIS MIGHT BE SLOW! kills each element; realloc might be faster
} while ( true );
if ( prevAccept==null ) {
@ -339,7 +362,7 @@ public class ParserATNSimulator extends ATNSimulator {
protected int resolveToMinAlt(OrderedHashSet<ATNConfig> reach, Set<Integer> ambigAlts) {
int min = getMinAlt(ambigAlts);
// if predicting, create DFA accept state for resolved alt
// create DFA accept state for resolved alt
ambigAlts.remove(min);
// kill dead alts so we don't chase them ever
killAlts(ambigAlts, reach);
@ -347,6 +370,17 @@ public class ParserATNSimulator extends ATNSimulator {
return min;
}
protected int resolveNongreedyToExitBranch(OrderedHashSet<ATNConfig> reach, Set<Integer> ambigAlts) {
// exit branch is alt 2 always; alt 1 is entry or loopback branch
// since we're predicting, create DFA accept state for exit alt
int exitAlt = 2;
ambigAlts.remove(exitAlt);
// kill dead alts so we don't chase them ever
killAlts(ambigAlts, reach);
if ( debug ) System.out.println("RESOLVED TO "+reach);
return exitAlt;
}
public int retryWithContext(TokenStream input,
DFA dfa,
int startIndex,
@ -373,7 +407,7 @@ public class ParserATNSimulator extends ATNSimulator {
// if it comes back with conflict, we have a true ambiguity
input.seek(startIndex); // rewind
DFA ctx_dfa = new DFA(dfa.atnStartState);
int ctx_alt = predictATN(ctx_dfa, input, dfa.decision, originalContext, true);
int ctx_alt = predictATN(ctx_dfa, input, originalContext, true);
if ( debug ) System.out.println("retry predicts "+ctx_alt+" vs "+getMinAlt(ambigAlts)+
" with conflict="+ctx_dfa.conflict+
" dfa="+ctx_dfa);
@ -408,16 +442,20 @@ public class ParserATNSimulator extends ATNSimulator {
return predictedAlt;
}
public OrderedHashSet<ATNConfig> computeStartState(ATNState p, RuleContext ctx) {
public OrderedHashSet<ATNConfig> computeStartState(int decision, ATNState p, RuleContext ctx) {
RuleContext initialContext = ctx; // always at least the implicit call to start rule
OrderedHashSet<ATNConfig> configs = new OrderedHashSet<ATNConfig>();
prevAccept = null; // might reach end rule; track
prevAcceptIndex = -1;
DecisionState decState = null;
if ( atn.decisionToState.size()>0 ) decState = atn.decisionToState.get(decision);
for (int i=0; i<p.getNumberOfTransitions(); i++) {
ATNState target = p.transition(i).target;
ATNConfig c = new ATNConfig(target, i+1, initialContext);
closure(c, configs);
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
closure(c, configs, decState, closureBusy);
}
return configs;
@ -426,7 +464,6 @@ public class ParserATNSimulator extends ATNSimulator {
public ATNState getReachableTarget(Transition trans, int ttype) {
if ( trans instanceof AtomTransition ) {
AtomTransition at = (AtomTransition)trans;
// boolean not = trans instanceof NotAtomTransition;
if ( at.label == ttype ) {
return at.target;
}
@ -438,18 +475,16 @@ public class ParserATNSimulator extends ATNSimulator {
return st.target;
}
}
// TODO else if ( trans instanceof WildcardTransition && t!=Token.EOF ) {
// ATNConfig targetConfig = new ATNConfig(c, trans.target);
// closure(input, targetConfig, reach);
// }
else if ( trans instanceof RangeTransition ) {
RangeTransition rt = (RangeTransition)trans;
if ( ttype>=rt.from && ttype<=rt.to ) return rt.target;
}
else if ( trans instanceof WildcardTransition && ttype!=Token.EOF ) {
return trans.target;
}
return null;
}
protected void closure(ATNConfig config, OrderedHashSet<ATNConfig> configs) {
closureBusy.clear();
closure(config, configs, closureBusy);
}
/* TODO: If we are doing predicates, there is no point in pursuing
closure operations if we reach a DFA state that uniquely predicts
alternative. We will not be caching that DFA state and it is a
@ -459,6 +494,7 @@ public class ParserATNSimulator extends ATNSimulator {
protected void closure(ATNConfig config,
OrderedHashSet<ATNConfig> configs,
DecisionState decState,
Set<ATNConfig> closureBusy)
{
if ( debug ) System.out.println("closure("+config+")");
@ -478,11 +514,19 @@ public class ParserATNSimulator extends ATNSimulator {
// gotten that context AFTER having fallen off a rule.
// Make sure we track that we are now out of context.
c.reachesIntoOuterContext = config.reachesIntoOuterContext;
closure(c, configs, closureBusy);
closure(c, configs, decState, closureBusy);
return;
}
else {
// else if we have no context info, just chase follow links
// else if we have no context info, just chase follow links (if greedy)
if ( decState!=null && !decState.isGreedy ) {
if ( debug ) System.out.println("nongreedy decision state = "+decState);
if ( debug ) System.out.println("NONGREEDY at stop state of "+
parser.getRuleNames()[config.state.ruleIndex]);
// don't purse past end of a rule for any nongreedy decision
configs.add(config);
return;
}
if ( debug ) System.out.println("FALLING off rule "+
parser.getRuleNames()[config.state.ruleIndex]);
}
@ -504,7 +548,7 @@ public class ParserATNSimulator extends ATNSimulator {
// preds if this is > 0.
c.reachesIntoOuterContext++;
}
closure(c, configs, closureBusy);
closure(c, configs, decState, closureBusy);
}
}
}
@ -611,13 +655,15 @@ public class ParserATNSimulator extends ATNSimulator {
return alt;
}
public RuleContext getCurrentExecContext(ATNConfig config) {
RuleContext ctx = config.context; // use context created after entry into interp
if ( ctx == RuleContext.EMPTY ) {
if ( config.reachesIntoOuterContext==0 ) ctx = outerContext;
else ctx = null; // no context if we in outer context
public ATNConfig configWithAltAtStopState(Collection<ATNConfig> configs, int alt) {
for (ATNConfig c : configs) {
if ( c.alt == alt ) {
if ( c.state.getClass() == RuleStopState.class ) {
return c;
}
}
}
return ctx;
return null;
}
public Set<Integer> getAmbiguousAlts(OrderedHashSet<ATNConfig> configs) {

View File

@ -1,17 +1,7 @@
grammar T;
options {output=AST;}
s : e_[0] EOF ;
e_[int _p]
: e_primary { }
( {19 >= $_p}? '['^ e_[0] ']'! )*
;
e_primary
: INT
| 'new'^ ID ('[' INT ']')+
;
ID : ('a'..'z'|'A'..'Z'|'_'|'$')+;
s : ifstat '.' {System.out.println(input.toString(0,input.index()-1));} ;
ifstat : 'if' '(' INT ')' .* ;
EQ : '=' ;
INT : '0'..'9'+ ;
WS : (' '|'\n') {skip();} ;
ID : 'a'..'z'+ ;
WS : (' '|'\n')+ {skip();} ;

View File

@ -11,7 +11,7 @@ class TestJava {
public static long lexerTime = 0;
public static boolean profile = false;
public static JavaLexer lexer;
// public static JavaParser parser = null;
public static JavaParser parser = null;
public static void main(String[] args) {
try {
@ -98,13 +98,13 @@ class TestJava {
if ( true ) {
// Create a parser that reads from the scanner
// if ( parser==null ) {
// //parser = new JavaParser(tokens);
//// parser.getInterpreter().setContextSensitive(true);
// }
//parser.setTokenStream(tokens);
if ( parser==null ) {
parser = new JavaParser(tokens);
// parser.getInterpreter().setContextSensitive(true);
}
parser.setTokenStream(tokens);
// start parsing at the compilationUnit rule
// parser.compilationUnit();
parser.compilationUnit();
//System.err.println("finished "+f);
}
}

View File

@ -4,6 +4,10 @@ public class TestT {
public static void main(String[] args) throws Exception {
TLexer t = new TLexer(new ANTLRFileStream(args[0]));
CommonTokenStream tokens = new CommonTokenStream(t);
tokens.fill();
for (Object tok : tokens.getTokens()) {
System.out.println(tok);
}
TParser p = new TParser(tokens);
ParserRuleContext ret = p.s();
// System.out.println(((Tree)ret.tree).toStringTree());

View File

@ -270,7 +270,7 @@ case <i>:
StarBlock(choice, alts, sync) ::= <<
int _alt<choice.uniqueID> = _interp.adaptivePredict(input,<choice.decision>,_ctx);
while ( _alt<choice.uniqueID>!=<choice.exitAlt> ) {
while ( _alt<choice.uniqueID>!=<choice.exitAlt> && _alt<choice.uniqueID>!=-1 ) {
switch ( _alt<choice.uniqueID> ) {
<alts:{alt|
case <i>:
@ -293,7 +293,7 @@ case <i>:
<error>
}
_alt<choice.uniqueID> = _interp.adaptivePredict(input,<choice.decision>,_ctx);
} while ( _alt<choice.uniqueID>!=<choice.exitAlt> );
} while ( _alt<choice.uniqueID>!=<choice.exitAlt> && _alt<choice.uniqueID>!=-1 );
>>
Sync(s) ::= "sync(<s.expecting.name>);"

View File

@ -240,13 +240,13 @@ public class Tool {
g.implicitLexer = lexerg;
lexerg.implicitLexerOwner = g;
processNonCombinedGrammar(lexerg, gencode);
System.out.println("lexer tokens="+lexerg.tokenNameToTypeMap);
System.out.println("lexer strings="+lexerg.stringLiteralToTypeMap);
// System.out.println("lexer tokens="+lexerg.tokenNameToTypeMap);
// System.out.println("lexer strings="+lexerg.stringLiteralToTypeMap);
}
}
if ( g.implicitLexer!=null ) g.importVocab(g.implicitLexer);
System.out.println("tokens="+g.tokenNameToTypeMap);
System.out.println("strings="+g.stringLiteralToTypeMap);
// System.out.println("tokens="+g.tokenNameToTypeMap);
// System.out.println("strings="+g.stringLiteralToTypeMap);
processNonCombinedGrammar(g, gencode);
}

View File

@ -178,8 +178,9 @@ public class ATNSerializer {
}
int ndecisions = atn.decisionToState.size();
data.add(ndecisions);
for (ATNState decStartState : atn.decisionToState) {
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
data.add(decStartState.isGreedy?1:0);
}
return data;
}
@ -239,7 +240,8 @@ public class ATNSerializer {
int ndecisions = ATNSimulator.toInt(data[p++]);
for (int i=1; i<=ndecisions; i++) {
int s = ATNSimulator.toInt(data[p++]);
buf.append((i-1)+":"+s+"\n");
int isGreedy = ATNSimulator.toInt(data[p++]);
buf.append((i-1)+":"+s+" "+isGreedy+"\n");
}
return buf.toString();
}

View File

@ -93,7 +93,7 @@ public class ParserATNFactory implements ATNFactory {
epsilon(blk.right, stop);
Handle h = new Handle(start, stop);
ATNPrinter ser = new ATNPrinter(g, h.left);
System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
// System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
ruleAST.atnState = start;
return h;
}
@ -370,7 +370,8 @@ public class ParserATNFactory implements ATNFactory {
epsilon(blkEnd, loop); // blk can see loop back
BlockAST blkAST = (BlockAST)plusAST.getChild(0);
if ( !g.isLexer() || isGreedy(blkAST) ) {
loop.isGreedy = isGreedy(blkAST);
if ( !g.isLexer() || loop.isGreedy ) {
epsilon(loop, blkStart); // loop back to start
epsilon(loop, end); // or exit
}
@ -405,7 +406,8 @@ public class ParserATNFactory implements ATNFactory {
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
BlockAST blkAST = (BlockAST)starAST.getChild(0);
if ( !g.isLexer() || isGreedy(blkAST) ) {
entry.isGreedy = isGreedy(blkAST);
if ( !g.isLexer() || entry.isGreedy ) {
epsilon(entry, blkStart); // loop enter edge (alt 1)
epsilon(entry, end); // bypass loop edge (alt 2)
}

View File

@ -177,14 +177,15 @@ grammarSpec
: ^( GRAMMAR ID {grammarName=$ID.text;} DOC_COMMENT?
{discoverGrammar((GrammarRootAST)$GRAMMAR, $ID);}
prequelConstructs
{finishPrequels($prequelConstructs.start);}
{finishPrequels($prequelConstructs.firstOne);}
rules mode*
{finishGrammar((GrammarRootAST)$GRAMMAR, $ID);}
)
;
prequelConstructs
: prequelConstruct*
prequelConstructs returns [GrammarAST firstOne=null]
: {$firstOne=$start;} prequelConstruct+
|
;
prequelConstruct
@ -205,8 +206,8 @@ boolean block = inContext("BLOCK ...");
}
: ^(a=ASSIGN ID optionValue)
{
if ( rule ) ruleOption($ID, $optionValue.v);
else if ( block ) blockOption($ID, $optionValue.v);
if ( block ) blockOption($ID, $optionValue.v); // most specific first
else if ( rule ) ruleOption($ID, $optionValue.v);
else grammarOption($ID, $optionValue.v);
}
;

View File

@ -291,10 +291,13 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
return t;
}
/**
* Match (RULE ID (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*)))
*/
public static boolean hasImmediateRecursiveRuleRefs(GrammarAST t, String ruleName) {
if ( t==null ) return false;
for (GrammarAST rref : t.getNodesWithType(RULE_REF)) {
if ( rref.getText().equals(ruleName) ) return true;
if ( rref.getChildIndex()==0 && rref.getText().equals(ruleName) ) return true;
}
return false;
}

View File

@ -59,6 +59,8 @@ public void otherAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {}
public void setReturnValues(GrammarAST t) {}
}
@rulecatch { }
// TODO: can get parser errors for not matching pattern; make them go away
public
rec_rule returns [boolean isLeftRec]

View File

@ -170,9 +170,11 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
@Override
public void finishPrequels(GrammarAST firstPrequel) {
List<GrammarAST> options = firstPrequel.getNodesWithType(OPTIONS);
List<GrammarAST> imports = firstPrequel.getNodesWithType(IMPORT);
List<GrammarAST> tokens = firstPrequel.getNodesWithType(TOKENS);
if ( firstPrequel==null ) return;
GrammarAST parent = (GrammarAST)firstPrequel.parent;
List<GrammarAST> options = parent.getAllChildrenWithType(OPTIONS);
List<GrammarAST> imports = parent.getAllChildrenWithType(IMPORT);
List<GrammarAST> tokens = parent.getAllChildrenWithType(TOKENS);
checkNumPrequels(options, imports, tokens);
}

View File

@ -103,6 +103,10 @@ public class ErrorManager {
messageST.add("exception", msg.e);
messageST.add("stackTrace", msg.e.getStackTrace());
}
else {
messageST.add("exception", null); // avoid ST error msg
messageST.add("stackTrace", null);
}
boolean locationValid = false;
if (msg.line != -1) {

View File

@ -70,6 +70,17 @@ public class GrammarAST extends CommonTree {
return getNodesWithType(IntervalSet.of(ttype));
}
public List<GrammarAST> getAllChildrenWithType(int type) {
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
for (int i = 0; children!=null && i < children.size(); i++) {
Tree t = (Tree) children.get(i);
if ( t.getType()==type ) {
nodes.add((GrammarAST)t);
}
}
return nodes;
}
public List<GrammarAST> getNodesWithType(IntervalSet types) {
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
List<GrammarAST> work = new LinkedList<GrammarAST>();

View File

@ -98,7 +98,7 @@ public class GrammarTransformPipeline {
isLeftRec = leftRecursiveRuleWalker.rec_rule();
}
catch (RecognitionException re) {
tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "bad ast structure", re);
isLeftRec = false; // didn't match; oh well
}
if ( !isLeftRec ) return;

View File

@ -499,18 +499,24 @@ public class TestATNParserPrediction extends BaseTest {
ATN atn = f.createATN();
DOTGenerator dot = new DOTGenerator(g);
System.out.println(dot.getDOT(atn.ruleToStartState[g.getRule("a").index]));
Rule r = g.getRule("b");
Rule r = g.getRule("a");
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("b");
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("e");
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("ifstat");
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
r = g.getRule("block");
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
// Check ATN prediction
ParserATNSimulator interp = new ParserATNSimulator(atn);
TokenStream input = new IntTokenStream(types);
ATNState startState = atn.decisionToState.get(decision);
DFA dfa = new DFA(startState);
int alt = interp.predictATN(dfa, input, decision, RuleContext.EMPTY, false);
int alt = interp.predictATN(dfa, input, RuleContext.EMPTY, false);
System.out.println(dot.getDOT(dfa, false));
@ -554,7 +560,7 @@ public class TestATNParserPrediction extends BaseTest {
DFA dfa = new DFA(startState);
// Rule r = g.getRule(ruleName);
//ATNState startState = atn.ruleToStartState.get(r);
interp.predictATN(dfa, input, 0, ctx, false);
interp.predictATN(dfa, input, ctx, false);
}
catch (NoViableAltException nvae) {
nvae.printStackTrace(System.err);

View File

@ -107,7 +107,7 @@ public class TestATNSerialization extends BaseTest {
"8->2 EPSILON 0,0,0\n" +
"8->4 EPSILON 0,0,0\n" +
"9->1 EPSILON 0,0,0\n" +
"0:8\n";
"0:8 1\n";
ATN atn = createATN(g);
String result = ATNSerializer.getDecoded(g, atn);
assertEquals(expecting, result);
@ -155,7 +155,7 @@ public class TestATNSerialization extends BaseTest {
"14->4 EPSILON 0,0,0\n" +
"14->8 EPSILON 0,0,0\n" +
"15->1 EPSILON 0,0,0\n" +
"0:14\n";
"0:14 1\n";
ATN atn = createATN(g);
String result = ATNSerializer.getDecoded(g, atn);
assertEquals(expecting, result);
@ -190,7 +190,7 @@ public class TestATNSerialization extends BaseTest {
"7->8 EPSILON 0,0,0\n" +
"8->9 ATOM 4,0,0\n" +
"9->1 EPSILON 0,0,0\n" +
"0:6\n";
"0:6 1\n";
ATN atn = createATN(g);
String result = ATNSerializer.getDecoded(g, atn);
assertEquals(expecting, result);
@ -254,7 +254,7 @@ public class TestATNSerialization extends BaseTest {
"6->2 EPSILON 0,0,0\n" +
"7->8 ATOM 98,0,0\n" +
"8->4 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -277,7 +277,7 @@ public class TestATNSerialization extends BaseTest {
"1->3 EPSILON 0,0,0\n" +
"3->4 RANGE 48,57,0\n" +
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -309,8 +309,8 @@ public class TestATNSerialization extends BaseTest {
"7->5 EPSILON 0,0,0\n" +
"7->8 EPSILON 0,0,0\n" +
"8->2 EPSILON 0,0,0\n" +
"0:0\n" +
"1:7\n";
"0:0 1\n" +
"1:7 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -357,7 +357,7 @@ public class TestATNSerialization extends BaseTest {
"12->13 ATOM 99,0,0\n" +
"13->14 EPSILON 0,0,0\n" +
"14->6 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -381,7 +381,7 @@ public class TestATNSerialization extends BaseTest {
"1->3 EPSILON 0,0,0\n" +
"3->4 NOT_SET 0,0,0\n" +
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -405,7 +405,7 @@ public class TestATNSerialization extends BaseTest {
"1->3 EPSILON 0,0,0\n" +
"3->4 SET 0,0,0\n" +
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -429,7 +429,7 @@ public class TestATNSerialization extends BaseTest {
"1->3 EPSILON 0,0,0\n" +
"3->4 NOT_SET 0,0,0\n" +
"4->2 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -490,9 +490,9 @@ public class TestATNSerialization extends BaseTest {
"18->19 WILDCARD 0,0,0\n" +
"19->20 EPSILON 0,0,0\n" +
"20->7 EPSILON 0,0,0\n" +
"0:0\n" +
"1:1\n" +
"2:12\n";
"0:0 1\n" +
"1:1 1\n" +
"2:12 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -521,7 +521,7 @@ public class TestATNSerialization extends BaseTest {
"4->5 EPSILON 0,0,0\n" +
"5->6 NOT_SET 1,0,0\n" +
"6->2 EPSILON 0,0,0\n" +
"0:0\n";
"0:0 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);
@ -570,9 +570,9 @@ public class TestATNSerialization extends BaseTest {
"12->6 EPSILON 0,0,0\n" +
"13->14 ATOM 99,0,0\n" +
"14->8 EPSILON 0,0,0\n" +
"0:0\n" +
"1:1\n" +
"2:2\n";
"0:0 1\n" +
"1:1 1\n" +
"2:2 1\n";
ATN atn = createATN(lg);
String result = ATNSerializer.getDecoded(lg, atn);
assertEquals(expecting, result);

View File

@ -69,7 +69,7 @@ public class TestIntervalSet extends BaseTest {
@Test public void testSimpleAnd() throws Exception {
IntervalSet s = IntervalSet.of(10,20);
IntervalSet s2 = IntervalSet.of(13,15);
String expecting = "13..15";
String expecting = "{13..15}";
String result = (s.and(s2)).toString();
assertEquals(result, expecting);
}
@ -157,12 +157,12 @@ public class TestIntervalSet extends BaseTest {
@Test public void testSubtractOfOverlappingRangeFromLeft() throws Exception {
IntervalSet s = IntervalSet.of(10,20);
IntervalSet s2 = IntervalSet.of(5,11);
String expecting = "12..20";
String expecting = "{12..20}";
String result = (s.subtract(s2)).toString();
assertEquals(result, expecting);
IntervalSet s3 = IntervalSet.of(5,10);
expecting = "11..20";
expecting = "{11..20}";
result = (s.subtract(s3)).toString();
assertEquals(result, expecting);
}
@ -170,12 +170,12 @@ public class TestIntervalSet extends BaseTest {
@Test public void testSubtractOfOverlappingRangeFromRight() throws Exception {
IntervalSet s = IntervalSet.of(10,20);
IntervalSet s2 = IntervalSet.of(15,25);
String expecting = "10..14";
String expecting = "{10..14}";
String result = (s.subtract(s2)).toString();
assertEquals(result, expecting);
IntervalSet s3 = IntervalSet.of(20,25);
expecting = "10..19";
expecting = "{10..19}";
result = (s.subtract(s3)).toString();
assertEquals(result, expecting);
}
@ -193,7 +193,7 @@ public class TestIntervalSet extends BaseTest {
s.add(30,40);
s.add(50,60); // s has 3 ranges now: 10..20, 30..40, 50..60
IntervalSet s2 = IntervalSet.of(5,55); // covers one and touches 2nd range
String expecting = "56..60";
String expecting = "{56..60}";
String result = (s.subtract(s2)).toString();
assertEquals(result, expecting);
@ -309,7 +309,7 @@ public class TestIntervalSet extends BaseTest {
@Test public void testComplement3() throws Exception {
IntervalSet s = IntervalSet.of(1,96);
s.add(99, Lexer.MAX_CHAR_VALUE);
String expecting = "97..98";
String expecting = "{97..98}";
String result = (s.complement(1, Lexer.MAX_CHAR_VALUE)).toString();
assertEquals(result, expecting);
}
@ -319,7 +319,7 @@ public class TestIntervalSet extends BaseTest {
IntervalSet s = IntervalSet.of(0,41);
s.add(42);
s.add(43,65534);
String expecting = "0..65534";
String expecting = "{0..65534}";
String result = s.toString();
assertEquals(result, expecting);
}
@ -328,7 +328,7 @@ public class TestIntervalSet extends BaseTest {
IntervalSet s = IntervalSet.of(43,65534);
s.add(42);
s.add(0,41);
String expecting = "0..65534";
String expecting = "{0..65534}";
String result = s.toString();
assertEquals(result, expecting);
}
@ -340,7 +340,7 @@ public class TestIntervalSet extends BaseTest {
s.add(0,9);
s.add(43,65534);
s.add(11,41);
String expecting = "0..65534";
String expecting = "{0..65534}";
String result = s.toString();
assertEquals(result, expecting);
}
@ -349,7 +349,7 @@ public class TestIntervalSet extends BaseTest {
IntervalSet s = IntervalSet.of(1,10);
s.add(20,30);
s.add(5,25); // overlaps two!
String expecting = "1..30";
String expecting = "{1..30}";
String result = s.toString();
assertEquals(result, expecting);
}

View File

@ -2,12 +2,43 @@ package org.antlr.v4.test;
import org.junit.Test;
/** Test parser execution.
*
* For the non-greedy stuff, the rule is that .* or any other non-greedy loop
* (any + or * loop that has an alternative with '.' in it is automatically
* non-greedy) never sees past the end of the rule containing that loop.
* There is no automatic way to detect when the exit branch of a non-greedy
* loop has seen enough input to determine how much the loop should consume
* yet still allow matching the entire input. Of course, this is extremely
* inefficient, particularly for things like
*
* block : '{' (block|.)* '}' ;
*
* that need only see one symbol to know when it hits a '}'. So, I
* came up with a practical solution. During prediction, the ATN
* simulator never fall off the end of a rule to compute the global
* FOLLOW. Instead, we terminate the loop, choosing the exit branch.
* Otherwise, we predict to reenter the loop. For example, input
* "{ foo }" will allow the loop to match foo, but that's it. During
* prediction, the ATN simulator will see that '}' reaches the end of a
* rule that contains a non-greedy loop and stop prediction. It will choose
* the exit branch of the inner loop. So, the way in which you construct
* the rule containing a non-greedy loop dictates how far it will scan ahead.
* Include everything after the non-greedy loop that you know it must scan
* in order to properly make a prediction decision. these beasts are tricky,
* so be careful. don't liberally sprinkle them around your code.
*
* To simulate filter mode, use ( .* (pattern1|pattern2|...) )*
*
* Nongreedy loops match as much input as possible while still allowing
* the remaining input to match.
*/
public class TestParserExec extends BaseTest {
@Test public void testBasic() throws Exception {
String grammar =
"grammar T;\n" +
"a : ID INT {System.out.println(input);} ;\n" +
"a : ID INT {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -20,7 +51,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAPlus() throws Exception {
String grammar =
"grammar T;\n" +
"a : ID+ {System.out.println(input);} ;\n" +
"a : ID+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -33,7 +64,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAorAPlus() throws Exception {
String grammar =
"grammar T;\n" +
"a : (ID|ID)+ {System.out.println(input);} ;\n" +
"a : (ID|ID)+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -45,7 +76,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAStar() throws Exception {
String grammar =
"grammar T;\n" +
"a : ID* {System.out.println(input);} ;\n" +
"a : ID* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -61,7 +92,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAorAStar() throws Exception {
String grammar =
"grammar T;\n" +
"a : (ID|ID)* {System.out.println(input);} ;\n" +
"a : (ID|ID)* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -76,7 +107,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAorBPlus() throws Exception {
String grammar =
"grammar T;\n" +
"a : (ID|INT{;})+ {System.out.println(input);} ;\n" +
"a : (ID|INT{;})+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -89,7 +120,7 @@ public class TestParserExec extends BaseTest {
@Test public void testAorBStar() throws Exception {
String grammar =
"grammar T;\n" +
"a : (ID|INT{;})* {System.out.println(input);} ;\n" +
"a : (ID|INT{;})* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ID : 'a'..'z'+ ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
@ -102,4 +133,258 @@ public class TestParserExec extends BaseTest {
assertEquals("a34c\n", found);
}
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
String grammar =
"grammar T;\n" +
"s : block EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"block : '{' .* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"{ }";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("{}\n", found);
input =
"{a b { } ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("{ab{}\n", found);
input =
"{ } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("", found); // should not print output; resync kills rest of input
}
@Test public void testNongreedyLoop() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( x=34 ) { } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(x=34){};\n", found);
input =
"if ( ))) ) { } ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if()))){};\n", found);
input =
"if (() { } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("", found); // should not finish to print output
}
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ifstat : 'if' '(' .* ')' block ;\n" +
"block : '{' (block|.)* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input1 =
"if ( x=34 ) { {return a} b 34 } ;";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input1, false);
assertEquals("if(x=34){{returna}b34};\n", found);
}
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
// EOF on end means LL(*) can identify when to stop the loop.
String grammar =
"grammar T;\n" +
"s : stat* ID '=' ID ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found);
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match last element
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '0' ';'
assertEquals("no viable token at input [@8,9:9='<EOF>',<-1>,1:9], index 8\n", found);
input =
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("no viable token at input [@12,14:14='<EOF>',<-1>,1:14], index 12\n", found); // should not finish to print output
}
@Test public void testStatLoopNongreedyNecessary() throws Exception {
// stops scanning ahead at end of rule s since decision is nongreedy.
// this says: "match statements until we see a=b; assignment; ignore any
// statements that follow."
String grammar =
"grammar T;\n" +
"random : s ;" + // call s so s isn't followed by EOF directly
"s : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
" {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"stat : 'if' '(' INT ')' stat\n" +
" | 'return' INT ';'\n" +
" | ID '=' (INT|ID) ';'\n" +
" | block\n" +
" ;\n" +
"block : '{' stat* '}' ;\n"+
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"x=1; a=b; x=y;";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found); // ignores x=1 that follows first a=b assignment
input =
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
input =
"x=1; a=3;"; // FAILS to match since it can't match either stat
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
// can't match EOF to ID '=' '0' ';'
assertEquals("no viable token at input [@8,9:9='<EOF>',<-1>,1:9], index 8\n", found);
input =
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("x=1;a=b;\n", found); // should not finish all input
}
@Test public void testHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"a : tag+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"tag : '<' '/'? .* '>' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"<a>foo</a>", false);
assertEquals("<a>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"<a></a>", false);
assertEquals("<a></a>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"</b><a src=\"abc\", width=32>", false);
assertEquals("</b><asrc=\"abc\",width=32>\n", found);
}
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
* that sees into a non-greedy loop, thinks it is greedy.
*/
@Test public void testFindHTMLTags() throws Exception {
String grammar =
"grammar T;\n" +
"a : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
"tag : '<' .+ '>' ;\n" +
"header : 'x' 'y' ;\n" +
"EQ : '=' ;\n" +
"COMMA : ',' ;\n" +
"ID : 'a'..'z'+ ;\n" +
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
"INT : '0'..'9'+;\n" +
"WS : (' '|'\\n') {skip();} ;\n";
String found = null;
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
",=foo <a x= 3>32skidoo<a><img>", false);
assertEquals("<ax=3>\n" +
"<a>\n" +
"<img>\n", found);
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"x x<a>", false);
assertEquals("<a>\n", found);
// gets line 1:3 no viable alternative at input '>'. Why??
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
// Seeing '.' in a lookahead prediction can be misleading!!
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
"x <><a>", false);
assertEquals("null\n" + // doesn't match tag; null
"<a>\n", found);
}
/** See comment on testNongreedyLoopEndOfRuleStuffFollowing */
@Test public void testNongreedyLoopEndOfRule() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ifstat : 'if' '(' INT ')' .* ;\n" +
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( 34 ) a b";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34)ab\n", found);
input =
"if ( 34 ))) ) ( a = = b( ;";
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("if(34))))(a==b(\n", found);
}
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
* since it immediately hits the end of the rule. Non-greedy loops
* never consume more tokens than exist following the .* end that
* same rule. So, in this case, the greedy loop always wins and it will
* suck tokens until end of file. Unfortunately, the '.' in rule s
* will not match, leading to a syntax error.
*/
@Test public void testNongreedyLoopEndOfRuleStuffFollowing() throws Exception {
String grammar =
"grammar T;\n" +
"s : ifstat '.' {System.out.println(input.toString(0,input.index()-1));} ;\n" +
"ifstat : 'if' '(' INT ')' .* ;\n" +
"EQ : '=' ;\n" +
"INT : '0'..'9'+ ;\n" +
"ID : 'a'..'z'+ ;\n" +
"WS : (' '|'\\n')+ {skip();} ;\n";
String input =
"if ( 34 ) a b .";
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
input, false);
assertEquals("no viable token at input [@7,15:15='<EOF>',<-1>,1:15], index 7\n", found);
}
}