got parser nongreedy loops going
[git-p4: depot-paths = "//depot/code/antlr4/main/": change = 9032]
This commit is contained in:
parent
f44c49a8b8
commit
085dd05bf1
|
@ -235,7 +235,7 @@ public class BufferedTokenStream implements TokenStream {
|
|||
}
|
||||
|
||||
public String toString(int start, int stop) {
|
||||
if ( start<0 || stop<0 ) return null;
|
||||
if ( start<0 || stop<0 ) return "";
|
||||
if ( p == -1 ) setup();
|
||||
if ( stop>=tokens.size() ) stop = tokens.size()-1;
|
||||
StringBuffer buf = new StringBuffer();
|
||||
|
|
|
@ -57,8 +57,8 @@ public class NoViableAltException extends RecognitionException {
|
|||
if ( recognizer!=null ) {
|
||||
TokenStream tokens = recognizer.input;
|
||||
String bad = tokens.toString(startIndex, index);
|
||||
return "NoViableAltException(input=\""+bad+"\" last token type is "+getUnexpectedType();
|
||||
return "NoViableAltException(input=\""+bad+"\" last token type is "+getUnexpectedType()+")";
|
||||
}
|
||||
return "NoViableAltException(last token type is "+getUnexpectedType();
|
||||
return "NoViableAltException(last token type is "+getUnexpectedType()+")";
|
||||
}
|
||||
}
|
||||
|
|
|
@ -121,9 +121,11 @@ public abstract class ATNSimulator {
|
|||
int ndecisions = toInt(data[p++]);
|
||||
for (int i=1; i<=ndecisions; i++) {
|
||||
int s = toInt(data[p++]);
|
||||
int isGreedy = toInt(data[p++]);
|
||||
DecisionState decState = (DecisionState)atn.states.get(s);
|
||||
atn.decisionToState.add((DecisionState) decState);
|
||||
atn.decisionToState.add(decState);
|
||||
decState.decision = i-1;
|
||||
decState.isGreedy = isGreedy==1;
|
||||
}
|
||||
return atn;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,8 @@ public class DecisionState extends ATNState {
|
|||
|
||||
public int decision = -1;
|
||||
|
||||
public boolean isGreedy = true;
|
||||
|
||||
@Override
|
||||
public int getNumberOfTransitions() { return transitions.size(); }
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
|
||||
package org.antlr.v4.runtime.atn;
|
||||
|
||||
import org.antlr.v4.runtime.RuleContext;
|
||||
import org.antlr.v4.runtime.*;
|
||||
import org.antlr.v4.runtime.misc.IntervalSet;
|
||||
|
||||
import java.util.*;
|
||||
|
@ -51,10 +51,9 @@ public class LL1Analyzer {
|
|||
// System.out.println("LOOK("+s.stateNumber+")");
|
||||
if ( s==null ) return null;
|
||||
IntervalSet[] look = new IntervalSet[s.getNumberOfTransitions()+1];
|
||||
Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
|
||||
for (int alt=1; alt<=s.getNumberOfTransitions(); alt++) {
|
||||
look[alt] = new IntervalSet();
|
||||
lookBusy.clear();
|
||||
Set<ATNConfig> lookBusy = new HashSet<ATNConfig>();
|
||||
_LOOK(s.transition(alt - 1).target, RuleContext.EMPTY, look[alt], lookBusy);
|
||||
}
|
||||
return look;
|
||||
|
@ -85,7 +84,7 @@ public class LL1Analyzer {
|
|||
int n = s.getNumberOfTransitions();
|
||||
for (int i=0; i<n; i++) {
|
||||
Transition t = s.transition(i);
|
||||
if ( t instanceof RuleTransition ) {
|
||||
if ( t.getClass() == RuleTransition.class ) {
|
||||
RuleContext newContext =
|
||||
new RuleContext(ctx, s.stateNumber, t.target.stateNumber);
|
||||
_LOOK(t.target, newContext, look, lookBusy);
|
||||
|
@ -93,6 +92,9 @@ public class LL1Analyzer {
|
|||
else if ( t.isEpsilon() ) {
|
||||
_LOOK(t.target, ctx, look, lookBusy);
|
||||
}
|
||||
else if ( t.getClass() == WildcardTransition.class ) {
|
||||
look.addAll( IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, atn.maxTokenType) );
|
||||
}
|
||||
else {
|
||||
// System.out.println("adding "+ t);
|
||||
IntervalSet set = t.label();
|
||||
|
|
|
@ -138,7 +138,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
t = input.LA(1);
|
||||
}
|
||||
if ( prevAcceptState==null ) {
|
||||
System.out.println("!!! no viable alt in dfa");
|
||||
if ( debug ) System.out.println("!!! no viable alt in dfa");
|
||||
return -1;
|
||||
}
|
||||
if ( recog!=null ) {
|
||||
|
@ -188,7 +188,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
// if we reach lexer accept state, toss out any configs in rest
|
||||
// of configs work list associated with this rule (config.alt);
|
||||
// that rule is done. this is how we cut off nongreedy .+ loops.
|
||||
deleteConfigsForAlt(closure, ci, c.alt);
|
||||
deleteWildcardConfigsForAlt(closure, ci, c.alt);
|
||||
// int j=ci+1;
|
||||
// while ( j<closure.size() ) {
|
||||
// ATNConfig c2 = closure.get(j);
|
||||
|
@ -252,7 +252,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
else System.out.println("ACTION "+ruleIndex+":"+ruleIndex);
|
||||
}
|
||||
int actionIndex = atn.ruleToActionIndex[ruleIndex];
|
||||
if ( actionIndex>=0 ) recog.action(null, ruleIndex, actionIndex);
|
||||
if ( actionIndex>=0 && recog!=null ) recog.action(null, ruleIndex, actionIndex);
|
||||
return ttype;
|
||||
}
|
||||
|
||||
|
@ -290,14 +290,14 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return null;
|
||||
}
|
||||
|
||||
public void deleteConfigsForAlt(OrderedHashSet<ATNConfig> closure, int ci, int alt) {
|
||||
public void deleteWildcardConfigsForAlt(OrderedHashSet<ATNConfig> closure, int ci, int alt) {
|
||||
int j=ci+1;
|
||||
while ( j<closure.size() ) {
|
||||
ATNConfig c = closure.get(j);
|
||||
boolean isWildcard = c.state.getClass() == ATNState.class &&
|
||||
c.state.transition(0).getClass() == WildcardTransition.class;
|
||||
if ( c.alt == alt && isWildcard ) {
|
||||
System.out.println("kill "+c);
|
||||
// System.out.println("kill "+c);
|
||||
closure.remove(j);
|
||||
}
|
||||
else j++;
|
||||
|
|
|
@ -64,8 +64,6 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
*/
|
||||
protected RuleContext outerContext;
|
||||
|
||||
protected Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
|
||||
|
||||
public ParserATNSimulator(ATN atn) {
|
||||
super(atn);
|
||||
ctxToDFAs = new HashMap<RuleContext, DFA[]>();
|
||||
|
@ -89,7 +87,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
ATNState startState = atn.decisionToState.get(decision);
|
||||
decisionToDFA[decision] = dfa = new DFA(startState);
|
||||
dfa.decision = decision;
|
||||
return predictATN(dfa, input, decision, outerContext, false);
|
||||
return predictATN(dfa, input, outerContext, false);
|
||||
}
|
||||
else {
|
||||
//dump(dfa);
|
||||
|
@ -102,7 +100,6 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
}
|
||||
|
||||
public int predictATN(DFA dfa, TokenStream input,
|
||||
int decision,
|
||||
RuleContext outerContext,
|
||||
boolean useContext)
|
||||
{
|
||||
|
@ -114,7 +111,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
RuleContext ctx = RuleContext.EMPTY;
|
||||
if ( useContext ) ctx = outerContext;
|
||||
OrderedHashSet<ATNConfig> s0_closure =
|
||||
computeStartState(dfa.atnStartState, ctx);
|
||||
computeStartState(dfa.decision, dfa.atnStartState, ctx);
|
||||
dfa.s0 = addDFAState(dfa, s0_closure);
|
||||
if ( prevAccept!=null ) {
|
||||
dfa.s0.isAcceptState = true;
|
||||
|
@ -138,7 +135,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
public int matchATN(TokenStream input, ATNState startState) {
|
||||
DFA dfa = new DFA(startState);
|
||||
RuleContext ctx = RuleContext.EMPTY;
|
||||
OrderedHashSet<ATNConfig> s0_closure = computeStartState(startState, ctx);
|
||||
OrderedHashSet<ATNConfig> s0_closure = computeStartState(dfa.decision, startState, ctx);
|
||||
return execATN(input, dfa, input.index(), s0_closure, false);
|
||||
}
|
||||
|
||||
|
@ -222,7 +219,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
t = input.LA(1);
|
||||
}
|
||||
if ( prevAcceptState==null ) {
|
||||
System.out.println("!!! no viable alt in dfa");
|
||||
if ( debug ) System.out.println("!!! no viable alt in dfa");
|
||||
return -1;
|
||||
}
|
||||
if ( dfa_debug ) System.out.println("DFA decision "+dfa.decision+
|
||||
|
@ -236,7 +233,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
OrderedHashSet<ATNConfig> s0,
|
||||
boolean useContext)
|
||||
{
|
||||
if ( debug ) System.out.println("ATN decision "+dfa.decision+" exec LA(1)=="+input.LT(1));
|
||||
if ( debug ) System.out.println("execATN decision "+dfa.decision+" exec LA(1)=="+input.LT(1));
|
||||
ATN_failover++;
|
||||
OrderedHashSet<ATNConfig> closure = new OrderedHashSet<ATNConfig>();
|
||||
|
||||
|
@ -250,6 +247,10 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return prevAccept.alt;
|
||||
}
|
||||
|
||||
DecisionState decState = null;
|
||||
if ( atn.decisionToState.size()>0 ) decState = atn.decisionToState.get(dfa.decision);
|
||||
if ( debug ) System.out.println("decision state = "+decState);
|
||||
|
||||
prevAccept = null;
|
||||
prevAcceptIndex = -1;
|
||||
OrderedHashSet<ATNConfig> reach = new OrderedHashSet<ATNConfig>();
|
||||
|
@ -265,7 +266,8 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
Transition trans = c.state.transition(ti);
|
||||
ATNState target = getReachableTarget(trans, t);
|
||||
if ( target!=null ) {
|
||||
closure(new ATNConfig(c, target), reach);
|
||||
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
|
||||
closure(new ATNConfig(c, target), reach, decState, closureBusy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -277,7 +279,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
ATNState loc = atn.states.get(outerContext.s);
|
||||
String rname = "n/a";
|
||||
if ( parser !=null ) rname = parser.getRuleNames()[loc.ruleIndex];
|
||||
System.out.println("AMBIG in "+rname+" for alt "+ambigAlts+" upon "+
|
||||
System.out.println("AMBIG dec "+dfa.decision+" in "+rname+" for alt "+ambigAlts+" upon "+
|
||||
input.toString(startIndex, input.index()));
|
||||
System.out.println("REACH="+reach);
|
||||
}
|
||||
|
@ -289,7 +291,16 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
// System.out.println("AMBIG orig="+outerContext.toString((BaseRecognizer)recog)+" for alt "+ambigAlts+" upon "+
|
||||
// input.toString(startIndex, input.index()));
|
||||
if ( !userWantsCtxSensitive || useContext ) {
|
||||
resolveToMinAlt(reach, ambigAlts);
|
||||
// resolve ambiguity
|
||||
if ( decState.isGreedy ) {
|
||||
// if greedy, resolve in favor of alt coming first
|
||||
resolveToMinAlt(reach, ambigAlts);
|
||||
}
|
||||
else {
|
||||
// if nongreedy loop, always pick exit branch to match
|
||||
// what follows instead of re-entering loop
|
||||
resolveNongreedyToExitBranch(reach, ambigAlts);
|
||||
}
|
||||
}
|
||||
else {
|
||||
return retryWithContext(input, dfa, startIndex, outerContext,
|
||||
|
@ -309,6 +320,18 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return uniqueAlt;
|
||||
}
|
||||
|
||||
if ( decState!=null && !decState.isGreedy ) {
|
||||
// if we reached end of rule via exit branch, we matched
|
||||
int exitAlt = 2;
|
||||
ATNConfig cstop = configWithAltAtStopState(reach, exitAlt);
|
||||
if ( cstop!=null ) {
|
||||
if ( debug ) System.out.println("nongreedy at stop state for exit branch");
|
||||
prevAccept = cstop;
|
||||
prevAcceptIndex = input.index();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( reach.size()==0 ) {
|
||||
break;
|
||||
}
|
||||
|
@ -323,7 +346,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
OrderedHashSet<ATNConfig> tmp = reach;
|
||||
reach = closure;
|
||||
closure = tmp;
|
||||
reach.clear(); // THIS MIGHT BE SLOW! kills each element; realloc might be faster
|
||||
reach.clear(); // TODO: THIS MIGHT BE SLOW! kills each element; realloc might be faster
|
||||
} while ( true );
|
||||
|
||||
if ( prevAccept==null ) {
|
||||
|
@ -339,7 +362,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
|
||||
protected int resolveToMinAlt(OrderedHashSet<ATNConfig> reach, Set<Integer> ambigAlts) {
|
||||
int min = getMinAlt(ambigAlts);
|
||||
// if predicting, create DFA accept state for resolved alt
|
||||
// create DFA accept state for resolved alt
|
||||
ambigAlts.remove(min);
|
||||
// kill dead alts so we don't chase them ever
|
||||
killAlts(ambigAlts, reach);
|
||||
|
@ -347,6 +370,17 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return min;
|
||||
}
|
||||
|
||||
protected int resolveNongreedyToExitBranch(OrderedHashSet<ATNConfig> reach, Set<Integer> ambigAlts) {
|
||||
// exit branch is alt 2 always; alt 1 is entry or loopback branch
|
||||
// since we're predicting, create DFA accept state for exit alt
|
||||
int exitAlt = 2;
|
||||
ambigAlts.remove(exitAlt);
|
||||
// kill dead alts so we don't chase them ever
|
||||
killAlts(ambigAlts, reach);
|
||||
if ( debug ) System.out.println("RESOLVED TO "+reach);
|
||||
return exitAlt;
|
||||
}
|
||||
|
||||
public int retryWithContext(TokenStream input,
|
||||
DFA dfa,
|
||||
int startIndex,
|
||||
|
@ -373,7 +407,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
// if it comes back with conflict, we have a true ambiguity
|
||||
input.seek(startIndex); // rewind
|
||||
DFA ctx_dfa = new DFA(dfa.atnStartState);
|
||||
int ctx_alt = predictATN(ctx_dfa, input, dfa.decision, originalContext, true);
|
||||
int ctx_alt = predictATN(ctx_dfa, input, originalContext, true);
|
||||
if ( debug ) System.out.println("retry predicts "+ctx_alt+" vs "+getMinAlt(ambigAlts)+
|
||||
" with conflict="+ctx_dfa.conflict+
|
||||
" dfa="+ctx_dfa);
|
||||
|
@ -408,16 +442,20 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return predictedAlt;
|
||||
}
|
||||
|
||||
public OrderedHashSet<ATNConfig> computeStartState(ATNState p, RuleContext ctx) {
|
||||
public OrderedHashSet<ATNConfig> computeStartState(int decision, ATNState p, RuleContext ctx) {
|
||||
RuleContext initialContext = ctx; // always at least the implicit call to start rule
|
||||
OrderedHashSet<ATNConfig> configs = new OrderedHashSet<ATNConfig>();
|
||||
prevAccept = null; // might reach end rule; track
|
||||
prevAcceptIndex = -1;
|
||||
|
||||
DecisionState decState = null;
|
||||
if ( atn.decisionToState.size()>0 ) decState = atn.decisionToState.get(decision);
|
||||
|
||||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||
ATNState target = p.transition(i).target;
|
||||
ATNConfig c = new ATNConfig(target, i+1, initialContext);
|
||||
closure(c, configs);
|
||||
Set<ATNConfig> closureBusy = new HashSet<ATNConfig>();
|
||||
closure(c, configs, decState, closureBusy);
|
||||
}
|
||||
|
||||
return configs;
|
||||
|
@ -426,7 +464,6 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
public ATNState getReachableTarget(Transition trans, int ttype) {
|
||||
if ( trans instanceof AtomTransition ) {
|
||||
AtomTransition at = (AtomTransition)trans;
|
||||
// boolean not = trans instanceof NotAtomTransition;
|
||||
if ( at.label == ttype ) {
|
||||
return at.target;
|
||||
}
|
||||
|
@ -438,18 +475,16 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return st.target;
|
||||
}
|
||||
}
|
||||
// TODO else if ( trans instanceof WildcardTransition && t!=Token.EOF ) {
|
||||
// ATNConfig targetConfig = new ATNConfig(c, trans.target);
|
||||
// closure(input, targetConfig, reach);
|
||||
// }
|
||||
else if ( trans instanceof RangeTransition ) {
|
||||
RangeTransition rt = (RangeTransition)trans;
|
||||
if ( ttype>=rt.from && ttype<=rt.to ) return rt.target;
|
||||
}
|
||||
else if ( trans instanceof WildcardTransition && ttype!=Token.EOF ) {
|
||||
return trans.target;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
protected void closure(ATNConfig config, OrderedHashSet<ATNConfig> configs) {
|
||||
closureBusy.clear();
|
||||
closure(config, configs, closureBusy);
|
||||
}
|
||||
|
||||
/* TODO: If we are doing predicates, there is no point in pursuing
|
||||
closure operations if we reach a DFA state that uniquely predicts
|
||||
alternative. We will not be caching that DFA state and it is a
|
||||
|
@ -459,6 +494,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
|
||||
protected void closure(ATNConfig config,
|
||||
OrderedHashSet<ATNConfig> configs,
|
||||
DecisionState decState,
|
||||
Set<ATNConfig> closureBusy)
|
||||
{
|
||||
if ( debug ) System.out.println("closure("+config+")");
|
||||
|
@ -478,11 +514,19 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
// gotten that context AFTER having fallen off a rule.
|
||||
// Make sure we track that we are now out of context.
|
||||
c.reachesIntoOuterContext = config.reachesIntoOuterContext;
|
||||
closure(c, configs, closureBusy);
|
||||
closure(c, configs, decState, closureBusy);
|
||||
return;
|
||||
}
|
||||
else {
|
||||
// else if we have no context info, just chase follow links
|
||||
// else if we have no context info, just chase follow links (if greedy)
|
||||
if ( decState!=null && !decState.isGreedy ) {
|
||||
if ( debug ) System.out.println("nongreedy decision state = "+decState);
|
||||
if ( debug ) System.out.println("NONGREEDY at stop state of "+
|
||||
parser.getRuleNames()[config.state.ruleIndex]);
|
||||
// don't purse past end of a rule for any nongreedy decision
|
||||
configs.add(config);
|
||||
return;
|
||||
}
|
||||
if ( debug ) System.out.println("FALLING off rule "+
|
||||
parser.getRuleNames()[config.state.ruleIndex]);
|
||||
}
|
||||
|
@ -504,7 +548,7 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
// preds if this is > 0.
|
||||
c.reachesIntoOuterContext++;
|
||||
}
|
||||
closure(c, configs, closureBusy);
|
||||
closure(c, configs, decState, closureBusy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -611,13 +655,15 @@ public class ParserATNSimulator extends ATNSimulator {
|
|||
return alt;
|
||||
}
|
||||
|
||||
public RuleContext getCurrentExecContext(ATNConfig config) {
|
||||
RuleContext ctx = config.context; // use context created after entry into interp
|
||||
if ( ctx == RuleContext.EMPTY ) {
|
||||
if ( config.reachesIntoOuterContext==0 ) ctx = outerContext;
|
||||
else ctx = null; // no context if we in outer context
|
||||
public ATNConfig configWithAltAtStopState(Collection<ATNConfig> configs, int alt) {
|
||||
for (ATNConfig c : configs) {
|
||||
if ( c.alt == alt ) {
|
||||
if ( c.state.getClass() == RuleStopState.class ) {
|
||||
return c;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ctx;
|
||||
return null;
|
||||
}
|
||||
|
||||
public Set<Integer> getAmbiguousAlts(OrderedHashSet<ATNConfig> configs) {
|
||||
|
|
|
@ -1,17 +1,7 @@
|
|||
grammar T;
|
||||
options {output=AST;}
|
||||
|
||||
s : e_[0] EOF ;
|
||||
|
||||
e_[int _p]
|
||||
: e_primary { }
|
||||
( {19 >= $_p}? '['^ e_[0] ']'! )*
|
||||
;
|
||||
e_primary
|
||||
: INT
|
||||
| 'new'^ ID ('[' INT ']')+
|
||||
;
|
||||
|
||||
ID : ('a'..'z'|'A'..'Z'|'_'|'$')+;
|
||||
s : ifstat '.' {System.out.println(input.toString(0,input.index()-1));} ;
|
||||
ifstat : 'if' '(' INT ')' .* ;
|
||||
EQ : '=' ;
|
||||
INT : '0'..'9'+ ;
|
||||
WS : (' '|'\n') {skip();} ;
|
||||
ID : 'a'..'z'+ ;
|
||||
WS : (' '|'\n')+ {skip();} ;
|
||||
|
|
|
@ -11,7 +11,7 @@ class TestJava {
|
|||
public static long lexerTime = 0;
|
||||
public static boolean profile = false;
|
||||
public static JavaLexer lexer;
|
||||
// public static JavaParser parser = null;
|
||||
public static JavaParser parser = null;
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
|
@ -98,13 +98,13 @@ class TestJava {
|
|||
|
||||
if ( true ) {
|
||||
// Create a parser that reads from the scanner
|
||||
// if ( parser==null ) {
|
||||
// //parser = new JavaParser(tokens);
|
||||
//// parser.getInterpreter().setContextSensitive(true);
|
||||
// }
|
||||
//parser.setTokenStream(tokens);
|
||||
if ( parser==null ) {
|
||||
parser = new JavaParser(tokens);
|
||||
// parser.getInterpreter().setContextSensitive(true);
|
||||
}
|
||||
parser.setTokenStream(tokens);
|
||||
// start parsing at the compilationUnit rule
|
||||
// parser.compilationUnit();
|
||||
parser.compilationUnit();
|
||||
//System.err.println("finished "+f);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,10 @@ public class TestT {
|
|||
public static void main(String[] args) throws Exception {
|
||||
TLexer t = new TLexer(new ANTLRFileStream(args[0]));
|
||||
CommonTokenStream tokens = new CommonTokenStream(t);
|
||||
tokens.fill();
|
||||
for (Object tok : tokens.getTokens()) {
|
||||
System.out.println(tok);
|
||||
}
|
||||
TParser p = new TParser(tokens);
|
||||
ParserRuleContext ret = p.s();
|
||||
// System.out.println(((Tree)ret.tree).toStringTree());
|
||||
|
|
|
@ -270,7 +270,7 @@ case <i>:
|
|||
|
||||
StarBlock(choice, alts, sync) ::= <<
|
||||
int _alt<choice.uniqueID> = _interp.adaptivePredict(input,<choice.decision>,_ctx);
|
||||
while ( _alt<choice.uniqueID>!=<choice.exitAlt> ) {
|
||||
while ( _alt<choice.uniqueID>!=<choice.exitAlt> && _alt<choice.uniqueID>!=-1 ) {
|
||||
switch ( _alt<choice.uniqueID> ) {
|
||||
<alts:{alt|
|
||||
case <i>:
|
||||
|
@ -293,7 +293,7 @@ case <i>:
|
|||
<error>
|
||||
}
|
||||
_alt<choice.uniqueID> = _interp.adaptivePredict(input,<choice.decision>,_ctx);
|
||||
} while ( _alt<choice.uniqueID>!=<choice.exitAlt> );
|
||||
} while ( _alt<choice.uniqueID>!=<choice.exitAlt> && _alt<choice.uniqueID>!=-1 );
|
||||
>>
|
||||
|
||||
Sync(s) ::= "sync(<s.expecting.name>);"
|
||||
|
|
|
@ -240,13 +240,13 @@ public class Tool {
|
|||
g.implicitLexer = lexerg;
|
||||
lexerg.implicitLexerOwner = g;
|
||||
processNonCombinedGrammar(lexerg, gencode);
|
||||
System.out.println("lexer tokens="+lexerg.tokenNameToTypeMap);
|
||||
System.out.println("lexer strings="+lexerg.stringLiteralToTypeMap);
|
||||
// System.out.println("lexer tokens="+lexerg.tokenNameToTypeMap);
|
||||
// System.out.println("lexer strings="+lexerg.stringLiteralToTypeMap);
|
||||
}
|
||||
}
|
||||
if ( g.implicitLexer!=null ) g.importVocab(g.implicitLexer);
|
||||
System.out.println("tokens="+g.tokenNameToTypeMap);
|
||||
System.out.println("strings="+g.stringLiteralToTypeMap);
|
||||
// System.out.println("tokens="+g.tokenNameToTypeMap);
|
||||
// System.out.println("strings="+g.stringLiteralToTypeMap);
|
||||
processNonCombinedGrammar(g, gencode);
|
||||
}
|
||||
|
||||
|
|
|
@ -178,8 +178,9 @@ public class ATNSerializer {
|
|||
}
|
||||
int ndecisions = atn.decisionToState.size();
|
||||
data.add(ndecisions);
|
||||
for (ATNState decStartState : atn.decisionToState) {
|
||||
for (DecisionState decStartState : atn.decisionToState) {
|
||||
data.add(decStartState.stateNumber);
|
||||
data.add(decStartState.isGreedy?1:0);
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
@ -239,7 +240,8 @@ public class ATNSerializer {
|
|||
int ndecisions = ATNSimulator.toInt(data[p++]);
|
||||
for (int i=1; i<=ndecisions; i++) {
|
||||
int s = ATNSimulator.toInt(data[p++]);
|
||||
buf.append((i-1)+":"+s+"\n");
|
||||
int isGreedy = ATNSimulator.toInt(data[p++]);
|
||||
buf.append((i-1)+":"+s+" "+isGreedy+"\n");
|
||||
}
|
||||
return buf.toString();
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ public class ParserATNFactory implements ATNFactory {
|
|||
epsilon(blk.right, stop);
|
||||
Handle h = new Handle(start, stop);
|
||||
ATNPrinter ser = new ATNPrinter(g, h.left);
|
||||
System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
|
||||
// System.out.println(ruleAST.toStringTree()+":\n"+ser.asString());
|
||||
ruleAST.atnState = start;
|
||||
return h;
|
||||
}
|
||||
|
@ -370,7 +370,8 @@ public class ParserATNFactory implements ATNFactory {
|
|||
epsilon(blkEnd, loop); // blk can see loop back
|
||||
|
||||
BlockAST blkAST = (BlockAST)plusAST.getChild(0);
|
||||
if ( !g.isLexer() || isGreedy(blkAST) ) {
|
||||
loop.isGreedy = isGreedy(blkAST);
|
||||
if ( !g.isLexer() || loop.isGreedy ) {
|
||||
epsilon(loop, blkStart); // loop back to start
|
||||
epsilon(loop, end); // or exit
|
||||
}
|
||||
|
@ -405,7 +406,8 @@ public class ParserATNFactory implements ATNFactory {
|
|||
StarLoopbackState loop = (StarLoopbackState)newState(StarLoopbackState.class, starAST);
|
||||
|
||||
BlockAST blkAST = (BlockAST)starAST.getChild(0);
|
||||
if ( !g.isLexer() || isGreedy(blkAST) ) {
|
||||
entry.isGreedy = isGreedy(blkAST);
|
||||
if ( !g.isLexer() || entry.isGreedy ) {
|
||||
epsilon(entry, blkStart); // loop enter edge (alt 1)
|
||||
epsilon(entry, end); // bypass loop edge (alt 2)
|
||||
}
|
||||
|
|
|
@ -177,14 +177,15 @@ grammarSpec
|
|||
: ^( GRAMMAR ID {grammarName=$ID.text;} DOC_COMMENT?
|
||||
{discoverGrammar((GrammarRootAST)$GRAMMAR, $ID);}
|
||||
prequelConstructs
|
||||
{finishPrequels($prequelConstructs.start);}
|
||||
{finishPrequels($prequelConstructs.firstOne);}
|
||||
rules mode*
|
||||
{finishGrammar((GrammarRootAST)$GRAMMAR, $ID);}
|
||||
)
|
||||
;
|
||||
|
||||
prequelConstructs
|
||||
: prequelConstruct*
|
||||
prequelConstructs returns [GrammarAST firstOne=null]
|
||||
: {$firstOne=$start;} prequelConstruct+
|
||||
|
|
||||
;
|
||||
|
||||
prequelConstruct
|
||||
|
@ -205,8 +206,8 @@ boolean block = inContext("BLOCK ...");
|
|||
}
|
||||
: ^(a=ASSIGN ID optionValue)
|
||||
{
|
||||
if ( rule ) ruleOption($ID, $optionValue.v);
|
||||
else if ( block ) blockOption($ID, $optionValue.v);
|
||||
if ( block ) blockOption($ID, $optionValue.v); // most specific first
|
||||
else if ( rule ) ruleOption($ID, $optionValue.v);
|
||||
else grammarOption($ID, $optionValue.v);
|
||||
}
|
||||
;
|
||||
|
|
|
@ -291,10 +291,13 @@ public class LeftRecursiveRuleAnalyzer extends LeftRecursiveRuleWalker {
|
|||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match (RULE ID (BLOCK (ALT .*) (ALT RULE_REF[self] .*) (ALT .*)))
|
||||
*/
|
||||
public static boolean hasImmediateRecursiveRuleRefs(GrammarAST t, String ruleName) {
|
||||
if ( t==null ) return false;
|
||||
for (GrammarAST rref : t.getNodesWithType(RULE_REF)) {
|
||||
if ( rref.getText().equals(ruleName) ) return true;
|
||||
if ( rref.getChildIndex()==0 && rref.getText().equals(ruleName) ) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -59,6 +59,8 @@ public void otherAlt(GrammarAST altTree, GrammarAST rewriteTree, int alt) {}
|
|||
public void setReturnValues(GrammarAST t) {}
|
||||
}
|
||||
|
||||
@rulecatch { }
|
||||
|
||||
// TODO: can get parser errors for not matching pattern; make them go away
|
||||
public
|
||||
rec_rule returns [boolean isLeftRec]
|
||||
|
|
|
@ -170,9 +170,11 @@ public class BasicSemanticChecks extends GrammarTreeVisitor {
|
|||
|
||||
@Override
|
||||
public void finishPrequels(GrammarAST firstPrequel) {
|
||||
List<GrammarAST> options = firstPrequel.getNodesWithType(OPTIONS);
|
||||
List<GrammarAST> imports = firstPrequel.getNodesWithType(IMPORT);
|
||||
List<GrammarAST> tokens = firstPrequel.getNodesWithType(TOKENS);
|
||||
if ( firstPrequel==null ) return;
|
||||
GrammarAST parent = (GrammarAST)firstPrequel.parent;
|
||||
List<GrammarAST> options = parent.getAllChildrenWithType(OPTIONS);
|
||||
List<GrammarAST> imports = parent.getAllChildrenWithType(IMPORT);
|
||||
List<GrammarAST> tokens = parent.getAllChildrenWithType(TOKENS);
|
||||
checkNumPrequels(options, imports, tokens);
|
||||
}
|
||||
|
||||
|
|
|
@ -103,6 +103,10 @@ public class ErrorManager {
|
|||
messageST.add("exception", msg.e);
|
||||
messageST.add("stackTrace", msg.e.getStackTrace());
|
||||
}
|
||||
else {
|
||||
messageST.add("exception", null); // avoid ST error msg
|
||||
messageST.add("stackTrace", null);
|
||||
}
|
||||
|
||||
boolean locationValid = false;
|
||||
if (msg.line != -1) {
|
||||
|
|
|
@ -70,6 +70,17 @@ public class GrammarAST extends CommonTree {
|
|||
return getNodesWithType(IntervalSet.of(ttype));
|
||||
}
|
||||
|
||||
public List<GrammarAST> getAllChildrenWithType(int type) {
|
||||
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
|
||||
for (int i = 0; children!=null && i < children.size(); i++) {
|
||||
Tree t = (Tree) children.get(i);
|
||||
if ( t.getType()==type ) {
|
||||
nodes.add((GrammarAST)t);
|
||||
}
|
||||
}
|
||||
return nodes;
|
||||
}
|
||||
|
||||
public List<GrammarAST> getNodesWithType(IntervalSet types) {
|
||||
List<GrammarAST> nodes = new ArrayList<GrammarAST>();
|
||||
List<GrammarAST> work = new LinkedList<GrammarAST>();
|
||||
|
|
|
@ -98,7 +98,7 @@ public class GrammarTransformPipeline {
|
|||
isLeftRec = leftRecursiveRuleWalker.rec_rule();
|
||||
}
|
||||
catch (RecognitionException re) {
|
||||
tool.errMgr.toolError(ErrorType.INTERNAL_ERROR, "bad ast structure", re);
|
||||
isLeftRec = false; // didn't match; oh well
|
||||
}
|
||||
if ( !isLeftRec ) return;
|
||||
|
||||
|
|
|
@ -499,18 +499,24 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
ATN atn = f.createATN();
|
||||
|
||||
DOTGenerator dot = new DOTGenerator(g);
|
||||
System.out.println(dot.getDOT(atn.ruleToStartState[g.getRule("a").index]));
|
||||
Rule r = g.getRule("b");
|
||||
|
||||
Rule r = g.getRule("a");
|
||||
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
|
||||
r = g.getRule("b");
|
||||
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
|
||||
r = g.getRule("e");
|
||||
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
|
||||
r = g.getRule("ifstat");
|
||||
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
|
||||
r = g.getRule("block");
|
||||
if ( r!=null) System.out.println(dot.getDOT(atn.ruleToStartState[r.index]));
|
||||
|
||||
// Check ATN prediction
|
||||
ParserATNSimulator interp = new ParserATNSimulator(atn);
|
||||
TokenStream input = new IntTokenStream(types);
|
||||
ATNState startState = atn.decisionToState.get(decision);
|
||||
DFA dfa = new DFA(startState);
|
||||
int alt = interp.predictATN(dfa, input, decision, RuleContext.EMPTY, false);
|
||||
int alt = interp.predictATN(dfa, input, RuleContext.EMPTY, false);
|
||||
|
||||
System.out.println(dot.getDOT(dfa, false));
|
||||
|
||||
|
@ -554,7 +560,7 @@ public class TestATNParserPrediction extends BaseTest {
|
|||
DFA dfa = new DFA(startState);
|
||||
// Rule r = g.getRule(ruleName);
|
||||
//ATNState startState = atn.ruleToStartState.get(r);
|
||||
interp.predictATN(dfa, input, 0, ctx, false);
|
||||
interp.predictATN(dfa, input, ctx, false);
|
||||
}
|
||||
catch (NoViableAltException nvae) {
|
||||
nvae.printStackTrace(System.err);
|
||||
|
|
|
@ -107,7 +107,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"8->2 EPSILON 0,0,0\n" +
|
||||
"8->4 EPSILON 0,0,0\n" +
|
||||
"9->1 EPSILON 0,0,0\n" +
|
||||
"0:8\n";
|
||||
"0:8 1\n";
|
||||
ATN atn = createATN(g);
|
||||
String result = ATNSerializer.getDecoded(g, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -155,7 +155,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"14->4 EPSILON 0,0,0\n" +
|
||||
"14->8 EPSILON 0,0,0\n" +
|
||||
"15->1 EPSILON 0,0,0\n" +
|
||||
"0:14\n";
|
||||
"0:14 1\n";
|
||||
ATN atn = createATN(g);
|
||||
String result = ATNSerializer.getDecoded(g, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -190,7 +190,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"7->8 EPSILON 0,0,0\n" +
|
||||
"8->9 ATOM 4,0,0\n" +
|
||||
"9->1 EPSILON 0,0,0\n" +
|
||||
"0:6\n";
|
||||
"0:6 1\n";
|
||||
ATN atn = createATN(g);
|
||||
String result = ATNSerializer.getDecoded(g, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -254,7 +254,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"6->2 EPSILON 0,0,0\n" +
|
||||
"7->8 ATOM 98,0,0\n" +
|
||||
"8->4 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -277,7 +277,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->4 RANGE 48,57,0\n" +
|
||||
"4->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -309,8 +309,8 @@ public class TestATNSerialization extends BaseTest {
|
|||
"7->5 EPSILON 0,0,0\n" +
|
||||
"7->8 EPSILON 0,0,0\n" +
|
||||
"8->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n" +
|
||||
"1:7\n";
|
||||
"0:0 1\n" +
|
||||
"1:7 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -357,7 +357,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"12->13 ATOM 99,0,0\n" +
|
||||
"13->14 EPSILON 0,0,0\n" +
|
||||
"14->6 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -381,7 +381,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->4 NOT_SET 0,0,0\n" +
|
||||
"4->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -405,7 +405,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->4 SET 0,0,0\n" +
|
||||
"4->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -429,7 +429,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"1->3 EPSILON 0,0,0\n" +
|
||||
"3->4 NOT_SET 0,0,0\n" +
|
||||
"4->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -490,9 +490,9 @@ public class TestATNSerialization extends BaseTest {
|
|||
"18->19 WILDCARD 0,0,0\n" +
|
||||
"19->20 EPSILON 0,0,0\n" +
|
||||
"20->7 EPSILON 0,0,0\n" +
|
||||
"0:0\n" +
|
||||
"1:1\n" +
|
||||
"2:12\n";
|
||||
"0:0 1\n" +
|
||||
"1:1 1\n" +
|
||||
"2:12 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -521,7 +521,7 @@ public class TestATNSerialization extends BaseTest {
|
|||
"4->5 EPSILON 0,0,0\n" +
|
||||
"5->6 NOT_SET 1,0,0\n" +
|
||||
"6->2 EPSILON 0,0,0\n" +
|
||||
"0:0\n";
|
||||
"0:0 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
@ -570,9 +570,9 @@ public class TestATNSerialization extends BaseTest {
|
|||
"12->6 EPSILON 0,0,0\n" +
|
||||
"13->14 ATOM 99,0,0\n" +
|
||||
"14->8 EPSILON 0,0,0\n" +
|
||||
"0:0\n" +
|
||||
"1:1\n" +
|
||||
"2:2\n";
|
||||
"0:0 1\n" +
|
||||
"1:1 1\n" +
|
||||
"2:2 1\n";
|
||||
ATN atn = createATN(lg);
|
||||
String result = ATNSerializer.getDecoded(lg, atn);
|
||||
assertEquals(expecting, result);
|
||||
|
|
|
@ -69,7 +69,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
@Test public void testSimpleAnd() throws Exception {
|
||||
IntervalSet s = IntervalSet.of(10,20);
|
||||
IntervalSet s2 = IntervalSet.of(13,15);
|
||||
String expecting = "13..15";
|
||||
String expecting = "{13..15}";
|
||||
String result = (s.and(s2)).toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -157,12 +157,12 @@ public class TestIntervalSet extends BaseTest {
|
|||
@Test public void testSubtractOfOverlappingRangeFromLeft() throws Exception {
|
||||
IntervalSet s = IntervalSet.of(10,20);
|
||||
IntervalSet s2 = IntervalSet.of(5,11);
|
||||
String expecting = "12..20";
|
||||
String expecting = "{12..20}";
|
||||
String result = (s.subtract(s2)).toString();
|
||||
assertEquals(result, expecting);
|
||||
|
||||
IntervalSet s3 = IntervalSet.of(5,10);
|
||||
expecting = "11..20";
|
||||
expecting = "{11..20}";
|
||||
result = (s.subtract(s3)).toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -170,12 +170,12 @@ public class TestIntervalSet extends BaseTest {
|
|||
@Test public void testSubtractOfOverlappingRangeFromRight() throws Exception {
|
||||
IntervalSet s = IntervalSet.of(10,20);
|
||||
IntervalSet s2 = IntervalSet.of(15,25);
|
||||
String expecting = "10..14";
|
||||
String expecting = "{10..14}";
|
||||
String result = (s.subtract(s2)).toString();
|
||||
assertEquals(result, expecting);
|
||||
|
||||
IntervalSet s3 = IntervalSet.of(20,25);
|
||||
expecting = "10..19";
|
||||
expecting = "{10..19}";
|
||||
result = (s.subtract(s3)).toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -193,7 +193,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
s.add(30,40);
|
||||
s.add(50,60); // s has 3 ranges now: 10..20, 30..40, 50..60
|
||||
IntervalSet s2 = IntervalSet.of(5,55); // covers one and touches 2nd range
|
||||
String expecting = "56..60";
|
||||
String expecting = "{56..60}";
|
||||
String result = (s.subtract(s2)).toString();
|
||||
assertEquals(result, expecting);
|
||||
|
||||
|
@ -309,7 +309,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
@Test public void testComplement3() throws Exception {
|
||||
IntervalSet s = IntervalSet.of(1,96);
|
||||
s.add(99, Lexer.MAX_CHAR_VALUE);
|
||||
String expecting = "97..98";
|
||||
String expecting = "{97..98}";
|
||||
String result = (s.complement(1, Lexer.MAX_CHAR_VALUE)).toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -319,7 +319,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
IntervalSet s = IntervalSet.of(0,41);
|
||||
s.add(42);
|
||||
s.add(43,65534);
|
||||
String expecting = "0..65534";
|
||||
String expecting = "{0..65534}";
|
||||
String result = s.toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -328,7 +328,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
IntervalSet s = IntervalSet.of(43,65534);
|
||||
s.add(42);
|
||||
s.add(0,41);
|
||||
String expecting = "0..65534";
|
||||
String expecting = "{0..65534}";
|
||||
String result = s.toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -340,7 +340,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
s.add(0,9);
|
||||
s.add(43,65534);
|
||||
s.add(11,41);
|
||||
String expecting = "0..65534";
|
||||
String expecting = "{0..65534}";
|
||||
String result = s.toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
@ -349,7 +349,7 @@ public class TestIntervalSet extends BaseTest {
|
|||
IntervalSet s = IntervalSet.of(1,10);
|
||||
s.add(20,30);
|
||||
s.add(5,25); // overlaps two!
|
||||
String expecting = "1..30";
|
||||
String expecting = "{1..30}";
|
||||
String result = s.toString();
|
||||
assertEquals(result, expecting);
|
||||
}
|
||||
|
|
|
@ -2,12 +2,43 @@ package org.antlr.v4.test;
|
|||
|
||||
import org.junit.Test;
|
||||
|
||||
/** Test parser execution.
|
||||
*
|
||||
* For the non-greedy stuff, the rule is that .* or any other non-greedy loop
|
||||
* (any + or * loop that has an alternative with '.' in it is automatically
|
||||
* non-greedy) never sees past the end of the rule containing that loop.
|
||||
* There is no automatic way to detect when the exit branch of a non-greedy
|
||||
* loop has seen enough input to determine how much the loop should consume
|
||||
* yet still allow matching the entire input. Of course, this is extremely
|
||||
* inefficient, particularly for things like
|
||||
*
|
||||
* block : '{' (block|.)* '}' ;
|
||||
*
|
||||
* that need only see one symbol to know when it hits a '}'. So, I
|
||||
* came up with a practical solution. During prediction, the ATN
|
||||
* simulator never fall off the end of a rule to compute the global
|
||||
* FOLLOW. Instead, we terminate the loop, choosing the exit branch.
|
||||
* Otherwise, we predict to reenter the loop. For example, input
|
||||
* "{ foo }" will allow the loop to match foo, but that's it. During
|
||||
* prediction, the ATN simulator will see that '}' reaches the end of a
|
||||
* rule that contains a non-greedy loop and stop prediction. It will choose
|
||||
* the exit branch of the inner loop. So, the way in which you construct
|
||||
* the rule containing a non-greedy loop dictates how far it will scan ahead.
|
||||
* Include everything after the non-greedy loop that you know it must scan
|
||||
* in order to properly make a prediction decision. these beasts are tricky,
|
||||
* so be careful. don't liberally sprinkle them around your code.
|
||||
*
|
||||
* To simulate filter mode, use ( .* (pattern1|pattern2|...) )*
|
||||
*
|
||||
* Nongreedy loops match as much input as possible while still allowing
|
||||
* the remaining input to match.
|
||||
*/
|
||||
public class TestParserExec extends BaseTest {
|
||||
|
||||
@Test public void testBasic() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : ID INT {System.out.println(input);} ;\n" +
|
||||
"a : ID INT {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
@ -20,7 +51,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAPlus() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : ID+ {System.out.println(input);} ;\n" +
|
||||
"a : ID+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
|
@ -33,7 +64,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAorAPlus() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : (ID|ID)+ {System.out.println(input);} ;\n" +
|
||||
"a : (ID|ID)+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
|
@ -45,7 +76,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAStar() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : ID* {System.out.println(input);} ;\n" +
|
||||
"a : ID* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
|
@ -61,7 +92,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAorAStar() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : (ID|ID)* {System.out.println(input);} ;\n" +
|
||||
"a : (ID|ID)* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
|
@ -76,7 +107,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAorBPlus() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : (ID|INT{;})+ {System.out.println(input);} ;\n" +
|
||||
"a : (ID|INT{;})+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
@ -89,7 +120,7 @@ public class TestParserExec extends BaseTest {
|
|||
@Test public void testAorBStar() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : (ID|INT{;})* {System.out.println(input);} ;\n" +
|
||||
"a : (ID|INT{;})* {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
@ -102,4 +133,258 @@ public class TestParserExec extends BaseTest {
|
|||
assertEquals("a34c\n", found);
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopCantSeeEOF() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : block EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"block : '{' .* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"{ }";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("{}\n", found);
|
||||
input =
|
||||
"{a b { } ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("{ab{}\n", found);
|
||||
input =
|
||||
"{ } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("", found); // should not print output; resync kills rest of input
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoop() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( x=34 ) { } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(x=34){};\n", found);
|
||||
input =
|
||||
"if ( ))) ) { } ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if()))){};\n", found);
|
||||
input =
|
||||
"if (() { } a 2) { } ;"; // FAILS to match since it terminates loop at first { }
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("", found); // should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testNongreedyLoopPassingThroughAnotherNongreedy() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' .* ')' block ;\n" +
|
||||
"block : '{' (block|.)* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input1 =
|
||||
"if ( x=34 ) { {return a} b 34 } ;";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input1, false);
|
||||
assertEquals("if(x=34){{returna}b34};\n", found);
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNotNecessary() throws Exception {
|
||||
// EOF on end means LL(*) can identify when to stop the loop.
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : stat* ID '=' ID ';' EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found);
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match last element
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("no viable token at input [@8,9:9='<EOF>',<-1>,1:9], index 8\n", found);
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // FAILS to match since it can't match last element
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("no viable token at input [@12,14:14='<EOF>',<-1>,1:14], index 12\n", found); // should not finish to print output
|
||||
}
|
||||
|
||||
@Test public void testStatLoopNongreedyNecessary() throws Exception {
|
||||
// stops scanning ahead at end of rule s since decision is nongreedy.
|
||||
// this says: "match statements until we see a=b; assignment; ignore any
|
||||
// statements that follow."
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"random : s ;" + // call s so s isn't followed by EOF directly
|
||||
"s : (options {greedy=false;} : stat)* ID '=' ID ';'\n" +
|
||||
" {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"stat : 'if' '(' INT ')' stat\n" +
|
||||
" | 'return' INT ';'\n" +
|
||||
" | ID '=' (INT|ID) ';'\n" +
|
||||
" | block\n" +
|
||||
" ;\n" +
|
||||
"block : '{' stat* '}' ;\n"+
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"x=1; a=b; x=y;";
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found); // ignores x=1 that follows first a=b assignment
|
||||
input =
|
||||
"if ( 1 ) { x=3; { return 4; } } return 99; abc=def;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(1){x=3;{return4;}}return99;abc=def;\n", found);
|
||||
input =
|
||||
"x=1; a=3;"; // FAILS to match since it can't match either stat
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
// can't match EOF to ID '=' '0' ';'
|
||||
assertEquals("no viable token at input [@8,9:9='<EOF>',<-1>,1:9], index 8\n", found);
|
||||
input =
|
||||
"x=1; a=b; z=3;"; // stops at a=b; ignores z=3;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("x=1;a=b;\n", found); // should not finish all input
|
||||
}
|
||||
|
||||
@Test public void testHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : tag+ {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"tag : '<' '/'? .* '>' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"<a>foo</a>", false);
|
||||
assertEquals("<a>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"<a></a>", false);
|
||||
assertEquals("<a></a>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"</b><a src=\"abc\", width=32>", false);
|
||||
assertEquals("</b><asrc=\"abc\",width=32>\n", found);
|
||||
}
|
||||
|
||||
/** lookahead prediction with '.' can be misleading since nongreedy. Lookahead
|
||||
* that sees into a non-greedy loop, thinks it is greedy.
|
||||
*/
|
||||
@Test public void testFindHTMLTags() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"a : ( .* (tag {System.out.println($tag.text);} |header) )* EOF;\n" +
|
||||
"tag : '<' .+ '>' ;\n" +
|
||||
"header : 'x' 'y' ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"COMMA : ',' ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"STR : '\"' (options {greedy=false;}:.)* '\"' ;\n" +
|
||||
"INT : '0'..'9'+;\n" +
|
||||
"WS : (' '|'\\n') {skip();} ;\n";
|
||||
|
||||
String found = null;
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
",=foo <a x= 3>32skidoo<a><img>", false);
|
||||
assertEquals("<ax=3>\n" +
|
||||
"<a>\n" +
|
||||
"<img>\n", found);
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"x x<a>", false);
|
||||
assertEquals("<a>\n", found);
|
||||
// gets line 1:3 no viable alternative at input '>'. Why??
|
||||
// oH! it sees .+ and figures it matches > so <> predicts tag CORRECT!
|
||||
// Seeing '.' in a lookahead prediction can be misleading!!
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "a",
|
||||
"x <><a>", false);
|
||||
assertEquals("null\n" + // doesn't match tag; null
|
||||
"<a>\n", found);
|
||||
}
|
||||
|
||||
/** See comment on testNongreedyLoopEndOfRuleStuffFollowing */
|
||||
@Test public void testNongreedyLoopEndOfRule() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat EOF {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' INT ')' .* ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( 34 ) a b";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34)ab\n", found);
|
||||
input =
|
||||
"if ( 34 ))) ) ( a = = b( ;";
|
||||
found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("if(34))))(a==b(\n", found);
|
||||
}
|
||||
|
||||
/** When .* is on the end of a rule, no tokens predict the exit branch of the loop
|
||||
* since it immediately hits the end of the rule. Non-greedy loops
|
||||
* never consume more tokens than exist following the .* end that
|
||||
* same rule. So, in this case, the greedy loop always wins and it will
|
||||
* suck tokens until end of file. Unfortunately, the '.' in rule s
|
||||
* will not match, leading to a syntax error.
|
||||
*/
|
||||
@Test public void testNongreedyLoopEndOfRuleStuffFollowing() throws Exception {
|
||||
String grammar =
|
||||
"grammar T;\n" +
|
||||
"s : ifstat '.' {System.out.println(input.toString(0,input.index()-1));} ;\n" +
|
||||
"ifstat : 'if' '(' INT ')' .* ;\n" +
|
||||
"EQ : '=' ;\n" +
|
||||
"INT : '0'..'9'+ ;\n" +
|
||||
"ID : 'a'..'z'+ ;\n" +
|
||||
"WS : (' '|'\\n')+ {skip();} ;\n";
|
||||
String input =
|
||||
"if ( 34 ) a b .";
|
||||
String found = execParser("T.g", grammar, "TParser", "TLexer", "s",
|
||||
input, false);
|
||||
assertEquals("no viable token at input [@7,15:15='<EOF>',<-1>,1:15], index 7\n", found);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue