forked from jasder/antlr
fix a bug related to semantic predicates in the lexer and generally cleaned up variable and method names in the simulator. I moved all of the predicates to the right side of lexer rules in the unit tests. Later, we should ensure that predicates only occur on the right edge of lexer rules. We should state that the rule is not been accepted so we can't test things like getText(), we have to use more raw indexes into the character stream. In the lexer simulator, the addDFAState() method now does not try to compute whether there is a predicate in the configurations. That information has already been set into the ATNConfigSet by the getEpsilonTarget() method. [I should also point out that I have not tested the Java parsing in a while and now it hits a landmine on a number of common Java files in jdk :(. I'm not sure where that crept in]
This commit is contained in:
parent
d3c238569f
commit
b255509e96
|
@ -43,6 +43,8 @@ public class ATNConfigSet extends OrderedHashSet<ATNConfig> {
|
|||
// TODO: can we track conflicts as they are added to save scanning configs later?
|
||||
public int uniqueAlt;
|
||||
public IntervalSet conflictingAlts;
|
||||
// Used in parser and lexer. In lexer, it indicates we hit a pred
|
||||
// while computing a closure operation. Don't make a DFA state from this.
|
||||
public boolean hasSemanticContext;
|
||||
public boolean dipsIntoOuterContext;
|
||||
|
||||
|
|
|
@ -58,11 +58,11 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
/** When we hit an accept state in either the DFA or the ATN, we
|
||||
* have to notify the character stream to start buffering characters
|
||||
* via mark() and record the current state. The current state includes
|
||||
* the current index into the input, the current line, and current
|
||||
* character position in that line. Note that the Lexer is tracking
|
||||
* the starting line and characterization of the token. These
|
||||
* variables track the state of the simulator when it hits an accept state.
|
||||
* via mark() and record the current state. The current sim state
|
||||
* includes the current index into the input, the current line,
|
||||
* and current character position in that line. Note that the Lexer is
|
||||
* tracking the starting line and characterization of the token. These
|
||||
* variables track the "state" of the simulator when it hits an accept state.
|
||||
*
|
||||
* We track these variables separately for the DFA and ATN simulation
|
||||
* because the DFA simulation often has to fail over to the ATN
|
||||
|
@ -71,18 +71,18 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
* then the ATN does the accept and the DFA simulator that invoked it
|
||||
* can simply return the predicted token type.
|
||||
*/
|
||||
protected static class ExecState {
|
||||
protected static class SimState {
|
||||
protected int index = -1;
|
||||
protected int line = 0;
|
||||
protected int charPos = -1;
|
||||
protected DFAState state;
|
||||
protected DFAState dfaState;
|
||||
protected ATNConfig config;
|
||||
|
||||
protected void reset() {
|
||||
index = -1;
|
||||
line = 0;
|
||||
charPos = -1;
|
||||
state = null;
|
||||
dfaState = null;
|
||||
config = null;
|
||||
}
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
/** Used during DFA/ATN exec to record the most recent accept configuration info */
|
||||
@NotNull
|
||||
protected final ExecState prevAccept = new ExecState();
|
||||
protected final SimState prevAccept = new SimState();
|
||||
|
||||
public static int ATN_failover = 0;
|
||||
public static int match_calls = 0;
|
||||
|
@ -184,7 +184,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
ATNState startState = atn.modeToStartState.get(mode);
|
||||
|
||||
if ( debug ) {
|
||||
System.out.format("mode %d start: %s\n", mode, startState);
|
||||
System.out.format("matchATN mode %d start: %s\n", mode, startState);
|
||||
}
|
||||
|
||||
ATNConfigSet s0_closure = computeStartState(input, startState);
|
||||
|
@ -236,7 +236,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
System.out.format("accept; predict %d in state %d\n", s.prediction, s.stateNumber);
|
||||
}
|
||||
|
||||
markAcceptState(prevAccept, input, s);
|
||||
captureSimState(prevAccept, input, s);
|
||||
// keep going unless we're at EOF; check if something else could match
|
||||
// EOF never in DFA
|
||||
if ( t==CharStream.EOF ) break;
|
||||
|
@ -247,7 +247,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
t = input.LA(1);
|
||||
}
|
||||
|
||||
ATNConfigSet reach = prevAccept.state != null ? prevAccept.state.configset : null;
|
||||
ATNConfigSet reach = prevAccept.dfaState != null ? prevAccept.dfaState.configset : null;
|
||||
return failOrAccept(prevAccept, input, reach, t);
|
||||
}
|
||||
|
||||
|
@ -265,7 +265,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
|
||||
while ( true ) { // while more work
|
||||
if ( debug ) {
|
||||
System.out.format("in reach starting closure: %s\n", closure);
|
||||
System.out.format("execATN loop starting closure: %s\n", closure);
|
||||
}
|
||||
|
||||
// As we move src->trg, src->trg, we keep track of the previous trg to
|
||||
|
@ -295,6 +295,10 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
break;
|
||||
}
|
||||
else if (target != null) {
|
||||
if ( debug ) {
|
||||
System.out.println("reuse state "+s.stateNumber+
|
||||
" edge to "+target.stateNumber);
|
||||
}
|
||||
reach = target.configset;
|
||||
}
|
||||
}
|
||||
|
@ -317,18 +321,18 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
if (from != null) {
|
||||
addDFAEdge(from, t, ERROR);
|
||||
}
|
||||
break;
|
||||
break; // stop when we can't match any more char
|
||||
}
|
||||
|
||||
// Did we hit a stop state during reach op?
|
||||
processAcceptStates(input, reach);
|
||||
processAcceptConfigs(input, reach);
|
||||
|
||||
// Add an edge from s to target DFA found/created for reach
|
||||
target = addDFAEdge(s, t, reach);
|
||||
}
|
||||
else if (target.isAcceptState) {
|
||||
traceAcceptState(target.prediction);
|
||||
markAcceptState(prevAccept, input, target);
|
||||
captureSimState(prevAccept, input, target);
|
||||
}
|
||||
|
||||
consume(input);
|
||||
|
@ -342,16 +346,16 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return failOrAccept(prevAccept, input, closure, t);
|
||||
}
|
||||
|
||||
protected int failOrAccept(ExecState prevAccept, CharStream input,
|
||||
protected int failOrAccept(SimState prevAccept, CharStream input,
|
||||
ATNConfigSet reach, int t)
|
||||
{
|
||||
if (prevAccept.state != null) {
|
||||
int ruleIndex = prevAccept.state.lexerRuleIndex;
|
||||
int actionIndex = prevAccept.state.lexerActionIndex;
|
||||
if (prevAccept.dfaState != null) {
|
||||
int ruleIndex = prevAccept.dfaState.lexerRuleIndex;
|
||||
int actionIndex = prevAccept.dfaState.lexerActionIndex;
|
||||
accept(input, ruleIndex, actionIndex,
|
||||
prevAccept.index, prevAccept.line, prevAccept.charPos);
|
||||
tracePredict(prevAccept.state.prediction);
|
||||
return prevAccept.state.prediction;
|
||||
tracePredict(prevAccept.dfaState.prediction);
|
||||
return prevAccept.dfaState.prediction;
|
||||
}
|
||||
else if (prevAccept.config != null) {
|
||||
int ruleIndex = prevAccept.config.state.ruleIndex;
|
||||
|
@ -389,23 +393,33 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
}
|
||||
}
|
||||
|
||||
protected void processAcceptStates(@NotNull CharStream input, @NotNull ATNConfigSet reach) {
|
||||
protected void processAcceptConfigs(@NotNull CharStream input, @NotNull ATNConfigSet reach) {
|
||||
if ( debug ) {
|
||||
System.out.format("processAcceptConfigs: reach=%s, prevAccept=%s, prevIndex=%d\n",
|
||||
reach, prevAccept.config, prevAccept.index);
|
||||
}
|
||||
for (int ci=0; ci<reach.size(); ci++) {
|
||||
ATNConfig c = reach.get(ci);
|
||||
if ( c.state instanceof RuleStopState) {
|
||||
if ( debug ) {
|
||||
System.out.format("in reach we hit accept state %s index %d, reach=%s, prevAccept=%s, prevIndex=%d\n",
|
||||
c, input.index(), reach, prevAccept.config, prevAccept.index);
|
||||
System.out.format("processAcceptConfigs: hit accept config %s index %d\n",
|
||||
c, input.index());
|
||||
}
|
||||
|
||||
int index = input.index();
|
||||
if ( index > prevAccept.index ) {
|
||||
traceAcceptState(c.alt);
|
||||
// will favor prev accept at same index so "int" is keyword not ID
|
||||
markAcceptState(prevAccept, input, c);
|
||||
if ( debug ) {
|
||||
System.out.format("mark %s @ index=%d, %d:%d\n", c, index, prevAccept.line, prevAccept.charPos);
|
||||
if ( prevAccept.index>=0 ) {
|
||||
System.out.println("processAcceptConfigs: found longer token");
|
||||
}
|
||||
}
|
||||
// condition > not >= will favor prev accept at same index.
|
||||
// This way, "int" is keyword not ID if listed first.
|
||||
traceAcceptState(c.alt);
|
||||
if ( debug ) {
|
||||
System.out.format("markExecSettings for %s @ index=%d, line %d:%d\n", c, index, prevAccept.line, prevAccept.charPos);
|
||||
}
|
||||
captureSimState(prevAccept, input, reach, c);
|
||||
}
|
||||
|
||||
// if we reach lexer accept state, toss out any configs in rest
|
||||
|
@ -546,13 +560,17 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
ATNState p = config.state;
|
||||
for (int i=0; i<p.getNumberOfTransitions(); i++) {
|
||||
Transition t = p.transition(i);
|
||||
ATNConfig c = getEpsilonTarget(config, t);
|
||||
ATNConfig c = getEpsilonTarget(config, t, configs);
|
||||
if ( c!=null ) closure(c, configs);
|
||||
}
|
||||
}
|
||||
|
||||
// side-effect: can alter configs.hasSemanticContext
|
||||
@Nullable
|
||||
public ATNConfig getEpsilonTarget(@NotNull ATNConfig config, @NotNull Transition t) {
|
||||
public ATNConfig getEpsilonTarget(@NotNull ATNConfig config,
|
||||
@NotNull Transition t,
|
||||
@NotNull ATNConfigSet configs)
|
||||
{
|
||||
ATNState p = config.state;
|
||||
ATNConfig c = null;
|
||||
if ( t.getClass() == RuleTransition.class ) {
|
||||
|
@ -565,7 +583,29 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
System.out.format("Predicates cannot be evaluated without a recognizer; assuming true.\n");
|
||||
}
|
||||
|
||||
/* Track traversing semantic predicates. If we traverse,
|
||||
we cannot add a DFA state for this "reach" computation
|
||||
because the DFA would not test the predicate again in the
|
||||
future. Rather than creating collections of semantic predicates
|
||||
like v3 and testing them on prediction, v4 will test them on the
|
||||
fly all the time using the ATN not the DFA. This is slower but
|
||||
semantically it's not used that often. One of the key elements to
|
||||
this predicate mechanism is not adding DFA states that see
|
||||
predicates immediately afterwards in the ATN. For example,
|
||||
|
||||
a : ID {p1}? | ID {p2}? ;
|
||||
|
||||
should create the start state for rule 'a' (to save start state
|
||||
competition), but should not create target of ID state. The
|
||||
collection of ATN states the following ID references includes
|
||||
states reached by traversing predicates. Since this is when we
|
||||
test them, we cannot cash the DFA state target of ID.
|
||||
*/
|
||||
PredicateTransition pt = (PredicateTransition)t;
|
||||
if ( debug ) {
|
||||
System.out.println("EVAL rule "+pt.ruleIndex+":"+pt.predIndex);
|
||||
}
|
||||
configs.hasSemanticContext = true;
|
||||
if ( recog == null || recog.sempred(null, pt.ruleIndex, pt.predIndex) ) {
|
||||
c = new ATNConfig(config, t.target, pt.getPredicate());
|
||||
}
|
||||
|
@ -603,20 +643,27 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return ttype;
|
||||
}
|
||||
|
||||
protected void markAcceptState(@NotNull ExecState state, @NotNull CharStream input, @NotNull DFAState dfaState) {
|
||||
state.index = input.index();
|
||||
state.line = line;
|
||||
state.charPos = charPositionInLine;
|
||||
state.config = null;
|
||||
state.state = dfaState;
|
||||
protected void captureSimState(@NotNull SimState settings,
|
||||
@NotNull CharStream input,
|
||||
@NotNull DFAState dfaState)
|
||||
{
|
||||
settings.index = input.index();
|
||||
settings.line = line;
|
||||
settings.charPos = charPositionInLine;
|
||||
settings.config = null;
|
||||
settings.dfaState = dfaState;
|
||||
}
|
||||
|
||||
protected void markAcceptState(@NotNull ExecState state, @NotNull CharStream input, @NotNull ATNConfig config) {
|
||||
state.index = input.index();
|
||||
state.line = line;
|
||||
state.charPos = charPositionInLine;
|
||||
state.config = config;
|
||||
state.state = null;
|
||||
protected void captureSimState(@NotNull SimState settings,
|
||||
@NotNull CharStream input,
|
||||
@NotNull ATNConfigSet ATNConfigs,
|
||||
@NotNull ATNConfig config)
|
||||
{
|
||||
settings.index = input.index();
|
||||
settings.line = line;
|
||||
settings.charPos = charPositionInLine;
|
||||
settings.config = config;
|
||||
settings.dfaState = null;
|
||||
}
|
||||
|
||||
protected DFAState addDFAEdge(@NotNull DFAState from,
|
||||
|
@ -630,12 +677,13 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
return to;
|
||||
}
|
||||
|
||||
// System.out.println("MOVE "+p+" -> "+q+" upon "+getTokenName(t));
|
||||
if (from == null || to == null) {
|
||||
return to;
|
||||
}
|
||||
|
||||
if ( debug ) System.out.println("EDGE "+from+" -> "+to+" upon "+((char)t));
|
||||
addDFAEdge(from, t, to);
|
||||
|
||||
return to;
|
||||
}
|
||||
|
||||
|
@ -645,37 +693,19 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
// make room for tokens 1..n and -1 masquerading as index 0
|
||||
p.edges = new DFAState[MAX_DFA_EDGE+1]; // TODO: make adaptive
|
||||
}
|
||||
// if ( t==Token.EOF ) {
|
||||
// System.out.println("state "+p+" has EOF edge");
|
||||
// t = 0;
|
||||
// }
|
||||
p.edges[t] = q; // connect
|
||||
}
|
||||
|
||||
/** Add a new DFA state if there isn't one with this set of
|
||||
configurations already. This method also detects the first
|
||||
configuration containing an ATN rule stop state. Later, when
|
||||
traversing the DFA, we will know which rule to accept. Also, we
|
||||
detect if any of the configurations derived from traversing a
|
||||
semantic predicate. If so, we cannot add a DFA state for this
|
||||
because the DFA would not test the predicate again in the
|
||||
future. Rather than creating collections of semantic predicates
|
||||
like v3 and testing them on prediction, v4 will test them on the
|
||||
fly all the time using the ATN not the DFA. This is slower but
|
||||
semantically it's not used that often. One of the key elements to
|
||||
this predicate mechanism is not adding DFA states that see
|
||||
predicates immediately afterwards in the ATN. For example,
|
||||
|
||||
a : ID {p1}? | ID {p2}? ;
|
||||
|
||||
should create the start state for rule 'a' (to save start state
|
||||
competition), but should not create target of ID state. The
|
||||
collection of ATN states the following ID references includes
|
||||
states reached by traversing predicates. Since this is when we
|
||||
test them, we cannot cash the DFA state target of ID.
|
||||
traversing the DFA, we will know which rule to accept.
|
||||
*/
|
||||
@Nullable
|
||||
protected DFAState addDFAState(@NotNull ATNConfigSet configs) {
|
||||
// If we eval'd a predicate while filling configs, mustn't create DFA state
|
||||
if ( configs.hasSemanticContext ) return null;
|
||||
|
||||
DFAState proposed = new DFAState(configs);
|
||||
DFAState existing = dfa[mode].states.get(proposed);
|
||||
if ( existing!=null ) return existing;
|
||||
|
@ -683,15 +713,10 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
DFAState newState = proposed;
|
||||
|
||||
ATNConfig firstConfigWithRuleStopState = null;
|
||||
boolean traversedPredicate = false;
|
||||
for (ATNConfig c : configs) {
|
||||
if ( firstConfigWithRuleStopState==null &&
|
||||
c.state instanceof RuleStopState )
|
||||
{
|
||||
if ( c.state instanceof RuleStopState ) {
|
||||
firstConfigWithRuleStopState = c;
|
||||
}
|
||||
if ( c.semanticContext!=null && c.semanticContext!=SemanticContext.NONE ) {
|
||||
traversedPredicate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -702,8 +727,6 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
newState.prediction = atn.ruleToTokenType[newState.lexerRuleIndex];
|
||||
}
|
||||
|
||||
if ( traversedPredicate ) return null; // cannot cache
|
||||
|
||||
newState.stateNumber = dfa[mode].states.size();
|
||||
newState.configset = new ATNConfigSet();
|
||||
newState.configset.addAll(configs);
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
grammar T;
|
||||
s : r=e ;
|
||||
e : e '(' INT ')'
|
||||
| INT
|
||||
;
|
||||
MULT: '*' ;
|
||||
ADD : '+' ;
|
||||
INT : [0-9]+ ;
|
||||
WS : [ \t\n]+ -> skip ;
|
||||
lexer grammar T;
|
||||
|
||||
A : 'a' {false}? ;
|
||||
B : 'a' ;
|
||||
WS : [ \n] ;
|
||||
|
|
|
@ -1,86 +0,0 @@
|
|||
/*
|
||||
[The "BSD license"]
|
||||
Copyright (c) 2011 Terence Parr
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
3. The name of the author may not be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
||||
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
||||
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
||||
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
||||
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
import org.antlr.v4.runtime.ANTLRFileStream;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.ParserRuleContext;
|
||||
import org.antlr.v4.runtime.Token;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||
|
||||
public class TestA2 {
|
||||
/** An example listener that squirrels away a return value in a field
|
||||
* called v that we get added to the expression context objects
|
||||
* by adding a return value to rule e. This is a version of A.g4
|
||||
* that performs actions during the parse with user-defined actions.
|
||||
* AND, we pass in a listener that gets executed during the parse
|
||||
* and we use a listener on a tree walk that executes after the parse.
|
||||
* So, it affect, we compute the result of the expression 3 times.
|
||||
*/
|
||||
public static class Do extends A2BaseListener {
|
||||
A2Parser p;
|
||||
public Do(A2Parser p) { this.p = p; }
|
||||
@Override
|
||||
public void exitAdd(A2Parser.AddContext ctx) {
|
||||
ctx.v = ctx.e(0).v + ctx.e(1).v;
|
||||
System.out.println("Add: " + ctx.v);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitInt(A2Parser.IntContext ctx) {
|
||||
ctx.v = Integer.valueOf(ctx.INT().getSymbol().getText());
|
||||
System.out.println("Int: "+ctx.v);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitMult(A2Parser.MultContext ctx) {
|
||||
ctx.v = ctx.e(0).v * ctx.e(1).v;
|
||||
System.out.println("Mult: " + ctx.v);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitParens(A2Parser.ParensContext ctx) {
|
||||
ctx.v = ctx.e().v;
|
||||
System.out.println("Parens: "+ctx.v);
|
||||
}
|
||||
}
|
||||
public static void main(String[] args) throws Exception {
|
||||
A2Lexer lexer = new A2Lexer(new ANTLRFileStream(args[0]));
|
||||
CommonTokenStream tokens = new CommonTokenStream(lexer);
|
||||
A2Parser p = new A2Parser(tokens);
|
||||
p.setBuildParseTree(true);
|
||||
ParserRuleContext<Token> t = p.s();
|
||||
System.out.println("tree = "+t.toStringTree(p));
|
||||
|
||||
ParseTreeWalker walker = new ParseTreeWalker();
|
||||
Do doer = new Do(p);
|
||||
walker.walk(doer, t);
|
||||
A2Parser.EContext ectx = (A2Parser.EContext)t.getChild(0);
|
||||
System.out.println("result from tree walk = "+ ectx.v);
|
||||
}
|
||||
}
|
|
@ -5,11 +5,9 @@ import org.antlr.v4.runtime.CommonTokenStream;
|
|||
public class TestT {
|
||||
public static void main(String[] args) throws Exception {
|
||||
CharStream input = new ANTLRFileStream(args[0]);
|
||||
TLexer lex = new TLexer(input);
|
||||
T lex = new T(input);
|
||||
CommonTokenStream tokens = new CommonTokenStream(lex);
|
||||
TParser parser = new TParser(tokens);
|
||||
|
||||
parser.setBuildParseTree(true);
|
||||
parser.s();
|
||||
tokens.fill();
|
||||
System.out.println(tokens.getTokens());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
import org.antlr.v4.runtime.ANTLRFileStream;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.ParserRuleContext;
|
||||
|
||||
public class TestU {
|
||||
public static void main(String[] args) throws Exception {
|
||||
|
@ -8,7 +7,7 @@ public class TestU {
|
|||
CommonTokenStream tokens = new CommonTokenStream(t);
|
||||
UParser p = new UParser(tokens);
|
||||
p.setBuildParseTree(true);
|
||||
ParserRuleContext r = p.s();
|
||||
System.out.println(r.toStringTree(p));
|
||||
// ParserRuleContext r = p.s();
|
||||
// System.out.println(r.toStringTree(p));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,13 +1,32 @@
|
|||
grammar U;
|
||||
s @after {System.out.println($ctx.toStringTree(this));} : e EOF ;
|
||||
e : e '.' ID
|
||||
| e '.' 'this'
|
||||
| '-' e
|
||||
| e '*' e
|
||||
| e ('+'|'-') e
|
||||
| INT
|
||||
| ID
|
||||
;
|
||||
ID : 'a'..'z'+ ;
|
||||
INT : '0'..'9'+ ;
|
||||
WS : (' '|'\n') {skip();} ;
|
||||
|
||||
@members {public static boolean java5 = true;}
|
||||
|
||||
prog: ( enumDecl
|
||||
| stat
|
||||
)*
|
||||
EOF
|
||||
;
|
||||
|
||||
enumDecl
|
||||
: {java5}? 'enum' ID '{' ID (',' ID)* '}'
|
||||
;
|
||||
|
||||
args
|
||||
: arg (',' arg )*
|
||||
;
|
||||
|
||||
arg
|
||||
: INT
|
||||
;
|
||||
|
||||
stat: ID '=' expr ';' ;
|
||||
|
||||
expr: ID {System.out.println("ID "+$ID.text);}
|
||||
| {!java5}? 'enum' {System.out.println("ID enum");}
|
||||
| INT
|
||||
;
|
||||
|
||||
ID : [a-zA-Z]+ ;
|
||||
INT : [0-9]+ ;
|
||||
WS : [ \t\n\r]+ -> skip ;
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.antlr.v4.test;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestSemPredEvalLexer extends BaseTest {
|
||||
|
@ -8,31 +7,15 @@ public class TestSemPredEvalLexer extends BaseTest {
|
|||
@Test public void testDisableRule() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"E1 : {false}? 'enum' ;\n" +
|
||||
"E2 : {true}? 'enum' ;\n" + // winner not E1 or ID
|
||||
"E1 : 'enum' {false}? ;\n" +
|
||||
"E2 : 'enum' {true}? ;\n" + // winner not E1 or ID
|
||||
"ID : 'a'..'z'+ ;\n"+
|
||||
"WS : (' '|'\\n') {skip();} ;";
|
||||
String found = execLexer("L.g4", grammar, "L", "enum abc", true);
|
||||
String expecting =
|
||||
"[@0,0:3='enum',<2>,1:0]\n" +
|
||||
"[@1,5:7='abc',<3>,1:5]\n" +
|
||||
"[@2,8:7='<EOF>',<-1>,1:8]\n"; // no dfa since preds on left edge
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testDisableRuleAfterMatch() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"E1 : 'enum' {false}? ;\n" +
|
||||
"E2 : 'enum' {true}? ;\n" + // winner not E1 or ID
|
||||
"ID : 'a'..'z'+ ;\n"+
|
||||
"WS : (' '|'\\n') {skip();} ;";
|
||||
String found = execLexer("L.g4", grammar, "L", "enum abc enum", true);
|
||||
String expecting =
|
||||
"[@0,0:3='enum',<2>,1:0]\n" +
|
||||
"[@1,5:7='abc',<3>,1:5]\n" +
|
||||
"[@2,9:12='enum',<2>,1:9]\n" +
|
||||
"[@3,13:12='<EOF>',<-1>,1:13]\n" +
|
||||
"[@2,8:7='<EOF>',<-1>,1:8]\n" +
|
||||
"s0-' '->:s4=>4\n" +
|
||||
"s0-'a'->:s5=>3\n" +
|
||||
"s0-'e'->:s1=>3\n" +
|
||||
|
@ -60,25 +43,51 @@ public class TestSemPredEvalLexer extends BaseTest {
|
|||
"s0-'e'->:s1=>2\n" +
|
||||
":s1=>2-'n'->:s2=>2\n" +
|
||||
":s2=>2-'u'->:s3=>2\n" +
|
||||
":s3=>2-'m'->:s4=>2\n" +
|
||||
":s4=>2-'b'->:s4=>2\n" +
|
||||
":s4=>2-'c'->:s4=>2\n";
|
||||
":s4=>2-'c'->:s4=>2\n"; // no 'm'-> transition...conflicts with pred
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Ignore
|
||||
public void testMatchNChar() throws Exception { // can't do locals yet
|
||||
@Test public void testIDnotEnum() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"B : {int n=0;} ({n<=2}? DIGIT {n++})+ ;\n" +
|
||||
"fragment DIGIT : '0'..'9' ;\n"+
|
||||
"ENUM : [a-z]+ {false}? ;\n" +
|
||||
"ID : [a-z]+ ;\n"+
|
||||
"WS : (' '|'\\n') {skip();} ;";
|
||||
String found = execLexer("L.g4", grammar, "L", "1234 56", true);
|
||||
String found = execLexer("L.g4", grammar, "L", "enum abc enum", true);
|
||||
String expecting =
|
||||
"[@0,0:3='enum',<4>,1:0]\n" +
|
||||
"[@1,5:7='abc',<5>,1:5]\n" +
|
||||
"[@2,8:8='<EOF>',<-1>,1:8]\n"; // no dfa since preds on left edge
|
||||
"[@0,0:3='enum',<2>,1:0]\n" +
|
||||
"[@1,5:7='abc',<2>,1:5]\n" +
|
||||
"[@2,9:12='enum',<2>,1:9]\n" +
|
||||
"[@3,13:12='<EOF>',<-1>,1:13]\n" +
|
||||
"s0-' '->:s2=>3\n"; // no DFA for enum/id. all paths lead to pred.
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
|
||||
@Test public void testIndent() throws Exception {
|
||||
String grammar =
|
||||
"lexer grammar L;\n"+
|
||||
"ID : [a-z]+ ;\n"+
|
||||
"INDENT : [ \\t]+ {_tokenStartCharPositionInLine==0}? ;"+
|
||||
"NL : '\\n' ;"+
|
||||
"WS : [ \\t]+ ;";
|
||||
String found = execLexer("L.g4", grammar, "L", "abc\n def \n", true);
|
||||
String expecting =
|
||||
"[@0,0:2='abc',<1>,1:0]\n" + // ID
|
||||
"[@1,3:3='\\n',<3>,1:3]\n" + // NL
|
||||
"[@2,4:5=' ',<2>,2:0]\n" + // INDENT
|
||||
"[@3,6:8='def',<1>,2:2]\n" + // ID
|
||||
"[@4,9:10=' ',<4>,2:5]\n" + // WS
|
||||
"[@5,11:11='\\n',<3>,2:7]\n" +
|
||||
"[@6,12:11='<EOF>',<-1>,3:8]\n" +
|
||||
"s0-'\n" +
|
||||
"'->:s2=>3\n" +
|
||||
"s0-'a'->:s1=>1\n" +
|
||||
"s0-'d'->:s1=>1\n" +
|
||||
":s1=>1-'b'->:s1=>1\n" +
|
||||
":s1=>1-'c'->:s1=>1\n" +
|
||||
":s1=>1-'e'->:s1=>1\n" +
|
||||
":s1=>1-'f'->:s1=>1\n";
|
||||
assertEquals(expecting, found);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue