recursive rule bug in lexer; the lexer ATN simulator was not checking for empty stack at rule stop states.
This commit is contained in:
parent
1b60543207
commit
262a331a5b
|
@ -332,6 +332,22 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
|
|||
this._token = _token;
|
||||
}
|
||||
|
||||
public void setType(int ttype) {
|
||||
_type = ttype;
|
||||
}
|
||||
|
||||
public int getType() {
|
||||
return _type;
|
||||
}
|
||||
|
||||
public void setChannel(int channel) {
|
||||
_channel = channel;
|
||||
}
|
||||
|
||||
public int getChannel() {
|
||||
return _channel;
|
||||
}
|
||||
|
||||
public String[] getModeNames() {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -436,7 +436,7 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
// that rule is done. this is how we cut off nongreedy .+ loops.
|
||||
reach = deleteWildcardConfigsForAlt(reach, ci, c.alt);
|
||||
|
||||
// move to next char, looking for longer match
|
||||
// move to next char, looking for longer match
|
||||
// (we continue processing if there are states in reach)
|
||||
}
|
||||
}
|
||||
|
@ -583,6 +583,13 @@ public class LexerATNSimulator extends ATNSimulator {
|
|||
for (SingletonPredictionContext ctx : config.context) {
|
||||
if ( !ctx.isEmpty() ) {
|
||||
PredictionContext newContext = ctx.parent; // "pop" invoking state
|
||||
if ( ctx.invokingState==PredictionContext.EMPTY_FULL_CTX_INVOKING_STATE ) {
|
||||
// we have no context info. Don't pursue.
|
||||
if ( debug ) System.out.println("FALLING off token "+
|
||||
recog.getRuleNames()[config.state.ruleIndex]);
|
||||
configs.add(config);
|
||||
continue;
|
||||
}
|
||||
ATNState invokingState = atn.states.get(ctx.invokingState);
|
||||
RuleTransition rt = (RuleTransition)invokingState.transition(0);
|
||||
ATNState retState = rt.followState;
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
{{x}
|
||||
}
|
|
@ -1,6 +1,25 @@
|
|||
grammar A;
|
||||
lexer grammar A;
|
||||
|
||||
s : INT { System.out.println($start.getText());} ;
|
||||
/*
|
||||
For input
|
||||
|
||||
INT : [0-9]+ ;
|
||||
WS : [ \t\n]+ -> skip ;
|
||||
{{x}
|
||||
}
|
||||
|
||||
This matches {{x} and then thinks that it can stop because it can match that
|
||||
without going into the recursive call. The context for the stop state in ACTION
|
||||
is (2,1,[[$, 6 $]]) so it deletes everything else associated with this token.
|
||||
Seems like we should favor the first alternative, but we can't do that within
|
||||
a single rule.
|
||||
|
||||
weird though that this one works
|
||||
|
||||
STRING : '"' ( '\\' '"' | . )* '"' ;
|
||||
|
||||
wouldn't it get to the end of the rule also by the wild-card route?
|
||||
Maybe it's a simple order of operations or order in which i process the
|
||||
alternatives?
|
||||
|
||||
*/
|
||||
ACTION : '{' ( ACTION | . )* '}' ;
|
||||
WS : [ \r\t\n]+ -> skip ;
|
||||
|
|
|
@ -1,2 +1,6 @@
|
|||
lexer grammar T;
|
||||
A : 'a';
|
||||
grammar T;
|
||||
|
||||
s : INT { System.out.println($start.getText());} ;
|
||||
|
||||
INT : [0-9]+ {$type = 3; String x = $text; $channel, $mode} ;
|
||||
WS : [ \t\n]+ -> skip ;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
import org.antlr.v4.runtime.CharStream;
|
||||
import org.antlr.v4.runtime.CommonTokenFactory;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.UnbufferedCharStream;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
public class TestA {
|
||||
public static void main(String[] args) throws Exception {
|
||||
String inputFile = null;
|
||||
if ( args.length>0 ) inputFile = args[0];
|
||||
InputStream is = System.in;
|
||||
if ( inputFile!=null ) {
|
||||
is = new FileInputStream(inputFile);
|
||||
}
|
||||
CharStream input = new UnbufferedCharStream(is);
|
||||
|
||||
A lex = new A(input);
|
||||
lex.setTokenFactory(new CommonTokenFactory(true));
|
||||
|
||||
CommonTokenStream tokens = new CommonTokenStream(lex);
|
||||
tokens.fill();
|
||||
System.out.println(tokens.getTokens());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue