recursive rule bug in lexer; the lexer ATN simulator was not checking for empty stack at rule stop states.

2012-09-23 18:04:46 -07:00 · 2012-09-23 18:04:46 -07:00 · 262a331a5b
parent 1b60543207
commit 262a331a5b
6 changed files with 81 additions and 7 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@ -332,6 +332,22 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 		this._token = _token;
 	}

+	public void setType(int ttype) {
+		_type = ttype;
+	}
+
+	public int getType() {
+		return _type;
+	}
+
+	public void setChannel(int channel) {
+		_channel = channel;
+	}
+
+	public int getChannel() {
+		return _channel;
+	}
+
 	public String[] getModeNames() {
 		return null;
 	}
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -436,7 +436,7 @@ public class LexerATNSimulator extends ATNSimulator {
 				// that rule is done. this is how we cut off nongreedy .+ loops.
 				reach = deleteWildcardConfigsForAlt(reach, ci, c.alt);

-				 // move to next char, looking for longer match
+			 	// move to next char, looking for longer match
 				// (we continue processing if there are states in reach)
 			}
 		}
@ -583,6 +583,13 @@ public class LexerATNSimulator extends ATNSimulator {
 				for (SingletonPredictionContext ctx : config.context) {
 					if ( !ctx.isEmpty() ) {
 						PredictionContext newContext = ctx.parent; // "pop" invoking state
+						if ( ctx.invokingState==PredictionContext.EMPTY_FULL_CTX_INVOKING_STATE ) {
+							// we have no context info. Don't pursue.
+							if ( debug ) System.out.println("FALLING off token "+
+														    recog.getRuleNames()[config.state.ruleIndex]);
+							configs.add(config);
+							continue;
+						}
 						ATNState invokingState = atn.states.get(ctx.invokingState);
 						RuleTransition rt = (RuleTransition)invokingState.transition(0);
 						ATNState retState = rt.followState;
--- a/tool/playground/A-input
+++ b/tool/playground/A-input
@ -0,0 +1,2 @@
+{{x}
+}
--- a/tool/playground/A.g4
+++ b/tool/playground/A.g4
@ -1,6 +1,25 @@
-grammar A;
+lexer grammar A;

-s : INT { System.out.println($start.getText());} ;
+/*
+For input 

-INT : [0-9]+ ;
-WS : [ \t\n]+ -> skip ;
+{{x}
+}
+
+This matches {{x} and then thinks that it can stop because it can match that
+without going into the recursive call. The context for the stop state in ACTION
+is (2,1,[[$, 6 $]]) so it deletes everything else associated with this token.
+Seems like we should favor the first alternative, but we can't do that within
+a single rule.
+
+ weird though that this one works
+
+STRING : '"' ( '\\' '"' | . )* '"' ;
+
+wouldn't it get to the end of the rule also by the wild-card route?
+ Maybe it's a simple order of operations or order in which i process the
+ alternatives?
+
+*/
+ACTION : '{' ( ACTION | . )* '}' ;
+WS     : [ \r\t\n]+ -> skip ;
--- a/tool/playground/T.g
+++ b/tool/playground/T.g
@ -1,2 +1,6 @@
-lexer grammar T;
-A : 'a';
+grammar T;
+
+s : INT { System.out.println($start.getText());} ;
+
+INT : [0-9]+ {$type = 3; String x = $text; $channel, $mode} ;
+WS : [ \t\n]+ -> skip ;
--- a/tool/playground/TestA.java
+++ b/tool/playground/TestA.java
@ -0,0 +1,26 @@
+import org.antlr.v4.runtime.CharStream;
+import org.antlr.v4.runtime.CommonTokenFactory;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.UnbufferedCharStream;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+public class TestA {
+	public static void main(String[] args) throws Exception {
+		String inputFile = null;
+		if ( args.length>0 ) inputFile = args[0];
+		InputStream is = System.in;
+		if ( inputFile!=null ) {
+			is = new FileInputStream(inputFile);
+		}
+		CharStream input = new UnbufferedCharStream(is);
+
+		A lex = new A(input);
+		lex.setTokenFactory(new CommonTokenFactory(true));
+
+		CommonTokenStream tokens = new CommonTokenStream(lex);
+		tokens.fill();
+		System.out.println(tokens.getTokens());
+	}
+}