add getSpeculativeText(); text matched so far in a lexer rule. Use this in predicates not actions. add unit tests.

2012-06-07 18:31:36 -07:00 · 2012-06-07 18:31:36 -07:00 · abc0e2ef87
parent 018e3c03e8
commit abc0e2ef87
3 changed files with 38 additions and 3 deletions
--- a/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/Lexer.java
@ -287,7 +287,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
 			return _text;
 		}
 		return getInterpreter().getText(_input);
-//		return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
+	}
+
+	/** Get the text from start of token to current lookahead char.
+	 *  Use this in predicates to test text matched so far in a lexer rule.
+	 */
+	public String getSpeculativeText() {
+		return getInterpreter().getSpeculativeText(_input);
 	}

 	/** Set the complete text of this token; it wipes any previous
--- a/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
+++ b/runtime/Java/src/org/antlr/v4/runtime/atn/LexerATNSimulator.java
@ -739,13 +739,24 @@ public class LexerATNSimulator extends ATNSimulator {
 		return dfa[mode];
 	}

-	/** Get the text of the current token. */
+	/** Get the text of the current token from an *action* in lexer not
+	 *  predicate.
+	 */
 	@NotNull
 	public String getText(@NotNull CharStream input) {
 		// index is first lookahead char, don't include.
 		return input.getText(Interval.of(startIndex, input.index()-1));
 	}

+	/** Get the text from start of token to current lookahead char.
+	 *  Use this in predicates to test text matched so far in a lexer rule.
+	 */
+	@NotNull
+	public String getSpeculativeText(@NotNull CharStream input) {
+		// index is first lookahead char, don't include.
+		return input.getText(Interval.of(startIndex, input.index()));
+	}
+
 	public int getLine() {
 		return line;
 	}
--- a/tool/test/org/antlr/v4/test/TestSemPredEvalLexer.java
+++ b/tool/test/org/antlr/v4/test/TestSemPredEvalLexer.java
@ -64,15 +64,33 @@ public class TestSemPredEvalLexer extends BaseTest {
 		assertEquals(expecting, found);
 	}

+	@Test public void testEnumNotID() throws Exception {
+		String grammar =
+			"lexer grammar L;\n"+
+			"ENUM : [a-z]+ {getSpeculativeText().equals(\"enum\")}? ;\n" +
+			"ID   : [a-z]+ ;\n"+
+			"WS : (' '|'\\n') {skip();} ;";
+		String found = execLexer("L.g4", grammar, "L", "enum abc enum", true);
+		String expecting =
+			"[@0,0:3='enum',<1>,1:0]\n" +
+			"[@1,5:7='abc',<2>,1:5]\n" +
+			"[@2,9:12='enum',<1>,1:9]\n" +
+			"[@3,13:12='<EOF>',<-1>,1:13]\n" +
+			"s0-' '->:s1=>3\n"; // no DFA for enum/id. all paths lead to pred.
+		assertEquals(expecting, found);
+	}
+
 	@Test public void testIndent() throws Exception {
 		String grammar =
 			"lexer grammar L;\n"+
 			"ID : [a-z]+ ;\n"+
-			"INDENT : [ \\t]+ {_tokenStartCharPositionInLine==0}? ;"+
+			"INDENT : [ \\t]+ {_tokenStartCharPositionInLine==0}? \n" +
+			"         {System.out.println(\"INDENT\");}  ;"+
 			"NL     : '\\n' ;"+
 			"WS     : [ \\t]+ ;";
 		String found = execLexer("L.g4", grammar, "L", "abc\n  def  \n", true);
 		String expecting =
+			"INDENT\n" +                        // action output
 			"[@0,0:2='abc',<1>,1:0]\n" +		// ID
 			"[@1,3:3='\\n',<3>,1:3]\n" +  		// NL
 			"[@2,4:5='  ',<2>,2:0]\n" +			// INDENT