add getSpeculativeText(); text matched so far in a lexer rule. Use this in predicates not actions. add unit tests.

This commit is contained in:
Terence Parr 2012-06-07 18:31:36 -07:00
parent 018e3c03e8
commit abc0e2ef87
3 changed files with 38 additions and 3 deletions

View File

@ -287,7 +287,13 @@ public abstract class Lexer extends Recognizer<Integer, LexerATNSimulator>
return _text;
}
return getInterpreter().getText(_input);
// return ((CharStream)input).substring(tokenStartCharIndex,getCharIndex()-1);
}
/** Get the text from start of token to current lookahead char.
* Use this in predicates to test text matched so far in a lexer rule.
*/
public String getSpeculativeText() {
return getInterpreter().getSpeculativeText(_input);
}
/** Set the complete text of this token; it wipes any previous

View File

@ -739,13 +739,24 @@ public class LexerATNSimulator extends ATNSimulator {
return dfa[mode];
}
/** Get the text of the current token. */
/** Get the text of the current token from an *action* in lexer not
* predicate.
*/
@NotNull
public String getText(@NotNull CharStream input) {
// index is first lookahead char, don't include.
return input.getText(Interval.of(startIndex, input.index()-1));
}
/** Get the text from start of token to current lookahead char.
* Use this in predicates to test text matched so far in a lexer rule.
*/
@NotNull
public String getSpeculativeText(@NotNull CharStream input) {
// index is first lookahead char, don't include.
return input.getText(Interval.of(startIndex, input.index()));
}
public int getLine() {
return line;
}

View File

@ -64,15 +64,33 @@ public class TestSemPredEvalLexer extends BaseTest {
assertEquals(expecting, found);
}
@Test public void testEnumNotID() throws Exception {
String grammar =
"lexer grammar L;\n"+
"ENUM : [a-z]+ {getSpeculativeText().equals(\"enum\")}? ;\n" +
"ID : [a-z]+ ;\n"+
"WS : (' '|'\\n') {skip();} ;";
String found = execLexer("L.g4", grammar, "L", "enum abc enum", true);
String expecting =
"[@0,0:3='enum',<1>,1:0]\n" +
"[@1,5:7='abc',<2>,1:5]\n" +
"[@2,9:12='enum',<1>,1:9]\n" +
"[@3,13:12='<EOF>',<-1>,1:13]\n" +
"s0-' '->:s1=>3\n"; // no DFA for enum/id. all paths lead to pred.
assertEquals(expecting, found);
}
@Test public void testIndent() throws Exception {
String grammar =
"lexer grammar L;\n"+
"ID : [a-z]+ ;\n"+
"INDENT : [ \\t]+ {_tokenStartCharPositionInLine==0}? ;"+
"INDENT : [ \\t]+ {_tokenStartCharPositionInLine==0}? \n" +
" {System.out.println(\"INDENT\");} ;"+
"NL : '\\n' ;"+
"WS : [ \\t]+ ;";
String found = execLexer("L.g4", grammar, "L", "abc\n def \n", true);
String expecting =
"INDENT\n" + // action output
"[@0,0:2='abc',<1>,1:0]\n" + // ID
"[@1,3:3='\\n',<3>,1:3]\n" + // NL
"[@2,4:5=' ',<2>,2:0]\n" + // INDENT